browserwright 0.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- browserwright/__init__.py +33 -0
- browserwright/__main__.py +6 -0
- browserwright/_executor/__init__.py +47 -0
- browserwright/_executor/__main__.py +9 -0
- browserwright/_executor/client.py +127 -0
- browserwright/_executor/process.py +652 -0
- browserwright/_executor/protocol.py +152 -0
- browserwright/api.py +66 -0
- browserwright/cdp.py +285 -0
- browserwright/cli.py +741 -0
- browserwright/daemon/__init__.py +8 -0
- browserwright/daemon/_ipc.py +444 -0
- browserwright/daemon/active_tab.py +183 -0
- browserwright/daemon/auth.py +395 -0
- browserwright/daemon/backends/__init__.py +59 -0
- browserwright/daemon/backends/base.py +120 -0
- browserwright/daemon/backends/cloud.py +222 -0
- browserwright/daemon/backends/env.py +119 -0
- browserwright/daemon/backends/extension.py +185 -0
- browserwright/daemon/backends/rdp.py +214 -0
- browserwright/daemon/cli.py +1437 -0
- browserwright/daemon/config.py +380 -0
- browserwright/daemon/doctor.py +179 -0
- browserwright/daemon/errors.py +34 -0
- browserwright/daemon/launch_chrome.py +353 -0
- browserwright/daemon/observability.py +181 -0
- browserwright/daemon/platforms.py +234 -0
- browserwright/daemon/resolver.py +72 -0
- browserwright/daemon/server/__init__.py +6 -0
- browserwright/daemon/server/daemon.py +229 -0
- browserwright/daemon/server/executor_registry.py +434 -0
- browserwright/daemon/server/extension_upstream.py +677 -0
- browserwright/daemon/server/facade.py +375 -0
- browserwright/daemon/server/facade_extension.py +969 -0
- browserwright/daemon/server/listener.py +1058 -0
- browserwright/daemon/server/proxy.py +1991 -0
- browserwright/daemon/server/relay.py +783 -0
- browserwright/daemon/server/state.py +432 -0
- browserwright/daemon/server/upstream.py +266 -0
- browserwright/daemon/userscripts.py +150 -0
- browserwright/discovery.py +213 -0
- browserwright/errors.py +177 -0
- browserwright/health.py +169 -0
- browserwright/install.py +628 -0
- browserwright/memory/__init__.py +15 -0
- browserwright/memory/_md.py +120 -0
- browserwright/memory/_yaml.py +217 -0
- browserwright/memory/global_mem.py +201 -0
- browserwright/memory/repl_mem.py +28 -0
- browserwright/memory/session_decisions.py +53 -0
- browserwright/memory/site_mem.py +381 -0
- browserwright/mode_b_client.py +590 -0
- browserwright/multitask.py +131 -0
- browserwright/output_schema.py +99 -0
- browserwright/primitives/__init__.py +67 -0
- browserwright/primitives/discovery_api.py +79 -0
- browserwright/primitives/http.py +42 -0
- browserwright/primitives/inspect.py +876 -0
- browserwright/primitives/interact.py +518 -0
- browserwright/primitives/page.py +556 -0
- browserwright/primitives/site.py +143 -0
- browserwright/release_install.py +466 -0
- browserwright/repl/__init__.py +6 -0
- browserwright/repl/_namespace.py +106 -0
- browserwright/repl/_smart_goto.py +236 -0
- browserwright/repl/inline.py +180 -0
- browserwright/repl/playwright_handle.py +449 -0
- browserwright/repl/snapshot.py +150 -0
- browserwright/session.py +229 -0
- browserwright/session_create.py +252 -0
- browserwright/session_ctx.py +24 -0
- browserwright/session_registry.py +133 -0
- browserwright/session_runtime.py +133 -0
- browserwright/site_skills_starter/github.com/SKILL.md +14 -0
- browserwright/site_skills_starter/github.com/memory.md +29 -0
- browserwright/site_skills_starter/github.com/tasks/list_issues.py +55 -0
- browserwright/site_skills_starter/google.com/SKILL.md +16 -0
- browserwright/site_skills_starter/google.com/memory.md +27 -0
- browserwright/site_skills_starter/google.com/tasks/search.py +53 -0
- browserwright/site_skills_starter/producthunt.com/SKILL.md +7 -0
- browserwright/site_skills_starter/producthunt.com/memory.md +26 -0
- browserwright/site_skills_starter/producthunt.com/tasks/today.py +64 -0
- browserwright/site_skills_starter/wikipedia.org/SKILL.md +7 -0
- browserwright/site_skills_starter/wikipedia.org/memory.md +22 -0
- browserwright/site_skills_starter/wikipedia.org/tasks/lookup.py +55 -0
- browserwright/site_skills_starter/ycombinator.com/SKILL.md +8 -0
- browserwright/site_skills_starter/ycombinator.com/memory.md +25 -0
- browserwright/site_skills_starter/ycombinator.com/tasks/front_page.py +63 -0
- browserwright/skill_doc.py +140 -0
- browserwright/skill_runtime.md +194 -0
- browserwright/subscriptions.py +213 -0
- browserwright/task_runner.py +125 -0
- browserwright/version.py +117 -0
- browserwright-0.6.2.dist-info/METADATA +12 -0
- browserwright-0.6.2.dist-info/RECORD +98 -0
- browserwright-0.6.2.dist-info/WHEEL +5 -0
- browserwright-0.6.2.dist-info/entry_points.txt +3 -0
- browserwright-0.6.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
"""Length-framed JSON request/response for the executor data plane (Fork 2).
|
|
2
|
+
|
|
3
|
+
A deliberately simple wire format of OUR design — it does NOT pretend to be CDP
|
|
4
|
+
(unlike the mode_b tunnel). Each message is a 4-byte big-endian unsigned length
|
|
5
|
+
prefix followed by that many bytes of UTF-8 JSON. The thin heredoc client sends
|
|
6
|
+
exactly one :class:`ExecuteRequest`; the executor replies with exactly one
|
|
7
|
+
:class:`ExecuteResponse`.
|
|
8
|
+
|
|
9
|
+
PR3 completes the response: ``console`` / ``return_value`` / ``warnings`` /
|
|
10
|
+
``screenshots`` / ``truncated`` / ``error`` (with a traceback for generic
|
|
11
|
+
exceptions, mirroring the in-process path), and the ``timeout_ms`` field is now
|
|
12
|
+
ENFORCED executor-side (a wedged call returns a timeout error without blocking
|
|
13
|
+
the serial queue forever).
|
|
14
|
+
"""
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import json
|
|
18
|
+
import socket
|
|
19
|
+
import struct
|
|
20
|
+
from dataclasses import dataclass, field
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
# Default per-call timeout (ms). Playwriter defaults to 10000ms, but real page
|
|
24
|
+
# ops (cold navigation + network settle) can legitimately take longer, so we
|
|
25
|
+
# pick a more generous default. It is deliberately bounded WELL UNDER any
|
|
26
|
+
# realistic idle-reap threshold (`Config.idle_close_after`, default None = never)
|
|
27
|
+
# so a slow-but-legitimate call never trips idle reclamation mid-flight.
|
|
28
|
+
DEFAULT_TIMEOUT_MS = 90000
|
|
29
|
+
|
|
30
|
+
# Cap on the rendered text block (console + return value), mirroring
|
|
31
|
+
# playwriter's ~10000-char truncation. Whole-line aware truncation lives in
|
|
32
|
+
# `snapshot._truncate_lines`; here we cap the console blob so a runaway print
|
|
33
|
+
# loop can't ship megabytes back to the agent.
|
|
34
|
+
MAX_TEXT_CHARS = 10000
|
|
35
|
+
|
|
36
|
+
_LEN = struct.Struct(">I")
|
|
37
|
+
_MAX_FRAME = 256 * 1024 * 1024 # generous: screenshots land here in PR3
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class ExecuteRequest:
|
|
42
|
+
"""A code blob the thin client ships to the executor."""
|
|
43
|
+
|
|
44
|
+
code: str
|
|
45
|
+
timeout_ms: int = DEFAULT_TIMEOUT_MS
|
|
46
|
+
|
|
47
|
+
def to_dict(self) -> dict[str, Any]:
|
|
48
|
+
return {"code": self.code, "timeout_ms": self.timeout_ms}
|
|
49
|
+
|
|
50
|
+
@classmethod
|
|
51
|
+
def from_dict(cls, d: dict[str, Any]) -> "ExecuteRequest":
|
|
52
|
+
code = d.get("code")
|
|
53
|
+
if not isinstance(code, str):
|
|
54
|
+
raise ValueError("ExecuteRequest.code must be a string")
|
|
55
|
+
timeout = d.get("timeout_ms", DEFAULT_TIMEOUT_MS)
|
|
56
|
+
if not isinstance(timeout, int) or timeout <= 0:
|
|
57
|
+
timeout = DEFAULT_TIMEOUT_MS
|
|
58
|
+
return cls(code=code, timeout_ms=timeout)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@dataclass
|
|
62
|
+
class ExecuteResponse:
|
|
63
|
+
"""The executor's reply to one :class:`ExecuteRequest` (PR3 full shape).
|
|
64
|
+
|
|
65
|
+
Mirrors playwriter's single response object:
|
|
66
|
+
|
|
67
|
+
- ``console``: captured stdout/stderr of the run.
|
|
68
|
+
- ``return_value``: ``repr`` of the trailing bare expression (if the last
|
|
69
|
+
statement was an expression), else None — playwriter's ``[return
|
|
70
|
+
value]`` block.
|
|
71
|
+
- ``warnings``: human-facing notices (e.g. a popup that became a tab) the
|
|
72
|
+
client renders as ``[WARNING] …`` lines. The field + plumbing exist
|
|
73
|
+
even though few producers exist yet.
|
|
74
|
+
- ``screenshots``: list of ``{"path": str, ...}`` blocks for any image the
|
|
75
|
+
heredoc captured — path-based (the executor and client share a
|
|
76
|
+
filesystem), so the (possibly large) bytes never ride the wire.
|
|
77
|
+
- ``truncated``: True when the text block was capped at ``MAX_TEXT_CHARS``.
|
|
78
|
+
- ``error``: ``errors.serialize(exc)`` (or None on success), WITH a
|
|
79
|
+
``traceback`` key for generic exceptions so a shipped heredoc surfaces
|
|
80
|
+
the same traceback the in-process path writes.
|
|
81
|
+
- ``exit_code``: mirrors the heredoc's desired process exit code so the
|
|
82
|
+
thin client can propagate it.
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
console: str = ""
|
|
86
|
+
return_value: str | None = None
|
|
87
|
+
error: dict[str, Any] | None = None
|
|
88
|
+
exit_code: int = 0
|
|
89
|
+
warnings: list[str] = field(default_factory=list)
|
|
90
|
+
screenshots: list[dict[str, Any]] = field(default_factory=list)
|
|
91
|
+
truncated: bool = False
|
|
92
|
+
|
|
93
|
+
def to_dict(self) -> dict[str, Any]:
|
|
94
|
+
return {
|
|
95
|
+
"console": self.console,
|
|
96
|
+
"return_value": self.return_value,
|
|
97
|
+
"error": self.error,
|
|
98
|
+
"exit_code": self.exit_code,
|
|
99
|
+
"warnings": self.warnings,
|
|
100
|
+
"screenshots": self.screenshots,
|
|
101
|
+
"truncated": self.truncated,
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
@classmethod
|
|
105
|
+
def from_dict(cls, d: dict[str, Any]) -> "ExecuteResponse":
|
|
106
|
+
return cls(
|
|
107
|
+
console=str(d.get("console") or ""),
|
|
108
|
+
return_value=d.get("return_value"),
|
|
109
|
+
error=d.get("error"),
|
|
110
|
+
exit_code=int(d.get("exit_code") or 0),
|
|
111
|
+
warnings=list(d.get("warnings") or []),
|
|
112
|
+
screenshots=list(d.get("screenshots") or []),
|
|
113
|
+
truncated=bool(d.get("truncated") or False),
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
# ---- length-framed transport ----------------------------------------------
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def send_message(sock: socket.socket, payload: dict[str, Any]) -> None:
|
|
121
|
+
"""Send one length-framed JSON message over a blocking socket."""
|
|
122
|
+
body = json.dumps(payload).encode("utf-8")
|
|
123
|
+
sock.sendall(_LEN.pack(len(body)) + body)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _recv_exact(sock: socket.socket, n: int) -> bytes:
|
|
127
|
+
"""Read exactly ``n`` bytes or raise ``ConnectionError`` on early EOF."""
|
|
128
|
+
chunks: list[bytes] = []
|
|
129
|
+
remaining = n
|
|
130
|
+
while remaining > 0:
|
|
131
|
+
chunk = sock.recv(remaining)
|
|
132
|
+
if not chunk:
|
|
133
|
+
raise ConnectionError("executor socket closed mid-message")
|
|
134
|
+
chunks.append(chunk)
|
|
135
|
+
remaining -= len(chunk)
|
|
136
|
+
return b"".join(chunks)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def recv_message(sock: socket.socket) -> dict[str, Any]:
|
|
140
|
+
"""Read one length-framed JSON message. Raises ``ConnectionError`` on a
|
|
141
|
+
clean peer close before any bytes, ``ValueError`` on a corrupt frame."""
|
|
142
|
+
header = b""
|
|
143
|
+
while len(header) < _LEN.size:
|
|
144
|
+
chunk = sock.recv(_LEN.size - len(header))
|
|
145
|
+
if not chunk:
|
|
146
|
+
raise ConnectionError("executor socket closed before a message")
|
|
147
|
+
header += chunk
|
|
148
|
+
(length,) = _LEN.unpack(header)
|
|
149
|
+
if length <= 0 or length > _MAX_FRAME:
|
|
150
|
+
raise ValueError(f"executor frame length out of range: {length}")
|
|
151
|
+
body = _recv_exact(sock, length)
|
|
152
|
+
return json.loads(body.decode("utf-8"))
|
browserwright/api.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""Canonical primitive surface for ``from browserwright import *``.
|
|
2
|
+
|
|
3
|
+
The inline / repl / task entry points all assemble their exec globals from
|
|
4
|
+
this module. Keeping the list in one place means an agent who imports
|
|
5
|
+
``browserwright`` directly from a saved task gets the same names the REPL
|
|
6
|
+
gave them.
|
|
7
|
+
|
|
8
|
+
Phase C PR3 (terminal state): the legacy CDP browser-driving primitives
|
|
9
|
+
(``open``/``goto_url``/``click_at_xy``/``js``/``cdp``/``capture_screenshot``/
|
|
10
|
+
``snapshot``/… — the whole page/tab interaction surface) are GONE from the
|
|
11
|
+
agent surface. The agent now drives the browser with **real Playwright** via
|
|
12
|
+
the injected ``page`` / ``context`` (bound to the session's current tab,
|
|
13
|
+
reused across heredocs) and observes with ``snapshot()`` (a first-party AI
|
|
14
|
+
aria snapshot whose ``[ref=eN]`` refs feed ``page.locator("aria-ref=eN")``).
|
|
15
|
+
Those three names are injected per-heredoc by ``repl/_namespace.build_globals``,
|
|
16
|
+
NOT exported here.
|
|
17
|
+
|
|
18
|
+
What remains in EXPORTS is the set of NON-browser-driving helpers that do not
|
|
19
|
+
overlap Playwright: ``http_get`` (no-browser escape hatch), the memory verbs,
|
|
20
|
+
and the site-skill / task layer. The implementation modules under
|
|
21
|
+
``primitives/`` still define the old functions (``current_page``, ``list_tabs``,
|
|
22
|
+
the daemon-driving glue, …); they are kept as INTERNAL functions the Phase C
|
|
23
|
+
binding glue (``repl/playwright_handle.py``) and the memory/site helpers rely
|
|
24
|
+
on — they are simply no longer part of the agent-callable surface.
|
|
25
|
+
"""
|
|
26
|
+
from .errors import (
|
|
27
|
+
AuthWall,
|
|
28
|
+
BrowserwrightError,
|
|
29
|
+
Captcha,
|
|
30
|
+
CDPError,
|
|
31
|
+
DaemonUnavailable,
|
|
32
|
+
ElementNotFound,
|
|
33
|
+
NeedsUserConfirm,
|
|
34
|
+
NetworkError,
|
|
35
|
+
PageLoadFailed,
|
|
36
|
+
)
|
|
37
|
+
from .multitask import run_tasks_concurrent
|
|
38
|
+
from .primitives import (
|
|
39
|
+
bootstrap_site,
|
|
40
|
+
http_get,
|
|
41
|
+
list_site_skills,
|
|
42
|
+
load_site_skill,
|
|
43
|
+
memory_read,
|
|
44
|
+
remember,
|
|
45
|
+
remember_global,
|
|
46
|
+
remember_preference,
|
|
47
|
+
run_task,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
EXPORTS = [
|
|
51
|
+
# http (escape hatch — no browser; does not overlap Playwright)
|
|
52
|
+
"http_get",
|
|
53
|
+
# memory + site
|
|
54
|
+
"bootstrap_site", "remember", "remember_global", "remember_preference",
|
|
55
|
+
"memory_read",
|
|
56
|
+
# task / fan-out (site-skills run on the Playwright surface — see
|
|
57
|
+
# task_runner.run_task, which injects page/context into the task module)
|
|
58
|
+
"list_site_skills", "load_site_skill", "run_task",
|
|
59
|
+
"run_tasks_concurrent",
|
|
60
|
+
# errors
|
|
61
|
+
"BrowserwrightError", "PageLoadFailed", "ElementNotFound", "AuthWall",
|
|
62
|
+
"Captcha", "NetworkError", "DaemonUnavailable", "CDPError",
|
|
63
|
+
"NeedsUserConfirm",
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
__all__ = EXPORTS
|
browserwright/cdp.py
ADDED
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
"""Synchronous CDP client over a single browser-level WebSocket.
|
|
2
|
+
|
|
3
|
+
Design:
|
|
4
|
+
- One root ws connection per Skill process.
|
|
5
|
+
- sessionId multiplex: ``send(method, session=...)`` for per-target ops,
|
|
6
|
+
no session for ``Target.*`` etc.
|
|
7
|
+
- Auto-attach to a tab on demand via ``attach(targetId)``; the resulting
|
|
8
|
+
session id is cached so subsequent calls reuse it.
|
|
9
|
+
- Events for the attached session are stashed in a per-session ring buffer
|
|
10
|
+
and exposed via ``drain_events()``.
|
|
11
|
+
|
|
12
|
+
We intentionally do not depend on cdp-use here. The whole client is < 200
|
|
13
|
+
lines of plain websockets — easier to reason about, easier to unit test,
|
|
14
|
+
and we never need typed wrappers (spec §3 "raw CDP strings over typed
|
|
15
|
+
wrappers").
|
|
16
|
+
"""
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import json
|
|
20
|
+
import threading
|
|
21
|
+
import time
|
|
22
|
+
from collections import deque
|
|
23
|
+
from typing import Any, Optional
|
|
24
|
+
|
|
25
|
+
from websockets.exceptions import ConnectionClosed
|
|
26
|
+
from websockets.sync.client import connect as ws_connect
|
|
27
|
+
|
|
28
|
+
from .errors import CDPError
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
_EVENT_RING_LIMIT = 1024
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class _UnixSocketAdapter:
|
|
35
|
+
"""Wrap an ``AF_UNIX`` socket so ``setsockopt(IPPROTO_TCP, ...)`` becomes
|
|
36
|
+
a no-op. websockets unconditionally calls
|
|
37
|
+
``sock.setsockopt(socket.IPPROTO_TCP, TCP_NODELAY, True)`` after
|
|
38
|
+
receiving a user-provided socket — which AF_UNIX doesn't support and
|
|
39
|
+
raises ``OSError: [Errno 102]``. Everything else delegates straight
|
|
40
|
+
through.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
__slots__ = ("_s",)
|
|
44
|
+
|
|
45
|
+
def __init__(self, s):
|
|
46
|
+
self._s = s
|
|
47
|
+
|
|
48
|
+
def setsockopt(self, level, optname, value):
|
|
49
|
+
import socket as _sock
|
|
50
|
+
if level == _sock.IPPROTO_TCP:
|
|
51
|
+
return None # silently ignore — unix sockets have no TCP layer
|
|
52
|
+
return self._s.setsockopt(level, optname, value)
|
|
53
|
+
|
|
54
|
+
def __getattr__(self, name):
|
|
55
|
+
return getattr(self._s, name)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _open_unix_websocket(ws_unix_url: str, *, connect_timeout: float):
|
|
59
|
+
"""Open a ws connection over a unix socket. ``ws_unix_url`` has the form
|
|
60
|
+
``ws+unix:///path/to/sock?client=skill-repl``. websockets supports this
|
|
61
|
+
via ``sock=`` + ``server_hostname=`` overrides, but we wrap the AF_UNIX
|
|
62
|
+
socket in ``_UnixSocketAdapter`` to absorb the unconditional
|
|
63
|
+
``TCP_NODELAY`` set the library performs.
|
|
64
|
+
"""
|
|
65
|
+
import socket as _sock
|
|
66
|
+
from urllib.parse import urlparse, urlunparse
|
|
67
|
+
|
|
68
|
+
parsed = urlparse(ws_unix_url)
|
|
69
|
+
path = parsed.path
|
|
70
|
+
query = parsed.query
|
|
71
|
+
raw = _sock.socket(_sock.AF_UNIX, _sock.SOCK_STREAM)
|
|
72
|
+
raw.settimeout(connect_timeout)
|
|
73
|
+
raw.connect(path)
|
|
74
|
+
sock = _UnixSocketAdapter(raw)
|
|
75
|
+
# Build a synthetic ws:// URL for the upgrade handshake; websockets parses
|
|
76
|
+
# this for the HTTP path + Host header.
|
|
77
|
+
upgrade_url = urlunparse(("ws", "browserwright", "/", "", query, ""))
|
|
78
|
+
return ws_connect(
|
|
79
|
+
upgrade_url,
|
|
80
|
+
sock=sock,
|
|
81
|
+
server_hostname="browserwright",
|
|
82
|
+
open_timeout=connect_timeout,
|
|
83
|
+
max_size=64 * 1024 * 1024,
|
|
84
|
+
proxy=None,
|
|
85
|
+
compression=None, # daemon disables permessage-deflate (§6.3)
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _rpc_error_fix(method: str, err: object) -> str:
|
|
90
|
+
"""Recovery hint for a JSON-RPC error returned over the wire. A ``-32601``
|
|
91
|
+
("method not found") almost always means the running daemon is older than
|
|
92
|
+
the installed code, so we surface the restart guidance (naming the method)
|
|
93
|
+
instead of leaking a bare envelope. Empty string for any other error."""
|
|
94
|
+
if isinstance(err, dict) and err.get("code") == -32601:
|
|
95
|
+
from .mode_b_client import ModeBClient # lazy: avoid import cycle
|
|
96
|
+
return ModeBClient.explain_rpc_error(method, err)
|
|
97
|
+
return ""
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class CDPSession:
|
|
101
|
+
"""Reader-singleton CDP transport.
|
|
102
|
+
|
|
103
|
+
All sends are synchronous: send → block on response with matching id →
|
|
104
|
+
return result. Events arrive on the same socket; the reader thread
|
|
105
|
+
routes them by sessionId into per-session deques.
|
|
106
|
+
"""
|
|
107
|
+
|
|
108
|
+
def __init__(self, ws_url: str, connect_timeout: float = 8.0):
|
|
109
|
+
self.ws_url = ws_url
|
|
110
|
+
# ``proxy=None`` is critical: CDP endpoints are loopback (or a
|
|
111
|
+
# daemon-provided URL the user controls). websockets.sync defaults to
|
|
112
|
+
# ``proxy=True`` which means "respect $ALL_PROXY/$HTTP_PROXY" — agents
|
|
113
|
+
# commonly run inside shells that point those at a SOCKS proxy for
|
|
114
|
+
# their normal browsing, and routing CDP through one would fail in
|
|
115
|
+
# confusing ways. browserwright-daemon-implementer flagged this.
|
|
116
|
+
if ws_url.startswith("ws+unix://"):
|
|
117
|
+
# Mode B: connect to the daemon's unix socket, then upgrade as
|
|
118
|
+
# if it were a ws:// localhost endpoint. We hand websockets a
|
|
119
|
+
# pre-connected socket via ``sock=`` and a stand-in HTTP URL.
|
|
120
|
+
self._ws = _open_unix_websocket(ws_url, connect_timeout=connect_timeout)
|
|
121
|
+
else:
|
|
122
|
+
# ``compression=None`` matches the Mode B daemon contract (which
|
|
123
|
+
# disables permessage-deflate) and is also fine for direct CDP:
|
|
124
|
+
# Chrome's browser-level ws doesn't benefit from deflate on
|
|
125
|
+
# localhost. ``proxy=None`` keeps $ALL_PROXY out of loopback.
|
|
126
|
+
self._ws = ws_connect(
|
|
127
|
+
ws_url,
|
|
128
|
+
open_timeout=connect_timeout,
|
|
129
|
+
max_size=64 * 1024 * 1024,
|
|
130
|
+
proxy=None,
|
|
131
|
+
compression=None,
|
|
132
|
+
)
|
|
133
|
+
self._lock = threading.Lock()
|
|
134
|
+
self._next_id = 1
|
|
135
|
+
self._inflight: dict[int, dict] = {}
|
|
136
|
+
self._inflight_cv = threading.Condition(self._lock)
|
|
137
|
+
self._events: dict[Optional[str], deque] = {None: deque(maxlen=_EVENT_RING_LIMIT)}
|
|
138
|
+
self._closed = False
|
|
139
|
+
self._closed_reason: Optional[str] = None
|
|
140
|
+
self._reader = threading.Thread(target=self._read_loop, name="cdp-reader", daemon=True)
|
|
141
|
+
self._reader.start()
|
|
142
|
+
# Track which target each session is bound to. Attaching to the same
|
|
143
|
+
# target twice in the same process is a programmer error (§D.2.10).
|
|
144
|
+
self._sessions: dict[str, str] = {} # targetId -> sessionId
|
|
145
|
+
|
|
146
|
+
# ---- public --------------------------------------------------------
|
|
147
|
+
|
|
148
|
+
def send(self, method: str, *, session: Optional[str] = None, **params) -> dict:
|
|
149
|
+
if self._closed:
|
|
150
|
+
raise CDPError(method=method, params=params,
|
|
151
|
+
cdp_message=f"ws closed: {self._closed_reason}")
|
|
152
|
+
with self._lock:
|
|
153
|
+
mid = self._next_id
|
|
154
|
+
self._next_id += 1
|
|
155
|
+
msg = {"id": mid, "method": method, "params": params}
|
|
156
|
+
if session:
|
|
157
|
+
msg["sessionId"] = session
|
|
158
|
+
self._inflight[mid] = {}
|
|
159
|
+
payload = json.dumps(msg)
|
|
160
|
+
try:
|
|
161
|
+
self._ws.send(payload)
|
|
162
|
+
except ConnectionClosed as e:
|
|
163
|
+
self._closed, self._closed_reason = True, str(e)
|
|
164
|
+
raise CDPError(method=method, params=params, cdp_message=str(e)) from e
|
|
165
|
+
# Wait for the reply.
|
|
166
|
+
deadline = time.monotonic() + 30.0
|
|
167
|
+
with self._inflight_cv:
|
|
168
|
+
while not self._inflight[mid] and not self._closed:
|
|
169
|
+
remaining = deadline - time.monotonic()
|
|
170
|
+
if remaining <= 0:
|
|
171
|
+
self._inflight.pop(mid, None)
|
|
172
|
+
raise CDPError(method=method, params=params,
|
|
173
|
+
cdp_message="timeout waiting for CDP reply")
|
|
174
|
+
self._inflight_cv.wait(timeout=remaining)
|
|
175
|
+
entry = self._inflight.pop(mid, None)
|
|
176
|
+
if self._closed and not entry:
|
|
177
|
+
raise CDPError(method=method, params=params,
|
|
178
|
+
cdp_message=f"ws closed: {self._closed_reason}")
|
|
179
|
+
if "error" in entry:
|
|
180
|
+
err = entry["error"]
|
|
181
|
+
raise CDPError(method=method, params=params,
|
|
182
|
+
cdp_message=err.get("message", str(err)),
|
|
183
|
+
fix=_rpc_error_fix(method, err))
|
|
184
|
+
return entry.get("result", {})
|
|
185
|
+
|
|
186
|
+
def attach(self, target_id: str) -> str:
|
|
187
|
+
"""Attach (or reuse attachment) to ``target_id`` and return sessionId."""
|
|
188
|
+
if target_id in self._sessions:
|
|
189
|
+
return self._sessions[target_id]
|
|
190
|
+
res = self.send("Target.attachToTarget", targetId=target_id, flatten=True)
|
|
191
|
+
sid = res["sessionId"]
|
|
192
|
+
self._sessions[target_id] = sid
|
|
193
|
+
self._events.setdefault(sid, deque(maxlen=_EVENT_RING_LIMIT))
|
|
194
|
+
# Enable the usual domains so wait_for_load / drain_events have data.
|
|
195
|
+
for domain in ("Page", "Runtime", "DOM", "Network"):
|
|
196
|
+
try:
|
|
197
|
+
self.send(f"{domain}.enable", session=sid)
|
|
198
|
+
except CDPError:
|
|
199
|
+
pass # Some domains are noop in some Chrome builds.
|
|
200
|
+
return sid
|
|
201
|
+
|
|
202
|
+
def attach_readonly(self, target_id: str) -> str:
|
|
203
|
+
"""Daemon v0.3 H7 shared-read attach.
|
|
204
|
+
|
|
205
|
+
Requests a session via ``flags.allowSecondaryReadOnly=True`` — daemon
|
|
206
|
+
returns a sessionId that receives this target's events but rejects
|
|
207
|
+
any command other than ``Target.detachFromTarget`` (`-32602`). Useful
|
|
208
|
+
for tail-following another agent's session for monitoring / drift
|
|
209
|
+
detection.
|
|
210
|
+
|
|
211
|
+
Note: this opens a *second* session on the same target if some other
|
|
212
|
+
client / process already owns it. If we own it ourselves, prefer
|
|
213
|
+
``attach()``.
|
|
214
|
+
"""
|
|
215
|
+
res = self.send(
|
|
216
|
+
"Target.attachToTarget",
|
|
217
|
+
targetId=target_id,
|
|
218
|
+
flatten=True,
|
|
219
|
+
flags={"allowSecondaryReadOnly": True},
|
|
220
|
+
)
|
|
221
|
+
sid = res["sessionId"]
|
|
222
|
+
self._events.setdefault(sid, deque(maxlen=_EVENT_RING_LIMIT))
|
|
223
|
+
# We deliberately *don't* register sid in ``self._sessions`` — that
|
|
224
|
+
# map tracks owning attachments, and a readonly attachment isn't one.
|
|
225
|
+
return sid
|
|
226
|
+
|
|
227
|
+
def detach(self, target_id: str) -> None:
|
|
228
|
+
sid = self._sessions.pop(target_id, None)
|
|
229
|
+
if sid:
|
|
230
|
+
try:
|
|
231
|
+
self.send("Target.detachFromTarget", sessionId=sid)
|
|
232
|
+
except CDPError:
|
|
233
|
+
pass
|
|
234
|
+
self._events.pop(sid, None)
|
|
235
|
+
|
|
236
|
+
def drain_events(self, session: Optional[str] = None) -> list[dict]:
|
|
237
|
+
buf = self._events.get(session)
|
|
238
|
+
if not buf:
|
|
239
|
+
return []
|
|
240
|
+
with self._lock:
|
|
241
|
+
out = list(buf)
|
|
242
|
+
buf.clear()
|
|
243
|
+
return out
|
|
244
|
+
|
|
245
|
+
def close(self) -> None:
|
|
246
|
+
if self._closed:
|
|
247
|
+
return
|
|
248
|
+
self._closed = True
|
|
249
|
+
try:
|
|
250
|
+
self._ws.close()
|
|
251
|
+
except Exception:
|
|
252
|
+
pass
|
|
253
|
+
with self._inflight_cv:
|
|
254
|
+
self._inflight_cv.notify_all()
|
|
255
|
+
|
|
256
|
+
# ---- reader thread -------------------------------------------------
|
|
257
|
+
|
|
258
|
+
def _read_loop(self) -> None:
|
|
259
|
+
try:
|
|
260
|
+
for raw in self._ws:
|
|
261
|
+
try:
|
|
262
|
+
msg = json.loads(raw)
|
|
263
|
+
except (TypeError, ValueError):
|
|
264
|
+
continue
|
|
265
|
+
mid = msg.get("id")
|
|
266
|
+
if mid is not None:
|
|
267
|
+
with self._inflight_cv:
|
|
268
|
+
if mid in self._inflight:
|
|
269
|
+
self._inflight[mid] = msg
|
|
270
|
+
self._inflight_cv.notify_all()
|
|
271
|
+
continue
|
|
272
|
+
# Event.
|
|
273
|
+
sid = msg.get("sessionId")
|
|
274
|
+
buf = self._events.get(sid)
|
|
275
|
+
if buf is None:
|
|
276
|
+
buf = self._events.setdefault(sid, deque(maxlen=_EVENT_RING_LIMIT))
|
|
277
|
+
buf.append({"method": msg.get("method"), "params": msg.get("params", {}), "sessionId": sid})
|
|
278
|
+
except ConnectionClosed as e:
|
|
279
|
+
self._closed_reason = str(e)
|
|
280
|
+
except Exception as e: # noqa: BLE001
|
|
281
|
+
self._closed_reason = f"reader crash: {e!r}"
|
|
282
|
+
finally:
|
|
283
|
+
self._closed = True
|
|
284
|
+
with self._inflight_cv:
|
|
285
|
+
self._inflight_cv.notify_all()
|