browserwright 0.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- browserwright/__init__.py +33 -0
- browserwright/__main__.py +6 -0
- browserwright/_executor/__init__.py +47 -0
- browserwright/_executor/__main__.py +9 -0
- browserwright/_executor/client.py +127 -0
- browserwright/_executor/process.py +652 -0
- browserwright/_executor/protocol.py +152 -0
- browserwright/api.py +66 -0
- browserwright/cdp.py +285 -0
- browserwright/cli.py +741 -0
- browserwright/daemon/__init__.py +8 -0
- browserwright/daemon/_ipc.py +444 -0
- browserwright/daemon/active_tab.py +183 -0
- browserwright/daemon/auth.py +395 -0
- browserwright/daemon/backends/__init__.py +59 -0
- browserwright/daemon/backends/base.py +120 -0
- browserwright/daemon/backends/cloud.py +222 -0
- browserwright/daemon/backends/env.py +119 -0
- browserwright/daemon/backends/extension.py +185 -0
- browserwright/daemon/backends/rdp.py +214 -0
- browserwright/daemon/cli.py +1437 -0
- browserwright/daemon/config.py +380 -0
- browserwright/daemon/doctor.py +179 -0
- browserwright/daemon/errors.py +34 -0
- browserwright/daemon/launch_chrome.py +353 -0
- browserwright/daemon/observability.py +181 -0
- browserwright/daemon/platforms.py +234 -0
- browserwright/daemon/resolver.py +72 -0
- browserwright/daemon/server/__init__.py +6 -0
- browserwright/daemon/server/daemon.py +229 -0
- browserwright/daemon/server/executor_registry.py +434 -0
- browserwright/daemon/server/extension_upstream.py +677 -0
- browserwright/daemon/server/facade.py +375 -0
- browserwright/daemon/server/facade_extension.py +969 -0
- browserwright/daemon/server/listener.py +1058 -0
- browserwright/daemon/server/proxy.py +1991 -0
- browserwright/daemon/server/relay.py +783 -0
- browserwright/daemon/server/state.py +432 -0
- browserwright/daemon/server/upstream.py +266 -0
- browserwright/daemon/userscripts.py +150 -0
- browserwright/discovery.py +213 -0
- browserwright/errors.py +177 -0
- browserwright/health.py +169 -0
- browserwright/install.py +628 -0
- browserwright/memory/__init__.py +15 -0
- browserwright/memory/_md.py +120 -0
- browserwright/memory/_yaml.py +217 -0
- browserwright/memory/global_mem.py +201 -0
- browserwright/memory/repl_mem.py +28 -0
- browserwright/memory/session_decisions.py +53 -0
- browserwright/memory/site_mem.py +381 -0
- browserwright/mode_b_client.py +590 -0
- browserwright/multitask.py +131 -0
- browserwright/output_schema.py +99 -0
- browserwright/primitives/__init__.py +67 -0
- browserwright/primitives/discovery_api.py +79 -0
- browserwright/primitives/http.py +42 -0
- browserwright/primitives/inspect.py +876 -0
- browserwright/primitives/interact.py +518 -0
- browserwright/primitives/page.py +556 -0
- browserwright/primitives/site.py +143 -0
- browserwright/release_install.py +466 -0
- browserwright/repl/__init__.py +6 -0
- browserwright/repl/_namespace.py +106 -0
- browserwright/repl/_smart_goto.py +236 -0
- browserwright/repl/inline.py +180 -0
- browserwright/repl/playwright_handle.py +449 -0
- browserwright/repl/snapshot.py +150 -0
- browserwright/session.py +229 -0
- browserwright/session_create.py +252 -0
- browserwright/session_ctx.py +24 -0
- browserwright/session_registry.py +133 -0
- browserwright/session_runtime.py +133 -0
- browserwright/site_skills_starter/github.com/SKILL.md +14 -0
- browserwright/site_skills_starter/github.com/memory.md +29 -0
- browserwright/site_skills_starter/github.com/tasks/list_issues.py +55 -0
- browserwright/site_skills_starter/google.com/SKILL.md +16 -0
- browserwright/site_skills_starter/google.com/memory.md +27 -0
- browserwright/site_skills_starter/google.com/tasks/search.py +53 -0
- browserwright/site_skills_starter/producthunt.com/SKILL.md +7 -0
- browserwright/site_skills_starter/producthunt.com/memory.md +26 -0
- browserwright/site_skills_starter/producthunt.com/tasks/today.py +64 -0
- browserwright/site_skills_starter/wikipedia.org/SKILL.md +7 -0
- browserwright/site_skills_starter/wikipedia.org/memory.md +22 -0
- browserwright/site_skills_starter/wikipedia.org/tasks/lookup.py +55 -0
- browserwright/site_skills_starter/ycombinator.com/SKILL.md +8 -0
- browserwright/site_skills_starter/ycombinator.com/memory.md +25 -0
- browserwright/site_skills_starter/ycombinator.com/tasks/front_page.py +63 -0
- browserwright/skill_doc.py +140 -0
- browserwright/skill_runtime.md +194 -0
- browserwright/subscriptions.py +213 -0
- browserwright/task_runner.py +125 -0
- browserwright/version.py +117 -0
- browserwright-0.6.2.dist-info/METADATA +12 -0
- browserwright-0.6.2.dist-info/RECORD +98 -0
- browserwright-0.6.2.dist-info/WHEEL +5 -0
- browserwright-0.6.2.dist-info/entry_points.txt +3 -0
- browserwright-0.6.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1058 @@
|
|
|
1
|
+
"""WebSocket listener + lifecycle orchestrator.
|
|
2
|
+
|
|
3
|
+
This module wires together:
|
|
4
|
+
- `_ipc` (socket file / token / ping)
|
|
5
|
+
- `state` (DaemonState)
|
|
6
|
+
- `upstream` (UpstreamConnection)
|
|
7
|
+
- `proxy` (Router)
|
|
8
|
+
|
|
9
|
+
Spec §8.5: the listener task accepts clients, the upstream-lifecycle task
|
|
10
|
+
opens/closes the upstream ws lazily, and the keepalive task is built into
|
|
11
|
+
UpstreamConnection (heartbeat) + websockets server (ws-level pings).
|
|
12
|
+
|
|
13
|
+
v0.2 single-client model: the second ws upgrade is rejected with HTTP 503
|
|
14
|
+
+ a clear body. spec §9.2.
|
|
15
|
+
"""
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import asyncio
|
|
19
|
+
import contextlib
|
|
20
|
+
import http
|
|
21
|
+
import json
|
|
22
|
+
import logging
|
|
23
|
+
import os
|
|
24
|
+
import signal
|
|
25
|
+
import sys
|
|
26
|
+
import time
|
|
27
|
+
from typing import Any
|
|
28
|
+
from urllib.parse import parse_qs, urlparse
|
|
29
|
+
|
|
30
|
+
import websockets
|
|
31
|
+
from websockets.asyncio.server import ServerConnection, serve, unix_serve
|
|
32
|
+
|
|
33
|
+
from .. import _ipc
|
|
34
|
+
from .. import __version__
|
|
35
|
+
from ..config import Config
|
|
36
|
+
from ..errors import Unavailable
|
|
37
|
+
from ..resolver import resolve
|
|
38
|
+
from ..observability import metrics, install_json_logging_if_requested
|
|
39
|
+
from .state import CloseReason, DaemonState, UpstreamPhase
|
|
40
|
+
from .proxy import Router
|
|
41
|
+
from .daemon import Daemon, UnknownSessionError, UpstreamContext
|
|
42
|
+
from .upstream import UpstreamConnection
|
|
43
|
+
from .relay import RelayServer
|
|
44
|
+
from .extension_upstream import ExtensionUpstream
|
|
45
|
+
from .facade import PlaywrightFacade
|
|
46
|
+
|
|
47
|
+
logger = logging.getLogger(__name__)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
# ---- per-upstream context factory ------------------------------------------
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def make_context(*, backend: str, cfg: Config,
|
|
54
|
+
session_id: str | None = None) -> UpstreamContext:
|
|
55
|
+
"""Build one `UpstreamContext` — the `(state, router, holder)` triple for a
|
|
56
|
+
single upstream, wired exactly like `run_serve` wired the single triple
|
|
57
|
+
before Phase 2. Lives here (not in daemon.py) because it constructs the
|
|
58
|
+
`_UpstreamHolder`, which is a listener-module concern.
|
|
59
|
+
|
|
60
|
+
The relay is NOT started here (only the extension *shared* context gets a
|
|
61
|
+
relay, started eagerly in `run_serve`); for everything else the holder's
|
|
62
|
+
lazy-open path opens the upstream on first client frame.
|
|
63
|
+
"""
|
|
64
|
+
state = DaemonState(backend_name=backend)
|
|
65
|
+
router = Router(state)
|
|
66
|
+
holder = _UpstreamHolder(state, router, cfg, session_id=session_id)
|
|
67
|
+
return UpstreamContext(
|
|
68
|
+
backend=backend, state=state, router=router, holder=holder,
|
|
69
|
+
session_id=session_id,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# ---- top-level entry -------------------------------------------------------
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
async def run_serve(cfg: Config) -> int:
|
|
77
|
+
"""Run a Mode B daemon until SIGTERM / Ctrl-C / shutdown. Returns exit code.
|
|
78
|
+
|
|
79
|
+
There is exactly one global daemon on a fixed socket — no instance name.
|
|
80
|
+
"""
|
|
81
|
+
# Stale-detect: ping any existing endpoint before binding. If something
|
|
82
|
+
# answers, refuse to start a second copy of ourselves (enforces the
|
|
83
|
+
# "at most one global daemon" invariant) — but if the ping comes back
|
|
84
|
+
# negative, we cleanup the dead socket file and proceed.
|
|
85
|
+
existing_pid, existing_version = await _ipc.ping_status_async(timeout=1.0)
|
|
86
|
+
if existing_pid is not None:
|
|
87
|
+
version_hint = ""
|
|
88
|
+
if existing_version and existing_version != __version__:
|
|
89
|
+
version_hint = (
|
|
90
|
+
f" (running {existing_version}, installed {__version__}; "
|
|
91
|
+
"use `browserwright-daemon stop` or `browserwright-daemon restart`)"
|
|
92
|
+
)
|
|
93
|
+
print(
|
|
94
|
+
f"browserwright-daemon already running (pid {existing_pid}); "
|
|
95
|
+
f"use `browserwright-daemon stop` to shut it down{version_hint}",
|
|
96
|
+
file=sys.stderr,
|
|
97
|
+
)
|
|
98
|
+
return 1
|
|
99
|
+
_ipc.cleanup_endpoint()
|
|
100
|
+
|
|
101
|
+
# Phase 3 (C2 ephemeral): rdp Chrome processes are daemon children and die
|
|
102
|
+
# with us — but a hard crash / SIGKILL can leave orphan Chrome processes
|
|
103
|
+
# holding their `bs-s{id}` profile dirs. Sweep them before serving so
|
|
104
|
+
# ephemeral rdp sessions start clean (and so a relaunch on the same profile
|
|
105
|
+
# isn't blocked by a stale SingletonLock).
|
|
106
|
+
_cleanup_orphan_rdp_chrome()
|
|
107
|
+
# Phase B (PR2): the executor is "rdp Chrome v2" — sweep orphan executor
|
|
108
|
+
# subprocesses + their stale `bw-exec-*` sockets/discovery files left by a
|
|
109
|
+
# prior daemon SIGKILL, same rationale as the rdp sweep above.
|
|
110
|
+
from .executor_registry import cleanup_orphan_executors
|
|
111
|
+
cleanup_orphan_executors()
|
|
112
|
+
|
|
113
|
+
# Log file is best-effort — we route Python logging to it but never crash
|
|
114
|
+
# the daemon over a write failure.
|
|
115
|
+
_wire_logging()
|
|
116
|
+
# v0.5: opt-in JSON log formatter. After _wire_logging adds the
|
|
117
|
+
# file/console handlers, swap formatters in place if BD_LOG_JSON=1.
|
|
118
|
+
install_json_logging_if_requested()
|
|
119
|
+
logger.info("browserwright-daemon %s starting (backend=%s)",
|
|
120
|
+
__version__, cfg.backend or "extension")
|
|
121
|
+
|
|
122
|
+
# Phase 2: one global daemon holding many upstream contexts. The shared
|
|
123
|
+
# context is the real-browser upstream (cfg.backend, default extension);
|
|
124
|
+
# rdp sessions get their own context lazily (Daemon.context_for). The
|
|
125
|
+
# routing engine (Router/DaemonState/_UpstreamHolder) is unchanged — we
|
|
126
|
+
# just instantiate it per context and dispatch in `_ClientHandler`.
|
|
127
|
+
shared_backend = cfg.backend or "extension"
|
|
128
|
+
# Pin the shared context's holder cfg to the resolved backend: serve now
|
|
129
|
+
# defaults a missing backend to extension (cli._cmd_serve), so the holder
|
|
130
|
+
# must see backend="extension" — not None — to take its extension-upstream
|
|
131
|
+
# open path. dataclasses.replace keeps the rest of cfg intact.
|
|
132
|
+
import dataclasses as _dc
|
|
133
|
+
shared_cfg = _dc.replace(cfg, backend=shared_backend)
|
|
134
|
+
shared_context = make_context(backend=shared_backend, cfg=shared_cfg)
|
|
135
|
+
daemon = Daemon(cfg=cfg, shared_context=shared_context,
|
|
136
|
+
make_context=make_context)
|
|
137
|
+
|
|
138
|
+
# SIGTERM / SIGINT → set the stop event. We don't tear down inline because
|
|
139
|
+
# we still need to run the graceful shutdown sequence (close clients with
|
|
140
|
+
# 1011 + emit upstreamClosed + close upstream).
|
|
141
|
+
stop = asyncio.Event()
|
|
142
|
+
|
|
143
|
+
def _on_signal():
|
|
144
|
+
stop.set()
|
|
145
|
+
loop = asyncio.get_running_loop()
|
|
146
|
+
if sys.platform != "win32":
|
|
147
|
+
for s in (signal.SIGTERM, signal.SIGINT):
|
|
148
|
+
try:
|
|
149
|
+
loop.add_signal_handler(s, _on_signal)
|
|
150
|
+
except NotImplementedError:
|
|
151
|
+
pass # e.g. inside pytest event loop
|
|
152
|
+
|
|
153
|
+
# PID file (best-effort).
|
|
154
|
+
_ipc.write_pid(os.getpid())
|
|
155
|
+
|
|
156
|
+
handler = _ClientHandler(daemon, cfg)
|
|
157
|
+
server = await _open_server(handler)
|
|
158
|
+
|
|
159
|
+
# v0.4: for the extension shared context, start the relay ws server eagerly
|
|
160
|
+
# so `browserwright-daemon doctor` can probe `__status__` even before any
|
|
161
|
+
# Skill client connects. The relay belongs to the shared context's holder
|
|
162
|
+
# (it is the always-on, real-browser upstream).
|
|
163
|
+
if shared_backend == "extension":
|
|
164
|
+
try:
|
|
165
|
+
# v0.5.3 F-5 / Task #24: bind at the configured host+port.
|
|
166
|
+
# Precedence (CLI > env > toml port > toml relay_url > default)
|
|
167
|
+
# is centralized in cfg.backends.extension.resolved_host_port().
|
|
168
|
+
host, port = cfg.backends.extension.resolved_host_port()
|
|
169
|
+
shared_context.holder.relay = RelayServer(host=host, port=port)
|
|
170
|
+
port = await shared_context.holder.relay.start()
|
|
171
|
+
logger.info("extension relay started on port %d", port)
|
|
172
|
+
except OSError as e:
|
|
173
|
+
print(
|
|
174
|
+
f"browserwright-daemon failed to bind extension relay: {e}",
|
|
175
|
+
file=sys.stderr,
|
|
176
|
+
)
|
|
177
|
+
server.close()
|
|
178
|
+
try:
|
|
179
|
+
await server.wait_closed()
|
|
180
|
+
except Exception:
|
|
181
|
+
pass
|
|
182
|
+
_ipc.cleanup_endpoint()
|
|
183
|
+
return 2
|
|
184
|
+
|
|
185
|
+
# Playwright facade (Phase C: auto-enabled by default). Bind an ADDITIONAL
|
|
186
|
+
# Playwright-facing CDP ws+HTTP endpoint layered beside the agent unix
|
|
187
|
+
# socket. It resolves the daemon's upstream Chrome (rdp backend) and
|
|
188
|
+
# transparently bridges raw browser-level CDP — the existing client path is
|
|
189
|
+
# untouched. The skill layer's heredoc `page`/`context` connect through it,
|
|
190
|
+
# so it is ON unless `facade_port == 0` (explicit disable). The bound ws is
|
|
191
|
+
# advertised via the `_ipc` facade discovery file so the skill layer can
|
|
192
|
+
# `connect_over_cdp` without parsing logs. A bind failure here is non-fatal:
|
|
193
|
+
# we log + continue serving the agent path.
|
|
194
|
+
facade: PlaywrightFacade | None = None
|
|
195
|
+
facade_port = cfg.resolved_facade_port()
|
|
196
|
+
if facade_port is not None:
|
|
197
|
+
try:
|
|
198
|
+
# PR2: for the extension backend the facade bridges through the
|
|
199
|
+
# daemon's shared relay (started just above). Pass a getter so the
|
|
200
|
+
# facade resolves the LIVE relay per client connection — it may be
|
|
201
|
+
# (re)bound across the daemon's lifetime.
|
|
202
|
+
def _shared_relay() -> RelayServer | None:
|
|
203
|
+
return shared_context.holder.relay
|
|
204
|
+
|
|
205
|
+
facade = PlaywrightFacade(cfg=cfg, port=facade_port,
|
|
206
|
+
relay_getter=_shared_relay,
|
|
207
|
+
daemon=daemon)
|
|
208
|
+
bound = await facade.start()
|
|
209
|
+
facade_ws = f"ws://127.0.0.1:{bound}/cdp"
|
|
210
|
+
# Advertise the bound ws so the skill layer can discover it (Phase C
|
|
211
|
+
# auto-enable). Best-effort: a write failure must not abort serving.
|
|
212
|
+
with contextlib.suppress(Exception):
|
|
213
|
+
_ipc.write_facade_file(facade_ws, bound)
|
|
214
|
+
logger.info("playwright facade started on port %d "
|
|
215
|
+
"(connect_over_cdp %s)", bound, facade_ws)
|
|
216
|
+
except OSError as e:
|
|
217
|
+
logger.warning("playwright facade failed to bind port %d: %r; "
|
|
218
|
+
"continuing without it", facade_port, e)
|
|
219
|
+
facade = None
|
|
220
|
+
|
|
221
|
+
# The watchdog runs unconditionally: even when upstream idle-close is off
|
|
222
|
+
# (cfg.idle_close_after None), it must still crash-reap dead executors
|
|
223
|
+
# (Fork 4 self-exit / segfault) so the registry never accumulates corpses.
|
|
224
|
+
# Upstream idle-close + executor idle-reap are gated on cfg.idle_close_after
|
|
225
|
+
# inside the loop.
|
|
226
|
+
idle_task: asyncio.Task | None = asyncio.create_task(
|
|
227
|
+
_idle_watchdog(daemon, cfg.idle_close_after))
|
|
228
|
+
try:
|
|
229
|
+
await stop.wait()
|
|
230
|
+
logger.info("browserwright-daemon shutdown requested")
|
|
231
|
+
await _graceful_shutdown(daemon)
|
|
232
|
+
finally:
|
|
233
|
+
if idle_task is not None:
|
|
234
|
+
idle_task.cancel()
|
|
235
|
+
with contextlib.suppress(Exception):
|
|
236
|
+
await idle_task
|
|
237
|
+
# Phase A1: stop the Playwright facade if it bound.
|
|
238
|
+
if facade is not None:
|
|
239
|
+
with contextlib.suppress(Exception):
|
|
240
|
+
await facade.stop()
|
|
241
|
+
# Stop every context's relay (only the extension shared context has
|
|
242
|
+
# one today, but iterate so a future rdp-with-relay can't leak).
|
|
243
|
+
for ctx in daemon.all_contexts():
|
|
244
|
+
if ctx.holder.relay is not None:
|
|
245
|
+
with contextlib.suppress(Exception):
|
|
246
|
+
await ctx.holder.relay.stop()
|
|
247
|
+
server.close()
|
|
248
|
+
try:
|
|
249
|
+
await server.wait_closed()
|
|
250
|
+
except Exception:
|
|
251
|
+
pass
|
|
252
|
+
_ipc.cleanup_endpoint()
|
|
253
|
+
return 0
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
# ---- rdp orphan cleanup (Phase 3 / C2 ephemeral) ---------------------------
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def _cleanup_orphan_rdp_chrome() -> None:
|
|
260
|
+
"""Best-effort: on daemon startup, kill stray Chrome processes + remove
|
|
261
|
+
leftover `bs-s{id}` profile dirs from a prior daemon crash (C2 ephemeral —
|
|
262
|
+
docs/refactor-single-daemon.md §Notes "rdp orphan cleanup").
|
|
263
|
+
|
|
264
|
+
Conservative by design:
|
|
265
|
+
- We ONLY touch profile dirs we own: `<cache>/profiles/bs-s*`. We never
|
|
266
|
+
scan the system process table for "chrome" (would catch the user's real
|
|
267
|
+
Chrome) — we only signal a pid we can prove belongs to one of our
|
|
268
|
+
profiles via that profile's own `SingletonLock`.
|
|
269
|
+
- Chrome writes `SingletonLock` as a symlink whose target is
|
|
270
|
+
`<hostname>-<pid>`. We parse the pid, SIGTERM it (if it still exists),
|
|
271
|
+
then remove the whole profile dir. A profile with no SingletonLock is
|
|
272
|
+
already-dead — we just remove the dir.
|
|
273
|
+
- Every step is wrapped so a permission error / race never crashes serve.
|
|
274
|
+
"""
|
|
275
|
+
import os as _os
|
|
276
|
+
import shutil as _shutil
|
|
277
|
+
import signal as _signal
|
|
278
|
+
from ..platforms import cache_dir
|
|
279
|
+
|
|
280
|
+
profiles_root = cache_dir() / "profiles"
|
|
281
|
+
if not profiles_root.is_dir():
|
|
282
|
+
return
|
|
283
|
+
for entry in profiles_root.iterdir():
|
|
284
|
+
if not entry.name.startswith("bs-s") or not entry.is_dir():
|
|
285
|
+
continue
|
|
286
|
+
# Try to identify + kill the Chrome that owns this profile via its
|
|
287
|
+
# SingletonLock symlink (target == "<hostname>-<pid>").
|
|
288
|
+
lock = entry / "SingletonLock"
|
|
289
|
+
try:
|
|
290
|
+
target = _os.readlink(lock)
|
|
291
|
+
pid = int(target.rsplit("-", 1)[-1])
|
|
292
|
+
except (OSError, ValueError):
|
|
293
|
+
pid = None
|
|
294
|
+
if pid is not None:
|
|
295
|
+
try:
|
|
296
|
+
_os.kill(pid, _signal.SIGTERM)
|
|
297
|
+
logger.info("orphan-cleanup: SIGTERM stray rdp Chrome pid %d "
|
|
298
|
+
"(profile %s)", pid, entry.name)
|
|
299
|
+
except (ProcessLookupError, PermissionError, OSError):
|
|
300
|
+
pass
|
|
301
|
+
# Remove the leftover profile dir so the next launch of this session id
|
|
302
|
+
# starts from a clean, lock-free profile.
|
|
303
|
+
try:
|
|
304
|
+
_shutil.rmtree(entry, ignore_errors=True)
|
|
305
|
+
logger.info("orphan-cleanup: removed stale profile dir %s", entry.name)
|
|
306
|
+
except OSError as e:
|
|
307
|
+
logger.debug("orphan-cleanup: could not remove %s: %r", entry, e)
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
# ---- log wiring ------------------------------------------------------------
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def _wire_logging() -> None:
|
|
314
|
+
"""Route the daemon's logger to a file under TMPDIR. Best-effort."""
|
|
315
|
+
try:
|
|
316
|
+
log_p = _ipc.log_path()
|
|
317
|
+
log_p.parent.mkdir(parents=True, exist_ok=True)
|
|
318
|
+
handler = logging.FileHandler(str(log_p), encoding="utf-8")
|
|
319
|
+
handler.setFormatter(logging.Formatter(
|
|
320
|
+
"%(asctime)s %(levelname)-7s %(name)s: %(message)s"))
|
|
321
|
+
root = logging.getLogger()
|
|
322
|
+
root.setLevel(logging.INFO)
|
|
323
|
+
# Also keep a console echo when stderr is a TTY (foreground serve).
|
|
324
|
+
if sys.stderr.isatty():
|
|
325
|
+
root.addHandler(logging.StreamHandler(sys.stderr))
|
|
326
|
+
root.addHandler(handler)
|
|
327
|
+
except OSError:
|
|
328
|
+
pass
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
# ---- websockets server with single-client gate ----------------------------
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
async def _open_server(handler: "_ClientHandler"):
|
|
335
|
+
"""Bind the listener with correct umask / file perms for POSIX, or the
|
|
336
|
+
token file for Windows. The HTTP /__ping__ path is intercepted here so
|
|
337
|
+
stale-detect works without a ws upgrade."""
|
|
338
|
+
process_request = _make_process_request(handler)
|
|
339
|
+
|
|
340
|
+
if _ipc.IS_WINDOWS:
|
|
341
|
+
sock, port, token = _ipc.make_tcp_socket()
|
|
342
|
+
_ipc.write_port_file(port, token)
|
|
343
|
+
handler.token = token
|
|
344
|
+
server = await serve(
|
|
345
|
+
handler.serve_one,
|
|
346
|
+
sock=sock,
|
|
347
|
+
process_request=process_request,
|
|
348
|
+
max_size=100 * 1024 * 1024,
|
|
349
|
+
compression=None,
|
|
350
|
+
ping_interval=20,
|
|
351
|
+
ping_timeout=20,
|
|
352
|
+
)
|
|
353
|
+
logger.info("listening on 127.0.0.1:%d (token=%s...)", port, token[:8])
|
|
354
|
+
return server
|
|
355
|
+
|
|
356
|
+
sock = _ipc.make_unix_socket()
|
|
357
|
+
# Verify the 0600 perms — spec §6.2 promises it; failing loudly here is
|
|
358
|
+
# better than silently exposing the socket.
|
|
359
|
+
st = os.stat(_ipc.sock_path())
|
|
360
|
+
if (st.st_mode & 0o777) != 0o600:
|
|
361
|
+
logger.warning("unexpected sock perms %o", st.st_mode & 0o777)
|
|
362
|
+
server = await unix_serve(
|
|
363
|
+
handler.serve_one,
|
|
364
|
+
sock=sock,
|
|
365
|
+
process_request=process_request,
|
|
366
|
+
max_size=100 * 1024 * 1024,
|
|
367
|
+
compression=None,
|
|
368
|
+
ping_interval=20,
|
|
369
|
+
ping_timeout=20,
|
|
370
|
+
)
|
|
371
|
+
logger.info("listening on %s", _ipc.sock_path())
|
|
372
|
+
return server
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def _make_process_request(handler: "_ClientHandler"):
|
|
376
|
+
"""Intercept the HTTP handshake.
|
|
377
|
+
|
|
378
|
+
Two responsibilities:
|
|
379
|
+
1. `/__ping__` GET → return a 200 with {"pong":true,"pid":N} so the
|
|
380
|
+
stale-detect probe works *before* a ws upgrade.
|
|
381
|
+
2. On Windows, verify the `?token=` query matches the server token.
|
|
382
|
+
|
|
383
|
+
v0.3: the single-client gate from v0.2 is **gone**. Multiple clients
|
|
384
|
+
connect concurrently; the router's sessionId/id translation keeps them
|
|
385
|
+
cleanly separated.
|
|
386
|
+
"""
|
|
387
|
+
def process_request(conn: ServerConnection, request) -> Any:
|
|
388
|
+
path = request.path or "/"
|
|
389
|
+
if path.startswith("/__ping__"):
|
|
390
|
+
body = _ipc.make_pong_body(os.getpid())
|
|
391
|
+
resp = conn.respond(http.HTTPStatus.OK, body.decode("utf-8"))
|
|
392
|
+
resp.headers["Content-Type"] = "application/json"
|
|
393
|
+
return resp
|
|
394
|
+
if _ipc.IS_WINDOWS:
|
|
395
|
+
query = _parse_query(path)
|
|
396
|
+
got = query.get("token")
|
|
397
|
+
if got != handler.token:
|
|
398
|
+
resp = conn.respond(
|
|
399
|
+
http.HTTPStatus.UNAUTHORIZED,
|
|
400
|
+
"missing or wrong ?token=\n",
|
|
401
|
+
)
|
|
402
|
+
return resp
|
|
403
|
+
return None # allow upgrade
|
|
404
|
+
return process_request
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
def _parse_query(path: str) -> dict[str, str]:
|
|
408
|
+
"""Pull single-valued query params from the request path."""
|
|
409
|
+
parsed = urlparse(path)
|
|
410
|
+
q = parse_qs(parsed.query)
|
|
411
|
+
return {k: v[0] for k, v in q.items() if v}
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
# ---- per-client handler ----------------------------------------------------
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
class _ClientHandler:
|
|
418
|
+
"""Stateless adapter object — websockets gives us a ServerConnection per
|
|
419
|
+
incoming client; we dispatch it to the right `UpstreamContext` and wire it
|
|
420
|
+
through THAT context's Router.
|
|
421
|
+
|
|
422
|
+
Phase 2: the handler holds the global `Daemon`, not a single triple. The
|
|
423
|
+
client's `?session=<id>` query selects the context (via the ledger's
|
|
424
|
+
immutable backend); `?client=<label>` is kept for log-friendly labels.
|
|
425
|
+
"""
|
|
426
|
+
|
|
427
|
+
def __init__(self, daemon: "Daemon", cfg: Config):
|
|
428
|
+
self.daemon = daemon
|
|
429
|
+
self.cfg = cfg
|
|
430
|
+
self.token: str | None = None
|
|
431
|
+
|
|
432
|
+
async def serve_one(self, conn: ServerConnection) -> None:
|
|
433
|
+
"""v0.3: handler instance per client connection — many run concurrently.
|
|
434
|
+
|
|
435
|
+
Phase 2 dispatch: parse `?session=<id>` (and keep `?client=<label>`),
|
|
436
|
+
resolve the `UpstreamContext` via `daemon.context_for(session_id)`, then
|
|
437
|
+
register/route/release entirely against THAT context's state + router.
|
|
438
|
+
Because a client is bound to one context for its whole life, each
|
|
439
|
+
context's `Router._broadcast` only ever reaches its own clients —
|
|
440
|
+
browser-level events cannot leak across contexts.
|
|
441
|
+
"""
|
|
442
|
+
query = _parse_query(conn.request.path or "/")
|
|
443
|
+
label = query.get("client", "anonymous")
|
|
444
|
+
session_id = query.get("session") or None
|
|
445
|
+
|
|
446
|
+
try:
|
|
447
|
+
ctx = (self.daemon.context_for_required(session_id)
|
|
448
|
+
if session_id else self.daemon.context_for(None))
|
|
449
|
+
except UnknownSessionError:
|
|
450
|
+
logger.warning("refusing client %s: unknown session %s",
|
|
451
|
+
label, session_id)
|
|
452
|
+
with contextlib.suppress(Exception):
|
|
453
|
+
await conn.close(code=1008, reason="unknown browserwright session")
|
|
454
|
+
return
|
|
455
|
+
state = ctx.state
|
|
456
|
+
router = ctx.router
|
|
457
|
+
holder = ctx.holder
|
|
458
|
+
|
|
459
|
+
# Allocate with a globally-unique client id (unique across contexts)
|
|
460
|
+
# but register it in this context's own client table. The session id +
|
|
461
|
+
# name (from the ledger) ride on the client so the shared extension
|
|
462
|
+
# context can scope Target.getTargets to this session's tab group.
|
|
463
|
+
session_name: str | None = None
|
|
464
|
+
if session_id:
|
|
465
|
+
from ... import session_registry
|
|
466
|
+
rec = session_registry.get(session_id)
|
|
467
|
+
if isinstance(rec, dict):
|
|
468
|
+
session_name = rec.get("name")
|
|
469
|
+
client = state.allocate_client(
|
|
470
|
+
label, client_id=next(self.daemon._next_client_id),
|
|
471
|
+
session_id=session_id, session_name=session_name)
|
|
472
|
+
|
|
473
|
+
async def send_to_client(text: str) -> None:
|
|
474
|
+
try:
|
|
475
|
+
await conn.send(text)
|
|
476
|
+
except Exception as e:
|
|
477
|
+
logger.warning("client %d send failed: %r", client.client_id, e)
|
|
478
|
+
|
|
479
|
+
router.register_client(client.client_id, send_to_client)
|
|
480
|
+
router.bind_lifecycle(
|
|
481
|
+
ensure_upstream=holder.ensure_open,
|
|
482
|
+
trigger_disconnect=holder.trigger_close,
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
# If upstream is already open (warm from another client), make sure the
|
|
486
|
+
# router has the send fn wired. (The first ensure_open call wires it
|
|
487
|
+
# internally; subsequent client sessions inherit it.)
|
|
488
|
+
if holder.is_open:
|
|
489
|
+
router.update_upstream_send(holder.send_text)
|
|
490
|
+
|
|
491
|
+
metrics().client_connected_total += 1
|
|
492
|
+
logger.info("client %d connected (label=%s, session=%s, backend=%s, total=%d)",
|
|
493
|
+
client.client_id, label, session_id or "-", ctx.backend,
|
|
494
|
+
len(state.clients))
|
|
495
|
+
try:
|
|
496
|
+
async for raw in conn:
|
|
497
|
+
if not isinstance(raw, (str, bytes)):
|
|
498
|
+
continue
|
|
499
|
+
text = raw if isinstance(raw, str) else raw.decode("utf-8", errors="replace")
|
|
500
|
+
metrics().client_frame_received_total += 1
|
|
501
|
+
await router.route_from_client(client, text)
|
|
502
|
+
except websockets.exceptions.ConnectionClosed:
|
|
503
|
+
logger.info("client %d disconnected", client.client_id)
|
|
504
|
+
except Exception as e:
|
|
505
|
+
logger.warning("client %d crashed: %r", client.client_id, e)
|
|
506
|
+
finally:
|
|
507
|
+
metrics().client_disconnected_total += 1
|
|
508
|
+
await router.release_client(client.client_id)
|
|
509
|
+
router.unregister_client(client.client_id)
|
|
510
|
+
# Upstream stays warm so other clients (or the next reconnect)
|
|
511
|
+
# don't pay banner-flash for our churn.
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
# ---- upstream lifecycle ----------------------------------------------------
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
class _UpstreamHolder:
|
|
518
|
+
"""Owns the single UpstreamConnection. Provides lazy-open + graceful-close
|
|
519
|
+
primitives the Router can call.
|
|
520
|
+
|
|
521
|
+
v0.4: when `cfg.backend == "extension"` we replace the conventional ws
|
|
522
|
+
upstream with an ExtensionUpstream wrapping a RelayServer. The relay is
|
|
523
|
+
started eagerly (so doctor probe answers `available=true` as soon as
|
|
524
|
+
the daemon is up), and `ensure_open` blocks the first client until the
|
|
525
|
+
extension has connected.
|
|
526
|
+
"""
|
|
527
|
+
|
|
528
|
+
def __init__(self, state: DaemonState, router: Router, cfg: Config,
|
|
529
|
+
*, session_id: str | None = None):
|
|
530
|
+
self.state = state
|
|
531
|
+
self.router = router
|
|
532
|
+
self.upstream: UpstreamConnection | ExtensionUpstream | None = None
|
|
533
|
+
self._open_lock = asyncio.Lock()
|
|
534
|
+
self._cfg: Config = cfg
|
|
535
|
+
# v0.4: only populated when backend=extension. Owned by the holder
|
|
536
|
+
# for the daemon's full lifetime; we don't tear down on idle-close
|
|
537
|
+
# so the extension's persistent ws to us stays warm.
|
|
538
|
+
self.relay: RelayServer | None = None
|
|
539
|
+
# Phase 3 (docs/refactor-single-daemon.md §P3 + C2): for an rdp context
|
|
540
|
+
# the daemon itself launches and owns a dedicated Chrome (own port +
|
|
541
|
+
# profile `bs-s{id}`). We record the launched process's pid + profile
|
|
542
|
+
# dir here so teardown can SIGTERM it and so orphan-cleanup can spot
|
|
543
|
+
# leftover `bs-s*` profiles after a crash. None on every other backend
|
|
544
|
+
# (the extension/env/cloud holders never own a Chrome process).
|
|
545
|
+
self.session_id: str | None = session_id
|
|
546
|
+
self.rdp_pid: int | None = None
|
|
547
|
+
self.rdp_profile_dir: str | None = None
|
|
548
|
+
self.rdp_port: int | None = None
|
|
549
|
+
self.rdp_owns_browser: bool = False
|
|
550
|
+
|
|
551
|
+
@property
|
|
552
|
+
def is_open(self) -> bool:
|
|
553
|
+
return self.upstream is not None and self.upstream.is_open
|
|
554
|
+
|
|
555
|
+
async def send_text(self, frame: str) -> None:
|
|
556
|
+
"""Proxy to the live UpstreamConnection.send_text.
|
|
557
|
+
|
|
558
|
+
Exposed on the holder so callers in this module can pass
|
|
559
|
+
`holder.send_text` as the router's `upstream_send` callable without
|
|
560
|
+
needing to drill through `holder.upstream.send_text` (which races
|
|
561
|
+
with close: holder.upstream may become None mid-call).
|
|
562
|
+
"""
|
|
563
|
+
conn = self.upstream
|
|
564
|
+
if conn is None:
|
|
565
|
+
raise RuntimeError("upstream not open")
|
|
566
|
+
await conn.send_text(frame)
|
|
567
|
+
|
|
568
|
+
async def _broadcast_event(self, method: str, params: dict) -> None:
|
|
569
|
+
"""Fan a `{method, params}` envelope to every connected client.
|
|
570
|
+
Same shape as the existing `upstreamClosed` broadcast (listener
|
|
571
|
+
spec §6.5). Used by v0.5.3 F-3: surface `upstreamConnecting` and
|
|
572
|
+
`upstreamReady` lifecycle events so Skill code subscribing per
|
|
573
|
+
design-v2.md:550-551 actually sees something."""
|
|
574
|
+
envelope = json.dumps({"method": method, "params": params})
|
|
575
|
+
for cid in list(self.state.clients.keys()):
|
|
576
|
+
try:
|
|
577
|
+
await self.router._send_to_client(cid, envelope)
|
|
578
|
+
except Exception:
|
|
579
|
+
pass
|
|
580
|
+
|
|
581
|
+
async def ensure_open(self) -> None:
|
|
582
|
+
"""Open upstream if not already. Idempotent + reentrant-safe.
|
|
583
|
+
|
|
584
|
+
v0.4 branches on `cfg.backend == "extension"`:
|
|
585
|
+
- extension → wait for the relay's first extension to send hello,
|
|
586
|
+
wrap in ExtensionUpstream, mark CONNECTED
|
|
587
|
+
- everything else → resolve a CDP ws URL and connect a real
|
|
588
|
+
UpstreamConnection
|
|
589
|
+
|
|
590
|
+
v0.5.3 F-3: emits two lifecycle events to subscribed clients:
|
|
591
|
+
- `BrowserwrightDaemon.upstreamConnecting {backend}` at the start of
|
|
592
|
+
the open attempt (after we've taken the lock and bumped state
|
|
593
|
+
to CONNECTING)
|
|
594
|
+
- `BrowserwrightDaemon.upstreamReady {backend, ws_url}` on successful
|
|
595
|
+
open (after `state.set_connected`)
|
|
596
|
+
Failed-open paths emit `upstreamClosed {reason}` via the
|
|
597
|
+
`trigger_close` path the resolver/connect call site already runs.
|
|
598
|
+
"""
|
|
599
|
+
if self.is_open:
|
|
600
|
+
return
|
|
601
|
+
async with self._open_lock:
|
|
602
|
+
if self.is_open:
|
|
603
|
+
return
|
|
604
|
+
cfg = self._cfg
|
|
605
|
+
await self.state.begin_connecting(cfg.backend or "auto")
|
|
606
|
+
metrics().upstream_open_attempts_total += 1
|
|
607
|
+
# F-3: emit BrowserwrightDaemon.upstreamConnecting to all clients.
|
|
608
|
+
await self._broadcast_event(
|
|
609
|
+
"BrowserwrightDaemon.upstreamConnecting",
|
|
610
|
+
{"backend": cfg.backend or "auto"},
|
|
611
|
+
)
|
|
612
|
+
|
|
613
|
+
try:
|
|
614
|
+
if cfg.backend == "extension":
|
|
615
|
+
await self._open_extension_upstream(cfg)
|
|
616
|
+
else:
|
|
617
|
+
# Phase 3: an rdp context owns its Chrome. Launch it (once)
|
|
618
|
+
# BEFORE the resolve/connect path runs, so the cfg's pinned
|
|
619
|
+
# rdp port is actually listening when `_open_chrome_upstream`
|
|
620
|
+
# → resolve() probes it. Other rdp callers (env/cloud share
|
|
621
|
+
# `_open_chrome_upstream` too) skip this — only a holder with
|
|
622
|
+
# a session_id + rdp backend owns a Chrome.
|
|
623
|
+
if (cfg.backend == "rdp" and self.session_id is not None
|
|
624
|
+
and self.rdp_owns_browser):
|
|
625
|
+
await self._launch_rdp_chrome(cfg)
|
|
626
|
+
await self._open_chrome_upstream(cfg)
|
|
627
|
+
except Exception:
|
|
628
|
+
metrics().upstream_open_failed_total += 1
|
|
629
|
+
raise
|
|
630
|
+
else:
|
|
631
|
+
metrics().upstream_open_succeeded_total += 1
|
|
632
|
+
# F-3: emit BrowserwrightDaemon.upstreamReady. `state.upstream_ws_url`
|
|
633
|
+
# is set by both open paths via `state.set_connected(...)`.
|
|
634
|
+
await self._broadcast_event(
|
|
635
|
+
"BrowserwrightDaemon.upstreamReady",
|
|
636
|
+
{
|
|
637
|
+
"backend": cfg.backend or "auto",
|
|
638
|
+
"ws_url": self.state.upstream_ws_url,
|
|
639
|
+
},
|
|
640
|
+
)
|
|
641
|
+
|
|
642
|
+
# Task #76: any client frame that arrived during the lazy-open
|
|
643
|
+
# window was buffered per-client. Replay them now that the
|
|
644
|
+
# upstream is live and `_upstream_send` is wired.
|
|
645
|
+
try:
|
|
646
|
+
await self.router.drain_pre_open_buffers()
|
|
647
|
+
except Exception as e:
|
|
648
|
+
logger.warning("drain pre-open buffers failed: %r", e)
|
|
649
|
+
|
|
650
|
+
async def _open_chrome_upstream(self, cfg: Config) -> None:
|
|
651
|
+
# Mark this resolve as Mode-B-originated. Reserved for future
|
|
652
|
+
# backends that need to diverge per call site (Mode A short-conn
|
|
653
|
+
# vs Mode B long-running daemon).
|
|
654
|
+
from .. import resolver as _resolver_mod
|
|
655
|
+
ctx_token = _resolver_mod.caller_context.set("mode_b_serve")
|
|
656
|
+
try:
|
|
657
|
+
rr = await resolve(cfg)
|
|
658
|
+
except Unavailable as e:
|
|
659
|
+
logger.warning("upstream resolve failed: %s", e)
|
|
660
|
+
self.state.last_close_reason = "backend_lost"
|
|
661
|
+
await self.state.set_disconnected()
|
|
662
|
+
_resolver_mod.caller_context.reset(ctx_token)
|
|
663
|
+
raise
|
|
664
|
+
_resolver_mod.caller_context.reset(ctx_token)
|
|
665
|
+
|
|
666
|
+
# v0.5: when backend=cloud, ask the cloud config's AuthProvider to
|
|
667
|
+
# produce headers + ssl_context for the upstream ws handshake. For
|
|
668
|
+
# every other backend (env/rdp) the provider is None and connect
|
|
669
|
+
# runs unchanged.
|
|
670
|
+
additional_headers: dict[str, str] = {}
|
|
671
|
+
ssl_context = None
|
|
672
|
+
if cfg.backend == "cloud":
|
|
673
|
+
additional_headers, ssl_context = await self._build_cloud_auth(cfg)
|
|
674
|
+
|
|
675
|
+
try:
|
|
676
|
+
conn = UpstreamConnection(
|
|
677
|
+
on_frame=self.router.forward_from_upstream,
|
|
678
|
+
on_close=self._on_upstream_closed,
|
|
679
|
+
)
|
|
680
|
+
await conn.open(
|
|
681
|
+
rr.ws_url,
|
|
682
|
+
timeout=cfg.timeout,
|
|
683
|
+
additional_headers=additional_headers or None,
|
|
684
|
+
ssl_context=ssl_context,
|
|
685
|
+
)
|
|
686
|
+
except Exception as e:
|
|
687
|
+
logger.warning("upstream open failed: %r", e)
|
|
688
|
+
self.state.last_close_reason = "backend_lost"
|
|
689
|
+
await self.state.set_disconnected()
|
|
690
|
+
raise
|
|
691
|
+
self.upstream = conn
|
|
692
|
+
self.router.update_upstream_send(conn.send_text)
|
|
693
|
+
# Phase 3: expose the upstream's daemon-internal command channel to the
|
|
694
|
+
# Router so the unified session verbs (openBackgroundTab / closeTab /
|
|
695
|
+
# userscript) have an rdp implementation via raw CDP — Target.create/
|
|
696
|
+
# closeTarget, Page.addScriptToEvaluateOnNewDocument. Distinct id space
|
|
697
|
+
# from client traffic (UpstreamConnection.send_command). Cleared on
|
|
698
|
+
# close (symmetric with the extension callbacks).
|
|
699
|
+
self.router._upstream_command = conn.send_command
|
|
700
|
+
# Tell Chrome to gossip about all targets so we can maintain the
|
|
701
|
+
# last_activated table without needing the client to enable it.
|
|
702
|
+
# `waitForDebuggerOnStart=False` keeps target creation immediate.
|
|
703
|
+
try:
|
|
704
|
+
await conn.send_command(
|
|
705
|
+
"Target.setDiscoverTargets", {"discover": True})
|
|
706
|
+
except Exception as e:
|
|
707
|
+
logger.warning("setDiscoverTargets failed: %r", e)
|
|
708
|
+
await self.state.set_connected(rr.ws_url, was_popup=False)
|
|
709
|
+
|
|
710
|
+
async def _launch_rdp_chrome(self, cfg: Config) -> None:
|
|
711
|
+
"""Phase 3 (C2 ephemeral): the daemon launches + owns this rdp session's
|
|
712
|
+
Chrome — a dedicated process on its own port with profile `bs-s{id}`.
|
|
713
|
+
|
|
714
|
+
Idempotent: if we already launched (rdp_pid set) we no-op so a
|
|
715
|
+
reconnect after idle-close doesn't spawn a second Chrome.
|
|
716
|
+
|
|
717
|
+
Port selection mirrors the old `session_create._launch_daemon`: reuse
|
|
718
|
+
`cfg.backends.rdp.port` when the ledger pinned one (Daemon._rdp_cfg_for
|
|
719
|
+
copies the session's `workspace["port"]` into the cfg), else allocate a
|
|
720
|
+
free port and pin it onto `self._cfg` so the subsequent resolve probes
|
|
721
|
+
the right port.
|
|
722
|
+
|
|
723
|
+
We call `launch_chrome.launch_chrome` in-process (NOT the CLI) so the
|
|
724
|
+
spawned Chrome's pid is visible to us for teardown. The function spawns
|
|
725
|
+
a detached Chrome and waits for `DevToolsActivePort`; on failure it
|
|
726
|
+
raises Unavailable, which propagates out of `ensure_open` and surfaces
|
|
727
|
+
to the client as a normal upstream-open failure.
|
|
728
|
+
"""
|
|
729
|
+
if self.rdp_pid is not None:
|
|
730
|
+
return # already launched (warm reconnect)
|
|
731
|
+
from ..launch_chrome import launch_chrome as _launch_chrome
|
|
732
|
+
|
|
733
|
+
port = cfg.backends.rdp.port
|
|
734
|
+
if not port:
|
|
735
|
+
# No port pinned by the ledger — pick a free one and pin it onto
|
|
736
|
+
# the holder's cfg so `_open_chrome_upstream`'s resolve hits it.
|
|
737
|
+
import socket as _socket
|
|
738
|
+
s = _socket.socket(_socket.AF_INET, _socket.SOCK_STREAM)
|
|
739
|
+
try:
|
|
740
|
+
s.bind(("127.0.0.1", 0))
|
|
741
|
+
port = s.getsockname()[1]
|
|
742
|
+
finally:
|
|
743
|
+
s.close()
|
|
744
|
+
import dataclasses as _dc
|
|
745
|
+
self._cfg = _dc.replace(
|
|
746
|
+
cfg,
|
|
747
|
+
backends=_dc.replace(
|
|
748
|
+
cfg.backends,
|
|
749
|
+
rdp=_dc.replace(cfg.backends.rdp, port=port),
|
|
750
|
+
),
|
|
751
|
+
)
|
|
752
|
+
cfg = self._cfg
|
|
753
|
+
|
|
754
|
+
profile = f"bs-s{self.session_id}"
|
|
755
|
+
logger.info("launching rdp Chrome for session %s on port %d (profile %s)",
|
|
756
|
+
self.session_id, port, profile)
|
|
757
|
+
out = await _launch_chrome(cfg, profile=profile, persistent=True,
|
|
758
|
+
port=port, timeout=max(cfg.timeout, 30.0))
|
|
759
|
+
extras = out.get("extras") or {}
|
|
760
|
+
self.rdp_pid = extras.get("pid")
|
|
761
|
+
self.rdp_profile_dir = extras.get("profile_path")
|
|
762
|
+
self.rdp_port = port
|
|
763
|
+
|
|
764
|
+
def _kill_rdp_chrome(self) -> None:
|
|
765
|
+
"""Phase 3 teardown: SIGTERM the daemon-owned Chrome for this rdp
|
|
766
|
+
session (best-effort; the process may already be gone). Clears the pid
|
|
767
|
+
so a later relaunch starts fresh. Leaves the profile dir on disk — it's
|
|
768
|
+
a persistent `bs-s{id}` dir that orphan-cleanup sweeps on next startup;
|
|
769
|
+
removing it inline races Chrome's shutdown writeback."""
|
|
770
|
+
pid = self.rdp_pid
|
|
771
|
+
self.rdp_pid = None
|
|
772
|
+
if pid is None:
|
|
773
|
+
return
|
|
774
|
+
import os as _os
|
|
775
|
+
import signal as _signal
|
|
776
|
+
try:
|
|
777
|
+
_os.kill(pid, _signal.SIGTERM)
|
|
778
|
+
logger.info("killed rdp Chrome pid %d for session %s",
|
|
779
|
+
pid, self.session_id)
|
|
780
|
+
except (ProcessLookupError, PermissionError, OSError) as e:
|
|
781
|
+
logger.debug("rdp Chrome pid %s already gone: %r", pid, e)
|
|
782
|
+
|
|
783
|
+
async def _build_cloud_auth(self, cfg: Config) -> tuple[dict[str, str], Any]:
|
|
784
|
+
"""Build (headers, ssl_context) for the cloud backend's upstream
|
|
785
|
+
ws handshake. Pulls the AuthProvider from `cfg.backends.cloud`.
|
|
786
|
+
|
|
787
|
+
Errors at this layer are logged and converted to "no auth"
|
|
788
|
+
gracefully — the connect itself will then 401, which surfaces a
|
|
789
|
+
clear `backend_lost` close reason to clients.
|
|
790
|
+
"""
|
|
791
|
+
from ..auth import build_auth_provider
|
|
792
|
+
from ..errors import UserError
|
|
793
|
+
cc = cfg.backends.cloud
|
|
794
|
+
if not cc.auth_kind:
|
|
795
|
+
return {}, None
|
|
796
|
+
try:
|
|
797
|
+
provider = build_auth_provider(cc.auth_kind, cc.auth)
|
|
798
|
+
headers = await provider.headers()
|
|
799
|
+
ssl_ctx = provider.ssl_context()
|
|
800
|
+
return headers, ssl_ctx
|
|
801
|
+
except UserError as e:
|
|
802
|
+
logger.warning("cloud auth misconfigured: %s", e)
|
|
803
|
+
return {}, None
|
|
804
|
+
|
|
805
|
+
async def _open_extension_upstream(self, cfg: Config) -> None:
|
|
806
|
+
"""v0.4 extension backend: the daemon IS the upstream.
|
|
807
|
+
|
|
808
|
+
The relay was already started at daemon launch (run_serve). All we
|
|
809
|
+
do here is wait for an extension to connect (with timeout) and wrap
|
|
810
|
+
the relay in an ExtensionUpstream. The relay stays alive across
|
|
811
|
+
idle-close / reconnect cycles.
|
|
812
|
+
"""
|
|
813
|
+
if self.relay is None:
|
|
814
|
+
# Bug: holder wasn't bootstrapped with a relay. Fall back to
|
|
815
|
+
# raising — surface the misconfig instead of hanging silently.
|
|
816
|
+
self.state.last_close_reason = "backend_lost"
|
|
817
|
+
await self.state.set_disconnected()
|
|
818
|
+
raise Unavailable(
|
|
819
|
+
"extension backend selected but relay was never started — "
|
|
820
|
+
"internal bug, please report")
|
|
821
|
+
try:
|
|
822
|
+
ext = ExtensionUpstream(
|
|
823
|
+
relay=self.relay,
|
|
824
|
+
on_frame=self.router.forward_from_upstream,
|
|
825
|
+
on_close=self._on_upstream_closed,
|
|
826
|
+
)
|
|
827
|
+
# Use the daemon's open timeout (default 5s in tests) but allow
|
|
828
|
+
# the user a generous window (60s) to load the extension. Spec
|
|
829
|
+
# §8.4 'extension-permission' ux_cost — user has to click the
|
|
830
|
+
# popup; that takes seconds.
|
|
831
|
+
timeout = max(cfg.timeout, 60.0)
|
|
832
|
+
await ext.open(timeout=timeout)
|
|
833
|
+
except asyncio.TimeoutError:
|
|
834
|
+
self.state.last_close_reason = "backend_lost"
|
|
835
|
+
await self.state.set_disconnected()
|
|
836
|
+
raise Unavailable(
|
|
837
|
+
"no extension connected within timeout — load the daemon's "
|
|
838
|
+
"Chrome extension from `chrome-extension/`")
|
|
839
|
+
except Exception as e:
|
|
840
|
+
logger.warning("extension upstream open failed: %r", e)
|
|
841
|
+
self.state.last_close_reason = "backend_lost"
|
|
842
|
+
await self.state.set_disconnected()
|
|
843
|
+
raise
|
|
844
|
+
self.upstream = ext
|
|
845
|
+
self.router.update_upstream_send(ext.send_text)
|
|
846
|
+
# IMPORTANT: wire all extension-only verb callbacks BEFORE
|
|
847
|
+
# state.set_connected — concurrent BrowserwrightDaemon.* handlers in the
|
|
848
|
+
# proxy gate on state.upstream_phase == CONNECTED to skip the lazy-
|
|
849
|
+
# open call, so if we flip the phase first they'd see callback=None
|
|
850
|
+
# and respond -32601 incorrectly. Tear-down in trigger_close runs
|
|
851
|
+
# the opposite order (clear callbacks AFTER set_disconnected) for
|
|
852
|
+
# the symmetric reason.
|
|
853
|
+
# v0.5.4: wire the daemon-driven attach-active path. Only the
|
|
854
|
+
# extension backend has an out-of-band attach verb; other backends
|
|
855
|
+
# leave the callback as None so the proxy errors -32601.
|
|
856
|
+
self.router._attach_active_tab = ext.attach_active_tab
|
|
857
|
+
# Phase B: open_background + close_tab — same extension-only contract.
|
|
858
|
+
self.router._open_background_tab = ext.open_background_tab
|
|
859
|
+
self.router._close_tab = ext.close_tab
|
|
860
|
+
self.router._close_tab_by_target_id = ext.close_tab_by_target_id
|
|
861
|
+
self.router._end_session = ext.end_session # P5 per-session teardown
|
|
862
|
+
# Session-reconnect-recovery: rebuild a session's tab bindings from the
|
|
863
|
+
# persisted numeric tab-group id.
|
|
864
|
+
self.router._recover_session = ext.recover_session
|
|
865
|
+
self.router._wait_session_announce = ext.wait_session_announce
|
|
866
|
+
self.router._userscript_request = ext.userscript_request
|
|
867
|
+
# Scope Target.getTargets to the requesting session's tab group so
|
|
868
|
+
# extension sessions sharing one Chrome are mutually invisible.
|
|
869
|
+
self.router._scoped_targets = ext.scoped_target_infos
|
|
870
|
+
await self.state.set_connected(ext.ws_url or "ext://relay",
|
|
871
|
+
was_popup=False)
|
|
872
|
+
|
|
873
|
+
async def trigger_close(self, reason: CloseReason) -> None:
|
|
874
|
+
"""Run the spec §6.5 close etiquette + tear down upstream.
|
|
875
|
+
|
|
876
|
+
Sequence per spec §6.5:
|
|
877
|
+
1. send Target.detachedFromTarget for each owned sessionId
|
|
878
|
+
2. send BrowserwrightDaemon.upstreamClosed
|
|
879
|
+
3. close client ws with 1011
|
|
880
|
+
We do (1)+(2) here. The actual ws close (3) is the client handler's
|
|
881
|
+
job; we set state so the handler's outer `async for` returns.
|
|
882
|
+
"""
|
|
883
|
+
if self.state.upstream_phase in (UpstreamPhase.DISCONNECTED, UpstreamPhase.CLOSING):
|
|
884
|
+
# Already closing / closed — idempotent.
|
|
885
|
+
return
|
|
886
|
+
await self.state.begin_closing(reason)
|
|
887
|
+
|
|
888
|
+
# Spec §6.5 step 1: per-session synthetic Target.detachedFromTarget
|
|
889
|
+
# events. v0.3 sends them to EACH client that owns a session, with
|
|
890
|
+
# that client's local sessionId AND the real targetId (the v0.2
|
|
891
|
+
# "<unknown>" placeholder upgrade).
|
|
892
|
+
for cid, client in list(self.state.clients.items()):
|
|
893
|
+
for local_sid, binding in list(client.sessions.items()):
|
|
894
|
+
try:
|
|
895
|
+
await self.router._send_to_client(cid, json.dumps({
|
|
896
|
+
"method": "Target.detachedFromTarget",
|
|
897
|
+
"params": {
|
|
898
|
+
"sessionId": local_sid,
|
|
899
|
+
"targetId": binding.target_id,
|
|
900
|
+
},
|
|
901
|
+
}))
|
|
902
|
+
except Exception:
|
|
903
|
+
pass
|
|
904
|
+
# We don't clear client.sessions here — set_disconnected() below
|
|
905
|
+
# wipes everyone's sessions atomically.
|
|
906
|
+
|
|
907
|
+
# Spec §6.5 step 2: BrowserwrightDaemon.upstreamClosed event broadcast.
|
|
908
|
+
for cid in list(self.state.clients.keys()):
|
|
909
|
+
try:
|
|
910
|
+
await self.router._send_to_client(cid, json.dumps({
|
|
911
|
+
"method": "BrowserwrightDaemon.upstreamClosed",
|
|
912
|
+
"params": {"reason": reason},
|
|
913
|
+
}))
|
|
914
|
+
except Exception:
|
|
915
|
+
pass
|
|
916
|
+
|
|
917
|
+
# Tear down upstream ws.
|
|
918
|
+
up = self.upstream
|
|
919
|
+
self.upstream = None
|
|
920
|
+
self.router.update_upstream_send(None)
|
|
921
|
+
# v0.5.4: drop the extension-backend attach-active callback so
|
|
922
|
+
# post-close BrowserwrightDaemon.attachActiveTab returns -32601 instead
|
|
923
|
+
# of racing against a torn-down upstream.
|
|
924
|
+
self.router._attach_active_tab = None
|
|
925
|
+
self.router._open_background_tab = None
|
|
926
|
+
self.router._close_tab = None
|
|
927
|
+
self.router._close_tab_by_target_id = None
|
|
928
|
+
self.router._end_session = None
|
|
929
|
+
self.router._recover_session = None
|
|
930
|
+
self.router._wait_session_announce = None
|
|
931
|
+
self.router._userscript_request = None
|
|
932
|
+
# Phase 3: drop the rdp raw-CDP command channel (symmetric with the
|
|
933
|
+
# extension callbacks above) so a post-close verb returns a clean error
|
|
934
|
+
# instead of racing a torn-down upstream.
|
|
935
|
+
self.router._upstream_command = None
|
|
936
|
+
if up is not None:
|
|
937
|
+
try:
|
|
938
|
+
await up.close(code=1000, reason=reason)
|
|
939
|
+
except Exception:
|
|
940
|
+
pass
|
|
941
|
+
|
|
942
|
+
# Phase 3 (C2 ephemeral): an rdp context's Chrome is a daemon child —
|
|
943
|
+
# it must die with the upstream. Kill it on every close path
|
|
944
|
+
# (endSession, idle_close, daemon_shutdown, chrome_exit). Harmless on
|
|
945
|
+
# non-rdp holders (rdp_pid is None there).
|
|
946
|
+
if self.rdp_pid is not None:
|
|
947
|
+
self._kill_rdp_chrome()
|
|
948
|
+
|
|
949
|
+
# Spec §6.5 step 3: close client ws. The handler's `async for` will
|
|
950
|
+
# exit naturally on the next read once we set state DISCONNECTED;
|
|
951
|
+
# for prompt teardown we'd need to plumb each ServerConnection in
|
|
952
|
+
# — left as a follow-up since the natural-exit path is reliable.
|
|
953
|
+
await self.state.set_disconnected()
|
|
954
|
+
|
|
955
|
+
async def _on_upstream_closed(self, reason: str) -> None:
|
|
956
|
+
"""Called by UpstreamConnection's reader when upstream drops on its
|
|
957
|
+
own (Chrome exited, etc.). We translate to a CloseReason and run
|
|
958
|
+
the close-etiquette path.
|
|
959
|
+
|
|
960
|
+
Phase 3 (docs/refactor-single-daemon.md §Notes): for an rdp context the
|
|
961
|
+
Chrome IS the upstream — once it's gone the context is dead, so we drop
|
|
962
|
+
it from the daemon's registry (not just mark disconnected). A later
|
|
963
|
+
ensureSession then recreates a fresh context + relaunches Chrome."""
|
|
964
|
+
metrics().upstream_closed_total += 1
|
|
965
|
+
if self.state.upstream_phase in (UpstreamPhase.DISCONNECTED, UpstreamPhase.CLOSING):
|
|
966
|
+
return
|
|
967
|
+
await self.trigger_close("chrome_exit")
|
|
968
|
+
if self.session_id is not None:
|
|
969
|
+
daemon = getattr(self.router, "daemon", None)
|
|
970
|
+
if daemon is not None:
|
|
971
|
+
try:
|
|
972
|
+
daemon.drop_rdp_context(self.session_id)
|
|
973
|
+
except Exception as e:
|
|
974
|
+
logger.warning("drop rdp context %s failed: %r",
|
|
975
|
+
self.session_id, e)
|
|
976
|
+
|
|
977
|
+
|
|
978
|
+
# ---- graceful shutdown -----------------------------------------------------
|
|
979
|
+
|
|
980
|
+
|
|
981
|
+
async def _idle_watchdog(daemon: "Daemon", idle_after: float | None) -> None:
|
|
982
|
+
"""Spec §6.5/§6.6: when configured, close each upstream after `idle_after`
|
|
983
|
+
seconds with no activity. The next client command lazy-opens it again.
|
|
984
|
+
|
|
985
|
+
Phase 2: iterate every context (shared + rdp) so per-upstream idle is
|
|
986
|
+
enforced independently — one busy upstream doesn't keep an idle one warm.
|
|
987
|
+
|
|
988
|
+
Phase B (PR2): the same loop supervises the per-session executors —
|
|
989
|
+
- crash-reap (ALWAYS, even when idle-close is off): drop executors whose
|
|
990
|
+
child has exited on its own (Fork 4 facade-death self-exit / segfault)
|
|
991
|
+
so the registry never holds corpses + the next ensure cold-starts fresh;
|
|
992
|
+
- idle-reap (gated on idle_after, like upstream idle-close): SIGTERM
|
|
993
|
+
executors idle past the threshold so a long-abandoned session doesn't
|
|
994
|
+
leak a subprocess.
|
|
995
|
+
|
|
996
|
+
Runs unconditionally; idle-close + idle-reap are no-ops when `idle_after`
|
|
997
|
+
is None. We poll at half the idle threshold (or every 5s when idle is off,
|
|
998
|
+
just for crash-reap granularity).
|
|
999
|
+
"""
|
|
1000
|
+
poll = 5.0 if not idle_after else max(1.0, idle_after / 2.0)
|
|
1001
|
+
try:
|
|
1002
|
+
while True:
|
|
1003
|
+
await asyncio.sleep(poll)
|
|
1004
|
+
# --- executor supervision (Phase B PR2) ---
|
|
1005
|
+
try:
|
|
1006
|
+
daemon.executors.reap_dead()
|
|
1007
|
+
if idle_after:
|
|
1008
|
+
daemon.executors.reap_idle(idle_after)
|
|
1009
|
+
except Exception as e: # noqa: BLE001 - never let reap break the loop
|
|
1010
|
+
logger.warning("executor reap failed: %r", e)
|
|
1011
|
+
# --- upstream idle-close (gated) ---
|
|
1012
|
+
if not idle_after:
|
|
1013
|
+
continue
|
|
1014
|
+
for ctx in daemon.all_contexts():
|
|
1015
|
+
if ctx.state.upstream_phase != UpstreamPhase.CONNECTED:
|
|
1016
|
+
continue
|
|
1017
|
+
idle_for = time.time() - ctx.state.last_activity_at
|
|
1018
|
+
if idle_for >= idle_after:
|
|
1019
|
+
logger.info("idle-watchdog: closing %s upstream after %.1fs",
|
|
1020
|
+
ctx.backend, idle_for)
|
|
1021
|
+
try:
|
|
1022
|
+
await ctx.holder.trigger_close("idle_close")
|
|
1023
|
+
except Exception as e:
|
|
1024
|
+
logger.warning("idle close failed: %r", e)
|
|
1025
|
+
# An idle-closed rdp context's Chrome is gone; drop the
|
|
1026
|
+
# context so the dict doesn't accumulate dead per-session
|
|
1027
|
+
# entries for the daemon's lifetime. (trigger_close flips
|
|
1028
|
+
# the phase itself, so _on_upstream_closed — the usual drop
|
|
1029
|
+
# path — never fires for the idle case.) A later client
|
|
1030
|
+
# frame for the session re-creates + relaunches cleanly.
|
|
1031
|
+
if ctx.backend == "rdp" and ctx.session_id is not None:
|
|
1032
|
+
daemon.drop_rdp_context(ctx.session_id)
|
|
1033
|
+
except asyncio.CancelledError:
|
|
1034
|
+
return
|
|
1035
|
+
|
|
1036
|
+
|
|
1037
|
+
async def _graceful_shutdown(daemon: "Daemon") -> None:
|
|
1038
|
+
"""Called on SIGTERM. Run close etiquette on every context then close
|
|
1039
|
+
the listener."""
|
|
1040
|
+
for ctx in daemon.all_contexts():
|
|
1041
|
+
try:
|
|
1042
|
+
await ctx.holder.trigger_close("daemon_shutdown")
|
|
1043
|
+
except Exception as e:
|
|
1044
|
+
logger.warning("shutdown close failed for %s: %r", ctx.backend, e)
|
|
1045
|
+
# Phase B (PR2): SIGTERM every registered executor — they are daemon
|
|
1046
|
+
# children and must die with us (mirrors the per-context close above).
|
|
1047
|
+
try:
|
|
1048
|
+
daemon.executors.kill_all()
|
|
1049
|
+
except Exception as e: # noqa: BLE001
|
|
1050
|
+
logger.warning("executor shutdown kill failed: %r", e)
|
|
1051
|
+
|
|
1052
|
+
|
|
1053
|
+
# ---- helper for the cli serve dispatcher ----------------------------------
|
|
1054
|
+
|
|
1055
|
+
|
|
1056
|
+
def make_holder(state: DaemonState, router: Router, cfg: Config) -> _UpstreamHolder:
|
|
1057
|
+
"""Test seam: build an _UpstreamHolder pre-bound to cfg."""
|
|
1058
|
+
return _UpstreamHolder(state, router, cfg)
|