browserwright 0.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. browserwright/__init__.py +33 -0
  2. browserwright/__main__.py +6 -0
  3. browserwright/_executor/__init__.py +47 -0
  4. browserwright/_executor/__main__.py +9 -0
  5. browserwright/_executor/client.py +127 -0
  6. browserwright/_executor/process.py +652 -0
  7. browserwright/_executor/protocol.py +152 -0
  8. browserwright/api.py +66 -0
  9. browserwright/cdp.py +285 -0
  10. browserwright/cli.py +741 -0
  11. browserwright/daemon/__init__.py +8 -0
  12. browserwright/daemon/_ipc.py +444 -0
  13. browserwright/daemon/active_tab.py +183 -0
  14. browserwright/daemon/auth.py +395 -0
  15. browserwright/daemon/backends/__init__.py +59 -0
  16. browserwright/daemon/backends/base.py +120 -0
  17. browserwright/daemon/backends/cloud.py +222 -0
  18. browserwright/daemon/backends/env.py +119 -0
  19. browserwright/daemon/backends/extension.py +185 -0
  20. browserwright/daemon/backends/rdp.py +214 -0
  21. browserwright/daemon/cli.py +1437 -0
  22. browserwright/daemon/config.py +380 -0
  23. browserwright/daemon/doctor.py +179 -0
  24. browserwright/daemon/errors.py +34 -0
  25. browserwright/daemon/launch_chrome.py +353 -0
  26. browserwright/daemon/observability.py +181 -0
  27. browserwright/daemon/platforms.py +234 -0
  28. browserwright/daemon/resolver.py +72 -0
  29. browserwright/daemon/server/__init__.py +6 -0
  30. browserwright/daemon/server/daemon.py +229 -0
  31. browserwright/daemon/server/executor_registry.py +434 -0
  32. browserwright/daemon/server/extension_upstream.py +677 -0
  33. browserwright/daemon/server/facade.py +375 -0
  34. browserwright/daemon/server/facade_extension.py +969 -0
  35. browserwright/daemon/server/listener.py +1058 -0
  36. browserwright/daemon/server/proxy.py +1991 -0
  37. browserwright/daemon/server/relay.py +783 -0
  38. browserwright/daemon/server/state.py +432 -0
  39. browserwright/daemon/server/upstream.py +266 -0
  40. browserwright/daemon/userscripts.py +150 -0
  41. browserwright/discovery.py +213 -0
  42. browserwright/errors.py +177 -0
  43. browserwright/health.py +169 -0
  44. browserwright/install.py +628 -0
  45. browserwright/memory/__init__.py +15 -0
  46. browserwright/memory/_md.py +120 -0
  47. browserwright/memory/_yaml.py +217 -0
  48. browserwright/memory/global_mem.py +201 -0
  49. browserwright/memory/repl_mem.py +28 -0
  50. browserwright/memory/session_decisions.py +53 -0
  51. browserwright/memory/site_mem.py +381 -0
  52. browserwright/mode_b_client.py +590 -0
  53. browserwright/multitask.py +131 -0
  54. browserwright/output_schema.py +99 -0
  55. browserwright/primitives/__init__.py +67 -0
  56. browserwright/primitives/discovery_api.py +79 -0
  57. browserwright/primitives/http.py +42 -0
  58. browserwright/primitives/inspect.py +876 -0
  59. browserwright/primitives/interact.py +518 -0
  60. browserwright/primitives/page.py +556 -0
  61. browserwright/primitives/site.py +143 -0
  62. browserwright/release_install.py +466 -0
  63. browserwright/repl/__init__.py +6 -0
  64. browserwright/repl/_namespace.py +106 -0
  65. browserwright/repl/_smart_goto.py +236 -0
  66. browserwright/repl/inline.py +180 -0
  67. browserwright/repl/playwright_handle.py +449 -0
  68. browserwright/repl/snapshot.py +150 -0
  69. browserwright/session.py +229 -0
  70. browserwright/session_create.py +252 -0
  71. browserwright/session_ctx.py +24 -0
  72. browserwright/session_registry.py +133 -0
  73. browserwright/session_runtime.py +133 -0
  74. browserwright/site_skills_starter/github.com/SKILL.md +14 -0
  75. browserwright/site_skills_starter/github.com/memory.md +29 -0
  76. browserwright/site_skills_starter/github.com/tasks/list_issues.py +55 -0
  77. browserwright/site_skills_starter/google.com/SKILL.md +16 -0
  78. browserwright/site_skills_starter/google.com/memory.md +27 -0
  79. browserwright/site_skills_starter/google.com/tasks/search.py +53 -0
  80. browserwright/site_skills_starter/producthunt.com/SKILL.md +7 -0
  81. browserwright/site_skills_starter/producthunt.com/memory.md +26 -0
  82. browserwright/site_skills_starter/producthunt.com/tasks/today.py +64 -0
  83. browserwright/site_skills_starter/wikipedia.org/SKILL.md +7 -0
  84. browserwright/site_skills_starter/wikipedia.org/memory.md +22 -0
  85. browserwright/site_skills_starter/wikipedia.org/tasks/lookup.py +55 -0
  86. browserwright/site_skills_starter/ycombinator.com/SKILL.md +8 -0
  87. browserwright/site_skills_starter/ycombinator.com/memory.md +25 -0
  88. browserwright/site_skills_starter/ycombinator.com/tasks/front_page.py +63 -0
  89. browserwright/skill_doc.py +140 -0
  90. browserwright/skill_runtime.md +194 -0
  91. browserwright/subscriptions.py +213 -0
  92. browserwright/task_runner.py +125 -0
  93. browserwright/version.py +117 -0
  94. browserwright-0.6.2.dist-info/METADATA +12 -0
  95. browserwright-0.6.2.dist-info/RECORD +98 -0
  96. browserwright-0.6.2.dist-info/WHEEL +5 -0
  97. browserwright-0.6.2.dist-info/entry_points.txt +3 -0
  98. browserwright-0.6.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1058 @@
1
+ """WebSocket listener + lifecycle orchestrator.
2
+
3
+ This module wires together:
4
+ - `_ipc` (socket file / token / ping)
5
+ - `state` (DaemonState)
6
+ - `upstream` (UpstreamConnection)
7
+ - `proxy` (Router)
8
+
9
+ Spec §8.5: the listener task accepts clients, the upstream-lifecycle task
10
+ opens/closes the upstream ws lazily, and the keepalive task is built into
11
+ UpstreamConnection (heartbeat) + websockets server (ws-level pings).
12
+
13
+ v0.2 single-client model: the second ws upgrade is rejected with HTTP 503
14
+ + a clear body. spec §9.2.
15
+ """
16
+ from __future__ import annotations
17
+
18
+ import asyncio
19
+ import contextlib
20
+ import http
21
+ import json
22
+ import logging
23
+ import os
24
+ import signal
25
+ import sys
26
+ import time
27
+ from typing import Any
28
+ from urllib.parse import parse_qs, urlparse
29
+
30
+ import websockets
31
+ from websockets.asyncio.server import ServerConnection, serve, unix_serve
32
+
33
+ from .. import _ipc
34
+ from .. import __version__
35
+ from ..config import Config
36
+ from ..errors import Unavailable
37
+ from ..resolver import resolve
38
+ from ..observability import metrics, install_json_logging_if_requested
39
+ from .state import CloseReason, DaemonState, UpstreamPhase
40
+ from .proxy import Router
41
+ from .daemon import Daemon, UnknownSessionError, UpstreamContext
42
+ from .upstream import UpstreamConnection
43
+ from .relay import RelayServer
44
+ from .extension_upstream import ExtensionUpstream
45
+ from .facade import PlaywrightFacade
46
+
47
+ logger = logging.getLogger(__name__)
48
+
49
+
50
+ # ---- per-upstream context factory ------------------------------------------
51
+
52
+
53
+ def make_context(*, backend: str, cfg: Config,
54
+ session_id: str | None = None) -> UpstreamContext:
55
+ """Build one `UpstreamContext` — the `(state, router, holder)` triple for a
56
+ single upstream, wired exactly like `run_serve` wired the single triple
57
+ before Phase 2. Lives here (not in daemon.py) because it constructs the
58
+ `_UpstreamHolder`, which is a listener-module concern.
59
+
60
+ The relay is NOT started here (only the extension *shared* context gets a
61
+ relay, started eagerly in `run_serve`); for everything else the holder's
62
+ lazy-open path opens the upstream on first client frame.
63
+ """
64
+ state = DaemonState(backend_name=backend)
65
+ router = Router(state)
66
+ holder = _UpstreamHolder(state, router, cfg, session_id=session_id)
67
+ return UpstreamContext(
68
+ backend=backend, state=state, router=router, holder=holder,
69
+ session_id=session_id,
70
+ )
71
+
72
+
73
+ # ---- top-level entry -------------------------------------------------------
74
+
75
+
76
+ async def run_serve(cfg: Config) -> int:
77
+ """Run a Mode B daemon until SIGTERM / Ctrl-C / shutdown. Returns exit code.
78
+
79
+ There is exactly one global daemon on a fixed socket — no instance name.
80
+ """
81
+ # Stale-detect: ping any existing endpoint before binding. If something
82
+ # answers, refuse to start a second copy of ourselves (enforces the
83
+ # "at most one global daemon" invariant) — but if the ping comes back
84
+ # negative, we cleanup the dead socket file and proceed.
85
+ existing_pid, existing_version = await _ipc.ping_status_async(timeout=1.0)
86
+ if existing_pid is not None:
87
+ version_hint = ""
88
+ if existing_version and existing_version != __version__:
89
+ version_hint = (
90
+ f" (running {existing_version}, installed {__version__}; "
91
+ "use `browserwright-daemon stop` or `browserwright-daemon restart`)"
92
+ )
93
+ print(
94
+ f"browserwright-daemon already running (pid {existing_pid}); "
95
+ f"use `browserwright-daemon stop` to shut it down{version_hint}",
96
+ file=sys.stderr,
97
+ )
98
+ return 1
99
+ _ipc.cleanup_endpoint()
100
+
101
+ # Phase 3 (C2 ephemeral): rdp Chrome processes are daemon children and die
102
+ # with us — but a hard crash / SIGKILL can leave orphan Chrome processes
103
+ # holding their `bs-s{id}` profile dirs. Sweep them before serving so
104
+ # ephemeral rdp sessions start clean (and so a relaunch on the same profile
105
+ # isn't blocked by a stale SingletonLock).
106
+ _cleanup_orphan_rdp_chrome()
107
+ # Phase B (PR2): the executor is "rdp Chrome v2" — sweep orphan executor
108
+ # subprocesses + their stale `bw-exec-*` sockets/discovery files left by a
109
+ # prior daemon SIGKILL, same rationale as the rdp sweep above.
110
+ from .executor_registry import cleanup_orphan_executors
111
+ cleanup_orphan_executors()
112
+
113
+ # Log file is best-effort — we route Python logging to it but never crash
114
+ # the daemon over a write failure.
115
+ _wire_logging()
116
+ # v0.5: opt-in JSON log formatter. After _wire_logging adds the
117
+ # file/console handlers, swap formatters in place if BD_LOG_JSON=1.
118
+ install_json_logging_if_requested()
119
+ logger.info("browserwright-daemon %s starting (backend=%s)",
120
+ __version__, cfg.backend or "extension")
121
+
122
+ # Phase 2: one global daemon holding many upstream contexts. The shared
123
+ # context is the real-browser upstream (cfg.backend, default extension);
124
+ # rdp sessions get their own context lazily (Daemon.context_for). The
125
+ # routing engine (Router/DaemonState/_UpstreamHolder) is unchanged — we
126
+ # just instantiate it per context and dispatch in `_ClientHandler`.
127
+ shared_backend = cfg.backend or "extension"
128
+ # Pin the shared context's holder cfg to the resolved backend: serve now
129
+ # defaults a missing backend to extension (cli._cmd_serve), so the holder
130
+ # must see backend="extension" — not None — to take its extension-upstream
131
+ # open path. dataclasses.replace keeps the rest of cfg intact.
132
+ import dataclasses as _dc
133
+ shared_cfg = _dc.replace(cfg, backend=shared_backend)
134
+ shared_context = make_context(backend=shared_backend, cfg=shared_cfg)
135
+ daemon = Daemon(cfg=cfg, shared_context=shared_context,
136
+ make_context=make_context)
137
+
138
+ # SIGTERM / SIGINT → set the stop event. We don't tear down inline because
139
+ # we still need to run the graceful shutdown sequence (close clients with
140
+ # 1011 + emit upstreamClosed + close upstream).
141
+ stop = asyncio.Event()
142
+
143
+ def _on_signal():
144
+ stop.set()
145
+ loop = asyncio.get_running_loop()
146
+ if sys.platform != "win32":
147
+ for s in (signal.SIGTERM, signal.SIGINT):
148
+ try:
149
+ loop.add_signal_handler(s, _on_signal)
150
+ except NotImplementedError:
151
+ pass # e.g. inside pytest event loop
152
+
153
+ # PID file (best-effort).
154
+ _ipc.write_pid(os.getpid())
155
+
156
+ handler = _ClientHandler(daemon, cfg)
157
+ server = await _open_server(handler)
158
+
159
+ # v0.4: for the extension shared context, start the relay ws server eagerly
160
+ # so `browserwright-daemon doctor` can probe `__status__` even before any
161
+ # Skill client connects. The relay belongs to the shared context's holder
162
+ # (it is the always-on, real-browser upstream).
163
+ if shared_backend == "extension":
164
+ try:
165
+ # v0.5.3 F-5 / Task #24: bind at the configured host+port.
166
+ # Precedence (CLI > env > toml port > toml relay_url > default)
167
+ # is centralized in cfg.backends.extension.resolved_host_port().
168
+ host, port = cfg.backends.extension.resolved_host_port()
169
+ shared_context.holder.relay = RelayServer(host=host, port=port)
170
+ port = await shared_context.holder.relay.start()
171
+ logger.info("extension relay started on port %d", port)
172
+ except OSError as e:
173
+ print(
174
+ f"browserwright-daemon failed to bind extension relay: {e}",
175
+ file=sys.stderr,
176
+ )
177
+ server.close()
178
+ try:
179
+ await server.wait_closed()
180
+ except Exception:
181
+ pass
182
+ _ipc.cleanup_endpoint()
183
+ return 2
184
+
185
+ # Playwright facade (Phase C: auto-enabled by default). Bind an ADDITIONAL
186
+ # Playwright-facing CDP ws+HTTP endpoint layered beside the agent unix
187
+ # socket. It resolves the daemon's upstream Chrome (rdp backend) and
188
+ # transparently bridges raw browser-level CDP — the existing client path is
189
+ # untouched. The skill layer's heredoc `page`/`context` connect through it,
190
+ # so it is ON unless `facade_port == 0` (explicit disable). The bound ws is
191
+ # advertised via the `_ipc` facade discovery file so the skill layer can
192
+ # `connect_over_cdp` without parsing logs. A bind failure here is non-fatal:
193
+ # we log + continue serving the agent path.
194
+ facade: PlaywrightFacade | None = None
195
+ facade_port = cfg.resolved_facade_port()
196
+ if facade_port is not None:
197
+ try:
198
+ # PR2: for the extension backend the facade bridges through the
199
+ # daemon's shared relay (started just above). Pass a getter so the
200
+ # facade resolves the LIVE relay per client connection — it may be
201
+ # (re)bound across the daemon's lifetime.
202
+ def _shared_relay() -> RelayServer | None:
203
+ return shared_context.holder.relay
204
+
205
+ facade = PlaywrightFacade(cfg=cfg, port=facade_port,
206
+ relay_getter=_shared_relay,
207
+ daemon=daemon)
208
+ bound = await facade.start()
209
+ facade_ws = f"ws://127.0.0.1:{bound}/cdp"
210
+ # Advertise the bound ws so the skill layer can discover it (Phase C
211
+ # auto-enable). Best-effort: a write failure must not abort serving.
212
+ with contextlib.suppress(Exception):
213
+ _ipc.write_facade_file(facade_ws, bound)
214
+ logger.info("playwright facade started on port %d "
215
+ "(connect_over_cdp %s)", bound, facade_ws)
216
+ except OSError as e:
217
+ logger.warning("playwright facade failed to bind port %d: %r; "
218
+ "continuing without it", facade_port, e)
219
+ facade = None
220
+
221
+ # The watchdog runs unconditionally: even when upstream idle-close is off
222
+ # (cfg.idle_close_after None), it must still crash-reap dead executors
223
+ # (Fork 4 self-exit / segfault) so the registry never accumulates corpses.
224
+ # Upstream idle-close + executor idle-reap are gated on cfg.idle_close_after
225
+ # inside the loop.
226
+ idle_task: asyncio.Task | None = asyncio.create_task(
227
+ _idle_watchdog(daemon, cfg.idle_close_after))
228
+ try:
229
+ await stop.wait()
230
+ logger.info("browserwright-daemon shutdown requested")
231
+ await _graceful_shutdown(daemon)
232
+ finally:
233
+ if idle_task is not None:
234
+ idle_task.cancel()
235
+ with contextlib.suppress(Exception):
236
+ await idle_task
237
+ # Phase A1: stop the Playwright facade if it bound.
238
+ if facade is not None:
239
+ with contextlib.suppress(Exception):
240
+ await facade.stop()
241
+ # Stop every context's relay (only the extension shared context has
242
+ # one today, but iterate so a future rdp-with-relay can't leak).
243
+ for ctx in daemon.all_contexts():
244
+ if ctx.holder.relay is not None:
245
+ with contextlib.suppress(Exception):
246
+ await ctx.holder.relay.stop()
247
+ server.close()
248
+ try:
249
+ await server.wait_closed()
250
+ except Exception:
251
+ pass
252
+ _ipc.cleanup_endpoint()
253
+ return 0
254
+
255
+
256
+ # ---- rdp orphan cleanup (Phase 3 / C2 ephemeral) ---------------------------
257
+
258
+
259
+ def _cleanup_orphan_rdp_chrome() -> None:
260
+ """Best-effort: on daemon startup, kill stray Chrome processes + remove
261
+ leftover `bs-s{id}` profile dirs from a prior daemon crash (C2 ephemeral —
262
+ docs/refactor-single-daemon.md §Notes "rdp orphan cleanup").
263
+
264
+ Conservative by design:
265
+ - We ONLY touch profile dirs we own: `<cache>/profiles/bs-s*`. We never
266
+ scan the system process table for "chrome" (would catch the user's real
267
+ Chrome) — we only signal a pid we can prove belongs to one of our
268
+ profiles via that profile's own `SingletonLock`.
269
+ - Chrome writes `SingletonLock` as a symlink whose target is
270
+ `<hostname>-<pid>`. We parse the pid, SIGTERM it (if it still exists),
271
+ then remove the whole profile dir. A profile with no SingletonLock is
272
+ already-dead — we just remove the dir.
273
+ - Every step is wrapped so a permission error / race never crashes serve.
274
+ """
275
+ import os as _os
276
+ import shutil as _shutil
277
+ import signal as _signal
278
+ from ..platforms import cache_dir
279
+
280
+ profiles_root = cache_dir() / "profiles"
281
+ if not profiles_root.is_dir():
282
+ return
283
+ for entry in profiles_root.iterdir():
284
+ if not entry.name.startswith("bs-s") or not entry.is_dir():
285
+ continue
286
+ # Try to identify + kill the Chrome that owns this profile via its
287
+ # SingletonLock symlink (target == "<hostname>-<pid>").
288
+ lock = entry / "SingletonLock"
289
+ try:
290
+ target = _os.readlink(lock)
291
+ pid = int(target.rsplit("-", 1)[-1])
292
+ except (OSError, ValueError):
293
+ pid = None
294
+ if pid is not None:
295
+ try:
296
+ _os.kill(pid, _signal.SIGTERM)
297
+ logger.info("orphan-cleanup: SIGTERM stray rdp Chrome pid %d "
298
+ "(profile %s)", pid, entry.name)
299
+ except (ProcessLookupError, PermissionError, OSError):
300
+ pass
301
+ # Remove the leftover profile dir so the next launch of this session id
302
+ # starts from a clean, lock-free profile.
303
+ try:
304
+ _shutil.rmtree(entry, ignore_errors=True)
305
+ logger.info("orphan-cleanup: removed stale profile dir %s", entry.name)
306
+ except OSError as e:
307
+ logger.debug("orphan-cleanup: could not remove %s: %r", entry, e)
308
+
309
+
310
+ # ---- log wiring ------------------------------------------------------------
311
+
312
+
313
+ def _wire_logging() -> None:
314
+ """Route the daemon's logger to a file under TMPDIR. Best-effort."""
315
+ try:
316
+ log_p = _ipc.log_path()
317
+ log_p.parent.mkdir(parents=True, exist_ok=True)
318
+ handler = logging.FileHandler(str(log_p), encoding="utf-8")
319
+ handler.setFormatter(logging.Formatter(
320
+ "%(asctime)s %(levelname)-7s %(name)s: %(message)s"))
321
+ root = logging.getLogger()
322
+ root.setLevel(logging.INFO)
323
+ # Also keep a console echo when stderr is a TTY (foreground serve).
324
+ if sys.stderr.isatty():
325
+ root.addHandler(logging.StreamHandler(sys.stderr))
326
+ root.addHandler(handler)
327
+ except OSError:
328
+ pass
329
+
330
+
331
+ # ---- websockets server with single-client gate ----------------------------
332
+
333
+
334
+ async def _open_server(handler: "_ClientHandler"):
335
+ """Bind the listener with correct umask / file perms for POSIX, or the
336
+ token file for Windows. The HTTP /__ping__ path is intercepted here so
337
+ stale-detect works without a ws upgrade."""
338
+ process_request = _make_process_request(handler)
339
+
340
+ if _ipc.IS_WINDOWS:
341
+ sock, port, token = _ipc.make_tcp_socket()
342
+ _ipc.write_port_file(port, token)
343
+ handler.token = token
344
+ server = await serve(
345
+ handler.serve_one,
346
+ sock=sock,
347
+ process_request=process_request,
348
+ max_size=100 * 1024 * 1024,
349
+ compression=None,
350
+ ping_interval=20,
351
+ ping_timeout=20,
352
+ )
353
+ logger.info("listening on 127.0.0.1:%d (token=%s...)", port, token[:8])
354
+ return server
355
+
356
+ sock = _ipc.make_unix_socket()
357
+ # Verify the 0600 perms — spec §6.2 promises it; failing loudly here is
358
+ # better than silently exposing the socket.
359
+ st = os.stat(_ipc.sock_path())
360
+ if (st.st_mode & 0o777) != 0o600:
361
+ logger.warning("unexpected sock perms %o", st.st_mode & 0o777)
362
+ server = await unix_serve(
363
+ handler.serve_one,
364
+ sock=sock,
365
+ process_request=process_request,
366
+ max_size=100 * 1024 * 1024,
367
+ compression=None,
368
+ ping_interval=20,
369
+ ping_timeout=20,
370
+ )
371
+ logger.info("listening on %s", _ipc.sock_path())
372
+ return server
373
+
374
+
375
+ def _make_process_request(handler: "_ClientHandler"):
376
+ """Intercept the HTTP handshake.
377
+
378
+ Two responsibilities:
379
+ 1. `/__ping__` GET → return a 200 with {"pong":true,"pid":N} so the
380
+ stale-detect probe works *before* a ws upgrade.
381
+ 2. On Windows, verify the `?token=` query matches the server token.
382
+
383
+ v0.3: the single-client gate from v0.2 is **gone**. Multiple clients
384
+ connect concurrently; the router's sessionId/id translation keeps them
385
+ cleanly separated.
386
+ """
387
+ def process_request(conn: ServerConnection, request) -> Any:
388
+ path = request.path or "/"
389
+ if path.startswith("/__ping__"):
390
+ body = _ipc.make_pong_body(os.getpid())
391
+ resp = conn.respond(http.HTTPStatus.OK, body.decode("utf-8"))
392
+ resp.headers["Content-Type"] = "application/json"
393
+ return resp
394
+ if _ipc.IS_WINDOWS:
395
+ query = _parse_query(path)
396
+ got = query.get("token")
397
+ if got != handler.token:
398
+ resp = conn.respond(
399
+ http.HTTPStatus.UNAUTHORIZED,
400
+ "missing or wrong ?token=\n",
401
+ )
402
+ return resp
403
+ return None # allow upgrade
404
+ return process_request
405
+
406
+
407
+ def _parse_query(path: str) -> dict[str, str]:
408
+ """Pull single-valued query params from the request path."""
409
+ parsed = urlparse(path)
410
+ q = parse_qs(parsed.query)
411
+ return {k: v[0] for k, v in q.items() if v}
412
+
413
+
414
+ # ---- per-client handler ----------------------------------------------------
415
+
416
+
417
+ class _ClientHandler:
418
+ """Stateless adapter object — websockets gives us a ServerConnection per
419
+ incoming client; we dispatch it to the right `UpstreamContext` and wire it
420
+ through THAT context's Router.
421
+
422
+ Phase 2: the handler holds the global `Daemon`, not a single triple. The
423
+ client's `?session=<id>` query selects the context (via the ledger's
424
+ immutable backend); `?client=<label>` is kept for log-friendly labels.
425
+ """
426
+
427
+ def __init__(self, daemon: "Daemon", cfg: Config):
428
+ self.daemon = daemon
429
+ self.cfg = cfg
430
+ self.token: str | None = None
431
+
432
+ async def serve_one(self, conn: ServerConnection) -> None:
433
+ """v0.3: handler instance per client connection — many run concurrently.
434
+
435
+ Phase 2 dispatch: parse `?session=<id>` (and keep `?client=<label>`),
436
+ resolve the `UpstreamContext` via `daemon.context_for(session_id)`, then
437
+ register/route/release entirely against THAT context's state + router.
438
+ Because a client is bound to one context for its whole life, each
439
+ context's `Router._broadcast` only ever reaches its own clients —
440
+ browser-level events cannot leak across contexts.
441
+ """
442
+ query = _parse_query(conn.request.path or "/")
443
+ label = query.get("client", "anonymous")
444
+ session_id = query.get("session") or None
445
+
446
+ try:
447
+ ctx = (self.daemon.context_for_required(session_id)
448
+ if session_id else self.daemon.context_for(None))
449
+ except UnknownSessionError:
450
+ logger.warning("refusing client %s: unknown session %s",
451
+ label, session_id)
452
+ with contextlib.suppress(Exception):
453
+ await conn.close(code=1008, reason="unknown browserwright session")
454
+ return
455
+ state = ctx.state
456
+ router = ctx.router
457
+ holder = ctx.holder
458
+
459
+ # Allocate with a globally-unique client id (unique across contexts)
460
+ # but register it in this context's own client table. The session id +
461
+ # name (from the ledger) ride on the client so the shared extension
462
+ # context can scope Target.getTargets to this session's tab group.
463
+ session_name: str | None = None
464
+ if session_id:
465
+ from ... import session_registry
466
+ rec = session_registry.get(session_id)
467
+ if isinstance(rec, dict):
468
+ session_name = rec.get("name")
469
+ client = state.allocate_client(
470
+ label, client_id=next(self.daemon._next_client_id),
471
+ session_id=session_id, session_name=session_name)
472
+
473
+ async def send_to_client(text: str) -> None:
474
+ try:
475
+ await conn.send(text)
476
+ except Exception as e:
477
+ logger.warning("client %d send failed: %r", client.client_id, e)
478
+
479
+ router.register_client(client.client_id, send_to_client)
480
+ router.bind_lifecycle(
481
+ ensure_upstream=holder.ensure_open,
482
+ trigger_disconnect=holder.trigger_close,
483
+ )
484
+
485
+ # If upstream is already open (warm from another client), make sure the
486
+ # router has the send fn wired. (The first ensure_open call wires it
487
+ # internally; subsequent client sessions inherit it.)
488
+ if holder.is_open:
489
+ router.update_upstream_send(holder.send_text)
490
+
491
+ metrics().client_connected_total += 1
492
+ logger.info("client %d connected (label=%s, session=%s, backend=%s, total=%d)",
493
+ client.client_id, label, session_id or "-", ctx.backend,
494
+ len(state.clients))
495
+ try:
496
+ async for raw in conn:
497
+ if not isinstance(raw, (str, bytes)):
498
+ continue
499
+ text = raw if isinstance(raw, str) else raw.decode("utf-8", errors="replace")
500
+ metrics().client_frame_received_total += 1
501
+ await router.route_from_client(client, text)
502
+ except websockets.exceptions.ConnectionClosed:
503
+ logger.info("client %d disconnected", client.client_id)
504
+ except Exception as e:
505
+ logger.warning("client %d crashed: %r", client.client_id, e)
506
+ finally:
507
+ metrics().client_disconnected_total += 1
508
+ await router.release_client(client.client_id)
509
+ router.unregister_client(client.client_id)
510
+ # Upstream stays warm so other clients (or the next reconnect)
511
+ # don't pay banner-flash for our churn.
512
+
513
+
514
+ # ---- upstream lifecycle ----------------------------------------------------
515
+
516
+
517
+ class _UpstreamHolder:
518
+ """Owns the single UpstreamConnection. Provides lazy-open + graceful-close
519
+ primitives the Router can call.
520
+
521
+ v0.4: when `cfg.backend == "extension"` we replace the conventional ws
522
+ upstream with an ExtensionUpstream wrapping a RelayServer. The relay is
523
+ started eagerly (so doctor probe answers `available=true` as soon as
524
+ the daemon is up), and `ensure_open` blocks the first client until the
525
+ extension has connected.
526
+ """
527
+
528
+ def __init__(self, state: DaemonState, router: Router, cfg: Config,
529
+ *, session_id: str | None = None):
530
+ self.state = state
531
+ self.router = router
532
+ self.upstream: UpstreamConnection | ExtensionUpstream | None = None
533
+ self._open_lock = asyncio.Lock()
534
+ self._cfg: Config = cfg
535
+ # v0.4: only populated when backend=extension. Owned by the holder
536
+ # for the daemon's full lifetime; we don't tear down on idle-close
537
+ # so the extension's persistent ws to us stays warm.
538
+ self.relay: RelayServer | None = None
539
+ # Phase 3 (docs/refactor-single-daemon.md §P3 + C2): for an rdp context
540
+ # the daemon itself launches and owns a dedicated Chrome (own port +
541
+ # profile `bs-s{id}`). We record the launched process's pid + profile
542
+ # dir here so teardown can SIGTERM it and so orphan-cleanup can spot
543
+ # leftover `bs-s*` profiles after a crash. None on every other backend
544
+ # (the extension/env/cloud holders never own a Chrome process).
545
+ self.session_id: str | None = session_id
546
+ self.rdp_pid: int | None = None
547
+ self.rdp_profile_dir: str | None = None
548
+ self.rdp_port: int | None = None
549
+ self.rdp_owns_browser: bool = False
550
+
551
+ @property
552
+ def is_open(self) -> bool:
553
+ return self.upstream is not None and self.upstream.is_open
554
+
555
+ async def send_text(self, frame: str) -> None:
556
+ """Proxy to the live UpstreamConnection.send_text.
557
+
558
+ Exposed on the holder so callers in this module can pass
559
+ `holder.send_text` as the router's `upstream_send` callable without
560
+ needing to drill through `holder.upstream.send_text` (which races
561
+ with close: holder.upstream may become None mid-call).
562
+ """
563
+ conn = self.upstream
564
+ if conn is None:
565
+ raise RuntimeError("upstream not open")
566
+ await conn.send_text(frame)
567
+
568
+ async def _broadcast_event(self, method: str, params: dict) -> None:
569
+ """Fan a `{method, params}` envelope to every connected client.
570
+ Same shape as the existing `upstreamClosed` broadcast (listener
571
+ spec §6.5). Used by v0.5.3 F-3: surface `upstreamConnecting` and
572
+ `upstreamReady` lifecycle events so Skill code subscribing per
573
+ design-v2.md:550-551 actually sees something."""
574
+ envelope = json.dumps({"method": method, "params": params})
575
+ for cid in list(self.state.clients.keys()):
576
+ try:
577
+ await self.router._send_to_client(cid, envelope)
578
+ except Exception:
579
+ pass
580
+
581
+ async def ensure_open(self) -> None:
582
+ """Open upstream if not already. Idempotent + reentrant-safe.
583
+
584
+ v0.4 branches on `cfg.backend == "extension"`:
585
+ - extension → wait for the relay's first extension to send hello,
586
+ wrap in ExtensionUpstream, mark CONNECTED
587
+ - everything else → resolve a CDP ws URL and connect a real
588
+ UpstreamConnection
589
+
590
+ v0.5.3 F-3: emits two lifecycle events to subscribed clients:
591
+ - `BrowserwrightDaemon.upstreamConnecting {backend}` at the start of
592
+ the open attempt (after we've taken the lock and bumped state
593
+ to CONNECTING)
594
+ - `BrowserwrightDaemon.upstreamReady {backend, ws_url}` on successful
595
+ open (after `state.set_connected`)
596
+ Failed-open paths emit `upstreamClosed {reason}` via the
597
+ `trigger_close` path the resolver/connect call site already runs.
598
+ """
599
+ if self.is_open:
600
+ return
601
+ async with self._open_lock:
602
+ if self.is_open:
603
+ return
604
+ cfg = self._cfg
605
+ await self.state.begin_connecting(cfg.backend or "auto")
606
+ metrics().upstream_open_attempts_total += 1
607
+ # F-3: emit BrowserwrightDaemon.upstreamConnecting to all clients.
608
+ await self._broadcast_event(
609
+ "BrowserwrightDaemon.upstreamConnecting",
610
+ {"backend": cfg.backend or "auto"},
611
+ )
612
+
613
+ try:
614
+ if cfg.backend == "extension":
615
+ await self._open_extension_upstream(cfg)
616
+ else:
617
+ # Phase 3: an rdp context owns its Chrome. Launch it (once)
618
+ # BEFORE the resolve/connect path runs, so the cfg's pinned
619
+ # rdp port is actually listening when `_open_chrome_upstream`
620
+ # → resolve() probes it. Other rdp callers (env/cloud share
621
+ # `_open_chrome_upstream` too) skip this — only a holder with
622
+ # a session_id + rdp backend owns a Chrome.
623
+ if (cfg.backend == "rdp" and self.session_id is not None
624
+ and self.rdp_owns_browser):
625
+ await self._launch_rdp_chrome(cfg)
626
+ await self._open_chrome_upstream(cfg)
627
+ except Exception:
628
+ metrics().upstream_open_failed_total += 1
629
+ raise
630
+ else:
631
+ metrics().upstream_open_succeeded_total += 1
632
+ # F-3: emit BrowserwrightDaemon.upstreamReady. `state.upstream_ws_url`
633
+ # is set by both open paths via `state.set_connected(...)`.
634
+ await self._broadcast_event(
635
+ "BrowserwrightDaemon.upstreamReady",
636
+ {
637
+ "backend": cfg.backend or "auto",
638
+ "ws_url": self.state.upstream_ws_url,
639
+ },
640
+ )
641
+
642
+ # Task #76: any client frame that arrived during the lazy-open
643
+ # window was buffered per-client. Replay them now that the
644
+ # upstream is live and `_upstream_send` is wired.
645
+ try:
646
+ await self.router.drain_pre_open_buffers()
647
+ except Exception as e:
648
+ logger.warning("drain pre-open buffers failed: %r", e)
649
+
650
+ async def _open_chrome_upstream(self, cfg: Config) -> None:
651
+ # Mark this resolve as Mode-B-originated. Reserved for future
652
+ # backends that need to diverge per call site (Mode A short-conn
653
+ # vs Mode B long-running daemon).
654
+ from .. import resolver as _resolver_mod
655
+ ctx_token = _resolver_mod.caller_context.set("mode_b_serve")
656
+ try:
657
+ rr = await resolve(cfg)
658
+ except Unavailable as e:
659
+ logger.warning("upstream resolve failed: %s", e)
660
+ self.state.last_close_reason = "backend_lost"
661
+ await self.state.set_disconnected()
662
+ _resolver_mod.caller_context.reset(ctx_token)
663
+ raise
664
+ _resolver_mod.caller_context.reset(ctx_token)
665
+
666
+ # v0.5: when backend=cloud, ask the cloud config's AuthProvider to
667
+ # produce headers + ssl_context for the upstream ws handshake. For
668
+ # every other backend (env/rdp) the provider is None and connect
669
+ # runs unchanged.
670
+ additional_headers: dict[str, str] = {}
671
+ ssl_context = None
672
+ if cfg.backend == "cloud":
673
+ additional_headers, ssl_context = await self._build_cloud_auth(cfg)
674
+
675
+ try:
676
+ conn = UpstreamConnection(
677
+ on_frame=self.router.forward_from_upstream,
678
+ on_close=self._on_upstream_closed,
679
+ )
680
+ await conn.open(
681
+ rr.ws_url,
682
+ timeout=cfg.timeout,
683
+ additional_headers=additional_headers or None,
684
+ ssl_context=ssl_context,
685
+ )
686
+ except Exception as e:
687
+ logger.warning("upstream open failed: %r", e)
688
+ self.state.last_close_reason = "backend_lost"
689
+ await self.state.set_disconnected()
690
+ raise
691
+ self.upstream = conn
692
+ self.router.update_upstream_send(conn.send_text)
693
+ # Phase 3: expose the upstream's daemon-internal command channel to the
694
+ # Router so the unified session verbs (openBackgroundTab / closeTab /
695
+ # userscript) have an rdp implementation via raw CDP — Target.create/
696
+ # closeTarget, Page.addScriptToEvaluateOnNewDocument. Distinct id space
697
+ # from client traffic (UpstreamConnection.send_command). Cleared on
698
+ # close (symmetric with the extension callbacks).
699
+ self.router._upstream_command = conn.send_command
700
+ # Tell Chrome to gossip about all targets so we can maintain the
701
+ # last_activated table without needing the client to enable it.
702
+ # `waitForDebuggerOnStart=False` keeps target creation immediate.
703
+ try:
704
+ await conn.send_command(
705
+ "Target.setDiscoverTargets", {"discover": True})
706
+ except Exception as e:
707
+ logger.warning("setDiscoverTargets failed: %r", e)
708
+ await self.state.set_connected(rr.ws_url, was_popup=False)
709
+
710
+ async def _launch_rdp_chrome(self, cfg: Config) -> None:
711
+ """Phase 3 (C2 ephemeral): the daemon launches + owns this rdp session's
712
+ Chrome — a dedicated process on its own port with profile `bs-s{id}`.
713
+
714
+ Idempotent: if we already launched (rdp_pid set) we no-op so a
715
+ reconnect after idle-close doesn't spawn a second Chrome.
716
+
717
+ Port selection mirrors the old `session_create._launch_daemon`: reuse
718
+ `cfg.backends.rdp.port` when the ledger pinned one (Daemon._rdp_cfg_for
719
+ copies the session's `workspace["port"]` into the cfg), else allocate a
720
+ free port and pin it onto `self._cfg` so the subsequent resolve probes
721
+ the right port.
722
+
723
+ We call `launch_chrome.launch_chrome` in-process (NOT the CLI) so the
724
+ spawned Chrome's pid is visible to us for teardown. The function spawns
725
+ a detached Chrome and waits for `DevToolsActivePort`; on failure it
726
+ raises Unavailable, which propagates out of `ensure_open` and surfaces
727
+ to the client as a normal upstream-open failure.
728
+ """
729
+ if self.rdp_pid is not None:
730
+ return # already launched (warm reconnect)
731
+ from ..launch_chrome import launch_chrome as _launch_chrome
732
+
733
+ port = cfg.backends.rdp.port
734
+ if not port:
735
+ # No port pinned by the ledger — pick a free one and pin it onto
736
+ # the holder's cfg so `_open_chrome_upstream`'s resolve hits it.
737
+ import socket as _socket
738
+ s = _socket.socket(_socket.AF_INET, _socket.SOCK_STREAM)
739
+ try:
740
+ s.bind(("127.0.0.1", 0))
741
+ port = s.getsockname()[1]
742
+ finally:
743
+ s.close()
744
+ import dataclasses as _dc
745
+ self._cfg = _dc.replace(
746
+ cfg,
747
+ backends=_dc.replace(
748
+ cfg.backends,
749
+ rdp=_dc.replace(cfg.backends.rdp, port=port),
750
+ ),
751
+ )
752
+ cfg = self._cfg
753
+
754
+ profile = f"bs-s{self.session_id}"
755
+ logger.info("launching rdp Chrome for session %s on port %d (profile %s)",
756
+ self.session_id, port, profile)
757
+ out = await _launch_chrome(cfg, profile=profile, persistent=True,
758
+ port=port, timeout=max(cfg.timeout, 30.0))
759
+ extras = out.get("extras") or {}
760
+ self.rdp_pid = extras.get("pid")
761
+ self.rdp_profile_dir = extras.get("profile_path")
762
+ self.rdp_port = port
763
+
764
+ def _kill_rdp_chrome(self) -> None:
765
+ """Phase 3 teardown: SIGTERM the daemon-owned Chrome for this rdp
766
+ session (best-effort; the process may already be gone). Clears the pid
767
+ so a later relaunch starts fresh. Leaves the profile dir on disk — it's
768
+ a persistent `bs-s{id}` dir that orphan-cleanup sweeps on next startup;
769
+ removing it inline races Chrome's shutdown writeback."""
770
+ pid = self.rdp_pid
771
+ self.rdp_pid = None
772
+ if pid is None:
773
+ return
774
+ import os as _os
775
+ import signal as _signal
776
+ try:
777
+ _os.kill(pid, _signal.SIGTERM)
778
+ logger.info("killed rdp Chrome pid %d for session %s",
779
+ pid, self.session_id)
780
+ except (ProcessLookupError, PermissionError, OSError) as e:
781
+ logger.debug("rdp Chrome pid %s already gone: %r", pid, e)
782
+
783
+ async def _build_cloud_auth(self, cfg: Config) -> tuple[dict[str, str], Any]:
784
+ """Build (headers, ssl_context) for the cloud backend's upstream
785
+ ws handshake. Pulls the AuthProvider from `cfg.backends.cloud`.
786
+
787
+ Errors at this layer are logged and converted to "no auth"
788
+ gracefully — the connect itself will then 401, which surfaces a
789
+ clear `backend_lost` close reason to clients.
790
+ """
791
+ from ..auth import build_auth_provider
792
+ from ..errors import UserError
793
+ cc = cfg.backends.cloud
794
+ if not cc.auth_kind:
795
+ return {}, None
796
+ try:
797
+ provider = build_auth_provider(cc.auth_kind, cc.auth)
798
+ headers = await provider.headers()
799
+ ssl_ctx = provider.ssl_context()
800
+ return headers, ssl_ctx
801
+ except UserError as e:
802
+ logger.warning("cloud auth misconfigured: %s", e)
803
+ return {}, None
804
+
805
+ async def _open_extension_upstream(self, cfg: Config) -> None:
806
+ """v0.4 extension backend: the daemon IS the upstream.
807
+
808
+ The relay was already started at daemon launch (run_serve). All we
809
+ do here is wait for an extension to connect (with timeout) and wrap
810
+ the relay in an ExtensionUpstream. The relay stays alive across
811
+ idle-close / reconnect cycles.
812
+ """
813
+ if self.relay is None:
814
+ # Bug: holder wasn't bootstrapped with a relay. Fall back to
815
+ # raising — surface the misconfig instead of hanging silently.
816
+ self.state.last_close_reason = "backend_lost"
817
+ await self.state.set_disconnected()
818
+ raise Unavailable(
819
+ "extension backend selected but relay was never started — "
820
+ "internal bug, please report")
821
+ try:
822
+ ext = ExtensionUpstream(
823
+ relay=self.relay,
824
+ on_frame=self.router.forward_from_upstream,
825
+ on_close=self._on_upstream_closed,
826
+ )
827
+ # Use the daemon's open timeout (default 5s in tests) but allow
828
+ # the user a generous window (60s) to load the extension. Spec
829
+ # §8.4 'extension-permission' ux_cost — user has to click the
830
+ # popup; that takes seconds.
831
+ timeout = max(cfg.timeout, 60.0)
832
+ await ext.open(timeout=timeout)
833
+ except asyncio.TimeoutError:
834
+ self.state.last_close_reason = "backend_lost"
835
+ await self.state.set_disconnected()
836
+ raise Unavailable(
837
+ "no extension connected within timeout — load the daemon's "
838
+ "Chrome extension from `chrome-extension/`")
839
+ except Exception as e:
840
+ logger.warning("extension upstream open failed: %r", e)
841
+ self.state.last_close_reason = "backend_lost"
842
+ await self.state.set_disconnected()
843
+ raise
844
+ self.upstream = ext
845
+ self.router.update_upstream_send(ext.send_text)
846
+ # IMPORTANT: wire all extension-only verb callbacks BEFORE
847
+ # state.set_connected — concurrent BrowserwrightDaemon.* handlers in the
848
+ # proxy gate on state.upstream_phase == CONNECTED to skip the lazy-
849
+ # open call, so if we flip the phase first they'd see callback=None
850
+ # and respond -32601 incorrectly. Tear-down in trigger_close runs
851
+ # the opposite order (clear callbacks AFTER set_disconnected) for
852
+ # the symmetric reason.
853
+ # v0.5.4: wire the daemon-driven attach-active path. Only the
854
+ # extension backend has an out-of-band attach verb; other backends
855
+ # leave the callback as None so the proxy errors -32601.
856
+ self.router._attach_active_tab = ext.attach_active_tab
857
+ # Phase B: open_background + close_tab — same extension-only contract.
858
+ self.router._open_background_tab = ext.open_background_tab
859
+ self.router._close_tab = ext.close_tab
860
+ self.router._close_tab_by_target_id = ext.close_tab_by_target_id
861
+ self.router._end_session = ext.end_session # P5 per-session teardown
862
+ # Session-reconnect-recovery: rebuild a session's tab bindings from the
863
+ # persisted numeric tab-group id.
864
+ self.router._recover_session = ext.recover_session
865
+ self.router._wait_session_announce = ext.wait_session_announce
866
+ self.router._userscript_request = ext.userscript_request
867
+ # Scope Target.getTargets to the requesting session's tab group so
868
+ # extension sessions sharing one Chrome are mutually invisible.
869
+ self.router._scoped_targets = ext.scoped_target_infos
870
+ await self.state.set_connected(ext.ws_url or "ext://relay",
871
+ was_popup=False)
872
+
873
+ async def trigger_close(self, reason: CloseReason) -> None:
874
+ """Run the spec §6.5 close etiquette + tear down upstream.
875
+
876
+ Sequence per spec §6.5:
877
+ 1. send Target.detachedFromTarget for each owned sessionId
878
+ 2. send BrowserwrightDaemon.upstreamClosed
879
+ 3. close client ws with 1011
880
+ We do (1)+(2) here. The actual ws close (3) is the client handler's
881
+ job; we set state so the handler's outer `async for` returns.
882
+ """
883
+ if self.state.upstream_phase in (UpstreamPhase.DISCONNECTED, UpstreamPhase.CLOSING):
884
+ # Already closing / closed — idempotent.
885
+ return
886
+ await self.state.begin_closing(reason)
887
+
888
+ # Spec §6.5 step 1: per-session synthetic Target.detachedFromTarget
889
+ # events. v0.3 sends them to EACH client that owns a session, with
890
+ # that client's local sessionId AND the real targetId (the v0.2
891
+ # "<unknown>" placeholder upgrade).
892
+ for cid, client in list(self.state.clients.items()):
893
+ for local_sid, binding in list(client.sessions.items()):
894
+ try:
895
+ await self.router._send_to_client(cid, json.dumps({
896
+ "method": "Target.detachedFromTarget",
897
+ "params": {
898
+ "sessionId": local_sid,
899
+ "targetId": binding.target_id,
900
+ },
901
+ }))
902
+ except Exception:
903
+ pass
904
+ # We don't clear client.sessions here — set_disconnected() below
905
+ # wipes everyone's sessions atomically.
906
+
907
+ # Spec §6.5 step 2: BrowserwrightDaemon.upstreamClosed event broadcast.
908
+ for cid in list(self.state.clients.keys()):
909
+ try:
910
+ await self.router._send_to_client(cid, json.dumps({
911
+ "method": "BrowserwrightDaemon.upstreamClosed",
912
+ "params": {"reason": reason},
913
+ }))
914
+ except Exception:
915
+ pass
916
+
917
+ # Tear down upstream ws.
918
+ up = self.upstream
919
+ self.upstream = None
920
+ self.router.update_upstream_send(None)
921
+ # v0.5.4: drop the extension-backend attach-active callback so
922
+ # post-close BrowserwrightDaemon.attachActiveTab returns -32601 instead
923
+ # of racing against a torn-down upstream.
924
+ self.router._attach_active_tab = None
925
+ self.router._open_background_tab = None
926
+ self.router._close_tab = None
927
+ self.router._close_tab_by_target_id = None
928
+ self.router._end_session = None
929
+ self.router._recover_session = None
930
+ self.router._wait_session_announce = None
931
+ self.router._userscript_request = None
932
+ # Phase 3: drop the rdp raw-CDP command channel (symmetric with the
933
+ # extension callbacks above) so a post-close verb returns a clean error
934
+ # instead of racing a torn-down upstream.
935
+ self.router._upstream_command = None
936
+ if up is not None:
937
+ try:
938
+ await up.close(code=1000, reason=reason)
939
+ except Exception:
940
+ pass
941
+
942
+ # Phase 3 (C2 ephemeral): an rdp context's Chrome is a daemon child —
943
+ # it must die with the upstream. Kill it on every close path
944
+ # (endSession, idle_close, daemon_shutdown, chrome_exit). Harmless on
945
+ # non-rdp holders (rdp_pid is None there).
946
+ if self.rdp_pid is not None:
947
+ self._kill_rdp_chrome()
948
+
949
+ # Spec §6.5 step 3: close client ws. The handler's `async for` will
950
+ # exit naturally on the next read once we set state DISCONNECTED;
951
+ # for prompt teardown we'd need to plumb each ServerConnection in
952
+ # — left as a follow-up since the natural-exit path is reliable.
953
+ await self.state.set_disconnected()
954
+
955
+ async def _on_upstream_closed(self, reason: str) -> None:
956
+ """Called by UpstreamConnection's reader when upstream drops on its
957
+ own (Chrome exited, etc.). We translate to a CloseReason and run
958
+ the close-etiquette path.
959
+
960
+ Phase 3 (docs/refactor-single-daemon.md §Notes): for an rdp context the
961
+ Chrome IS the upstream — once it's gone the context is dead, so we drop
962
+ it from the daemon's registry (not just mark disconnected). A later
963
+ ensureSession then recreates a fresh context + relaunches Chrome."""
964
+ metrics().upstream_closed_total += 1
965
+ if self.state.upstream_phase in (UpstreamPhase.DISCONNECTED, UpstreamPhase.CLOSING):
966
+ return
967
+ await self.trigger_close("chrome_exit")
968
+ if self.session_id is not None:
969
+ daemon = getattr(self.router, "daemon", None)
970
+ if daemon is not None:
971
+ try:
972
+ daemon.drop_rdp_context(self.session_id)
973
+ except Exception as e:
974
+ logger.warning("drop rdp context %s failed: %r",
975
+ self.session_id, e)
976
+
977
+
978
+ # ---- graceful shutdown -----------------------------------------------------
979
+
980
+
981
+ async def _idle_watchdog(daemon: "Daemon", idle_after: float | None) -> None:
982
+ """Spec §6.5/§6.6: when configured, close each upstream after `idle_after`
983
+ seconds with no activity. The next client command lazy-opens it again.
984
+
985
+ Phase 2: iterate every context (shared + rdp) so per-upstream idle is
986
+ enforced independently — one busy upstream doesn't keep an idle one warm.
987
+
988
+ Phase B (PR2): the same loop supervises the per-session executors —
989
+ - crash-reap (ALWAYS, even when idle-close is off): drop executors whose
990
+ child has exited on its own (Fork 4 facade-death self-exit / segfault)
991
+ so the registry never holds corpses + the next ensure cold-starts fresh;
992
+ - idle-reap (gated on idle_after, like upstream idle-close): SIGTERM
993
+ executors idle past the threshold so a long-abandoned session doesn't
994
+ leak a subprocess.
995
+
996
+ Runs unconditionally; idle-close + idle-reap are no-ops when `idle_after`
997
+ is None. We poll at half the idle threshold (or every 5s when idle is off,
998
+ just for crash-reap granularity).
999
+ """
1000
+ poll = 5.0 if not idle_after else max(1.0, idle_after / 2.0)
1001
+ try:
1002
+ while True:
1003
+ await asyncio.sleep(poll)
1004
+ # --- executor supervision (Phase B PR2) ---
1005
+ try:
1006
+ daemon.executors.reap_dead()
1007
+ if idle_after:
1008
+ daemon.executors.reap_idle(idle_after)
1009
+ except Exception as e: # noqa: BLE001 - never let reap break the loop
1010
+ logger.warning("executor reap failed: %r", e)
1011
+ # --- upstream idle-close (gated) ---
1012
+ if not idle_after:
1013
+ continue
1014
+ for ctx in daemon.all_contexts():
1015
+ if ctx.state.upstream_phase != UpstreamPhase.CONNECTED:
1016
+ continue
1017
+ idle_for = time.time() - ctx.state.last_activity_at
1018
+ if idle_for >= idle_after:
1019
+ logger.info("idle-watchdog: closing %s upstream after %.1fs",
1020
+ ctx.backend, idle_for)
1021
+ try:
1022
+ await ctx.holder.trigger_close("idle_close")
1023
+ except Exception as e:
1024
+ logger.warning("idle close failed: %r", e)
1025
+ # An idle-closed rdp context's Chrome is gone; drop the
1026
+ # context so the dict doesn't accumulate dead per-session
1027
+ # entries for the daemon's lifetime. (trigger_close flips
1028
+ # the phase itself, so _on_upstream_closed — the usual drop
1029
+ # path — never fires for the idle case.) A later client
1030
+ # frame for the session re-creates + relaunches cleanly.
1031
+ if ctx.backend == "rdp" and ctx.session_id is not None:
1032
+ daemon.drop_rdp_context(ctx.session_id)
1033
+ except asyncio.CancelledError:
1034
+ return
1035
+
1036
+
1037
+ async def _graceful_shutdown(daemon: "Daemon") -> None:
1038
+ """Called on SIGTERM. Run close etiquette on every context then close
1039
+ the listener."""
1040
+ for ctx in daemon.all_contexts():
1041
+ try:
1042
+ await ctx.holder.trigger_close("daemon_shutdown")
1043
+ except Exception as e:
1044
+ logger.warning("shutdown close failed for %s: %r", ctx.backend, e)
1045
+ # Phase B (PR2): SIGTERM every registered executor — they are daemon
1046
+ # children and must die with us (mirrors the per-context close above).
1047
+ try:
1048
+ daemon.executors.kill_all()
1049
+ except Exception as e: # noqa: BLE001
1050
+ logger.warning("executor shutdown kill failed: %r", e)
1051
+
1052
+
1053
+ # ---- helper for the cli serve dispatcher ----------------------------------
1054
+
1055
+
1056
+ def make_holder(state: DaemonState, router: Router, cfg: Config) -> _UpstreamHolder:
1057
+ """Test seam: build an _UpstreamHolder pre-bound to cfg."""
1058
+ return _UpstreamHolder(state, router, cfg)