browserwright 0.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. browserwright/__init__.py +33 -0
  2. browserwright/__main__.py +6 -0
  3. browserwright/_executor/__init__.py +47 -0
  4. browserwright/_executor/__main__.py +9 -0
  5. browserwright/_executor/client.py +127 -0
  6. browserwright/_executor/process.py +652 -0
  7. browserwright/_executor/protocol.py +152 -0
  8. browserwright/api.py +66 -0
  9. browserwright/cdp.py +285 -0
  10. browserwright/cli.py +741 -0
  11. browserwright/daemon/__init__.py +8 -0
  12. browserwright/daemon/_ipc.py +444 -0
  13. browserwright/daemon/active_tab.py +183 -0
  14. browserwright/daemon/auth.py +395 -0
  15. browserwright/daemon/backends/__init__.py +59 -0
  16. browserwright/daemon/backends/base.py +120 -0
  17. browserwright/daemon/backends/cloud.py +222 -0
  18. browserwright/daemon/backends/env.py +119 -0
  19. browserwright/daemon/backends/extension.py +185 -0
  20. browserwright/daemon/backends/rdp.py +214 -0
  21. browserwright/daemon/cli.py +1437 -0
  22. browserwright/daemon/config.py +380 -0
  23. browserwright/daemon/doctor.py +179 -0
  24. browserwright/daemon/errors.py +34 -0
  25. browserwright/daemon/launch_chrome.py +353 -0
  26. browserwright/daemon/observability.py +181 -0
  27. browserwright/daemon/platforms.py +234 -0
  28. browserwright/daemon/resolver.py +72 -0
  29. browserwright/daemon/server/__init__.py +6 -0
  30. browserwright/daemon/server/daemon.py +229 -0
  31. browserwright/daemon/server/executor_registry.py +434 -0
  32. browserwright/daemon/server/extension_upstream.py +677 -0
  33. browserwright/daemon/server/facade.py +375 -0
  34. browserwright/daemon/server/facade_extension.py +969 -0
  35. browserwright/daemon/server/listener.py +1058 -0
  36. browserwright/daemon/server/proxy.py +1991 -0
  37. browserwright/daemon/server/relay.py +783 -0
  38. browserwright/daemon/server/state.py +432 -0
  39. browserwright/daemon/server/upstream.py +266 -0
  40. browserwright/daemon/userscripts.py +150 -0
  41. browserwright/discovery.py +213 -0
  42. browserwright/errors.py +177 -0
  43. browserwright/health.py +169 -0
  44. browserwright/install.py +628 -0
  45. browserwright/memory/__init__.py +15 -0
  46. browserwright/memory/_md.py +120 -0
  47. browserwright/memory/_yaml.py +217 -0
  48. browserwright/memory/global_mem.py +201 -0
  49. browserwright/memory/repl_mem.py +28 -0
  50. browserwright/memory/session_decisions.py +53 -0
  51. browserwright/memory/site_mem.py +381 -0
  52. browserwright/mode_b_client.py +590 -0
  53. browserwright/multitask.py +131 -0
  54. browserwright/output_schema.py +99 -0
  55. browserwright/primitives/__init__.py +67 -0
  56. browserwright/primitives/discovery_api.py +79 -0
  57. browserwright/primitives/http.py +42 -0
  58. browserwright/primitives/inspect.py +876 -0
  59. browserwright/primitives/interact.py +518 -0
  60. browserwright/primitives/page.py +556 -0
  61. browserwright/primitives/site.py +143 -0
  62. browserwright/release_install.py +466 -0
  63. browserwright/repl/__init__.py +6 -0
  64. browserwright/repl/_namespace.py +106 -0
  65. browserwright/repl/_smart_goto.py +236 -0
  66. browserwright/repl/inline.py +180 -0
  67. browserwright/repl/playwright_handle.py +449 -0
  68. browserwright/repl/snapshot.py +150 -0
  69. browserwright/session.py +229 -0
  70. browserwright/session_create.py +252 -0
  71. browserwright/session_ctx.py +24 -0
  72. browserwright/session_registry.py +133 -0
  73. browserwright/session_runtime.py +133 -0
  74. browserwright/site_skills_starter/github.com/SKILL.md +14 -0
  75. browserwright/site_skills_starter/github.com/memory.md +29 -0
  76. browserwright/site_skills_starter/github.com/tasks/list_issues.py +55 -0
  77. browserwright/site_skills_starter/google.com/SKILL.md +16 -0
  78. browserwright/site_skills_starter/google.com/memory.md +27 -0
  79. browserwright/site_skills_starter/google.com/tasks/search.py +53 -0
  80. browserwright/site_skills_starter/producthunt.com/SKILL.md +7 -0
  81. browserwright/site_skills_starter/producthunt.com/memory.md +26 -0
  82. browserwright/site_skills_starter/producthunt.com/tasks/today.py +64 -0
  83. browserwright/site_skills_starter/wikipedia.org/SKILL.md +7 -0
  84. browserwright/site_skills_starter/wikipedia.org/memory.md +22 -0
  85. browserwright/site_skills_starter/wikipedia.org/tasks/lookup.py +55 -0
  86. browserwright/site_skills_starter/ycombinator.com/SKILL.md +8 -0
  87. browserwright/site_skills_starter/ycombinator.com/memory.md +25 -0
  88. browserwright/site_skills_starter/ycombinator.com/tasks/front_page.py +63 -0
  89. browserwright/skill_doc.py +140 -0
  90. browserwright/skill_runtime.md +194 -0
  91. browserwright/subscriptions.py +213 -0
  92. browserwright/task_runner.py +125 -0
  93. browserwright/version.py +117 -0
  94. browserwright-0.6.2.dist-info/METADATA +12 -0
  95. browserwright-0.6.2.dist-info/RECORD +98 -0
  96. browserwright-0.6.2.dist-info/WHEEL +5 -0
  97. browserwright-0.6.2.dist-info/entry_points.txt +3 -0
  98. browserwright-0.6.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,432 @@
1
+ """Centralized daemon state (§8.5).
2
+
3
+ v0.3 expansion of the v0.2 single-client model:
4
+
5
+ - `client` (singular) → `clients: dict[id, ClientState]`
6
+ - per-client `sessions: dict[local_session_id, SessionBinding]`
7
+ - `upstream_to_local: dict[upstream_session_id, list[SessionBinding]]`
8
+ (list because one upstream session can serve N local sessions via shared-read)
9
+ - `attachers: dict[target_id, AttachOwnership]` — the single-attacher rule
10
+ - `pending_requests: dict[upstream_id, PendingRequest]` — id translation for
11
+ CDP response routing (CDP responses correlate by id, not by sessionId, so
12
+ ids must be unique across clients on the upstream wire)
13
+
14
+ The transitions still go through the same observer pattern as v0.2.
15
+ """
16
+ from __future__ import annotations
17
+
18
+ import asyncio
19
+ import itertools
20
+ import time
21
+ from collections import deque
22
+ from dataclasses import dataclass, field
23
+ from enum import Enum
24
+ from typing import Any, Awaitable, Callable, Literal
25
+
26
+
27
+ # Per-client buffer size for frames received while upstream is still
28
+ # opening. Spec §10 open question — "buffer with limit 100, error past that"
29
+ # was the resolution. Keep here as a module constant so tests can override.
30
+ PRE_OPEN_BUFFER_LIMIT = 100
31
+
32
+
33
+ # ---- enums -----------------------------------------------------------------
34
+
35
+
36
+ class UpstreamPhase(str, Enum):
37
+ DISCONNECTED = "DISCONNECTED"
38
+ CONNECTING = "CONNECTING"
39
+ CONNECTED = "CONNECTED"
40
+ CLOSING = "CLOSING"
41
+
42
+
43
+ CloseReason = Literal[
44
+ "chrome_exit", "backend_lost", "idle_close",
45
+ "daemon_shutdown", "skill_disconnect",
46
+ ]
47
+
48
+
49
+ # ---- session / attach data classes ----------------------------------------
50
+
51
+
52
+ @dataclass
53
+ class SessionBinding:
54
+ """One local sessionId, owned by a specific client, mapped to one upstream
55
+ sessionId. Multiple SessionBindings can point at the same upstream session
56
+ when shared-read is active.
57
+ """
58
+ client_id: int
59
+ local_session_id: str # what THIS client sees
60
+ upstream_session_id: str # what Chrome sees
61
+ target_id: str # known from the attach response onward
62
+ readonly: bool = False # True ⇒ shared-read; commands rejected -32602
63
+
64
+
65
+ @dataclass
66
+ class AttachOwnership:
67
+ """Per-targetId ownership record. The primary client has full read+write;
68
+ additional readers (shared-read) get read-only sessions backed by the
69
+ same upstream session.
70
+ """
71
+ target_id: str
72
+ primary_client_id: int
73
+ primary_local_session: str
74
+ upstream_session_id: str
75
+ readers: list[tuple[int, str]] = field(default_factory=list)
76
+ """(client_id, local_session_id) tuples for read-only attachers."""
77
+
78
+ def all_local_sessions(self) -> list[tuple[int, str]]:
79
+ """Primary first, then readers — useful for event fan-out within a session."""
80
+ return [(self.primary_client_id, self.primary_local_session), *self.readers]
81
+
82
+
83
+ @dataclass
84
+ class PendingRequest:
85
+ """A client request awaiting its upstream response. We translate ids
86
+ because CDP responses correlate by id, and multiple clients can otherwise
87
+ pick the same numeric id.
88
+ """
89
+ client_id: int
90
+ client_request_id: int # the id the client originally sent
91
+ method: str # raw method (used by attach interceptor)
92
+ # For Target.attachToTarget we need to remember which targetId the client
93
+ # asked for so we can fill the attachers table when the response arrives.
94
+ attach_target_id: str | None = None
95
+ # Whether the client passed `flags.allowSecondaryReadOnly=true` in the
96
+ # attach. Daemon doesn't actually forward this flag — the routing decision
97
+ # is made locally — but we remember it for the rare case where the primary
98
+ # owner is the SAME client (then we keep regular write semantics).
99
+ attach_allow_shared_read: bool = False
100
+ # Sessionless-vs-sessioned: if the original request carried a sessionId,
101
+ # the response must carry the *local* sessionId back. CDP responses on
102
+ # session-scoped requests echo the session-id in some daemon-mediated
103
+ # synthetic events; for plain {"id","result"} responses CDP itself doesn't
104
+ # echo sessionId so we don't need this for vanilla responses.
105
+
106
+
107
+ # ---- ClientState -----------------------------------------------------------
108
+
109
+
110
+ @dataclass
111
+ class ClientState:
112
+ """One connected ws client. v0.3: N of these exist at a time."""
113
+ client_id: int
114
+ label: str
115
+ # The browserwright session this client is bound to (ledger id) + its name.
116
+ # Set from the ws ``?session=<id>`` query at connect. On the shared
117
+ # extension context these scope browser-level enumeration (Target.getTargets)
118
+ # to THIS session's tab group so sessions are mutually invisible. None for
119
+ # the bare REPL client / single-context unit tests.
120
+ session_id: str | None = None
121
+ session_name: str | None = None
122
+ sessions: dict[str, SessionBinding] = field(default_factory=dict)
123
+ """local_session_id → SessionBinding owned by this client."""
124
+ subscribed_focus: bool = False
125
+ connected_at: float = field(default_factory=time.time)
126
+ last_command_at: float = field(default_factory=time.time)
127
+ # Spec §10 open question: when a client sends a frame while upstream is
128
+ # still in DISCONNECTED / CONNECTING phase, the daemon buffers the frame
129
+ # per-client (FIFO, capacity 100) and drains it once upstream is OPEN.
130
+ # The 101st frame is rejected with CDP error -32603. Without this, the
131
+ # frame is silently dropped and the client times out at the 30s CDP
132
+ # boundary (Task #76).
133
+ pre_open_buffer: deque[str] = field(default_factory=deque)
134
+
135
+ def owns_session(self, local_session_id: str) -> bool:
136
+ return local_session_id in self.sessions
137
+
138
+
139
+ # ---- DaemonState -----------------------------------------------------------
140
+
141
+
142
+ @dataclass
143
+ class DaemonState:
144
+ """Whole-process mutable state. ONE instance per daemon."""
145
+ backend_name: str
146
+ upstream_phase: UpstreamPhase = UpstreamPhase.DISCONNECTED
147
+ upstream_ws_url: str | None = None
148
+ last_close_reason: CloseReason | None = None
149
+
150
+ # v0.3: many clients keyed by client_id (monotonic).
151
+ clients: dict[int, ClientState] = field(default_factory=dict)
152
+ _next_client_id: itertools.count = field(
153
+ default_factory=lambda: itertools.count(1))
154
+
155
+ # Local→upstream session lookup is on ClientState. Upstream→[locals] lives here
156
+ # for fast event fan-out (sessionId-carrying events look up here).
157
+ upstream_to_locals: dict[str, list[SessionBinding]] = field(default_factory=dict)
158
+
159
+ # Single-attacher table: targetId → AttachOwnership.
160
+ attachers: dict[str, AttachOwnership] = field(default_factory=dict)
161
+
162
+ # Pending request map keyed by the *upstream* (translated) id.
163
+ pending_requests: dict[int, PendingRequest] = field(default_factory=dict)
164
+ # Allocator for upstream ids. Stays positive — daemon-internal ids on
165
+ # UpstreamConnection.send_command live in big negatives.
166
+ _next_upstream_id: itertools.count = field(
167
+ default_factory=lambda: itertools.count(1))
168
+
169
+ # Heuristic active-tab table (unchanged from v0.2).
170
+ last_activated_at: dict[str, float] = field(default_factory=dict)
171
+ targets: dict[str, dict[str, Any]] = field(default_factory=dict)
172
+
173
+ last_activity_at: float = field(default_factory=time.time)
174
+ last_popup_resolved_at: float | None = None
175
+
176
+ _subscribers: list[Callable[[str, dict], Awaitable[None]]] = field(default_factory=list)
177
+
178
+ # ---- client lifecycle -------------------------------------------------
179
+
180
+ def allocate_client(self, label: str, *, client_id: int | None = None,
181
+ session_id: str | None = None,
182
+ session_name: str | None = None) -> ClientState:
183
+ # Phase 2: the Daemon passes a globally-unique client_id (unique across
184
+ # all UpstreamContexts) so daemon logs never show two clients sharing a
185
+ # number. When omitted (single-context callers / tests), fall back to
186
+ # this state's own monotonic counter.
187
+ cid = client_id if client_id is not None else next(self._next_client_id)
188
+ c = ClientState(client_id=cid, label=label or "anonymous",
189
+ session_id=session_id, session_name=session_name)
190
+ self.clients[cid] = c
191
+ return c
192
+
193
+ def release_client(self, client_id: int) -> ClientState | None:
194
+ """Drop a client + clean up all its sessions and owned attachments.
195
+
196
+ Returns the released ClientState (so the caller can iterate owned
197
+ sessions for synthesizing detach events to send before closing the
198
+ ws). The caller MUST handle those side effects — state.release_client
199
+ only mutates state.
200
+ """
201
+ client = self.clients.pop(client_id, None)
202
+ if client is None:
203
+ return None
204
+ # Walk sessions; for each, pull from upstream_to_locals and drop or
205
+ # transfer attacher ownership.
206
+ for local_sid, binding in list(client.sessions.items()):
207
+ self._unbind_session(binding)
208
+ return client
209
+
210
+ def _unbind_session(self, binding: SessionBinding) -> None:
211
+ """Internal — remove a SessionBinding from the upstream→local table
212
+ and update attacher ownership accordingly."""
213
+ # Pop from upstream_to_locals.
214
+ bindings = self.upstream_to_locals.get(binding.upstream_session_id, [])
215
+ bindings = [b for b in bindings if not (
216
+ b.client_id == binding.client_id
217
+ and b.local_session_id == binding.local_session_id)]
218
+ if bindings:
219
+ self.upstream_to_locals[binding.upstream_session_id] = bindings
220
+ else:
221
+ self.upstream_to_locals.pop(binding.upstream_session_id, None)
222
+ # Attacher cleanup.
223
+ own = self.attachers.get(binding.target_id)
224
+ if own is None:
225
+ return
226
+ if (own.primary_client_id == binding.client_id
227
+ and own.primary_local_session == binding.local_session_id):
228
+ # Primary owner is leaving. If there's a reader, promote them;
229
+ # otherwise drop the attachment. NOTE: actually transferring write
230
+ # ownership without consent is unusual — for v0.3 we just drop and
231
+ # let the upstream session die. spec doesn't mandate promotion.
232
+ self.attachers.pop(binding.target_id, None)
233
+ else:
234
+ # Reader leaving.
235
+ own.readers = [
236
+ (cid, lsid) for (cid, lsid) in own.readers
237
+ if not (cid == binding.client_id and lsid == binding.local_session_id)
238
+ ]
239
+
240
+ # ---- session table ----------------------------------------------------
241
+
242
+ def bind_session(
243
+ self,
244
+ client_id: int,
245
+ local_session_id: str,
246
+ upstream_session_id: str,
247
+ target_id: str,
248
+ *,
249
+ readonly: bool,
250
+ ) -> SessionBinding:
251
+ client = self.clients[client_id]
252
+ binding = SessionBinding(
253
+ client_id=client_id,
254
+ local_session_id=local_session_id,
255
+ upstream_session_id=upstream_session_id,
256
+ target_id=target_id,
257
+ readonly=readonly,
258
+ )
259
+ client.sessions[local_session_id] = binding
260
+ self.upstream_to_locals.setdefault(upstream_session_id, []).append(binding)
261
+ return binding
262
+
263
+ def unbind_session_by_local(
264
+ self, client_id: int, local_session_id: str
265
+ ) -> SessionBinding | None:
266
+ """Used on Target.detachFromTarget. Returns the binding removed, or None."""
267
+ client = self.clients.get(client_id)
268
+ if client is None:
269
+ return None
270
+ binding = client.sessions.pop(local_session_id, None)
271
+ if binding is not None:
272
+ self._unbind_session(binding)
273
+ return binding
274
+
275
+ # ---- attacher table ---------------------------------------------------
276
+
277
+ def claim_attacher(
278
+ self,
279
+ target_id: str,
280
+ client_id: int,
281
+ local_session_id: str,
282
+ upstream_session_id: str,
283
+ ) -> None:
284
+ """Record that `client_id` is the primary owner of `target_id`. The
285
+ single-attacher check happened earlier in the router; this just
286
+ commits the bookkeeping after the upstream attach succeeded."""
287
+ self.attachers[target_id] = AttachOwnership(
288
+ target_id=target_id,
289
+ primary_client_id=client_id,
290
+ primary_local_session=local_session_id,
291
+ upstream_session_id=upstream_session_id,
292
+ )
293
+
294
+ def add_reader(
295
+ self,
296
+ target_id: str,
297
+ client_id: int,
298
+ local_session_id: str,
299
+ ) -> AttachOwnership | None:
300
+ own = self.attachers.get(target_id)
301
+ if own is None:
302
+ return None
303
+ own.readers.append((client_id, local_session_id))
304
+ return own
305
+
306
+ # ---- pending request map ---------------------------------------------
307
+
308
+ def allocate_upstream_id(self) -> int:
309
+ return next(self._next_upstream_id)
310
+
311
+ def remember_request(
312
+ self,
313
+ upstream_id: int,
314
+ client_id: int,
315
+ client_request_id: int,
316
+ method: str,
317
+ *,
318
+ attach_target_id: str | None = None,
319
+ attach_allow_shared_read: bool = False,
320
+ ) -> None:
321
+ self.pending_requests[upstream_id] = PendingRequest(
322
+ client_id=client_id,
323
+ client_request_id=client_request_id,
324
+ method=method,
325
+ attach_target_id=attach_target_id,
326
+ attach_allow_shared_read=attach_allow_shared_read,
327
+ )
328
+
329
+ def take_pending(self, upstream_id: int) -> PendingRequest | None:
330
+ return self.pending_requests.pop(upstream_id, None)
331
+
332
+ # ---- subscriptions / transitions (unchanged) -------------------------
333
+
334
+ def subscribe(self, fn: Callable[[str, dict], Awaitable[None]]) -> None:
335
+ self._subscribers.append(fn)
336
+
337
+ async def _emit(self, event: str, payload: dict) -> None:
338
+ for fn in list(self._subscribers):
339
+ try:
340
+ await fn(event, payload)
341
+ except Exception:
342
+ pass
343
+
344
+ async def begin_connecting(self, backend_name: str) -> None:
345
+ self.upstream_phase = UpstreamPhase.CONNECTING
346
+ self.backend_name = backend_name
347
+ await self._emit("upstream.connecting", {"backend": backend_name})
348
+
349
+ async def set_connected(self, ws_url: str, *, was_popup: bool) -> None:
350
+ self.upstream_phase = UpstreamPhase.CONNECTED
351
+ self.upstream_ws_url = ws_url
352
+ if was_popup:
353
+ self.last_popup_resolved_at = time.time()
354
+ await self._emit("upstream.ready", {"ws_url": ws_url})
355
+
356
+ async def begin_closing(self, reason: CloseReason) -> None:
357
+ self.upstream_phase = UpstreamPhase.CLOSING
358
+ self.last_close_reason = reason
359
+ await self._emit("upstream.closing", {"reason": reason})
360
+
361
+ async def set_disconnected(self) -> None:
362
+ self.upstream_phase = UpstreamPhase.DISCONNECTED
363
+ self.upstream_ws_url = None
364
+ # Wipe v0.3-only tables but keep client list; the client handlers will
365
+ # individually release. attachers / pending_requests are upstream-tied.
366
+ self.attachers.clear()
367
+ self.pending_requests.clear()
368
+ self.upstream_to_locals.clear()
369
+ for c in self.clients.values():
370
+ c.sessions.clear()
371
+ await self._emit("upstream.disconnected", {"reason": self.last_close_reason})
372
+
373
+ # ---- heuristic active-tab table (unchanged) --------------------------
374
+
375
+ def note_activate(self, target_id: str) -> None:
376
+ self.last_activated_at[target_id] = time.time()
377
+ self.last_activity_at = time.time()
378
+
379
+ def note_target_info(self, info: dict) -> None:
380
+ tid = info.get("targetId")
381
+ if not isinstance(tid, str):
382
+ return
383
+ self.targets[tid] = {
384
+ "type": info.get("type"),
385
+ "url": info.get("url", ""),
386
+ "title": info.get("title", ""),
387
+ }
388
+
389
+ def note_target_destroyed(self, target_id: str) -> None:
390
+ self.targets.pop(target_id, None)
391
+ self.last_activated_at.pop(target_id, None)
392
+ # Also drop any attacher record (the upstream session is gone with it).
393
+ self.attachers.pop(target_id, None)
394
+
395
+ def best_active_tab(self) -> dict | None:
396
+ internals = (
397
+ "chrome://", "chrome-untrusted://", "devtools://", "edge://",
398
+ "chrome-extension://", "about:", "view-source:",
399
+ )
400
+ eligible: list[tuple[float, str, dict]] = []
401
+ for tid, meta in self.targets.items():
402
+ if meta.get("type") != "page":
403
+ continue
404
+ url = meta.get("url") or ""
405
+ if url.startswith(internals):
406
+ continue
407
+ score = self.last_activated_at.get(tid, 0.0)
408
+ eligible.append((score, tid, meta))
409
+ if not eligible:
410
+ return None
411
+ eligible.sort(key=lambda r: r[0], reverse=True)
412
+ score, tid, meta = eligible[0]
413
+ since = (time.time() - score) if score > 0 else None
414
+ return {
415
+ "targetId": tid,
416
+ "url": meta.get("url", ""),
417
+ "title": meta.get("title", ""),
418
+ "accuracy": "heuristic-recent-activate",
419
+ "since_seconds": since,
420
+ }
421
+
422
+ # ---- v0.2 compat: legacy `client` accessor ---------------------------
423
+
424
+ @property
425
+ def client(self) -> ClientState | None:
426
+ """v0.2 callers used `state.client` (singular). v0.3 supports many,
427
+ but keeping this convenient when there happens to be exactly one
428
+ client connected makes the close-etiquette path simpler in single-
429
+ client deployments. None when 0 or >1 clients."""
430
+ if len(self.clients) == 1:
431
+ return next(iter(self.clients.values()))
432
+ return None
@@ -0,0 +1,266 @@
1
+ """Upstream ws connection — minimal CDP transport without cdp-use framing.
2
+
3
+ Why hand-rolled? We need raw frame-in/frame-out because the daemon is a
4
+ transparent proxy: a client's outbound text frame gets forwarded byte-for-byte
5
+ to upstream, and upstream's response/event frames get forwarded back without
6
+ re-parsing or rewriting (§6.3). cdp-use parses + re-emits + tracks ids on its
7
+ own; that's two layers of conflict we don't want.
8
+
9
+ websockets.connect gives us the right primitive: a raw async iterator of text
10
+ frames, with `.send(str|bytes)` for the other direction. We also handle the
11
+ localhost-proxy-bypass dance from active_tab here.
12
+
13
+ Spec §6.5 invariant: upstream never auto-reconnects. When the connection
14
+ drops, we mark CLOSING and signal up; the caller decides what comes next.
15
+ """
16
+ from __future__ import annotations
17
+
18
+ import asyncio
19
+ import contextlib
20
+ import json
21
+ import logging
22
+ import os
23
+ import time
24
+ from typing import Any, Awaitable, Callable
25
+ from urllib.parse import urlparse
26
+
27
+ import websockets
28
+ from websockets.exceptions import ConnectionClosed
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+ # 30s upstream heartbeat — spec §10 open question "Browser.getVersion 心跳频率"
33
+ # resolved to 30s.
34
+ HEARTBEAT_INTERVAL = 30.0
35
+ # Number of synthetic command ids reserved for daemon-internal use (heartbeat,
36
+ # Target subscriptions). Client ids passthrough unchanged; daemon uses big
37
+ # negatives to avoid colliding with anything a CDP client might send.
38
+ _DAEMON_ID_BASE = -2_000_000_000
39
+
40
+
41
+ class UpstreamConnection:
42
+ """Wraps a single ws to Chrome's browser-level CDP endpoint.
43
+
44
+ Lifecycle:
45
+ open(ws_url) → forward() pumps frames → close() ends it cleanly.
46
+
47
+ `on_frame(text)` is called for every frame *from* upstream. It is the
48
+ caller's job to forward it downstream (modulo BrowserwrightDaemon.* answers
49
+ which never enter here).
50
+ """
51
+
52
+ def __init__(
53
+ self,
54
+ on_frame: Callable[[str], Awaitable[None]],
55
+ on_close: Callable[[str], Awaitable[None]],
56
+ ):
57
+ self._on_frame = on_frame
58
+ self._on_close = on_close
59
+ self._ws: websockets.ClientConnection | None = None # type: ignore[name-defined]
60
+ self._reader_task: asyncio.Task | None = None
61
+ self._heartbeat_task: asyncio.Task | None = None
62
+ self._next_internal_id = _DAEMON_ID_BASE
63
+ self._pending_internal: dict[int, asyncio.Future] = {}
64
+ self._ws_url: str | None = None
65
+
66
+ # ---- public API -------------------------------------------------------
67
+
68
+ @property
69
+ def ws_url(self) -> str | None:
70
+ return self._ws_url
71
+
72
+ @property
73
+ def is_open(self) -> bool:
74
+ return self._ws is not None
75
+
76
+ async def open(
77
+ self,
78
+ ws_url: str,
79
+ *,
80
+ timeout: float = 30.0,
81
+ additional_headers: dict[str, str] | None = None,
82
+ ssl_context: Any = None,
83
+ ) -> None:
84
+ """Connect to upstream. Raises on failure; caller transitions state.
85
+
86
+ v0.5: `additional_headers` + `ssl_context` parameterize the upstream
87
+ handshake so the `cloud` backend's AuthProvider can inject
88
+ `Authorization: Bearer ...` headers or pass a client-cert
89
+ `ssl.SSLContext` for mTLS. Both are None for the v0.1-v0.4 use
90
+ cases (local Chrome — no auth needed, ws:// is plaintext).
91
+ """
92
+ if self._ws is not None:
93
+ raise RuntimeError("upstream already open")
94
+ with _localhost_bypass_proxy(ws_url):
95
+ connect_kwargs: dict[str, Any] = {
96
+ # Big max_size: CDP `Page.captureScreenshot` returns base64
97
+ # blobs that comfortably exceed the websockets default 1MiB.
98
+ "max_size": 100 * 1024 * 1024,
99
+ # Disable per-message-deflate — Chrome's browser-level CDP
100
+ # doesn't speak it, and websockets v15 sometimes negotiates
101
+ # extensions that break the handshake.
102
+ "compression": None,
103
+ # Keep the upstream alive with ws-level pings; CDP-level
104
+ # Browser.getVersion heartbeat is layered on top for protocol
105
+ # liveness.
106
+ "ping_interval": 20,
107
+ "ping_timeout": 20,
108
+ }
109
+ if additional_headers:
110
+ # websockets v15 accepts `additional_headers=` for client
111
+ # connections (older `extra_headers=` is deprecated). The
112
+ # keyword name is part of websockets' public API.
113
+ connect_kwargs["additional_headers"] = list(additional_headers.items())
114
+ if ssl_context is not None:
115
+ connect_kwargs["ssl"] = ssl_context
116
+ self._ws = await asyncio.wait_for(
117
+ websockets.connect(ws_url, **connect_kwargs),
118
+ timeout=timeout,
119
+ )
120
+ self._ws_url = ws_url
121
+ self._reader_task = asyncio.create_task(self._reader_loop())
122
+ self._heartbeat_task = asyncio.create_task(self._heartbeat_loop())
123
+
124
+ async def send_text(self, frame: str) -> None:
125
+ """Forward a downstream frame to upstream verbatim."""
126
+ if self._ws is None:
127
+ raise RuntimeError("upstream not open")
128
+ await self._ws.send(frame)
129
+
130
+ async def send_command(self, method: str, params: dict | None = None,
131
+ session_id: str | None = None,
132
+ timeout: float = 10.0) -> dict:
133
+ """Daemon-internal command — distinct id space from client ids so
134
+ results never collide with downstream traffic.
135
+
136
+ Used for: initial Target.setDiscoverTargets to populate the target
137
+ table, the periodic Browser.getVersion heartbeat, and the close-time
138
+ Target.detachFromTarget.
139
+ """
140
+ if self._ws is None:
141
+ raise RuntimeError("upstream not open")
142
+ cmd_id = self._alloc_id()
143
+ msg: dict[str, Any] = {"id": cmd_id, "method": method}
144
+ if params is not None:
145
+ msg["params"] = params
146
+ if session_id is not None:
147
+ msg["sessionId"] = session_id
148
+ loop = asyncio.get_running_loop()
149
+ fut = loop.create_future()
150
+ self._pending_internal[cmd_id] = fut
151
+ try:
152
+ await self._ws.send(json.dumps(msg))
153
+ return await asyncio.wait_for(fut, timeout=timeout)
154
+ finally:
155
+ self._pending_internal.pop(cmd_id, None)
156
+
157
+ async def close(self, *, code: int = 1000, reason: str = "") -> None:
158
+ """Close the upstream cleanly. Idempotent."""
159
+ if self._reader_task is not None:
160
+ self._reader_task.cancel()
161
+ if self._heartbeat_task is not None:
162
+ self._heartbeat_task.cancel()
163
+ ws = self._ws
164
+ self._ws = None
165
+ for fut in self._pending_internal.values():
166
+ if not fut.done():
167
+ fut.set_exception(ConnectionError("upstream closing"))
168
+ self._pending_internal.clear()
169
+ if ws is not None:
170
+ try:
171
+ await ws.close(code=code, reason=reason)
172
+ except Exception:
173
+ pass
174
+ self._ws_url = None
175
+
176
+ # ---- internal ---------------------------------------------------------
177
+
178
+ def _alloc_id(self) -> int:
179
+ v = self._next_internal_id
180
+ self._next_internal_id += 1
181
+ return v
182
+
183
+ async def _reader_loop(self) -> None:
184
+ ws = self._ws
185
+ if ws is None:
186
+ return
187
+ try:
188
+ async for raw in ws:
189
+ if not isinstance(raw, (str, bytes)):
190
+ continue
191
+ text = raw if isinstance(raw, str) else raw.decode("utf-8", errors="replace")
192
+ # Intercept responses to *our* internal ids (heartbeat etc).
193
+ try:
194
+ parsed = json.loads(text)
195
+ except (ValueError, TypeError):
196
+ parsed = None
197
+ if isinstance(parsed, dict):
198
+ cid = parsed.get("id")
199
+ if isinstance(cid, int) and cid in self._pending_internal:
200
+ fut = self._pending_internal.pop(cid)
201
+ if not fut.done():
202
+ fut.set_result(parsed)
203
+ continue
204
+ # Forward to downstream.
205
+ try:
206
+ await self._on_frame(text)
207
+ except Exception as e:
208
+ logger.warning("on_frame raised: %r", e)
209
+ except ConnectionClosed as e:
210
+ logger.info("upstream closed: code=%s reason=%s", e.code, e.reason)
211
+ except Exception as e:
212
+ logger.warning("upstream reader crashed: %r", e)
213
+ finally:
214
+ # Always notify close — this is the canonical signal for the
215
+ # state machine to enter CLOSING (caller decides reason).
216
+ try:
217
+ await self._on_close("upstream-eof")
218
+ except Exception:
219
+ pass
220
+
221
+ async def _heartbeat_loop(self) -> None:
222
+ """Keep CDP alive by pinging `Browser.getVersion` every 30s.
223
+
224
+ Spec §10 open question: 30s is the chosen cadence. Too fast = wasted
225
+ CDP traffic; too slow = stale-Chrome detection latency. Tunable later.
226
+ """
227
+ try:
228
+ while True:
229
+ await asyncio.sleep(HEARTBEAT_INTERVAL)
230
+ if self._ws is None:
231
+ return
232
+ try:
233
+ await self.send_command("Browser.getVersion", timeout=10)
234
+ except (asyncio.TimeoutError, ConnectionError, ConnectionClosed):
235
+ logger.warning("heartbeat failed, closing upstream")
236
+ return
237
+ except asyncio.CancelledError:
238
+ return
239
+
240
+
241
+ # ---- localhost proxy bypass (same trick as active_tab) --------------------
242
+
243
+
244
+ @contextlib.contextmanager
245
+ def _localhost_bypass_proxy(ws_url: str):
246
+ """When the upstream URL is loopback, ensure NO_PROXY covers it. Same
247
+ rationale as `active_tab._localhost_bypass_proxy`. Spec doesn't mention
248
+ this — but Chrome runs on the user's machine, and the user often has
249
+ HTTPS_PROXY / ALL_PROXY set."""
250
+ host = (urlparse(ws_url).hostname or "").lower()
251
+ if host not in ("127.0.0.1", "localhost", "::1", "[::1]"):
252
+ yield
253
+ return
254
+ prev = os.environ.get("NO_PROXY", "")
255
+ augmented = prev
256
+ for h in ("127.0.0.1", "localhost", "::1"):
257
+ if h not in augmented:
258
+ augmented = f"{augmented},{h}" if augmented else h
259
+ os.environ["NO_PROXY"] = augmented
260
+ try:
261
+ yield
262
+ finally:
263
+ if prev:
264
+ os.environ["NO_PROXY"] = prev
265
+ else:
266
+ os.environ.pop("NO_PROXY", None)