browserwright 0.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- browserwright/__init__.py +33 -0
- browserwright/__main__.py +6 -0
- browserwright/_executor/__init__.py +47 -0
- browserwright/_executor/__main__.py +9 -0
- browserwright/_executor/client.py +127 -0
- browserwright/_executor/process.py +652 -0
- browserwright/_executor/protocol.py +152 -0
- browserwright/api.py +66 -0
- browserwright/cdp.py +285 -0
- browserwright/cli.py +741 -0
- browserwright/daemon/__init__.py +8 -0
- browserwright/daemon/_ipc.py +444 -0
- browserwright/daemon/active_tab.py +183 -0
- browserwright/daemon/auth.py +395 -0
- browserwright/daemon/backends/__init__.py +59 -0
- browserwright/daemon/backends/base.py +120 -0
- browserwright/daemon/backends/cloud.py +222 -0
- browserwright/daemon/backends/env.py +119 -0
- browserwright/daemon/backends/extension.py +185 -0
- browserwright/daemon/backends/rdp.py +214 -0
- browserwright/daemon/cli.py +1437 -0
- browserwright/daemon/config.py +380 -0
- browserwright/daemon/doctor.py +179 -0
- browserwright/daemon/errors.py +34 -0
- browserwright/daemon/launch_chrome.py +353 -0
- browserwright/daemon/observability.py +181 -0
- browserwright/daemon/platforms.py +234 -0
- browserwright/daemon/resolver.py +72 -0
- browserwright/daemon/server/__init__.py +6 -0
- browserwright/daemon/server/daemon.py +229 -0
- browserwright/daemon/server/executor_registry.py +434 -0
- browserwright/daemon/server/extension_upstream.py +677 -0
- browserwright/daemon/server/facade.py +375 -0
- browserwright/daemon/server/facade_extension.py +969 -0
- browserwright/daemon/server/listener.py +1058 -0
- browserwright/daemon/server/proxy.py +1991 -0
- browserwright/daemon/server/relay.py +783 -0
- browserwright/daemon/server/state.py +432 -0
- browserwright/daemon/server/upstream.py +266 -0
- browserwright/daemon/userscripts.py +150 -0
- browserwright/discovery.py +213 -0
- browserwright/errors.py +177 -0
- browserwright/health.py +169 -0
- browserwright/install.py +628 -0
- browserwright/memory/__init__.py +15 -0
- browserwright/memory/_md.py +120 -0
- browserwright/memory/_yaml.py +217 -0
- browserwright/memory/global_mem.py +201 -0
- browserwright/memory/repl_mem.py +28 -0
- browserwright/memory/session_decisions.py +53 -0
- browserwright/memory/site_mem.py +381 -0
- browserwright/mode_b_client.py +590 -0
- browserwright/multitask.py +131 -0
- browserwright/output_schema.py +99 -0
- browserwright/primitives/__init__.py +67 -0
- browserwright/primitives/discovery_api.py +79 -0
- browserwright/primitives/http.py +42 -0
- browserwright/primitives/inspect.py +876 -0
- browserwright/primitives/interact.py +518 -0
- browserwright/primitives/page.py +556 -0
- browserwright/primitives/site.py +143 -0
- browserwright/release_install.py +466 -0
- browserwright/repl/__init__.py +6 -0
- browserwright/repl/_namespace.py +106 -0
- browserwright/repl/_smart_goto.py +236 -0
- browserwright/repl/inline.py +180 -0
- browserwright/repl/playwright_handle.py +449 -0
- browserwright/repl/snapshot.py +150 -0
- browserwright/session.py +229 -0
- browserwright/session_create.py +252 -0
- browserwright/session_ctx.py +24 -0
- browserwright/session_registry.py +133 -0
- browserwright/session_runtime.py +133 -0
- browserwright/site_skills_starter/github.com/SKILL.md +14 -0
- browserwright/site_skills_starter/github.com/memory.md +29 -0
- browserwright/site_skills_starter/github.com/tasks/list_issues.py +55 -0
- browserwright/site_skills_starter/google.com/SKILL.md +16 -0
- browserwright/site_skills_starter/google.com/memory.md +27 -0
- browserwright/site_skills_starter/google.com/tasks/search.py +53 -0
- browserwright/site_skills_starter/producthunt.com/SKILL.md +7 -0
- browserwright/site_skills_starter/producthunt.com/memory.md +26 -0
- browserwright/site_skills_starter/producthunt.com/tasks/today.py +64 -0
- browserwright/site_skills_starter/wikipedia.org/SKILL.md +7 -0
- browserwright/site_skills_starter/wikipedia.org/memory.md +22 -0
- browserwright/site_skills_starter/wikipedia.org/tasks/lookup.py +55 -0
- browserwright/site_skills_starter/ycombinator.com/SKILL.md +8 -0
- browserwright/site_skills_starter/ycombinator.com/memory.md +25 -0
- browserwright/site_skills_starter/ycombinator.com/tasks/front_page.py +63 -0
- browserwright/skill_doc.py +140 -0
- browserwright/skill_runtime.md +194 -0
- browserwright/subscriptions.py +213 -0
- browserwright/task_runner.py +125 -0
- browserwright/version.py +117 -0
- browserwright-0.6.2.dist-info/METADATA +12 -0
- browserwright-0.6.2.dist-info/RECORD +98 -0
- browserwright-0.6.2.dist-info/WHEEL +5 -0
- browserwright-0.6.2.dist-info/entry_points.txt +3 -0
- browserwright-0.6.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,432 @@
|
|
|
1
|
+
"""Centralized daemon state (§8.5).
|
|
2
|
+
|
|
3
|
+
v0.3 expansion of the v0.2 single-client model:
|
|
4
|
+
|
|
5
|
+
- `client` (singular) → `clients: dict[id, ClientState]`
|
|
6
|
+
- per-client `sessions: dict[local_session_id, SessionBinding]`
|
|
7
|
+
- `upstream_to_local: dict[upstream_session_id, list[SessionBinding]]`
|
|
8
|
+
(list because one upstream session can serve N local sessions via shared-read)
|
|
9
|
+
- `attachers: dict[target_id, AttachOwnership]` — the single-attacher rule
|
|
10
|
+
- `pending_requests: dict[upstream_id, PendingRequest]` — id translation for
|
|
11
|
+
CDP response routing (CDP responses correlate by id, not by sessionId, so
|
|
12
|
+
ids must be unique across clients on the upstream wire)
|
|
13
|
+
|
|
14
|
+
The transitions still go through the same observer pattern as v0.2.
|
|
15
|
+
"""
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import asyncio
|
|
19
|
+
import itertools
|
|
20
|
+
import time
|
|
21
|
+
from collections import deque
|
|
22
|
+
from dataclasses import dataclass, field
|
|
23
|
+
from enum import Enum
|
|
24
|
+
from typing import Any, Awaitable, Callable, Literal
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# Per-client buffer size for frames received while upstream is still
|
|
28
|
+
# opening. Spec §10 open question — "buffer with limit 100, error past that"
|
|
29
|
+
# was the resolution. Keep here as a module constant so tests can override.
|
|
30
|
+
PRE_OPEN_BUFFER_LIMIT = 100
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# ---- enums -----------------------------------------------------------------
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class UpstreamPhase(str, Enum):
|
|
37
|
+
DISCONNECTED = "DISCONNECTED"
|
|
38
|
+
CONNECTING = "CONNECTING"
|
|
39
|
+
CONNECTED = "CONNECTED"
|
|
40
|
+
CLOSING = "CLOSING"
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
CloseReason = Literal[
|
|
44
|
+
"chrome_exit", "backend_lost", "idle_close",
|
|
45
|
+
"daemon_shutdown", "skill_disconnect",
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# ---- session / attach data classes ----------------------------------------
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass
|
|
53
|
+
class SessionBinding:
|
|
54
|
+
"""One local sessionId, owned by a specific client, mapped to one upstream
|
|
55
|
+
sessionId. Multiple SessionBindings can point at the same upstream session
|
|
56
|
+
when shared-read is active.
|
|
57
|
+
"""
|
|
58
|
+
client_id: int
|
|
59
|
+
local_session_id: str # what THIS client sees
|
|
60
|
+
upstream_session_id: str # what Chrome sees
|
|
61
|
+
target_id: str # known from the attach response onward
|
|
62
|
+
readonly: bool = False # True ⇒ shared-read; commands rejected -32602
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@dataclass
|
|
66
|
+
class AttachOwnership:
|
|
67
|
+
"""Per-targetId ownership record. The primary client has full read+write;
|
|
68
|
+
additional readers (shared-read) get read-only sessions backed by the
|
|
69
|
+
same upstream session.
|
|
70
|
+
"""
|
|
71
|
+
target_id: str
|
|
72
|
+
primary_client_id: int
|
|
73
|
+
primary_local_session: str
|
|
74
|
+
upstream_session_id: str
|
|
75
|
+
readers: list[tuple[int, str]] = field(default_factory=list)
|
|
76
|
+
"""(client_id, local_session_id) tuples for read-only attachers."""
|
|
77
|
+
|
|
78
|
+
def all_local_sessions(self) -> list[tuple[int, str]]:
|
|
79
|
+
"""Primary first, then readers — useful for event fan-out within a session."""
|
|
80
|
+
return [(self.primary_client_id, self.primary_local_session), *self.readers]
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@dataclass
|
|
84
|
+
class PendingRequest:
|
|
85
|
+
"""A client request awaiting its upstream response. We translate ids
|
|
86
|
+
because CDP responses correlate by id, and multiple clients can otherwise
|
|
87
|
+
pick the same numeric id.
|
|
88
|
+
"""
|
|
89
|
+
client_id: int
|
|
90
|
+
client_request_id: int # the id the client originally sent
|
|
91
|
+
method: str # raw method (used by attach interceptor)
|
|
92
|
+
# For Target.attachToTarget we need to remember which targetId the client
|
|
93
|
+
# asked for so we can fill the attachers table when the response arrives.
|
|
94
|
+
attach_target_id: str | None = None
|
|
95
|
+
# Whether the client passed `flags.allowSecondaryReadOnly=true` in the
|
|
96
|
+
# attach. Daemon doesn't actually forward this flag — the routing decision
|
|
97
|
+
# is made locally — but we remember it for the rare case where the primary
|
|
98
|
+
# owner is the SAME client (then we keep regular write semantics).
|
|
99
|
+
attach_allow_shared_read: bool = False
|
|
100
|
+
# Sessionless-vs-sessioned: if the original request carried a sessionId,
|
|
101
|
+
# the response must carry the *local* sessionId back. CDP responses on
|
|
102
|
+
# session-scoped requests echo the session-id in some daemon-mediated
|
|
103
|
+
# synthetic events; for plain {"id","result"} responses CDP itself doesn't
|
|
104
|
+
# echo sessionId so we don't need this for vanilla responses.
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
# ---- ClientState -----------------------------------------------------------
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
@dataclass
|
|
111
|
+
class ClientState:
|
|
112
|
+
"""One connected ws client. v0.3: N of these exist at a time."""
|
|
113
|
+
client_id: int
|
|
114
|
+
label: str
|
|
115
|
+
# The browserwright session this client is bound to (ledger id) + its name.
|
|
116
|
+
# Set from the ws ``?session=<id>`` query at connect. On the shared
|
|
117
|
+
# extension context these scope browser-level enumeration (Target.getTargets)
|
|
118
|
+
# to THIS session's tab group so sessions are mutually invisible. None for
|
|
119
|
+
# the bare REPL client / single-context unit tests.
|
|
120
|
+
session_id: str | None = None
|
|
121
|
+
session_name: str | None = None
|
|
122
|
+
sessions: dict[str, SessionBinding] = field(default_factory=dict)
|
|
123
|
+
"""local_session_id → SessionBinding owned by this client."""
|
|
124
|
+
subscribed_focus: bool = False
|
|
125
|
+
connected_at: float = field(default_factory=time.time)
|
|
126
|
+
last_command_at: float = field(default_factory=time.time)
|
|
127
|
+
# Spec §10 open question: when a client sends a frame while upstream is
|
|
128
|
+
# still in DISCONNECTED / CONNECTING phase, the daemon buffers the frame
|
|
129
|
+
# per-client (FIFO, capacity 100) and drains it once upstream is OPEN.
|
|
130
|
+
# The 101st frame is rejected with CDP error -32603. Without this, the
|
|
131
|
+
# frame is silently dropped and the client times out at the 30s CDP
|
|
132
|
+
# boundary (Task #76).
|
|
133
|
+
pre_open_buffer: deque[str] = field(default_factory=deque)
|
|
134
|
+
|
|
135
|
+
def owns_session(self, local_session_id: str) -> bool:
|
|
136
|
+
return local_session_id in self.sessions
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
# ---- DaemonState -----------------------------------------------------------
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
@dataclass
|
|
143
|
+
class DaemonState:
|
|
144
|
+
"""Whole-process mutable state. ONE instance per daemon."""
|
|
145
|
+
backend_name: str
|
|
146
|
+
upstream_phase: UpstreamPhase = UpstreamPhase.DISCONNECTED
|
|
147
|
+
upstream_ws_url: str | None = None
|
|
148
|
+
last_close_reason: CloseReason | None = None
|
|
149
|
+
|
|
150
|
+
# v0.3: many clients keyed by client_id (monotonic).
|
|
151
|
+
clients: dict[int, ClientState] = field(default_factory=dict)
|
|
152
|
+
_next_client_id: itertools.count = field(
|
|
153
|
+
default_factory=lambda: itertools.count(1))
|
|
154
|
+
|
|
155
|
+
# Local→upstream session lookup is on ClientState. Upstream→[locals] lives here
|
|
156
|
+
# for fast event fan-out (sessionId-carrying events look up here).
|
|
157
|
+
upstream_to_locals: dict[str, list[SessionBinding]] = field(default_factory=dict)
|
|
158
|
+
|
|
159
|
+
# Single-attacher table: targetId → AttachOwnership.
|
|
160
|
+
attachers: dict[str, AttachOwnership] = field(default_factory=dict)
|
|
161
|
+
|
|
162
|
+
# Pending request map keyed by the *upstream* (translated) id.
|
|
163
|
+
pending_requests: dict[int, PendingRequest] = field(default_factory=dict)
|
|
164
|
+
# Allocator for upstream ids. Stays positive — daemon-internal ids on
|
|
165
|
+
# UpstreamConnection.send_command live in big negatives.
|
|
166
|
+
_next_upstream_id: itertools.count = field(
|
|
167
|
+
default_factory=lambda: itertools.count(1))
|
|
168
|
+
|
|
169
|
+
# Heuristic active-tab table (unchanged from v0.2).
|
|
170
|
+
last_activated_at: dict[str, float] = field(default_factory=dict)
|
|
171
|
+
targets: dict[str, dict[str, Any]] = field(default_factory=dict)
|
|
172
|
+
|
|
173
|
+
last_activity_at: float = field(default_factory=time.time)
|
|
174
|
+
last_popup_resolved_at: float | None = None
|
|
175
|
+
|
|
176
|
+
_subscribers: list[Callable[[str, dict], Awaitable[None]]] = field(default_factory=list)
|
|
177
|
+
|
|
178
|
+
# ---- client lifecycle -------------------------------------------------
|
|
179
|
+
|
|
180
|
+
def allocate_client(self, label: str, *, client_id: int | None = None,
|
|
181
|
+
session_id: str | None = None,
|
|
182
|
+
session_name: str | None = None) -> ClientState:
|
|
183
|
+
# Phase 2: the Daemon passes a globally-unique client_id (unique across
|
|
184
|
+
# all UpstreamContexts) so daemon logs never show two clients sharing a
|
|
185
|
+
# number. When omitted (single-context callers / tests), fall back to
|
|
186
|
+
# this state's own monotonic counter.
|
|
187
|
+
cid = client_id if client_id is not None else next(self._next_client_id)
|
|
188
|
+
c = ClientState(client_id=cid, label=label or "anonymous",
|
|
189
|
+
session_id=session_id, session_name=session_name)
|
|
190
|
+
self.clients[cid] = c
|
|
191
|
+
return c
|
|
192
|
+
|
|
193
|
+
def release_client(self, client_id: int) -> ClientState | None:
|
|
194
|
+
"""Drop a client + clean up all its sessions and owned attachments.
|
|
195
|
+
|
|
196
|
+
Returns the released ClientState (so the caller can iterate owned
|
|
197
|
+
sessions for synthesizing detach events to send before closing the
|
|
198
|
+
ws). The caller MUST handle those side effects — state.release_client
|
|
199
|
+
only mutates state.
|
|
200
|
+
"""
|
|
201
|
+
client = self.clients.pop(client_id, None)
|
|
202
|
+
if client is None:
|
|
203
|
+
return None
|
|
204
|
+
# Walk sessions; for each, pull from upstream_to_locals and drop or
|
|
205
|
+
# transfer attacher ownership.
|
|
206
|
+
for local_sid, binding in list(client.sessions.items()):
|
|
207
|
+
self._unbind_session(binding)
|
|
208
|
+
return client
|
|
209
|
+
|
|
210
|
+
def _unbind_session(self, binding: SessionBinding) -> None:
|
|
211
|
+
"""Internal — remove a SessionBinding from the upstream→local table
|
|
212
|
+
and update attacher ownership accordingly."""
|
|
213
|
+
# Pop from upstream_to_locals.
|
|
214
|
+
bindings = self.upstream_to_locals.get(binding.upstream_session_id, [])
|
|
215
|
+
bindings = [b for b in bindings if not (
|
|
216
|
+
b.client_id == binding.client_id
|
|
217
|
+
and b.local_session_id == binding.local_session_id)]
|
|
218
|
+
if bindings:
|
|
219
|
+
self.upstream_to_locals[binding.upstream_session_id] = bindings
|
|
220
|
+
else:
|
|
221
|
+
self.upstream_to_locals.pop(binding.upstream_session_id, None)
|
|
222
|
+
# Attacher cleanup.
|
|
223
|
+
own = self.attachers.get(binding.target_id)
|
|
224
|
+
if own is None:
|
|
225
|
+
return
|
|
226
|
+
if (own.primary_client_id == binding.client_id
|
|
227
|
+
and own.primary_local_session == binding.local_session_id):
|
|
228
|
+
# Primary owner is leaving. If there's a reader, promote them;
|
|
229
|
+
# otherwise drop the attachment. NOTE: actually transferring write
|
|
230
|
+
# ownership without consent is unusual — for v0.3 we just drop and
|
|
231
|
+
# let the upstream session die. spec doesn't mandate promotion.
|
|
232
|
+
self.attachers.pop(binding.target_id, None)
|
|
233
|
+
else:
|
|
234
|
+
# Reader leaving.
|
|
235
|
+
own.readers = [
|
|
236
|
+
(cid, lsid) for (cid, lsid) in own.readers
|
|
237
|
+
if not (cid == binding.client_id and lsid == binding.local_session_id)
|
|
238
|
+
]
|
|
239
|
+
|
|
240
|
+
# ---- session table ----------------------------------------------------
|
|
241
|
+
|
|
242
|
+
def bind_session(
|
|
243
|
+
self,
|
|
244
|
+
client_id: int,
|
|
245
|
+
local_session_id: str,
|
|
246
|
+
upstream_session_id: str,
|
|
247
|
+
target_id: str,
|
|
248
|
+
*,
|
|
249
|
+
readonly: bool,
|
|
250
|
+
) -> SessionBinding:
|
|
251
|
+
client = self.clients[client_id]
|
|
252
|
+
binding = SessionBinding(
|
|
253
|
+
client_id=client_id,
|
|
254
|
+
local_session_id=local_session_id,
|
|
255
|
+
upstream_session_id=upstream_session_id,
|
|
256
|
+
target_id=target_id,
|
|
257
|
+
readonly=readonly,
|
|
258
|
+
)
|
|
259
|
+
client.sessions[local_session_id] = binding
|
|
260
|
+
self.upstream_to_locals.setdefault(upstream_session_id, []).append(binding)
|
|
261
|
+
return binding
|
|
262
|
+
|
|
263
|
+
def unbind_session_by_local(
|
|
264
|
+
self, client_id: int, local_session_id: str
|
|
265
|
+
) -> SessionBinding | None:
|
|
266
|
+
"""Used on Target.detachFromTarget. Returns the binding removed, or None."""
|
|
267
|
+
client = self.clients.get(client_id)
|
|
268
|
+
if client is None:
|
|
269
|
+
return None
|
|
270
|
+
binding = client.sessions.pop(local_session_id, None)
|
|
271
|
+
if binding is not None:
|
|
272
|
+
self._unbind_session(binding)
|
|
273
|
+
return binding
|
|
274
|
+
|
|
275
|
+
# ---- attacher table ---------------------------------------------------
|
|
276
|
+
|
|
277
|
+
def claim_attacher(
|
|
278
|
+
self,
|
|
279
|
+
target_id: str,
|
|
280
|
+
client_id: int,
|
|
281
|
+
local_session_id: str,
|
|
282
|
+
upstream_session_id: str,
|
|
283
|
+
) -> None:
|
|
284
|
+
"""Record that `client_id` is the primary owner of `target_id`. The
|
|
285
|
+
single-attacher check happened earlier in the router; this just
|
|
286
|
+
commits the bookkeeping after the upstream attach succeeded."""
|
|
287
|
+
self.attachers[target_id] = AttachOwnership(
|
|
288
|
+
target_id=target_id,
|
|
289
|
+
primary_client_id=client_id,
|
|
290
|
+
primary_local_session=local_session_id,
|
|
291
|
+
upstream_session_id=upstream_session_id,
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
def add_reader(
|
|
295
|
+
self,
|
|
296
|
+
target_id: str,
|
|
297
|
+
client_id: int,
|
|
298
|
+
local_session_id: str,
|
|
299
|
+
) -> AttachOwnership | None:
|
|
300
|
+
own = self.attachers.get(target_id)
|
|
301
|
+
if own is None:
|
|
302
|
+
return None
|
|
303
|
+
own.readers.append((client_id, local_session_id))
|
|
304
|
+
return own
|
|
305
|
+
|
|
306
|
+
# ---- pending request map ---------------------------------------------
|
|
307
|
+
|
|
308
|
+
def allocate_upstream_id(self) -> int:
|
|
309
|
+
return next(self._next_upstream_id)
|
|
310
|
+
|
|
311
|
+
def remember_request(
|
|
312
|
+
self,
|
|
313
|
+
upstream_id: int,
|
|
314
|
+
client_id: int,
|
|
315
|
+
client_request_id: int,
|
|
316
|
+
method: str,
|
|
317
|
+
*,
|
|
318
|
+
attach_target_id: str | None = None,
|
|
319
|
+
attach_allow_shared_read: bool = False,
|
|
320
|
+
) -> None:
|
|
321
|
+
self.pending_requests[upstream_id] = PendingRequest(
|
|
322
|
+
client_id=client_id,
|
|
323
|
+
client_request_id=client_request_id,
|
|
324
|
+
method=method,
|
|
325
|
+
attach_target_id=attach_target_id,
|
|
326
|
+
attach_allow_shared_read=attach_allow_shared_read,
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
def take_pending(self, upstream_id: int) -> PendingRequest | None:
|
|
330
|
+
return self.pending_requests.pop(upstream_id, None)
|
|
331
|
+
|
|
332
|
+
# ---- subscriptions / transitions (unchanged) -------------------------
|
|
333
|
+
|
|
334
|
+
def subscribe(self, fn: Callable[[str, dict], Awaitable[None]]) -> None:
|
|
335
|
+
self._subscribers.append(fn)
|
|
336
|
+
|
|
337
|
+
async def _emit(self, event: str, payload: dict) -> None:
|
|
338
|
+
for fn in list(self._subscribers):
|
|
339
|
+
try:
|
|
340
|
+
await fn(event, payload)
|
|
341
|
+
except Exception:
|
|
342
|
+
pass
|
|
343
|
+
|
|
344
|
+
async def begin_connecting(self, backend_name: str) -> None:
|
|
345
|
+
self.upstream_phase = UpstreamPhase.CONNECTING
|
|
346
|
+
self.backend_name = backend_name
|
|
347
|
+
await self._emit("upstream.connecting", {"backend": backend_name})
|
|
348
|
+
|
|
349
|
+
async def set_connected(self, ws_url: str, *, was_popup: bool) -> None:
|
|
350
|
+
self.upstream_phase = UpstreamPhase.CONNECTED
|
|
351
|
+
self.upstream_ws_url = ws_url
|
|
352
|
+
if was_popup:
|
|
353
|
+
self.last_popup_resolved_at = time.time()
|
|
354
|
+
await self._emit("upstream.ready", {"ws_url": ws_url})
|
|
355
|
+
|
|
356
|
+
async def begin_closing(self, reason: CloseReason) -> None:
|
|
357
|
+
self.upstream_phase = UpstreamPhase.CLOSING
|
|
358
|
+
self.last_close_reason = reason
|
|
359
|
+
await self._emit("upstream.closing", {"reason": reason})
|
|
360
|
+
|
|
361
|
+
async def set_disconnected(self) -> None:
|
|
362
|
+
self.upstream_phase = UpstreamPhase.DISCONNECTED
|
|
363
|
+
self.upstream_ws_url = None
|
|
364
|
+
# Wipe v0.3-only tables but keep client list; the client handlers will
|
|
365
|
+
# individually release. attachers / pending_requests are upstream-tied.
|
|
366
|
+
self.attachers.clear()
|
|
367
|
+
self.pending_requests.clear()
|
|
368
|
+
self.upstream_to_locals.clear()
|
|
369
|
+
for c in self.clients.values():
|
|
370
|
+
c.sessions.clear()
|
|
371
|
+
await self._emit("upstream.disconnected", {"reason": self.last_close_reason})
|
|
372
|
+
|
|
373
|
+
# ---- heuristic active-tab table (unchanged) --------------------------
|
|
374
|
+
|
|
375
|
+
def note_activate(self, target_id: str) -> None:
|
|
376
|
+
self.last_activated_at[target_id] = time.time()
|
|
377
|
+
self.last_activity_at = time.time()
|
|
378
|
+
|
|
379
|
+
def note_target_info(self, info: dict) -> None:
|
|
380
|
+
tid = info.get("targetId")
|
|
381
|
+
if not isinstance(tid, str):
|
|
382
|
+
return
|
|
383
|
+
self.targets[tid] = {
|
|
384
|
+
"type": info.get("type"),
|
|
385
|
+
"url": info.get("url", ""),
|
|
386
|
+
"title": info.get("title", ""),
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
def note_target_destroyed(self, target_id: str) -> None:
|
|
390
|
+
self.targets.pop(target_id, None)
|
|
391
|
+
self.last_activated_at.pop(target_id, None)
|
|
392
|
+
# Also drop any attacher record (the upstream session is gone with it).
|
|
393
|
+
self.attachers.pop(target_id, None)
|
|
394
|
+
|
|
395
|
+
def best_active_tab(self) -> dict | None:
|
|
396
|
+
internals = (
|
|
397
|
+
"chrome://", "chrome-untrusted://", "devtools://", "edge://",
|
|
398
|
+
"chrome-extension://", "about:", "view-source:",
|
|
399
|
+
)
|
|
400
|
+
eligible: list[tuple[float, str, dict]] = []
|
|
401
|
+
for tid, meta in self.targets.items():
|
|
402
|
+
if meta.get("type") != "page":
|
|
403
|
+
continue
|
|
404
|
+
url = meta.get("url") or ""
|
|
405
|
+
if url.startswith(internals):
|
|
406
|
+
continue
|
|
407
|
+
score = self.last_activated_at.get(tid, 0.0)
|
|
408
|
+
eligible.append((score, tid, meta))
|
|
409
|
+
if not eligible:
|
|
410
|
+
return None
|
|
411
|
+
eligible.sort(key=lambda r: r[0], reverse=True)
|
|
412
|
+
score, tid, meta = eligible[0]
|
|
413
|
+
since = (time.time() - score) if score > 0 else None
|
|
414
|
+
return {
|
|
415
|
+
"targetId": tid,
|
|
416
|
+
"url": meta.get("url", ""),
|
|
417
|
+
"title": meta.get("title", ""),
|
|
418
|
+
"accuracy": "heuristic-recent-activate",
|
|
419
|
+
"since_seconds": since,
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
# ---- v0.2 compat: legacy `client` accessor ---------------------------
|
|
423
|
+
|
|
424
|
+
@property
|
|
425
|
+
def client(self) -> ClientState | None:
|
|
426
|
+
"""v0.2 callers used `state.client` (singular). v0.3 supports many,
|
|
427
|
+
but keeping this convenient when there happens to be exactly one
|
|
428
|
+
client connected makes the close-etiquette path simpler in single-
|
|
429
|
+
client deployments. None when 0 or >1 clients."""
|
|
430
|
+
if len(self.clients) == 1:
|
|
431
|
+
return next(iter(self.clients.values()))
|
|
432
|
+
return None
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
"""Upstream ws connection — minimal CDP transport without cdp-use framing.
|
|
2
|
+
|
|
3
|
+
Why hand-rolled? We need raw frame-in/frame-out because the daemon is a
|
|
4
|
+
transparent proxy: a client's outbound text frame gets forwarded byte-for-byte
|
|
5
|
+
to upstream, and upstream's response/event frames get forwarded back without
|
|
6
|
+
re-parsing or rewriting (§6.3). cdp-use parses + re-emits + tracks ids on its
|
|
7
|
+
own; that's two layers of conflict we don't want.
|
|
8
|
+
|
|
9
|
+
websockets.connect gives us the right primitive: a raw async iterator of text
|
|
10
|
+
frames, with `.send(str|bytes)` for the other direction. We also handle the
|
|
11
|
+
localhost-proxy-bypass dance from active_tab here.
|
|
12
|
+
|
|
13
|
+
Spec §6.5 invariant: upstream never auto-reconnects. When the connection
|
|
14
|
+
drops, we mark CLOSING and signal up; the caller decides what comes next.
|
|
15
|
+
"""
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import asyncio
|
|
19
|
+
import contextlib
|
|
20
|
+
import json
|
|
21
|
+
import logging
|
|
22
|
+
import os
|
|
23
|
+
import time
|
|
24
|
+
from typing import Any, Awaitable, Callable
|
|
25
|
+
from urllib.parse import urlparse
|
|
26
|
+
|
|
27
|
+
import websockets
|
|
28
|
+
from websockets.exceptions import ConnectionClosed
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
# 30s upstream heartbeat — spec §10 open question "Browser.getVersion 心跳频率"
|
|
33
|
+
# resolved to 30s.
|
|
34
|
+
HEARTBEAT_INTERVAL = 30.0
|
|
35
|
+
# Number of synthetic command ids reserved for daemon-internal use (heartbeat,
|
|
36
|
+
# Target subscriptions). Client ids passthrough unchanged; daemon uses big
|
|
37
|
+
# negatives to avoid colliding with anything a CDP client might send.
|
|
38
|
+
_DAEMON_ID_BASE = -2_000_000_000
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class UpstreamConnection:
|
|
42
|
+
"""Wraps a single ws to Chrome's browser-level CDP endpoint.
|
|
43
|
+
|
|
44
|
+
Lifecycle:
|
|
45
|
+
open(ws_url) → forward() pumps frames → close() ends it cleanly.
|
|
46
|
+
|
|
47
|
+
`on_frame(text)` is called for every frame *from* upstream. It is the
|
|
48
|
+
caller's job to forward it downstream (modulo BrowserwrightDaemon.* answers
|
|
49
|
+
which never enter here).
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__(
|
|
53
|
+
self,
|
|
54
|
+
on_frame: Callable[[str], Awaitable[None]],
|
|
55
|
+
on_close: Callable[[str], Awaitable[None]],
|
|
56
|
+
):
|
|
57
|
+
self._on_frame = on_frame
|
|
58
|
+
self._on_close = on_close
|
|
59
|
+
self._ws: websockets.ClientConnection | None = None # type: ignore[name-defined]
|
|
60
|
+
self._reader_task: asyncio.Task | None = None
|
|
61
|
+
self._heartbeat_task: asyncio.Task | None = None
|
|
62
|
+
self._next_internal_id = _DAEMON_ID_BASE
|
|
63
|
+
self._pending_internal: dict[int, asyncio.Future] = {}
|
|
64
|
+
self._ws_url: str | None = None
|
|
65
|
+
|
|
66
|
+
# ---- public API -------------------------------------------------------
|
|
67
|
+
|
|
68
|
+
@property
|
|
69
|
+
def ws_url(self) -> str | None:
|
|
70
|
+
return self._ws_url
|
|
71
|
+
|
|
72
|
+
@property
|
|
73
|
+
def is_open(self) -> bool:
|
|
74
|
+
return self._ws is not None
|
|
75
|
+
|
|
76
|
+
async def open(
|
|
77
|
+
self,
|
|
78
|
+
ws_url: str,
|
|
79
|
+
*,
|
|
80
|
+
timeout: float = 30.0,
|
|
81
|
+
additional_headers: dict[str, str] | None = None,
|
|
82
|
+
ssl_context: Any = None,
|
|
83
|
+
) -> None:
|
|
84
|
+
"""Connect to upstream. Raises on failure; caller transitions state.
|
|
85
|
+
|
|
86
|
+
v0.5: `additional_headers` + `ssl_context` parameterize the upstream
|
|
87
|
+
handshake so the `cloud` backend's AuthProvider can inject
|
|
88
|
+
`Authorization: Bearer ...` headers or pass a client-cert
|
|
89
|
+
`ssl.SSLContext` for mTLS. Both are None for the v0.1-v0.4 use
|
|
90
|
+
cases (local Chrome — no auth needed, ws:// is plaintext).
|
|
91
|
+
"""
|
|
92
|
+
if self._ws is not None:
|
|
93
|
+
raise RuntimeError("upstream already open")
|
|
94
|
+
with _localhost_bypass_proxy(ws_url):
|
|
95
|
+
connect_kwargs: dict[str, Any] = {
|
|
96
|
+
# Big max_size: CDP `Page.captureScreenshot` returns base64
|
|
97
|
+
# blobs that comfortably exceed the websockets default 1MiB.
|
|
98
|
+
"max_size": 100 * 1024 * 1024,
|
|
99
|
+
# Disable per-message-deflate — Chrome's browser-level CDP
|
|
100
|
+
# doesn't speak it, and websockets v15 sometimes negotiates
|
|
101
|
+
# extensions that break the handshake.
|
|
102
|
+
"compression": None,
|
|
103
|
+
# Keep the upstream alive with ws-level pings; CDP-level
|
|
104
|
+
# Browser.getVersion heartbeat is layered on top for protocol
|
|
105
|
+
# liveness.
|
|
106
|
+
"ping_interval": 20,
|
|
107
|
+
"ping_timeout": 20,
|
|
108
|
+
}
|
|
109
|
+
if additional_headers:
|
|
110
|
+
# websockets v15 accepts `additional_headers=` for client
|
|
111
|
+
# connections (older `extra_headers=` is deprecated). The
|
|
112
|
+
# keyword name is part of websockets' public API.
|
|
113
|
+
connect_kwargs["additional_headers"] = list(additional_headers.items())
|
|
114
|
+
if ssl_context is not None:
|
|
115
|
+
connect_kwargs["ssl"] = ssl_context
|
|
116
|
+
self._ws = await asyncio.wait_for(
|
|
117
|
+
websockets.connect(ws_url, **connect_kwargs),
|
|
118
|
+
timeout=timeout,
|
|
119
|
+
)
|
|
120
|
+
self._ws_url = ws_url
|
|
121
|
+
self._reader_task = asyncio.create_task(self._reader_loop())
|
|
122
|
+
self._heartbeat_task = asyncio.create_task(self._heartbeat_loop())
|
|
123
|
+
|
|
124
|
+
async def send_text(self, frame: str) -> None:
|
|
125
|
+
"""Forward a downstream frame to upstream verbatim."""
|
|
126
|
+
if self._ws is None:
|
|
127
|
+
raise RuntimeError("upstream not open")
|
|
128
|
+
await self._ws.send(frame)
|
|
129
|
+
|
|
130
|
+
async def send_command(self, method: str, params: dict | None = None,
|
|
131
|
+
session_id: str | None = None,
|
|
132
|
+
timeout: float = 10.0) -> dict:
|
|
133
|
+
"""Daemon-internal command — distinct id space from client ids so
|
|
134
|
+
results never collide with downstream traffic.
|
|
135
|
+
|
|
136
|
+
Used for: initial Target.setDiscoverTargets to populate the target
|
|
137
|
+
table, the periodic Browser.getVersion heartbeat, and the close-time
|
|
138
|
+
Target.detachFromTarget.
|
|
139
|
+
"""
|
|
140
|
+
if self._ws is None:
|
|
141
|
+
raise RuntimeError("upstream not open")
|
|
142
|
+
cmd_id = self._alloc_id()
|
|
143
|
+
msg: dict[str, Any] = {"id": cmd_id, "method": method}
|
|
144
|
+
if params is not None:
|
|
145
|
+
msg["params"] = params
|
|
146
|
+
if session_id is not None:
|
|
147
|
+
msg["sessionId"] = session_id
|
|
148
|
+
loop = asyncio.get_running_loop()
|
|
149
|
+
fut = loop.create_future()
|
|
150
|
+
self._pending_internal[cmd_id] = fut
|
|
151
|
+
try:
|
|
152
|
+
await self._ws.send(json.dumps(msg))
|
|
153
|
+
return await asyncio.wait_for(fut, timeout=timeout)
|
|
154
|
+
finally:
|
|
155
|
+
self._pending_internal.pop(cmd_id, None)
|
|
156
|
+
|
|
157
|
+
async def close(self, *, code: int = 1000, reason: str = "") -> None:
|
|
158
|
+
"""Close the upstream cleanly. Idempotent."""
|
|
159
|
+
if self._reader_task is not None:
|
|
160
|
+
self._reader_task.cancel()
|
|
161
|
+
if self._heartbeat_task is not None:
|
|
162
|
+
self._heartbeat_task.cancel()
|
|
163
|
+
ws = self._ws
|
|
164
|
+
self._ws = None
|
|
165
|
+
for fut in self._pending_internal.values():
|
|
166
|
+
if not fut.done():
|
|
167
|
+
fut.set_exception(ConnectionError("upstream closing"))
|
|
168
|
+
self._pending_internal.clear()
|
|
169
|
+
if ws is not None:
|
|
170
|
+
try:
|
|
171
|
+
await ws.close(code=code, reason=reason)
|
|
172
|
+
except Exception:
|
|
173
|
+
pass
|
|
174
|
+
self._ws_url = None
|
|
175
|
+
|
|
176
|
+
# ---- internal ---------------------------------------------------------
|
|
177
|
+
|
|
178
|
+
def _alloc_id(self) -> int:
|
|
179
|
+
v = self._next_internal_id
|
|
180
|
+
self._next_internal_id += 1
|
|
181
|
+
return v
|
|
182
|
+
|
|
183
|
+
async def _reader_loop(self) -> None:
|
|
184
|
+
ws = self._ws
|
|
185
|
+
if ws is None:
|
|
186
|
+
return
|
|
187
|
+
try:
|
|
188
|
+
async for raw in ws:
|
|
189
|
+
if not isinstance(raw, (str, bytes)):
|
|
190
|
+
continue
|
|
191
|
+
text = raw if isinstance(raw, str) else raw.decode("utf-8", errors="replace")
|
|
192
|
+
# Intercept responses to *our* internal ids (heartbeat etc).
|
|
193
|
+
try:
|
|
194
|
+
parsed = json.loads(text)
|
|
195
|
+
except (ValueError, TypeError):
|
|
196
|
+
parsed = None
|
|
197
|
+
if isinstance(parsed, dict):
|
|
198
|
+
cid = parsed.get("id")
|
|
199
|
+
if isinstance(cid, int) and cid in self._pending_internal:
|
|
200
|
+
fut = self._pending_internal.pop(cid)
|
|
201
|
+
if not fut.done():
|
|
202
|
+
fut.set_result(parsed)
|
|
203
|
+
continue
|
|
204
|
+
# Forward to downstream.
|
|
205
|
+
try:
|
|
206
|
+
await self._on_frame(text)
|
|
207
|
+
except Exception as e:
|
|
208
|
+
logger.warning("on_frame raised: %r", e)
|
|
209
|
+
except ConnectionClosed as e:
|
|
210
|
+
logger.info("upstream closed: code=%s reason=%s", e.code, e.reason)
|
|
211
|
+
except Exception as e:
|
|
212
|
+
logger.warning("upstream reader crashed: %r", e)
|
|
213
|
+
finally:
|
|
214
|
+
# Always notify close — this is the canonical signal for the
|
|
215
|
+
# state machine to enter CLOSING (caller decides reason).
|
|
216
|
+
try:
|
|
217
|
+
await self._on_close("upstream-eof")
|
|
218
|
+
except Exception:
|
|
219
|
+
pass
|
|
220
|
+
|
|
221
|
+
async def _heartbeat_loop(self) -> None:
|
|
222
|
+
"""Keep CDP alive by pinging `Browser.getVersion` every 30s.
|
|
223
|
+
|
|
224
|
+
Spec §10 open question: 30s is the chosen cadence. Too fast = wasted
|
|
225
|
+
CDP traffic; too slow = stale-Chrome detection latency. Tunable later.
|
|
226
|
+
"""
|
|
227
|
+
try:
|
|
228
|
+
while True:
|
|
229
|
+
await asyncio.sleep(HEARTBEAT_INTERVAL)
|
|
230
|
+
if self._ws is None:
|
|
231
|
+
return
|
|
232
|
+
try:
|
|
233
|
+
await self.send_command("Browser.getVersion", timeout=10)
|
|
234
|
+
except (asyncio.TimeoutError, ConnectionError, ConnectionClosed):
|
|
235
|
+
logger.warning("heartbeat failed, closing upstream")
|
|
236
|
+
return
|
|
237
|
+
except asyncio.CancelledError:
|
|
238
|
+
return
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
# ---- localhost proxy bypass (same trick as active_tab) --------------------
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
@contextlib.contextmanager
|
|
245
|
+
def _localhost_bypass_proxy(ws_url: str):
|
|
246
|
+
"""When the upstream URL is loopback, ensure NO_PROXY covers it. Same
|
|
247
|
+
rationale as `active_tab._localhost_bypass_proxy`. Spec doesn't mention
|
|
248
|
+
this — but Chrome runs on the user's machine, and the user often has
|
|
249
|
+
HTTPS_PROXY / ALL_PROXY set."""
|
|
250
|
+
host = (urlparse(ws_url).hostname or "").lower()
|
|
251
|
+
if host not in ("127.0.0.1", "localhost", "::1", "[::1]"):
|
|
252
|
+
yield
|
|
253
|
+
return
|
|
254
|
+
prev = os.environ.get("NO_PROXY", "")
|
|
255
|
+
augmented = prev
|
|
256
|
+
for h in ("127.0.0.1", "localhost", "::1"):
|
|
257
|
+
if h not in augmented:
|
|
258
|
+
augmented = f"{augmented},{h}" if augmented else h
|
|
259
|
+
os.environ["NO_PROXY"] = augmented
|
|
260
|
+
try:
|
|
261
|
+
yield
|
|
262
|
+
finally:
|
|
263
|
+
if prev:
|
|
264
|
+
os.environ["NO_PROXY"] = prev
|
|
265
|
+
else:
|
|
266
|
+
os.environ.pop("NO_PROXY", None)
|