browserwright 0.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- browserwright/__init__.py +33 -0
- browserwright/__main__.py +6 -0
- browserwright/_executor/__init__.py +47 -0
- browserwright/_executor/__main__.py +9 -0
- browserwright/_executor/client.py +127 -0
- browserwright/_executor/process.py +652 -0
- browserwright/_executor/protocol.py +152 -0
- browserwright/api.py +66 -0
- browserwright/cdp.py +285 -0
- browserwright/cli.py +741 -0
- browserwright/daemon/__init__.py +8 -0
- browserwright/daemon/_ipc.py +444 -0
- browserwright/daemon/active_tab.py +183 -0
- browserwright/daemon/auth.py +395 -0
- browserwright/daemon/backends/__init__.py +59 -0
- browserwright/daemon/backends/base.py +120 -0
- browserwright/daemon/backends/cloud.py +222 -0
- browserwright/daemon/backends/env.py +119 -0
- browserwright/daemon/backends/extension.py +185 -0
- browserwright/daemon/backends/rdp.py +214 -0
- browserwright/daemon/cli.py +1437 -0
- browserwright/daemon/config.py +380 -0
- browserwright/daemon/doctor.py +179 -0
- browserwright/daemon/errors.py +34 -0
- browserwright/daemon/launch_chrome.py +353 -0
- browserwright/daemon/observability.py +181 -0
- browserwright/daemon/platforms.py +234 -0
- browserwright/daemon/resolver.py +72 -0
- browserwright/daemon/server/__init__.py +6 -0
- browserwright/daemon/server/daemon.py +229 -0
- browserwright/daemon/server/executor_registry.py +434 -0
- browserwright/daemon/server/extension_upstream.py +677 -0
- browserwright/daemon/server/facade.py +375 -0
- browserwright/daemon/server/facade_extension.py +969 -0
- browserwright/daemon/server/listener.py +1058 -0
- browserwright/daemon/server/proxy.py +1991 -0
- browserwright/daemon/server/relay.py +783 -0
- browserwright/daemon/server/state.py +432 -0
- browserwright/daemon/server/upstream.py +266 -0
- browserwright/daemon/userscripts.py +150 -0
- browserwright/discovery.py +213 -0
- browserwright/errors.py +177 -0
- browserwright/health.py +169 -0
- browserwright/install.py +628 -0
- browserwright/memory/__init__.py +15 -0
- browserwright/memory/_md.py +120 -0
- browserwright/memory/_yaml.py +217 -0
- browserwright/memory/global_mem.py +201 -0
- browserwright/memory/repl_mem.py +28 -0
- browserwright/memory/session_decisions.py +53 -0
- browserwright/memory/site_mem.py +381 -0
- browserwright/mode_b_client.py +590 -0
- browserwright/multitask.py +131 -0
- browserwright/output_schema.py +99 -0
- browserwright/primitives/__init__.py +67 -0
- browserwright/primitives/discovery_api.py +79 -0
- browserwright/primitives/http.py +42 -0
- browserwright/primitives/inspect.py +876 -0
- browserwright/primitives/interact.py +518 -0
- browserwright/primitives/page.py +556 -0
- browserwright/primitives/site.py +143 -0
- browserwright/release_install.py +466 -0
- browserwright/repl/__init__.py +6 -0
- browserwright/repl/_namespace.py +106 -0
- browserwright/repl/_smart_goto.py +236 -0
- browserwright/repl/inline.py +180 -0
- browserwright/repl/playwright_handle.py +449 -0
- browserwright/repl/snapshot.py +150 -0
- browserwright/session.py +229 -0
- browserwright/session_create.py +252 -0
- browserwright/session_ctx.py +24 -0
- browserwright/session_registry.py +133 -0
- browserwright/session_runtime.py +133 -0
- browserwright/site_skills_starter/github.com/SKILL.md +14 -0
- browserwright/site_skills_starter/github.com/memory.md +29 -0
- browserwright/site_skills_starter/github.com/tasks/list_issues.py +55 -0
- browserwright/site_skills_starter/google.com/SKILL.md +16 -0
- browserwright/site_skills_starter/google.com/memory.md +27 -0
- browserwright/site_skills_starter/google.com/tasks/search.py +53 -0
- browserwright/site_skills_starter/producthunt.com/SKILL.md +7 -0
- browserwright/site_skills_starter/producthunt.com/memory.md +26 -0
- browserwright/site_skills_starter/producthunt.com/tasks/today.py +64 -0
- browserwright/site_skills_starter/wikipedia.org/SKILL.md +7 -0
- browserwright/site_skills_starter/wikipedia.org/memory.md +22 -0
- browserwright/site_skills_starter/wikipedia.org/tasks/lookup.py +55 -0
- browserwright/site_skills_starter/ycombinator.com/SKILL.md +8 -0
- browserwright/site_skills_starter/ycombinator.com/memory.md +25 -0
- browserwright/site_skills_starter/ycombinator.com/tasks/front_page.py +63 -0
- browserwright/skill_doc.py +140 -0
- browserwright/skill_runtime.md +194 -0
- browserwright/subscriptions.py +213 -0
- browserwright/task_runner.py +125 -0
- browserwright/version.py +117 -0
- browserwright-0.6.2.dist-info/METADATA +12 -0
- browserwright-0.6.2.dist-info/RECORD +98 -0
- browserwright-0.6.2.dist-info/WHEEL +5 -0
- browserwright-0.6.2.dist-info/entry_points.txt +3 -0
- browserwright-0.6.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,969 @@
|
|
|
1
|
+
"""Playwright facade ↔ extension backend bridge (Task #tab-handle-model, PR2).
|
|
2
|
+
|
|
3
|
+
PR1's `facade.py` is a byte-for-byte CDP passthrough to a resolved upstream ws.
|
|
4
|
+
That works for the rdp backend because the daemon-owned Chrome speaks real
|
|
5
|
+
browser-level CDP natively (it emits `Target.attachedToTarget`/`targetCreated`,
|
|
6
|
+
handles `Target.*`/`Browser.*`). The **extension** backend has no resolvable
|
|
7
|
+
upstream ws — the daemon IS the relay, and `extension_upstream.py` only *acks*
|
|
8
|
+
`Target.setAutoAttach`/`setDiscoverTargets` (it never emits the unsolicited
|
|
9
|
+
target-lifecycle EVENTS that Playwright's `connect_over_cdp` handshake depends
|
|
10
|
+
on to discover tabs). So a Playwright client connects but `context.pages()` is
|
|
11
|
+
empty.
|
|
12
|
+
|
|
13
|
+
This module is the **extension-specific synthesis layer** the facade switches to
|
|
14
|
+
when the resolved backend is `extension`. The design (and the reason synthesis
|
|
15
|
+
lives HERE, not inside `extension_upstream.py`):
|
|
16
|
+
|
|
17
|
+
- The agent client path (`BrowserwrightDaemon.*` over the unix socket) relies
|
|
18
|
+
on `Target.setAutoAttach` being a *silent ack* — it drives discovery via the
|
|
19
|
+
daemon's own RPCs, not via Chrome's auto-attach event stream. Emitting
|
|
20
|
+
synthetic `attachedToTarget` into THAT path would be a regression. Only a
|
|
21
|
+
raw Playwright client wants the events, and the facade is the only place
|
|
22
|
+
that knows the consumer is a raw Playwright client.
|
|
23
|
+
|
|
24
|
+
- We REUSE (not duplicate) the existing emulation in `ExtensionUpstream`:
|
|
25
|
+
`Target.getTargets` / `Target.attachToTarget` / `Target.detachFromTarget` /
|
|
26
|
+
`Browser.getVersion` and the session-scoped `chrome.debugger` forwarding all
|
|
27
|
+
already live there. This bridge constructs a *dedicated* `ExtensionUpstream`
|
|
28
|
+
over the SAME shared `RelayServer` (so all those methods work unchanged) and
|
|
29
|
+
only ADDS:
|
|
30
|
+
* A2 — `Target.setAutoAttach`/`setDiscoverTargets`: ack + replay
|
|
31
|
+
`Target.targetCreated` + `Target.attachedToTarget` for every connected
|
|
32
|
+
tab (mirrors playwriter's relay replay). Also pushes these when a tab is
|
|
33
|
+
opened/attached later (via a relay fan-out listener).
|
|
34
|
+
* A3 — `Target.createTarget` → `RelayServer.create_background_tab`
|
|
35
|
+
(the extension can't open browser-level targets), then synthesizes the
|
|
36
|
+
created/attached events for the new tab.
|
|
37
|
+
* A4 — `Runtime.enable` execution-context barrier: forward, then wait
|
|
38
|
+
(bounded) for `Runtime.executionContextCreated` so Playwright doesn't
|
|
39
|
+
race ahead of the main-frame default context.
|
|
40
|
+
|
|
41
|
+
`getTargets` scope policy: session-bound facade connections scope discovery to
|
|
42
|
+
that session's tab group, while genuinely sessionless raw CDP clients keep the
|
|
43
|
+
historical unscoped enumeration.
|
|
44
|
+
"""
|
|
45
|
+
from __future__ import annotations
|
|
46
|
+
|
|
47
|
+
import asyncio
|
|
48
|
+
import contextlib
|
|
49
|
+
import json
|
|
50
|
+
import logging
|
|
51
|
+
import time
|
|
52
|
+
from typing import Any
|
|
53
|
+
|
|
54
|
+
from websockets.asyncio.server import ServerConnection
|
|
55
|
+
|
|
56
|
+
from ... import session_registry
|
|
57
|
+
from .extension_upstream import (
|
|
58
|
+
ExtensionUpstream,
|
|
59
|
+
_new_upstream_session_id,
|
|
60
|
+
_tab_id_from_target_id,
|
|
61
|
+
)
|
|
62
|
+
from .relay import RelayServer, _CommandError
|
|
63
|
+
|
|
64
|
+
logger = logging.getLogger(__name__)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# Bounded wait for the main-frame execution context after `Runtime.enable`
|
|
68
|
+
# (A4 / PR3). playwriter waits ~3s; we match that order of magnitude. On timeout
|
|
69
|
+
# we still return the enable result — the barrier is best-effort robustness, not
|
|
70
|
+
# a correctness gate.
|
|
71
|
+
_RUNTIME_ENABLE_BARRIER_TIMEOUT = 3.0
|
|
72
|
+
|
|
73
|
+
# After `Runtime.disable`, pause briefly before `Runtime.enable` so Chrome
|
|
74
|
+
# treats the re-enable as a fresh subscription and re-emits
|
|
75
|
+
# `executionContextCreated` for the existing default context to this
|
|
76
|
+
# late-joining client (playwriter's relay does the same disable→sleep→enable
|
|
77
|
+
# dance: cdp-relay.ts:792-829).
|
|
78
|
+
_RUNTIME_REENABLE_PAUSE = 0.05
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
# Synthetic browserContextId for synthesized page targets. The extension backend
|
|
82
|
+
# has no real CDP browser contexts (P4 — sessions isolate via tab groups), but
|
|
83
|
+
# Playwright requires a truthy browserContextId on attachedToTarget; a stable
|
|
84
|
+
# value routes every page into Playwright's single default context.
|
|
85
|
+
_SYNTHETIC_BROWSER_CONTEXT_ID = "browserwright-ext-default"
|
|
86
|
+
|
|
87
|
+
# Synthetic sessionId for the browser target itself (Playwright's
|
|
88
|
+
# `new_browser_cdp_session` → Target.attachToBrowserTarget). Commands arriving
|
|
89
|
+
# on this session are browser-level (Target.*/Browser.*), not page-scoped, so we
|
|
90
|
+
# strip the session and run them through the same session-less emulation.
|
|
91
|
+
_BROWSER_SESSION_ID = "browserwright-ext-browser-session"
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# Browser-level CDP methods Playwright's `connect_over_cdp` handshake (and some
|
|
95
|
+
# context setup) issues that the extension backend cannot honor — but which are
|
|
96
|
+
# safe to ACK with an empty result so the handshake proceeds. `extension_upstream
|
|
97
|
+
# .py` returns -32601 for several of these (it serves the agent path, which never
|
|
98
|
+
# sends them); a raw Playwright client treats a -32601 during bootstrap as fatal.
|
|
99
|
+
# Mirrors playwriter's relay, which synthesizes benign successes for the
|
|
100
|
+
# browser-level methods it doesn't forward. SCOPED TO THE FACADE — the agent
|
|
101
|
+
# path's -32601 behavior is unchanged.
|
|
102
|
+
_BENIGN_BROWSER_NOOPS = frozenset({
|
|
103
|
+
"Browser.setDownloadBehavior",
|
|
104
|
+
"Storage.setStorageBucketTracking",
|
|
105
|
+
"Target.autoAttachRelated",
|
|
106
|
+
"Target.setRemoteLocations",
|
|
107
|
+
})
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class ExtensionFacadeBridge:
|
|
111
|
+
"""One Playwright `connect_over_cdp` client bridged to the extension
|
|
112
|
+
backend via the shared relay.
|
|
113
|
+
|
|
114
|
+
Lifetime == one ws connection. `run()` pumps frames from the Playwright
|
|
115
|
+
client through interception/synthesis until either side closes; `aclose()`
|
|
116
|
+
detaches the relay listener and tears down state.
|
|
117
|
+
"""
|
|
118
|
+
|
|
119
|
+
def __init__(
|
|
120
|
+
self, *, client: ServerConnection, relay: RelayServer,
|
|
121
|
+
session_id: str | None = None, session_name: str | None = None,
|
|
122
|
+
session_group_id: int | None = None,
|
|
123
|
+
):
|
|
124
|
+
self._client = client
|
|
125
|
+
self._relay = relay
|
|
126
|
+
self._session_id = session_id
|
|
127
|
+
loaded_name, loaded_group_id = self._load_session_scope(session_id)
|
|
128
|
+
self._session_name = session_name or loaded_name or session_id
|
|
129
|
+
self._group_id = (
|
|
130
|
+
session_group_id
|
|
131
|
+
if isinstance(session_group_id, int) and session_group_id >= 0
|
|
132
|
+
else loaded_group_id
|
|
133
|
+
)
|
|
134
|
+
# A dedicated ExtensionUpstream over the SAME relay. on_frame routes
|
|
135
|
+
# synthesized/forwarded frames back to THIS Playwright client. on_close
|
|
136
|
+
# is a no-op — the facade owns connection teardown, not the upstream.
|
|
137
|
+
self._ext = ExtensionUpstream(
|
|
138
|
+
relay=relay,
|
|
139
|
+
on_frame=self._send_to_client,
|
|
140
|
+
on_close=self._noop_close,
|
|
141
|
+
)
|
|
142
|
+
if self._session_id is not None and self._group_id is not None:
|
|
143
|
+
self._ext._bind_group(self._session_id, self._group_id) # noqa: SLF001
|
|
144
|
+
# tab_id → synthetic flat sessionId we've handed Playwright for it. One
|
|
145
|
+
# entry per tab we've announced via attachedToTarget, so a later
|
|
146
|
+
# `targetDestroyed`/`detachedFromTarget` references the same session and
|
|
147
|
+
# forwarded extension events can be tagged with the right sessionId.
|
|
148
|
+
self._tab_sessions: dict[int, str] = {}
|
|
149
|
+
self._closed = False
|
|
150
|
+
# Guard concurrent synthesis (autoAttach replay vs fan-out attach) so we
|
|
151
|
+
# never announce the same tab twice.
|
|
152
|
+
self._lock = asyncio.Lock()
|
|
153
|
+
# >0 while a Target.createTarget is in flight: the fan-out `attached`
|
|
154
|
+
# observer defers announcing so the createTarget RESPONSE is sent before
|
|
155
|
+
# the target's attachedToTarget event (CDP ordering Playwright needs).
|
|
156
|
+
self._creating = 0
|
|
157
|
+
# Per-frame: the sessionId to echo on responses when the current command
|
|
158
|
+
# arrived on the synthetic browser CDP session (Target.attachToBrowser-
|
|
159
|
+
# Target). None for ordinary frames.
|
|
160
|
+
self._echo_sid: str | None = None
|
|
161
|
+
# PR3: last-known top-frame url per tab, fed from `Page.frameNavigated`
|
|
162
|
+
# (and seeded from the relay ghost). Used to keep synthesized targetInfo
|
|
163
|
+
# url fresh so Playwright isn't stranded on a stale value. A freshly-
|
|
164
|
+
# created, not-yet-navigated tab is normalized to ":" (Chrome's initial
|
|
165
|
+
# empty document) so CRPage's `isInitialEmptyPage` heuristic matches.
|
|
166
|
+
self._tab_url: dict[int, str] = {}
|
|
167
|
+
# PR3: tabs we created via Target.createTarget that have NOT yet seen a
|
|
168
|
+
# real navigation — their targetInfo.url is reported as ":" (the initial
|
|
169
|
+
# empty document) until the first frameNavigated lands. This is what
|
|
170
|
+
# flips Playwright's `crPage.ts` init onto the benign initial-empty-page
|
|
171
|
+
# branch instead of the "already navigated" one (research delta #2).
|
|
172
|
+
self._fresh_blank_tabs: set[int] = set()
|
|
173
|
+
# PR3: tab_id → the REAL Chrome main-frame id (from Page.getFrameTree).
|
|
174
|
+
# Real Chrome makes a page's top-level frame id === its targetId, and
|
|
175
|
+
# Playwright's CRPage keys its frame→session map on the targetId
|
|
176
|
+
# (`_sessions.set(targetId, mainFrameSession)`), then looks the main
|
|
177
|
+
# frame up by `frame.id` (`_sessionForFrame`). The extension backend's
|
|
178
|
+
# targetId is the SYNTHETIC `ext-tab-<tabid>`, which never equals
|
|
179
|
+
# Chrome's internal main-frame id — so the lookup throws "Frame has been
|
|
180
|
+
# detached" and init rejects. We bridge by rewriting the main frame's id
|
|
181
|
+
# to the synthetic targetId in everything we hand Playwright (frame tree
|
|
182
|
+
# + page-domain events), and rewriting it back to the real id on
|
|
183
|
+
# commands Playwright sends scoped to that frame.
|
|
184
|
+
self._tab_main_frame: dict[int, str] = {}
|
|
185
|
+
# PR3: per-(tab) futures awaiting the main-frame default
|
|
186
|
+
# `Runtime.executionContextCreated` event, resolved by `_on_relay_event`
|
|
187
|
+
# so `_handle_runtime_enable` can gate its response on the real event
|
|
188
|
+
# rather than a blind sleep.
|
|
189
|
+
self._ctx_waiters: dict[int, list[asyncio.Future]] = {}
|
|
190
|
+
@staticmethod
|
|
191
|
+
def _load_session_scope(session_id: str | None) -> tuple[str | None, int | None]:
|
|
192
|
+
if not session_id:
|
|
193
|
+
return None, None
|
|
194
|
+
rec = session_registry.get(session_id)
|
|
195
|
+
if not isinstance(rec, dict):
|
|
196
|
+
return None, None
|
|
197
|
+
name = rec.get("name")
|
|
198
|
+
name = name if isinstance(name, str) and name else None
|
|
199
|
+
runtime = rec.get("runtime") or {}
|
|
200
|
+
gid = runtime.get("group_id") if isinstance(runtime, dict) else None
|
|
201
|
+
gid = gid if isinstance(gid, int) and gid >= 0 else None
|
|
202
|
+
return name, gid
|
|
203
|
+
|
|
204
|
+
def _persist_group_id(self, group_id: int) -> None:
|
|
205
|
+
if not self._session_id or group_id < 0:
|
|
206
|
+
return
|
|
207
|
+
self._group_id = group_id
|
|
208
|
+
self._ext._bind_group(self._session_id, group_id) # noqa: SLF001
|
|
209
|
+
try:
|
|
210
|
+
rec = session_registry.get(self._session_id) or {}
|
|
211
|
+
runtime = dict(rec.get("runtime") or {})
|
|
212
|
+
runtime["group_id"] = group_id
|
|
213
|
+
runtime["updated_at"] = time.time()
|
|
214
|
+
session_registry.update(self._session_id, runtime=runtime)
|
|
215
|
+
except Exception:
|
|
216
|
+
pass
|
|
217
|
+
|
|
218
|
+
def _refresh_session_group_id(self) -> int | None:
|
|
219
|
+
"""Refresh the session group before creating a tab.
|
|
220
|
+
|
|
221
|
+
The daemon's agent path may have bound the group after this facade
|
|
222
|
+
bridge was constructed. The relay-scoped map is the same-process fast
|
|
223
|
+
path; the ledger is the restart/reconnect fallback.
|
|
224
|
+
"""
|
|
225
|
+
if not self._session_id:
|
|
226
|
+
return self._group_id
|
|
227
|
+
gid = self._relay.session_group(self._session_id)
|
|
228
|
+
if gid is None:
|
|
229
|
+
_name, gid = self._load_session_scope(self._session_id)
|
|
230
|
+
if gid is not None:
|
|
231
|
+
self._group_id = gid
|
|
232
|
+
self._ext._bind_group(self._session_id, gid) # noqa: SLF001
|
|
233
|
+
return self._group_id
|
|
234
|
+
|
|
235
|
+
# ---- lifecycle -------------------------------------------------------
|
|
236
|
+
|
|
237
|
+
async def run(self) -> None:
|
|
238
|
+
"""Pump the Playwright client until it (or the relay) closes."""
|
|
239
|
+
self._relay.add_event_listener(self._on_relay_event)
|
|
240
|
+
try:
|
|
241
|
+
async for raw in self._client:
|
|
242
|
+
if self._closed:
|
|
243
|
+
break
|
|
244
|
+
if not isinstance(raw, (str, bytes)):
|
|
245
|
+
continue
|
|
246
|
+
text = raw if isinstance(raw, str) else raw.decode(
|
|
247
|
+
"utf-8", errors="replace")
|
|
248
|
+
await self._handle_client_frame(text)
|
|
249
|
+
finally:
|
|
250
|
+
await self.aclose()
|
|
251
|
+
|
|
252
|
+
async def aclose(self) -> None:
|
|
253
|
+
if self._closed:
|
|
254
|
+
return
|
|
255
|
+
self._closed = True
|
|
256
|
+
self._relay.remove_event_listener(self._on_relay_event)
|
|
257
|
+
# NOTE: we deliberately do NOT call self._ext.close() — ExtensionUpstream
|
|
258
|
+
# .close() calls relay.set_event_handler(None), which would clobber the
|
|
259
|
+
# AGENT path's primary event handler. Our dedicated ExtensionUpstream
|
|
260
|
+
# never called open() (so it never set the handler); it only lent us its
|
|
261
|
+
# send_text emulation + session table. Tearing down our relay fan-out
|
|
262
|
+
# listener above is the only relay-side cleanup we own.
|
|
263
|
+
self._ext._open = False # noqa: SLF001 — mark our adapter inert
|
|
264
|
+
# Cancel any in-flight Runtime.enable barriers so a closing connection
|
|
265
|
+
# never leaves awaiters hanging (CancelledError is BaseException; the
|
|
266
|
+
# awaiters catch TimeoutError/CancelledError and proceed).
|
|
267
|
+
for tab_id in list(self._ctx_waiters.keys()):
|
|
268
|
+
for fut in self._ctx_waiters.pop(tab_id, []):
|
|
269
|
+
if not fut.done():
|
|
270
|
+
fut.cancel()
|
|
271
|
+
|
|
272
|
+
# ---- client frame handling ------------------------------------------
|
|
273
|
+
|
|
274
|
+
async def _handle_client_frame(self, frame: str) -> None:
|
|
275
|
+
try:
|
|
276
|
+
msg = json.loads(frame)
|
|
277
|
+
except (ValueError, TypeError):
|
|
278
|
+
logger.warning("facade(ext) got non-JSON: %s", frame[:80])
|
|
279
|
+
return
|
|
280
|
+
if not isinstance(msg, dict):
|
|
281
|
+
return
|
|
282
|
+
|
|
283
|
+
method = msg.get("method")
|
|
284
|
+
req_id = msg.get("id") if isinstance(msg.get("id"), int) else None
|
|
285
|
+
params = msg.get("params") or {}
|
|
286
|
+
session_id = (msg.get("sessionId")
|
|
287
|
+
if isinstance(msg.get("sessionId"), str) else None)
|
|
288
|
+
# The browser CDP session carries browser-level commands — treat them as
|
|
289
|
+
# session-less (the emulation below keys on method name) but echo the
|
|
290
|
+
# sessionId back so Playwright's CDPSession routing stays consistent.
|
|
291
|
+
browser_session = session_id == _BROWSER_SESSION_ID
|
|
292
|
+
if browser_session:
|
|
293
|
+
# Echo the browser sessionId on every response/event for this frame
|
|
294
|
+
# so Playwright's CDPSession routing matches; the browser-level
|
|
295
|
+
# handlers below stay session-agnostic (run as session-less).
|
|
296
|
+
self._echo_sid = session_id
|
|
297
|
+
session_id = None
|
|
298
|
+
else:
|
|
299
|
+
self._echo_sid = None
|
|
300
|
+
|
|
301
|
+
# --- Benign browser-level no-ops the handshake needs acked ---
|
|
302
|
+
if (isinstance(method, str) and method in _BENIGN_BROWSER_NOOPS
|
|
303
|
+
and session_id is None):
|
|
304
|
+
await self._respond(req_id, {})
|
|
305
|
+
return
|
|
306
|
+
|
|
307
|
+
# --- Target.attachToBrowserTarget → synthesize a browser session. ---
|
|
308
|
+
# Playwright's `new_browser_cdp_session()` uses this; the extension can't
|
|
309
|
+
# provide a real browser-level session, so we hand back a stable
|
|
310
|
+
# synthetic sessionId. Browser-level commands on it are then handled by
|
|
311
|
+
# the same session-less emulation (the facade keys browser methods on
|
|
312
|
+
# method name, not session).
|
|
313
|
+
if method == "Target.attachToBrowserTarget" and session_id is None:
|
|
314
|
+
await self._respond(req_id, {"sessionId": _BROWSER_SESSION_ID})
|
|
315
|
+
return
|
|
316
|
+
|
|
317
|
+
# --- Target.getTargetInfo: the handshake asks for the browser target ---
|
|
318
|
+
# (no/unknown targetId) — synthesize it; a tab targetId returns that
|
|
319
|
+
# tab's info. ExtensionUpstream errors here (it expects a sessionId), so
|
|
320
|
+
# the facade answers directly.
|
|
321
|
+
if method == "Target.getTargetInfo" and session_id is None:
|
|
322
|
+
tid = params.get("targetId")
|
|
323
|
+
tab_id = (_tab_id_from_target_id(tid)
|
|
324
|
+
if isinstance(tid, str) else None)
|
|
325
|
+
if tab_id is not None:
|
|
326
|
+
await self._respond(req_id, {"targetInfo": self._target_info(tab_id)})
|
|
327
|
+
else:
|
|
328
|
+
await self._respond(req_id, {"targetInfo": self._browser_target_info()})
|
|
329
|
+
return
|
|
330
|
+
|
|
331
|
+
# --- A2: discovery handshake → ack + replay target events ---
|
|
332
|
+
if method in ("Target.setAutoAttach", "Target.setDiscoverTargets"):
|
|
333
|
+
# The browser-level (session-less) form is the discovery handshake;
|
|
334
|
+
# ack and replay every known tab. A *session-scoped* setAutoAttach
|
|
335
|
+
# (auto-attach for a target's children) is forwarded like any other
|
|
336
|
+
# session command below.
|
|
337
|
+
if session_id is None:
|
|
338
|
+
await self._respond(req_id, {})
|
|
339
|
+
await self._replay_all_targets()
|
|
340
|
+
return
|
|
341
|
+
|
|
342
|
+
# --- A3: createTarget → open a background tab via the extension ---
|
|
343
|
+
if method == "Target.createTarget" and session_id is None:
|
|
344
|
+
await self._handle_create_target(req_id, params)
|
|
345
|
+
return
|
|
346
|
+
|
|
347
|
+
# --- Target.closeTarget (browser-level, {targetId}) → close the tab. ---
|
|
348
|
+
# Playwright issues this during page teardown; ExtensionUpstream returns
|
|
349
|
+
# -32601 for the session-less form, which aborts Playwright's page setup.
|
|
350
|
+
# Map it to the relay's close-by-target-id (no session lookup needed).
|
|
351
|
+
if method == "Target.closeTarget" and session_id is None:
|
|
352
|
+
await self._handle_close_target(req_id, params)
|
|
353
|
+
return
|
|
354
|
+
|
|
355
|
+
# --- A4: Runtime.enable barrier (session-scoped) ---
|
|
356
|
+
if method == "Runtime.enable" and session_id is not None:
|
|
357
|
+
await self._handle_runtime_enable(req_id, session_id, params)
|
|
358
|
+
return
|
|
359
|
+
|
|
360
|
+
# --- Browser-level (session-less) Target/Browser emulation: reuse
|
|
361
|
+
# ExtensionUpstream (getTargets / attachToTarget / detachFromTarget /
|
|
362
|
+
# Browser.getVersion). Its responses carry no sessionId, which is
|
|
363
|
+
# correct for these browser-level methods. ---
|
|
364
|
+
if session_id is None:
|
|
365
|
+
if method == "Target.attachToTarget":
|
|
366
|
+
await self._handle_attach_to_target(req_id, params)
|
|
367
|
+
return
|
|
368
|
+
await self._ext.send_text(frame)
|
|
369
|
+
return
|
|
370
|
+
|
|
371
|
+
# --- Session-scoped commands: forward to the tab's chrome.debugger and
|
|
372
|
+
# echo the response WITH the sessionId. This is the critical difference
|
|
373
|
+
# from the agent path: Playwright drives FLAT sessions and routes every
|
|
374
|
+
# response by (sessionId, id) — a response missing the sessionId lands
|
|
375
|
+
# on the root session, whose id space doesn't know it, tripping
|
|
376
|
+
# Playwright's `_onMessage` assert and dropping the connection. The
|
|
377
|
+
# daemon Router re-adds sessionId for the agent path; the facade must do
|
|
378
|
+
# it here because it talks raw flat-session CDP to Playwright. ---
|
|
379
|
+
await self._forward_session_command(req_id, session_id, method, params)
|
|
380
|
+
|
|
381
|
+
async def _forward_session_command(self, req_id: int | None,
|
|
382
|
+
session_id: str, method: str | None,
|
|
383
|
+
params: dict) -> None:
|
|
384
|
+
"""Forward a session-scoped command to the tab's chrome.debugger and
|
|
385
|
+
echo `{id, sessionId, result|error}`."""
|
|
386
|
+
tab_id = self._ext._sessions.get(session_id) # noqa: SLF001
|
|
387
|
+
if tab_id is None:
|
|
388
|
+
from .extension_upstream import _tab_id_from_session_id
|
|
389
|
+
tab_id = _tab_id_from_session_id(session_id)
|
|
390
|
+
if tab_id is None:
|
|
391
|
+
await self._error(req_id, -32602,
|
|
392
|
+
f"unknown sessionId {session_id!r}",
|
|
393
|
+
session_id=session_id)
|
|
394
|
+
return
|
|
395
|
+
# PR3: page-session `Target.setAutoAttach` (id 13 in the CRPage init
|
|
396
|
+
# batch) is part of CRPage `_initialize`'s `Promise.all`; the agent path
|
|
397
|
+
# silent-acks it (it never drives child auto-attach), but for the
|
|
398
|
+
# Playwright high-level path we FORWARD it to the extension's
|
|
399
|
+
# chrome.debugger so the page session's auto-attach contract is honored.
|
|
400
|
+
# A plain about:blank page has no OOPIF children, so the forward
|
|
401
|
+
# resolves with `{}` either way — but forwarding (vs faking) means the
|
|
402
|
+
# init promise resolves against real Chrome state rather than a
|
|
403
|
+
# synthesized lie, which is the fidelity CRPage init depends on. Child
|
|
404
|
+
# `attachedToTarget` for real OOPIFs is out of scope here (phase A); the
|
|
405
|
+
# forward just must not error.
|
|
406
|
+
# PR3: a command scoped to the synthetic main-frame id (which equals the
|
|
407
|
+
# targetId we handed Playwright) must target the REAL Chrome frame id.
|
|
408
|
+
self._rewrite_command_frame_id(tab_id, params)
|
|
409
|
+
try:
|
|
410
|
+
result = await self._relay.send_cdp(tab_id, method or "", params)
|
|
411
|
+
# PR3: real Chrome makes a page's TOP frame id === its targetId, and
|
|
412
|
+
# CRPage keys its frame→session map on the targetId
|
|
413
|
+
# (`_sessions.set(targetId, mainFrameSession)`) then resolves the
|
|
414
|
+
# main frame by `frame.id` (`_sessionForFrame`). Our targetId is the
|
|
415
|
+
# SYNTHETIC `ext-tab-<tabid>`, never Chrome's internal main-frame id,
|
|
416
|
+
# so that lookup throws "Frame has been detached" and `new_page()`
|
|
417
|
+
# init rejects. Remember the real↔synthetic mapping and present the
|
|
418
|
+
# synthetic targetId as the main frame id in the frame tree (the same
|
|
419
|
+
# rewrite is mirrored on forwarded events + inbound commands), making
|
|
420
|
+
# the page look exactly like a real-Chrome top-level target.
|
|
421
|
+
#
|
|
422
|
+
# NOTE: we deliberately do NOT rewrite the frame url to ":" here.
|
|
423
|
+
# CRPage init computes `isInitialEmptyPage = mainFrame().url() === ":"`;
|
|
424
|
+
# when TRUE it withholds `_firstNonInitialNavigationCommittedFulfill()`
|
|
425
|
+
# (waiting for a real navigation), and since init awaits
|
|
426
|
+
# `_firstNonInitialNavigationCommittedPromise`, a fresh `new_page()`
|
|
427
|
+
# that never navigates would hang. Leaving the real `about:blank` url
|
|
428
|
+
# makes `isInitialEmptyPage` FALSE → init fulfills immediately, which
|
|
429
|
+
# is exactly how a real-Chrome `context.new_page()` settles.
|
|
430
|
+
if (method == "Page.getFrameTree"
|
|
431
|
+
and isinstance(result, dict)):
|
|
432
|
+
frame = (result.get("frameTree") or {}).get("frame")
|
|
433
|
+
if isinstance(frame, dict):
|
|
434
|
+
real_id = frame.get("id")
|
|
435
|
+
if isinstance(real_id, str) and real_id:
|
|
436
|
+
self._tab_main_frame[tab_id] = real_id
|
|
437
|
+
frame["id"] = f"ext-tab-{tab_id}"
|
|
438
|
+
await self._respond(req_id, result, session_id=session_id)
|
|
439
|
+
except _CommandError as e:
|
|
440
|
+
await self._error(req_id, e.code, e.message, session_id=session_id)
|
|
441
|
+
except Exception as e: # noqa: BLE001
|
|
442
|
+
await self._error(req_id, -32603, f"relay send failed: {e!r}",
|
|
443
|
+
session_id=session_id)
|
|
444
|
+
|
|
445
|
+
async def _handle_attach_to_target(self, req_id: int | None,
|
|
446
|
+
params: dict) -> None:
|
|
447
|
+
"""Forward to ExtensionUpstream but capture the fabricated sessionId so
|
|
448
|
+
our tab↔session table stays consistent with what Playwright holds (so
|
|
449
|
+
forwarded async events get the right sessionId tag)."""
|
|
450
|
+
target_id = params.get("targetId")
|
|
451
|
+
tab_id = (_tab_id_from_target_id(target_id)
|
|
452
|
+
if isinstance(target_id, str) else None)
|
|
453
|
+
# Reuse an already-announced session for this tab if we have one, so
|
|
454
|
+
# auto-attach replay + an explicit attachToTarget agree on one session.
|
|
455
|
+
if tab_id is not None and tab_id in self._tab_sessions:
|
|
456
|
+
sid = self._tab_sessions[tab_id]
|
|
457
|
+
# Re-register the sid in the upstream's table (it may differ from
|
|
458
|
+
# ours if this is the first explicit attach) so session-scoped
|
|
459
|
+
# commands resolve the tab.
|
|
460
|
+
self._ext._sessions[sid] = tab_id # noqa: SLF001 — intra-package
|
|
461
|
+
try:
|
|
462
|
+
await self._relay.attach_tab(tab_id, timeout=10.0)
|
|
463
|
+
except _CommandError as e:
|
|
464
|
+
await self._error(req_id, e.code, e.message)
|
|
465
|
+
return
|
|
466
|
+
except Exception as e: # noqa: BLE001
|
|
467
|
+
await self._error(req_id, -32603, f"attach failed: {e!r}")
|
|
468
|
+
return
|
|
469
|
+
await self._respond(req_id, {"sessionId": sid})
|
|
470
|
+
await self._announce_target(tab_id, sid=sid, send_created=False)
|
|
471
|
+
return
|
|
472
|
+
# Unknown tab → let ExtensionUpstream do the attach + sid fabrication,
|
|
473
|
+
# then snoop its table to learn the sid it handed back.
|
|
474
|
+
await self._ext.send_text(json.dumps({
|
|
475
|
+
"id": req_id, "method": "Target.attachToTarget", "params": params,
|
|
476
|
+
}))
|
|
477
|
+
if tab_id is not None:
|
|
478
|
+
sid = next((s for s, t in self._ext._sessions.items() # noqa: SLF001
|
|
479
|
+
if t == tab_id), None)
|
|
480
|
+
if sid is not None:
|
|
481
|
+
self._tab_sessions[tab_id] = sid
|
|
482
|
+
|
|
483
|
+
async def _handle_create_target(self, req_id: int | None,
|
|
484
|
+
params: dict) -> None:
|
|
485
|
+
"""A3: map browser-level Target.createTarget to a real background tab.
|
|
486
|
+
|
|
487
|
+
Playwright calls this for `context.new_page()`. The extension can't
|
|
488
|
+
issue browser-level CDP, so we open a tab via the relay's
|
|
489
|
+
`create_background_tab` (the same primitive `open_background` uses) and
|
|
490
|
+
return its `ext-tab-<id>` targetId. The subsequent attach + page events
|
|
491
|
+
are synthesized so Playwright wires up the new Page object."""
|
|
492
|
+
url = params.get("url") or "about:blank"
|
|
493
|
+
# CDP ORDERING (critical): Playwright's `doCreateNewPage` does
|
|
494
|
+
# `const {targetId} = await createTarget(...); return
|
|
495
|
+
# this._crPages.get(targetId)._page` — it expects the target's
|
|
496
|
+
# `Target.attachedToTarget` to have ALREADY been delivered (and the
|
|
497
|
+
# CRPage registered) by the time the createTarget RESPONSE arrives. Real
|
|
498
|
+
# Chrome emits attachedToTarget before the response. So we suppress the
|
|
499
|
+
# extension's own `attached` fan-out for this tab (in-flight guard),
|
|
500
|
+
# announce attachedToTarget OURSELVES first, THEN send the response.
|
|
501
|
+
self._creating += 1
|
|
502
|
+
try:
|
|
503
|
+
group_name = self._session_name or "Agent"
|
|
504
|
+
group_id = self._refresh_session_group_id()
|
|
505
|
+
# Same session-group discipline as the agent `open_background`
|
|
506
|
+
# verb: when the Playwright facade is session-scoped, tabs created
|
|
507
|
+
# by context.new_page() must belong to that session so session end
|
|
508
|
+
# can close them.
|
|
509
|
+
if self._session_id is not None and group_id is not None:
|
|
510
|
+
self._ext._bind_group(self._session_id, group_id) # noqa: SLF001
|
|
511
|
+
gt = await self._ext.open_background_tab(
|
|
512
|
+
url, group_name=group_name,
|
|
513
|
+
session_id=self._session_id,
|
|
514
|
+
background=True)
|
|
515
|
+
tab_id = int(gt["tabId"])
|
|
516
|
+
created_group = gt.get("groupId")
|
|
517
|
+
if isinstance(created_group, int) and created_group >= 0:
|
|
518
|
+
self._persist_group_id(created_group)
|
|
519
|
+
# PR3 (research delta #2): a brand-new, not-yet-navigated tab must be
|
|
520
|
+
# reported to Playwright with the initial-empty-document url ":" (NOT
|
|
521
|
+
# "about:blank"), so CRPage's `isInitialEmptyPage = mainFrame().url()
|
|
522
|
+
# === ":"` heuristic takes the benign branch instead of treating the
|
|
523
|
+
# page as already-navigated (which flips init onto the path that
|
|
524
|
+
# rejects → close). Real Chrome reports ":" for createTarget targets;
|
|
525
|
+
# the extension's tab has already committed about:blank by attach
|
|
526
|
+
# time, so we normalize it here until the first real frameNavigated.
|
|
527
|
+
if url in ("", "about:blank"):
|
|
528
|
+
self._fresh_blank_tabs.add(tab_id)
|
|
529
|
+
else:
|
|
530
|
+
self._tab_url[tab_id] = url
|
|
531
|
+
# Announce BEFORE the response (Chrome ordering Playwright relies on).
|
|
532
|
+
await self._announce_target(
|
|
533
|
+
tab_id, sid=gt.get("sessionId"), send_created=True)
|
|
534
|
+
except Exception as e: # noqa: BLE001
|
|
535
|
+
await self._error(req_id, -32603,
|
|
536
|
+
f"createTarget→createTab failed: {e!r}")
|
|
537
|
+
return
|
|
538
|
+
finally:
|
|
539
|
+
self._creating -= 1
|
|
540
|
+
await self._respond(req_id, {"targetId": gt["targetId"]})
|
|
541
|
+
|
|
542
|
+
def _persist_session_group(self, group_id: Any) -> None:
|
|
543
|
+
"""Best-effort: make facade-created tabs durable for session end."""
|
|
544
|
+
if self._session_id is None or not isinstance(group_id, int) or group_id < 0:
|
|
545
|
+
return
|
|
546
|
+
try:
|
|
547
|
+
from ... import session_registry
|
|
548
|
+
rec = session_registry.get(self._session_id)
|
|
549
|
+
runtime = (rec.get("runtime") or {}) if isinstance(rec, dict) else {}
|
|
550
|
+
runtime = dict(runtime) if isinstance(runtime, dict) else {}
|
|
551
|
+
runtime["group_id"] = group_id
|
|
552
|
+
runtime["updated_at"] = time.time()
|
|
553
|
+
session_registry.update(self._session_id, runtime=runtime)
|
|
554
|
+
except Exception: # noqa: BLE001
|
|
555
|
+
pass
|
|
556
|
+
|
|
557
|
+
async def _handle_close_target(self, req_id: int | None,
|
|
558
|
+
params: dict) -> None:
|
|
559
|
+
"""Browser-level Target.closeTarget → close the tab via the relay
|
|
560
|
+
(derive tabId from the ext-tab-<id> targetId; no session needed)."""
|
|
561
|
+
target_id = params.get("targetId")
|
|
562
|
+
tab_id = (_tab_id_from_target_id(target_id)
|
|
563
|
+
if isinstance(target_id, str) else None)
|
|
564
|
+
if tab_id is None:
|
|
565
|
+
# CDP returns success:false for an unknown target rather than erroring.
|
|
566
|
+
await self._respond(req_id, {"success": False})
|
|
567
|
+
return
|
|
568
|
+
sid = self._tab_sessions.get(tab_id)
|
|
569
|
+
try:
|
|
570
|
+
await self._relay.close_tab(tab_id)
|
|
571
|
+
except Exception as e: # noqa: BLE001
|
|
572
|
+
logger.debug("facade(ext) closeTarget tab %s failed: %r", tab_id, e)
|
|
573
|
+
# Evict local + upstream session state for the closed tab.
|
|
574
|
+
self._evict_tab(tab_id)
|
|
575
|
+
await self._respond(req_id, {"success": True})
|
|
576
|
+
# Real Chrome ALWAYS emits detachedFromTarget + targetDestroyed after a
|
|
577
|
+
# successful closeTarget; Playwright's page-creation/teardown path AWAITS
|
|
578
|
+
# targetDestroyed to settle the CRPage. The relay only surfaces a
|
|
579
|
+
# `detached` event for a USER-driven tab close, not for this
|
|
580
|
+
# daemon-initiated `close_tab`, so the events must be synthesized here —
|
|
581
|
+
# otherwise (e.g. when CRPage `_initialize` rejects and Playwright closes
|
|
582
|
+
# the freshly-created target) `new_page()` hangs forever waiting for the
|
|
583
|
+
# destroy that never arrives. Mirrors the `detached` relay-event path.
|
|
584
|
+
await self._send_to_client(json.dumps({
|
|
585
|
+
"method": "Target.detachedFromTarget",
|
|
586
|
+
"params": {
|
|
587
|
+
"sessionId": sid or "",
|
|
588
|
+
"targetId": f"ext-tab-{tab_id}",
|
|
589
|
+
},
|
|
590
|
+
}))
|
|
591
|
+
await self._send_to_client(json.dumps({
|
|
592
|
+
"method": "Target.targetDestroyed",
|
|
593
|
+
"params": {"targetId": f"ext-tab-{tab_id}"},
|
|
594
|
+
}))
|
|
595
|
+
|
|
596
|
+
async def _handle_runtime_enable(self, req_id: int | None,
|
|
597
|
+
session_id: str, params: dict) -> None:
|
|
598
|
+
"""A4 / PR3: event-gated `Runtime.enable` barrier — the single most
|
|
599
|
+
important CRPage-init fidelity fix per playwriter.
|
|
600
|
+
|
|
601
|
+
CRPage `_initialize` issues `Runtime.enable` and expects the default
|
|
602
|
+
execution context to materialize before init completes; if the
|
|
603
|
+
late-joining Playwright client never sees the main-frame
|
|
604
|
+
`executionContextCreated`, init's promise chain settles as an error and
|
|
605
|
+
Playwright closes the freshly-created target. The extension's
|
|
606
|
+
`chrome.debugger` session is shared/long-lived, so a plain
|
|
607
|
+
`Runtime.enable` may NOT re-emit `executionContextCreated` for a context
|
|
608
|
+
that already existed before this client subscribed.
|
|
609
|
+
|
|
610
|
+
We mirror playwriter's relay dance (cdp-relay.ts:792-829):
|
|
611
|
+
1. `Runtime.disable` → short pause → `Runtime.enable` so Chrome treats
|
|
612
|
+
it as a fresh subscription and re-emits `executionContextCreated`
|
|
613
|
+
for the existing default context.
|
|
614
|
+
2. HOLD this `Runtime.enable` response until we OBSERVE the default
|
|
615
|
+
(`auxData.isDefault == true`) `executionContextCreated` for this
|
|
616
|
+
tab (forwarded via `_on_relay_event`), bounded by ~3s. On timeout
|
|
617
|
+
we still return the enable result (best-effort, not a hard gate).
|
|
618
|
+
|
|
619
|
+
The architectural choice: the disable/enable round-trip is issued HERE
|
|
620
|
+
in the bridge over its relay upstream (not pushed down to the
|
|
621
|
+
extension), because the bridge owns the Playwright-facing flat session
|
|
622
|
+
AND already observes the extension event fan-out via `_on_relay_event` —
|
|
623
|
+
so it is the one place that can both drive the re-subscribe and watch
|
|
624
|
+
for the resulting event without a second transport hop."""
|
|
625
|
+
tab_id = self._ext._sessions.get(session_id) # noqa: SLF001
|
|
626
|
+
if tab_id is None:
|
|
627
|
+
from .extension_upstream import _tab_id_from_session_id
|
|
628
|
+
tab_id = _tab_id_from_session_id(session_id)
|
|
629
|
+
if tab_id is None:
|
|
630
|
+
await self._error(req_id, -32602,
|
|
631
|
+
f"unknown sessionId {session_id!r}",
|
|
632
|
+
session_id=session_id)
|
|
633
|
+
return
|
|
634
|
+
# Arm the waiter BEFORE issuing enable so we can't miss the event
|
|
635
|
+
# between the enable round-trip and registering the future.
|
|
636
|
+
waiter = self._arm_context_waiter(tab_id)
|
|
637
|
+
try:
|
|
638
|
+
# Force re-emission of executionContextCreated for the existing
|
|
639
|
+
# default context: disable → pause → enable.
|
|
640
|
+
with contextlib.suppress(_CommandError, Exception):
|
|
641
|
+
await self._relay.send_cdp(tab_id, "Runtime.disable", {})
|
|
642
|
+
await asyncio.sleep(_RUNTIME_REENABLE_PAUSE)
|
|
643
|
+
result = await self._relay.send_cdp(tab_id, "Runtime.enable", params)
|
|
644
|
+
except _CommandError as e:
|
|
645
|
+
self._disarm_context_waiter(tab_id, waiter)
|
|
646
|
+
await self._error(req_id, e.code, e.message, session_id=session_id)
|
|
647
|
+
return
|
|
648
|
+
except Exception as e: # noqa: BLE001
|
|
649
|
+
self._disarm_context_waiter(tab_id, waiter)
|
|
650
|
+
await self._error(req_id, -32603, f"Runtime.enable failed: {e!r}",
|
|
651
|
+
session_id=session_id)
|
|
652
|
+
return
|
|
653
|
+
# Gate the response on the real default-context event (bounded).
|
|
654
|
+
try:
|
|
655
|
+
with contextlib.suppress(asyncio.TimeoutError):
|
|
656
|
+
await asyncio.wait_for(
|
|
657
|
+
waiter, timeout=_RUNTIME_ENABLE_BARRIER_TIMEOUT)
|
|
658
|
+
finally:
|
|
659
|
+
self._disarm_context_waiter(tab_id, waiter)
|
|
660
|
+
await self._respond(req_id, result, session_id=session_id)
|
|
661
|
+
|
|
662
|
+
def _arm_context_waiter(self, tab_id: int) -> asyncio.Future:
|
|
663
|
+
"""Register a future resolved when the next default
|
|
664
|
+
`Runtime.executionContextCreated` for `tab_id` is observed."""
|
|
665
|
+
fut: asyncio.Future = asyncio.get_running_loop().create_future()
|
|
666
|
+
self._ctx_waiters.setdefault(tab_id, []).append(fut)
|
|
667
|
+
return fut
|
|
668
|
+
|
|
669
|
+
def _disarm_context_waiter(self, tab_id: int,
|
|
670
|
+
fut: asyncio.Future) -> None:
|
|
671
|
+
waiters = self._ctx_waiters.get(tab_id)
|
|
672
|
+
if waiters and fut in waiters:
|
|
673
|
+
waiters.remove(fut)
|
|
674
|
+
if not waiters:
|
|
675
|
+
self._ctx_waiters.pop(tab_id, None)
|
|
676
|
+
if not fut.done():
|
|
677
|
+
fut.cancel()
|
|
678
|
+
|
|
679
|
+
def _resolve_context_waiters(self, tab_id: int) -> None:
|
|
680
|
+
"""Wake every waiter for `tab_id` (the default execution context just
|
|
681
|
+
landed)."""
|
|
682
|
+
for fut in self._ctx_waiters.pop(tab_id, []):
|
|
683
|
+
if not fut.done():
|
|
684
|
+
fut.set_result(None)
|
|
685
|
+
|
|
686
|
+
# ---- target event synthesis (A2) ------------------------------------
|
|
687
|
+
|
|
688
|
+
async def _replay_all_targets(self) -> None:
|
|
689
|
+
"""Replay targetCreated + attachedToTarget for visible tabs.
|
|
690
|
+
|
|
691
|
+
Session-bound facade connections see only the tab group recorded for
|
|
692
|
+
that Browserwright session. Sessionless raw CDP clients keep the legacy
|
|
693
|
+
unscoped view across all attached tabs.
|
|
694
|
+
"""
|
|
695
|
+
if self._session_id is not None:
|
|
696
|
+
try:
|
|
697
|
+
infos = await self._ext.scoped_target_infos(self._session_id)
|
|
698
|
+
except Exception as e: # noqa: BLE001
|
|
699
|
+
logger.warning("facade(ext) scoped replay failed: %r", e)
|
|
700
|
+
infos = []
|
|
701
|
+
for info in infos:
|
|
702
|
+
target_id = info.get("targetId") if isinstance(info, dict) else None
|
|
703
|
+
tab_id = _tab_id_from_target_id(target_id) if isinstance(target_id, str) else None
|
|
704
|
+
if tab_id is not None:
|
|
705
|
+
await self._announce_target(tab_id, send_created=True)
|
|
706
|
+
return
|
|
707
|
+
for g in self._relay.list_ghost_targets():
|
|
708
|
+
tab_id = _tab_id_from_target_id(g.target_id)
|
|
709
|
+
if tab_id is None:
|
|
710
|
+
continue
|
|
711
|
+
await self._announce_target(tab_id, send_created=True)
|
|
712
|
+
|
|
713
|
+
async def _announce_target(self, tab_id: int, *, sid: str | None = None,
|
|
714
|
+
send_created: bool) -> None:
|
|
715
|
+
"""Synthesize the target-lifecycle frames Playwright needs to attach a
|
|
716
|
+
Page to `tab_id`. Idempotent per tab: if we've already announced this
|
|
717
|
+
tab (it has a session), we don't re-emit attachedToTarget (which would
|
|
718
|
+
make Playwright create a duplicate Page)."""
|
|
719
|
+
async with self._lock:
|
|
720
|
+
already = tab_id in self._tab_sessions
|
|
721
|
+
if sid is None:
|
|
722
|
+
sid = self._tab_sessions.get(tab_id) or _new_upstream_session_id(tab_id)
|
|
723
|
+
self._tab_sessions[tab_id] = sid
|
|
724
|
+
# Keep the upstream's session table in sync so session-scoped
|
|
725
|
+
# commands Playwright sends for this sid resolve to the tab.
|
|
726
|
+
self._ext._sessions[sid] = tab_id # noqa: SLF001
|
|
727
|
+
if already:
|
|
728
|
+
return
|
|
729
|
+
# Make sure the extension actually has chrome.debugger attached so
|
|
730
|
+
# subsequent session-scoped commands work (idempotent in the relay).
|
|
731
|
+
with contextlib.suppress(Exception):
|
|
732
|
+
await self._relay.attach_tab(tab_id, timeout=10.0)
|
|
733
|
+
info = self._target_info(tab_id)
|
|
734
|
+
if send_created:
|
|
735
|
+
await self._send_to_client(json.dumps({
|
|
736
|
+
"method": "Target.targetCreated",
|
|
737
|
+
"params": {"targetInfo": info},
|
|
738
|
+
}))
|
|
739
|
+
await self._send_to_client(json.dumps({
|
|
740
|
+
"method": "Target.attachedToTarget",
|
|
741
|
+
"params": {
|
|
742
|
+
"sessionId": sid,
|
|
743
|
+
"targetInfo": info,
|
|
744
|
+
"waitingForDebugger": False,
|
|
745
|
+
},
|
|
746
|
+
}))
|
|
747
|
+
self._relay.set_session_announce(self._session_id)
|
|
748
|
+
|
|
749
|
+
async def _tab_visible_to_session(self, tab_id: int) -> bool:
|
|
750
|
+
if self._session_id is None:
|
|
751
|
+
return True
|
|
752
|
+
try:
|
|
753
|
+
infos = await self._ext.scoped_target_infos(self._session_id)
|
|
754
|
+
except Exception as e: # noqa: BLE001
|
|
755
|
+
logger.warning("facade(ext) scoped visibility check failed: %r", e)
|
|
756
|
+
return False
|
|
757
|
+
target_id = f"ext-tab-{tab_id}"
|
|
758
|
+
return any(
|
|
759
|
+
isinstance(info, dict) and info.get("targetId") == target_id
|
|
760
|
+
for info in infos
|
|
761
|
+
)
|
|
762
|
+
|
|
763
|
+
def _browser_target_info(self) -> dict:
|
|
764
|
+
"""Synthetic targetInfo for the browser itself (type=browser). Some
|
|
765
|
+
handshake steps query it before any page target exists."""
|
|
766
|
+
return {
|
|
767
|
+
"targetId": "browserwright-extension-browser",
|
|
768
|
+
"type": "browser",
|
|
769
|
+
"title": "Browserwright",
|
|
770
|
+
"url": "",
|
|
771
|
+
"attached": True,
|
|
772
|
+
"canAccessOpener": False,
|
|
773
|
+
"browserContextId": "",
|
|
774
|
+
}
|
|
775
|
+
|
|
776
|
+
def _target_info(self, tab_id: int) -> dict:
|
|
777
|
+
"""Build a CDP targetInfo from the relay's current ghost view, kept
|
|
778
|
+
fresh from `Page.frameNavigated` (PR3)."""
|
|
779
|
+
url = ""
|
|
780
|
+
title = ""
|
|
781
|
+
for g in self._relay.list_ghost_targets():
|
|
782
|
+
if _tab_id_from_target_id(g.target_id) == tab_id:
|
|
783
|
+
url = g.url
|
|
784
|
+
title = g.title
|
|
785
|
+
break
|
|
786
|
+
# PR3: prefer the live top-frame url we track from frameNavigated so
|
|
787
|
+
# getTargetInfo/attachedToTarget never strand Playwright on a stale
|
|
788
|
+
# value; a freshly-created blank tab is reported as the initial empty
|
|
789
|
+
# document ":" (research delta #2 / #3).
|
|
790
|
+
if tab_id in self._fresh_blank_tabs:
|
|
791
|
+
url = ":"
|
|
792
|
+
elif tab_id in self._tab_url:
|
|
793
|
+
url = self._tab_url[tab_id]
|
|
794
|
+
return {
|
|
795
|
+
"targetId": f"ext-tab-{tab_id}",
|
|
796
|
+
"type": "page",
|
|
797
|
+
"title": title,
|
|
798
|
+
"url": url,
|
|
799
|
+
"attached": True,
|
|
800
|
+
"canAccessOpener": False,
|
|
801
|
+
# Playwright's `_onAttachedToTarget` asserts a TRUTHY browserContextId
|
|
802
|
+
# and looks it up in its known contexts, falling back to the default
|
|
803
|
+
# context when not found. A stable synthetic id satisfies the assert
|
|
804
|
+
# and routes the page into Playwright's default context (the
|
|
805
|
+
# extension backend has no real browser contexts — P4).
|
|
806
|
+
"browserContextId": _SYNTHETIC_BROWSER_CONTEXT_ID,
|
|
807
|
+
}
|
|
808
|
+
|
|
809
|
+
# ---- relay fan-out (new tabs + async page events) -------------------
|
|
810
|
+
|
|
811
|
+
async def _on_relay_event(self, ext_msg: dict) -> None:
|
|
812
|
+
"""Called by the relay for every extension `attached`/`detached`/`event`
|
|
813
|
+
message (fan-out observer). Translates them into the frames a live
|
|
814
|
+
Playwright client expects."""
|
|
815
|
+
if self._closed:
|
|
816
|
+
return
|
|
817
|
+
kind = ext_msg.get("type")
|
|
818
|
+
tab_id = ext_msg.get("tabId")
|
|
819
|
+
if not isinstance(tab_id, int):
|
|
820
|
+
return
|
|
821
|
+
|
|
822
|
+
if kind == "attached":
|
|
823
|
+
# A newly-attached tab (popup click, daemon-driven adopt, or our own
|
|
824
|
+
# createTarget). Announce it so Playwright spawns a Page — UNLESS a
|
|
825
|
+
# createTarget is in flight: that path announces explicitly AFTER it
|
|
826
|
+
# sends the createTarget response (CDP ordering), so deferring here
|
|
827
|
+
# avoids emitting attachedToTarget before the response.
|
|
828
|
+
if self._creating > 0:
|
|
829
|
+
return
|
|
830
|
+
if not await self._tab_visible_to_session(tab_id):
|
|
831
|
+
return
|
|
832
|
+
await self._announce_target(tab_id, send_created=True)
|
|
833
|
+
return
|
|
834
|
+
|
|
835
|
+
if kind == "detached":
|
|
836
|
+
sid = self._tab_sessions.get(tab_id)
|
|
837
|
+
if self._session_id is not None and sid is None:
|
|
838
|
+
return
|
|
839
|
+
self._evict_tab(tab_id)
|
|
840
|
+
await self._send_to_client(json.dumps({
|
|
841
|
+
"method": "Target.detachedFromTarget",
|
|
842
|
+
"params": {
|
|
843
|
+
"sessionId": sid or "",
|
|
844
|
+
"targetId": f"ext-tab-{tab_id}",
|
|
845
|
+
},
|
|
846
|
+
}))
|
|
847
|
+
await self._send_to_client(json.dumps({
|
|
848
|
+
"method": "Target.targetDestroyed",
|
|
849
|
+
"params": {"targetId": f"ext-tab-{tab_id}"},
|
|
850
|
+
}))
|
|
851
|
+
return
|
|
852
|
+
|
|
853
|
+
if kind == "event":
|
|
854
|
+
method = ext_msg.get("method")
|
|
855
|
+
params = ext_msg.get("params") or {}
|
|
856
|
+
if not isinstance(method, str):
|
|
857
|
+
return
|
|
858
|
+
# PR3: keep the live top-frame url fresh and release the fresh-blank
|
|
859
|
+
# normalization once the page actually navigates, so getTargetInfo
|
|
860
|
+
# stops reporting ":" after the first real navigation.
|
|
861
|
+
if method == "Page.frameNavigated":
|
|
862
|
+
frame = params.get("frame") or {}
|
|
863
|
+
# Top frame only: no parentId.
|
|
864
|
+
if isinstance(frame, dict) and not frame.get("parentId"):
|
|
865
|
+
new_url = frame.get("url")
|
|
866
|
+
if isinstance(new_url, str) and new_url and new_url != ":":
|
|
867
|
+
self._tab_url[tab_id] = new_url
|
|
868
|
+
self._fresh_blank_tabs.discard(tab_id)
|
|
869
|
+
# PR3: a default-context creation releases the Runtime.enable barrier.
|
|
870
|
+
elif method == "Runtime.executionContextCreated":
|
|
871
|
+
ctx = params.get("context") or {}
|
|
872
|
+
aux = ctx.get("auxData") or {} if isinstance(ctx, dict) else {}
|
|
873
|
+
if isinstance(aux, dict) and aux.get("isDefault"):
|
|
874
|
+
self._resolve_context_waiters(tab_id)
|
|
875
|
+
# PR3: rewrite the REAL Chrome main-frame id → the synthetic
|
|
876
|
+
# targetId in events we forward, matching the rewrite applied to the
|
|
877
|
+
# getFrameTree response (so Playwright's frame→session map stays
|
|
878
|
+
# consistent and never throws "Frame has been detached").
|
|
879
|
+
self._rewrite_event_frame_id(tab_id, method, params)
|
|
880
|
+
sid = self._tab_sessions.get(tab_id)
|
|
881
|
+
if self._session_id is not None and sid is None:
|
|
882
|
+
return
|
|
883
|
+
out: dict[str, Any] = {"method": method, "params": params}
|
|
884
|
+
if sid is not None:
|
|
885
|
+
out["sessionId"] = sid
|
|
886
|
+
await self._send_to_client(json.dumps(out))
|
|
887
|
+
return
|
|
888
|
+
|
|
889
|
+
# ---- helpers ---------------------------------------------------------
|
|
890
|
+
|
|
891
|
+
def _rewrite_event_frame_id(self, tab_id: int, method: str,
|
|
892
|
+
params: dict) -> None:
|
|
893
|
+
"""In-place: swap the REAL Chrome main-frame id for the synthetic
|
|
894
|
+
targetId (`ext-tab-<tab_id>`) in a forwarded page-domain event, so it
|
|
895
|
+
agrees with the frame id we presented in `Page.getFrameTree`. Only the
|
|
896
|
+
TOP frame is remapped; child/OOPIF frames keep their real ids."""
|
|
897
|
+
real = self._tab_main_frame.get(tab_id)
|
|
898
|
+
if not real:
|
|
899
|
+
return
|
|
900
|
+
synthetic = f"ext-tab-{tab_id}"
|
|
901
|
+
# `params.frameId` (lifecycleEvent, frameStartedLoading, navigatedWithin
|
|
902
|
+
# Document, …) and `params.frame.id`/`parentId` (frameNavigated,
|
|
903
|
+
# frameAttached) and `params.context.auxData.frameId`
|
|
904
|
+
# (executionContextCreated) are the carriers of the top-frame id.
|
|
905
|
+
if params.get("frameId") == real:
|
|
906
|
+
params["frameId"] = synthetic
|
|
907
|
+
frame = params.get("frame")
|
|
908
|
+
if isinstance(frame, dict):
|
|
909
|
+
if frame.get("id") == real:
|
|
910
|
+
frame["id"] = synthetic
|
|
911
|
+
if frame.get("parentId") == real:
|
|
912
|
+
frame["parentId"] = synthetic
|
|
913
|
+
ctx = params.get("context")
|
|
914
|
+
if isinstance(ctx, dict):
|
|
915
|
+
aux = ctx.get("auxData")
|
|
916
|
+
if isinstance(aux, dict) and aux.get("frameId") == real:
|
|
917
|
+
aux["frameId"] = synthetic
|
|
918
|
+
|
|
919
|
+
def _rewrite_command_frame_id(self, tab_id: int, params: dict) -> None:
|
|
920
|
+
"""In-place inverse of `_rewrite_event_frame_id`: a command Playwright
|
|
921
|
+
sends scoped to the (synthetic) main frame id must be rewritten back to
|
|
922
|
+
the REAL Chrome frame id before forwarding to chrome.debugger (e.g.
|
|
923
|
+
`Page.createIsolatedWorld {frameId}`)."""
|
|
924
|
+
real = self._tab_main_frame.get(tab_id)
|
|
925
|
+
if not real:
|
|
926
|
+
return
|
|
927
|
+
synthetic = f"ext-tab-{tab_id}"
|
|
928
|
+
if params.get("frameId") == synthetic:
|
|
929
|
+
params["frameId"] = real
|
|
930
|
+
|
|
931
|
+
def _evict_tab(self, tab_id: int) -> None:
|
|
932
|
+
"""Drop all per-tab state for a closed/detached tab and wake any
|
|
933
|
+
outstanding Runtime.enable barrier so it doesn't hang on a dead tab."""
|
|
934
|
+
sid = self._tab_sessions.pop(tab_id, None)
|
|
935
|
+
if sid:
|
|
936
|
+
self._ext._sessions.pop(sid, None) # noqa: SLF001
|
|
937
|
+
self._tab_url.pop(tab_id, None)
|
|
938
|
+
self._fresh_blank_tabs.discard(tab_id)
|
|
939
|
+
self._tab_main_frame.pop(tab_id, None)
|
|
940
|
+
for fut in self._ctx_waiters.pop(tab_id, []):
|
|
941
|
+
if not fut.done():
|
|
942
|
+
fut.cancel()
|
|
943
|
+
|
|
944
|
+
async def _send_to_client(self, frame: str) -> None:
|
|
945
|
+
if self._closed:
|
|
946
|
+
return
|
|
947
|
+
with contextlib.suppress(Exception):
|
|
948
|
+
await self._client.send(frame)
|
|
949
|
+
|
|
950
|
+
async def _respond(self, req_id: int | None, result: dict,
|
|
951
|
+
*, session_id: str | None = None) -> None:
|
|
952
|
+
frame: dict[str, Any] = {"id": req_id, "result": result}
|
|
953
|
+
sid = session_id if session_id is not None else self._echo_sid
|
|
954
|
+
if sid is not None:
|
|
955
|
+
frame["sessionId"] = sid
|
|
956
|
+
await self._send_to_client(json.dumps(frame))
|
|
957
|
+
|
|
958
|
+
async def _error(self, req_id: int | None, code: int, msg: str,
|
|
959
|
+
*, session_id: str | None = None) -> None:
|
|
960
|
+
frame: dict[str, Any] = {
|
|
961
|
+
"id": req_id, "error": {"code": code, "message": msg},
|
|
962
|
+
}
|
|
963
|
+
sid = session_id if session_id is not None else self._echo_sid
|
|
964
|
+
if sid is not None:
|
|
965
|
+
frame["sessionId"] = sid
|
|
966
|
+
await self._send_to_client(json.dumps(frame))
|
|
967
|
+
|
|
968
|
+
async def _noop_close(self, reason: str) -> None:
|
|
969
|
+
return
|