cube-browser-playwright 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: cube-browser-playwright
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Concrete browser session implementations for cube-standard
|
|
5
|
+
Requires-Dist: cube-standard
|
|
6
|
+
Requires-Dist: playwright
|
|
7
|
+
Requires-Dist: pytest>=8.0.0 ; extra == 'dev'
|
|
8
|
+
Requires-Python: >=3.12
|
|
9
|
+
Provides-Extra: dev
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "cube-browser-playwright"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Concrete browser session implementations for cube-standard"
|
|
5
|
+
requires-python = ">=3.12"
|
|
6
|
+
dependencies = [
|
|
7
|
+
"cube-standard",
|
|
8
|
+
"playwright",
|
|
9
|
+
]
|
|
10
|
+
|
|
11
|
+
[project.optional-dependencies]
|
|
12
|
+
dev = [
|
|
13
|
+
"pytest>=8.0.0",
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
[build-system]
|
|
17
|
+
requires = ["uv_build>=0.6.0,<0.7.0"]
|
|
18
|
+
build-backend = "uv_build"
|
|
19
|
+
|
|
20
|
+
[tool.uv.build-backend]
|
|
21
|
+
module-name = "cube_browser_playwright"
|
|
22
|
+
|
|
23
|
+
[tool.ruff]
|
|
24
|
+
fix = true
|
|
25
|
+
line-length = 120
|
|
26
|
+
indent-width = 4
|
|
27
|
+
|
|
28
|
+
[tool.ruff.format]
|
|
29
|
+
quote-style = "double"
|
|
30
|
+
indent-style = "space"
|
|
31
|
+
skip-magic-trailing-comma = false
|
|
32
|
+
line-ending = "auto"
|
|
33
|
+
|
|
34
|
+
[tool.ruff.lint]
|
|
35
|
+
extend-select = ["I"]
|
|
36
|
+
|
|
37
|
+
[tool.pytest.ini_options]
|
|
38
|
+
testpaths = ["tests"]
|
|
39
|
+
addopts = "-rs"
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
"""playwright-browser: concrete browser session implementations for cube-standard."""
|
|
2
|
+
|
|
3
|
+
from cube_browser_playwright.playwright_session import PlaywrightSession, PlaywrightSessionConfig, Viewport
|
|
4
|
+
|
|
5
|
+
__all__ = ["PlaywrightSession", "PlaywrightSessionConfig", "Viewport"]
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
"""Playwright browser session implementation.
|
|
2
|
+
|
|
3
|
+
Provides PlaywrightSessionConfig and PlaywrightSession, the concrete implementation
|
|
4
|
+
of the BrowserConfig / BrowserSession abstractions defined in cube.resources.browser_session.
|
|
5
|
+
|
|
6
|
+
Chromium is always launched with --remote-debugging-port=0 so cdp_url is always
|
|
7
|
+
available for cross-backend access (Puppeteer, raw CDP, etc.).
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
import shutil
|
|
12
|
+
import tempfile
|
|
13
|
+
import time
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
from cube.resources.browser_session import BrowserConfig, BrowserSession
|
|
18
|
+
from playwright.sync_api import BrowserContext, Page, Playwright, sync_playwright
|
|
19
|
+
from pydantic import BaseModel, Field
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Viewport(BaseModel):
|
|
25
|
+
"""Browser viewport dimensions."""
|
|
26
|
+
|
|
27
|
+
width: int
|
|
28
|
+
height: int
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _read_cdp_url(user_data_dir: str) -> str:
|
|
32
|
+
"""Read the CDP URL from Chrome's DevToolsActivePort file.
|
|
33
|
+
|
|
34
|
+
Chrome writes this file to user_data_dir immediately after binding to the
|
|
35
|
+
debug port. Using --remote-debugging-port=0 lets the OS assign a free port
|
|
36
|
+
atomically, avoiding multiprocessing race conditions.
|
|
37
|
+
"""
|
|
38
|
+
port_file = Path(user_data_dir) / "DevToolsActivePort"
|
|
39
|
+
deadline = time.monotonic() + 2.0
|
|
40
|
+
while not port_file.exists():
|
|
41
|
+
if time.monotonic() > deadline:
|
|
42
|
+
raise RuntimeError(f"Chrome did not write DevToolsActivePort to {user_data_dir!r}")
|
|
43
|
+
time.sleep(0.05)
|
|
44
|
+
content = port_file.read_text()
|
|
45
|
+
try:
|
|
46
|
+
port = int(content.splitlines()[0])
|
|
47
|
+
except (IndexError, ValueError) as e:
|
|
48
|
+
raise RuntimeError(
|
|
49
|
+
f"Chrome wrote malformed DevToolsActivePort to {str(port_file)!r}; content={content!r}"
|
|
50
|
+
) from e
|
|
51
|
+
return f"http://localhost:{port}"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class PlaywrightSessionConfig(BrowserConfig):
|
|
55
|
+
"""Serializable Playwright launch parameters.
|
|
56
|
+
|
|
57
|
+
Call make() to start a Chromium browser and get a live PlaywrightSession.
|
|
58
|
+
The browser is always launched with --remote-debugging-port so the returned
|
|
59
|
+
session exposes a cdp_url for cross-backend access.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
headless: bool = True
|
|
63
|
+
viewport: Viewport = Field(default_factory=lambda: Viewport(width=1280, height=720))
|
|
64
|
+
slow_mo: int | None = None
|
|
65
|
+
timeout: int | None = None
|
|
66
|
+
locale: str | None = None
|
|
67
|
+
timezone_id: str | None = None
|
|
68
|
+
|
|
69
|
+
# Advanced Playwright options (rarely needed)
|
|
70
|
+
resizeable_window: bool = False
|
|
71
|
+
pw_extra_kwargs: dict[str, Any] = Field(default_factory=dict)
|
|
72
|
+
record_video_dir: str | None = None
|
|
73
|
+
|
|
74
|
+
def make(self) -> "PlaywrightSession":
|
|
75
|
+
"""Launch a Chromium browser and return a live PlaywrightSession."""
|
|
76
|
+
pw = sync_playwright().start()
|
|
77
|
+
context = None
|
|
78
|
+
user_data_dir = tempfile.mkdtemp(prefix="cube_harness_")
|
|
79
|
+
try:
|
|
80
|
+
args = [
|
|
81
|
+
f"--window-size={self.viewport.width},{self.viewport.height}" if self.resizeable_window else None,
|
|
82
|
+
"--disable-features=OverlayScrollbars,ExtendedOverlayScrollbars",
|
|
83
|
+
"--remote-debugging-port=0",
|
|
84
|
+
]
|
|
85
|
+
viewport_dict = {"width": self.viewport.width, "height": self.viewport.height}
|
|
86
|
+
# Explicit params take precedence over pw_extra_kwargs.
|
|
87
|
+
# Extra kwargs are merged first so that any key collision is won by the explicit value.
|
|
88
|
+
explicit_kwargs: dict[str, Any] = {
|
|
89
|
+
"headless": self.headless,
|
|
90
|
+
"args": [arg for arg in args if arg is not None],
|
|
91
|
+
"ignore_default_args": ["--hide-scrollbars"],
|
|
92
|
+
}
|
|
93
|
+
if self.slow_mo is not None:
|
|
94
|
+
explicit_kwargs["slow_mo"] = self.slow_mo
|
|
95
|
+
if self.resizeable_window:
|
|
96
|
+
explicit_kwargs["no_viewport"] = True
|
|
97
|
+
else:
|
|
98
|
+
explicit_kwargs["viewport"] = viewport_dict
|
|
99
|
+
if self.locale is not None:
|
|
100
|
+
explicit_kwargs["locale"] = self.locale
|
|
101
|
+
if self.timezone_id is not None:
|
|
102
|
+
explicit_kwargs["timezone_id"] = self.timezone_id
|
|
103
|
+
if self.record_video_dir is not None:
|
|
104
|
+
explicit_kwargs["record_video_dir"] = Path(self.record_video_dir) / "task_video"
|
|
105
|
+
explicit_kwargs["record_video_size"] = viewport_dict
|
|
106
|
+
context = pw.chromium.launch_persistent_context(
|
|
107
|
+
user_data_dir, **{**self.pw_extra_kwargs, **explicit_kwargs}
|
|
108
|
+
)
|
|
109
|
+
if self.timeout is not None:
|
|
110
|
+
context.set_default_timeout(self.timeout)
|
|
111
|
+
page = context.pages[0] if context.pages else context.new_page()
|
|
112
|
+
cdp_url = _read_cdp_url(user_data_dir)
|
|
113
|
+
return PlaywrightSession(
|
|
114
|
+
playwright=pw, page=page, context=context, cdp_url=cdp_url, user_data_dir=user_data_dir
|
|
115
|
+
)
|
|
116
|
+
except Exception:
|
|
117
|
+
if context is not None:
|
|
118
|
+
try:
|
|
119
|
+
context.close()
|
|
120
|
+
except Exception as e:
|
|
121
|
+
logger.error(
|
|
122
|
+
"Failed to close browser context during make() cleanup; resources may be leaked: %s", e
|
|
123
|
+
)
|
|
124
|
+
try:
|
|
125
|
+
pw.stop()
|
|
126
|
+
except Exception as e:
|
|
127
|
+
logger.error("Failed to stop playwright during make() cleanup; resources may be leaked: %s", e)
|
|
128
|
+
try:
|
|
129
|
+
shutil.rmtree(user_data_dir)
|
|
130
|
+
except OSError as e:
|
|
131
|
+
logger.warning("Failed to remove temp profile dir %r during make() cleanup: %s", user_data_dir, e)
|
|
132
|
+
raise
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class PlaywrightSession(BrowserSession):
|
|
136
|
+
"""Live Playwright browser session.
|
|
137
|
+
|
|
138
|
+
Owns the Playwright instance, page, and context launched by PlaywrightSessionConfig.
|
|
139
|
+
Always exposes a cdp_url for cross-backend access.
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
def __init__(
|
|
143
|
+
self, playwright: Playwright, page: Page, context: BrowserContext, cdp_url: str, user_data_dir: str
|
|
144
|
+
) -> None:
|
|
145
|
+
self._playwright: Playwright = playwright
|
|
146
|
+
self._page: Page = page
|
|
147
|
+
self._context: BrowserContext = context
|
|
148
|
+
self._cdp_url: str = cdp_url
|
|
149
|
+
self._user_data_dir: str = user_data_dir
|
|
150
|
+
|
|
151
|
+
@property
|
|
152
|
+
def cdp_url(self) -> str:
|
|
153
|
+
return self._cdp_url
|
|
154
|
+
|
|
155
|
+
@property
|
|
156
|
+
def page(self) -> Page:
|
|
157
|
+
return self._page
|
|
158
|
+
|
|
159
|
+
@property
|
|
160
|
+
def context(self) -> BrowserContext:
|
|
161
|
+
return self._context
|
|
162
|
+
|
|
163
|
+
@property
|
|
164
|
+
def playwright(self) -> Playwright:
|
|
165
|
+
return self._playwright
|
|
166
|
+
|
|
167
|
+
def stop(self) -> None:
|
|
168
|
+
"""Close the context, release all Playwright resources, and remove the temp profile dir."""
|
|
169
|
+
try:
|
|
170
|
+
self._context.close()
|
|
171
|
+
except Exception as e:
|
|
172
|
+
logger.error("Error closing browser context; resources may be leaked: %s", e)
|
|
173
|
+
try:
|
|
174
|
+
self._playwright.stop()
|
|
175
|
+
except Exception as e:
|
|
176
|
+
logger.error("Error stopping playwright; process may be leaked: %s", e)
|
|
177
|
+
try:
|
|
178
|
+
shutil.rmtree(self._user_data_dir)
|
|
179
|
+
except OSError as e:
|
|
180
|
+
logger.warning("Failed to remove temp profile dir %r: %s", self._user_data_dir, e)
|