o-browser 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- o_browser/__init__.py +4 -0
- o_browser/_mixin.py +183 -0
- o_browser/client.py +297 -0
- o_browser/remote.py +88 -0
- o_browser-0.1.0.dist-info/METADATA +8 -0
- o_browser-0.1.0.dist-info/RECORD +7 -0
- o_browser-0.1.0.dist-info/WHEEL +4 -0
o_browser/__init__.py
ADDED
o_browser/_mixin.py
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
"""
|
|
2
|
+
PageMixin — Shared browser page interactions.
|
|
3
|
+
|
|
4
|
+
All methods depend only on self._page / self._context.
|
|
5
|
+
Inherited by BrowserClient (direct) and RemoteBrowser (CDP).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import os
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class PageMixin:
|
|
14
|
+
"""Mixin providing navigation, interactions, scrolling, waiting, selectors, and GIF recording."""
|
|
15
|
+
|
|
16
|
+
_page: Any
|
|
17
|
+
_context: Any
|
|
18
|
+
|
|
19
|
+
@property
|
|
20
|
+
def page(self):
|
|
21
|
+
if not self._page:
|
|
22
|
+
raise RuntimeError("Browser not started. Use 'async with' or call start() first.")
|
|
23
|
+
return self._page
|
|
24
|
+
|
|
25
|
+
@property
|
|
26
|
+
def context(self):
|
|
27
|
+
if not self._context:
|
|
28
|
+
raise RuntimeError("Browser not started. Use 'async with' or call start() first.")
|
|
29
|
+
return self._context
|
|
30
|
+
|
|
31
|
+
# === Navigation ===
|
|
32
|
+
|
|
33
|
+
async def goto(self, url: str, wait_until: str = "domcontentloaded", timeout: int = 30000) -> bool:
|
|
34
|
+
"""Navigate to URL. Returns True if status 200."""
|
|
35
|
+
try:
|
|
36
|
+
response = await self.page.goto(url, wait_until=wait_until, timeout=timeout)
|
|
37
|
+
return response and response.status == 200
|
|
38
|
+
except Exception:
|
|
39
|
+
return False
|
|
40
|
+
|
|
41
|
+
async def wait(self, seconds: float):
|
|
42
|
+
"""Wait for specified seconds."""
|
|
43
|
+
await asyncio.sleep(seconds)
|
|
44
|
+
|
|
45
|
+
async def get_text(self) -> str:
|
|
46
|
+
"""Get page text content."""
|
|
47
|
+
return await self.page.evaluate("() => document.body.innerText")
|
|
48
|
+
|
|
49
|
+
async def get_html(self) -> str:
|
|
50
|
+
"""Get page HTML."""
|
|
51
|
+
return await self.page.content()
|
|
52
|
+
|
|
53
|
+
async def screenshot(self, path: str, full_page: bool = True) -> str:
|
|
54
|
+
"""Take screenshot and return path."""
|
|
55
|
+
await self.page.screenshot(path=path, full_page=full_page)
|
|
56
|
+
return path
|
|
57
|
+
|
|
58
|
+
# === Scrolling ===
|
|
59
|
+
|
|
60
|
+
async def scroll_to_bottom(self, times: int = 3, delay: float = 2.0) -> None:
|
|
61
|
+
"""Scroll to bottom multiple times to load dynamic content."""
|
|
62
|
+
for _ in range(times):
|
|
63
|
+
await self.page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
|
64
|
+
await asyncio.sleep(delay)
|
|
65
|
+
|
|
66
|
+
async def scroll_element(self, selector: str, times: int = 3, delay: float = 2.0) -> None:
|
|
67
|
+
"""Scroll inside a specific element (infinite scroll containers)."""
|
|
68
|
+
for _ in range(times):
|
|
69
|
+
await self.page.evaluate(f"""
|
|
70
|
+
const el = document.querySelector('{selector}');
|
|
71
|
+
if (el) el.scrollTop = el.scrollHeight;
|
|
72
|
+
""")
|
|
73
|
+
await asyncio.sleep(delay)
|
|
74
|
+
|
|
75
|
+
async def scroll_by(self, y: int):
|
|
76
|
+
"""Scroll page by Y pixels."""
|
|
77
|
+
await self.page.evaluate(f"window.scrollBy(0, {y})")
|
|
78
|
+
|
|
79
|
+
# === Waiting ===
|
|
80
|
+
|
|
81
|
+
async def wait_for_content(self, min_length: int = 500, max_attempts: int = 10, delay: float = 2.0) -> bool:
|
|
82
|
+
"""Wait for page content to load (not showing 'Loading...')."""
|
|
83
|
+
for _ in range(max_attempts):
|
|
84
|
+
text = await self.get_text()
|
|
85
|
+
if "Loading" not in text and len(text) > min_length:
|
|
86
|
+
return True
|
|
87
|
+
await asyncio.sleep(delay)
|
|
88
|
+
return False
|
|
89
|
+
|
|
90
|
+
async def wait_for_selector(self, selector: str, timeout: int = 30000):
|
|
91
|
+
"""Wait for element to appear."""
|
|
92
|
+
return await self.page.wait_for_selector(selector, timeout=timeout)
|
|
93
|
+
|
|
94
|
+
# === Interactions ===
|
|
95
|
+
|
|
96
|
+
async def click(self, selector: str) -> None:
|
|
97
|
+
"""Click element by selector."""
|
|
98
|
+
await self.page.click(selector)
|
|
99
|
+
|
|
100
|
+
async def fill(self, selector: str, value: str) -> None:
|
|
101
|
+
"""Fill input field."""
|
|
102
|
+
await self.page.fill(selector, value)
|
|
103
|
+
|
|
104
|
+
async def type(self, selector: str, text: str, delay: int = 50) -> None:
|
|
105
|
+
"""Type text with realistic delay between keystrokes."""
|
|
106
|
+
await self.page.type(selector, text, delay=delay)
|
|
107
|
+
|
|
108
|
+
async def press(self, key: str) -> None:
|
|
109
|
+
"""Press a key (e.g., 'Enter', 'Tab')."""
|
|
110
|
+
await self.page.keyboard.press(key)
|
|
111
|
+
|
|
112
|
+
# === Selectors ===
|
|
113
|
+
|
|
114
|
+
async def query_selector(self, selector: str):
|
|
115
|
+
"""Query single element."""
|
|
116
|
+
return await self.page.query_selector(selector)
|
|
117
|
+
|
|
118
|
+
async def query_selector_all(self, selector: str):
|
|
119
|
+
"""Query all elements matching selector."""
|
|
120
|
+
return await self.page.query_selector_all(selector)
|
|
121
|
+
|
|
122
|
+
async def evaluate(self, expression: str) -> Any:
|
|
123
|
+
"""Evaluate JavaScript expression."""
|
|
124
|
+
return await self.page.evaluate(expression)
|
|
125
|
+
|
|
126
|
+
# === GIF Recording ===
|
|
127
|
+
|
|
128
|
+
def _init_recording(self):
|
|
129
|
+
"""Initialize recording state if needed."""
|
|
130
|
+
if not hasattr(self, '_frames'):
|
|
131
|
+
self._frames = []
|
|
132
|
+
self._rec_dir = None
|
|
133
|
+
|
|
134
|
+
async def capture_frame(self, duration: float = 0.5, full_page: bool = False):
|
|
135
|
+
"""Capture a screenshot frame for GIF recording."""
|
|
136
|
+
self._init_recording()
|
|
137
|
+
if not self._rec_dir:
|
|
138
|
+
import tempfile
|
|
139
|
+
self._rec_dir = tempfile.mkdtemp(prefix="browser_gif_")
|
|
140
|
+
|
|
141
|
+
frame_path = os.path.join(self._rec_dir, f"frame_{len(self._frames):03d}.png")
|
|
142
|
+
await self.page.screenshot(path=frame_path, full_page=full_page)
|
|
143
|
+
self._frames.append((frame_path, int(duration * 100)))
|
|
144
|
+
|
|
145
|
+
async def type_animated(self, selector: str, text: str, frame_every: int = 5,
|
|
146
|
+
frame_duration: float = 0.15, type_delay: int = 40):
|
|
147
|
+
"""Type text capturing frames periodically for GIF."""
|
|
148
|
+
await self.page.click(selector)
|
|
149
|
+
for i, char in enumerate(text):
|
|
150
|
+
await self.page.keyboard.type(char, delay=type_delay)
|
|
151
|
+
if (i + 1) % frame_every == 0 or i == len(text) - 1:
|
|
152
|
+
await self.capture_frame(duration=frame_duration)
|
|
153
|
+
|
|
154
|
+
def save_gif(self, output_path: str, resize: str = None, optimize: bool = True) -> str:
|
|
155
|
+
"""Assemble captured frames into an animated GIF using ImageMagick."""
|
|
156
|
+
import subprocess
|
|
157
|
+
import shutil
|
|
158
|
+
|
|
159
|
+
self._init_recording()
|
|
160
|
+
if not self._frames:
|
|
161
|
+
raise RuntimeError("No frames captured. Use capture_frame() first.")
|
|
162
|
+
|
|
163
|
+
if not shutil.which("convert"):
|
|
164
|
+
raise RuntimeError("ImageMagick 'convert' not found. Install with: apt install imagemagick")
|
|
165
|
+
|
|
166
|
+
cmd = ["convert"]
|
|
167
|
+
for path, delay_cs in self._frames:
|
|
168
|
+
cmd.extend(["-delay", str(delay_cs), path])
|
|
169
|
+
cmd.extend(["-loop", "0"])
|
|
170
|
+
if resize:
|
|
171
|
+
cmd.extend(["-resize", resize])
|
|
172
|
+
if optimize:
|
|
173
|
+
cmd.extend(["-layers", "Optimize"])
|
|
174
|
+
cmd.append(output_path)
|
|
175
|
+
|
|
176
|
+
subprocess.run(cmd, check=True)
|
|
177
|
+
|
|
178
|
+
if self._rec_dir:
|
|
179
|
+
shutil.rmtree(self._rec_dir, ignore_errors=True)
|
|
180
|
+
self._frames = []
|
|
181
|
+
self._rec_dir = None
|
|
182
|
+
|
|
183
|
+
return output_path
|
o_browser/client.py
ADDED
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
"""
|
|
2
|
+
BrowserClient — Direct browser automation (launches Chrome locally or connects via CDP).
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
# Headless automation
|
|
6
|
+
async with BrowserClient() as browser:
|
|
7
|
+
await browser.goto("https://example.com")
|
|
8
|
+
text = await browser.get_text()
|
|
9
|
+
|
|
10
|
+
# Connect to existing Chrome (e.g. launched with --remote-debugging-port=9222)
|
|
11
|
+
async with BrowserClient(cdp_url="http://127.0.0.1:9222") as browser:
|
|
12
|
+
await browser.goto("https://example.com")
|
|
13
|
+
text = await browser.get_text()
|
|
14
|
+
|
|
15
|
+
# With recording + proxy
|
|
16
|
+
async with BrowserClient(record=True, proxy={"server": "http://host:port"}) as browser:
|
|
17
|
+
await browser.goto("https://example.com")
|
|
18
|
+
# → recordings/ses_YYYYMMDD_HHMMSS/{network.har, video.webm, state.json}
|
|
19
|
+
|
|
20
|
+
# Interactive (human navigates, we record)
|
|
21
|
+
async with BrowserClient(interactive=True, record=True) as browser:
|
|
22
|
+
await browser.wait_closed()
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
import asyncio
|
|
26
|
+
import os
|
|
27
|
+
import shutil
|
|
28
|
+
from datetime import datetime
|
|
29
|
+
from pathlib import Path
|
|
30
|
+
from typing import Optional, List, Dict, Callable
|
|
31
|
+
|
|
32
|
+
from ._mixin import PageMixin
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _detect_channel() -> str:
|
|
36
|
+
"""Detect best available Chrome channel."""
|
|
37
|
+
for channel, binary in [
|
|
38
|
+
("chrome-beta", "google-chrome-beta"),
|
|
39
|
+
("chrome", "google-chrome"),
|
|
40
|
+
]:
|
|
41
|
+
if shutil.which(binary):
|
|
42
|
+
return channel
|
|
43
|
+
return "chromium"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class BrowserClient(PageMixin):
|
|
47
|
+
"""
|
|
48
|
+
Async browser client — launches Chrome directly.
|
|
49
|
+
|
|
50
|
+
Can be used standalone or inherited by domain-specific clients.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
def __init__(
|
|
54
|
+
self,
|
|
55
|
+
profile_path: Optional[str] = None,
|
|
56
|
+
headless: bool = True,
|
|
57
|
+
channel: Optional[str] = None,
|
|
58
|
+
viewport: tuple[int, int] = (1920, 1080),
|
|
59
|
+
user_agent: str = None,
|
|
60
|
+
cookies: List[Dict] = None,
|
|
61
|
+
locale: str = None,
|
|
62
|
+
timezone_id: str = None,
|
|
63
|
+
browser_args: List[str] = None,
|
|
64
|
+
proxy: Optional[Dict] = None,
|
|
65
|
+
record: bool = False,
|
|
66
|
+
record_dir: Optional[str] = None,
|
|
67
|
+
interactive: bool = False,
|
|
68
|
+
cdp_url: Optional[str] = None,
|
|
69
|
+
):
|
|
70
|
+
self.cdp_url = cdp_url
|
|
71
|
+
self.profile_path = Path(profile_path).expanduser() if profile_path else None
|
|
72
|
+
self.headless = headless if not interactive else False
|
|
73
|
+
self.channel = channel or os.environ.get("BROWSER_CHANNEL") or _detect_channel()
|
|
74
|
+
self.viewport = {"width": viewport[0], "height": viewport[1]} if not interactive else None
|
|
75
|
+
self.user_agent = user_agent
|
|
76
|
+
self.cookies = cookies or []
|
|
77
|
+
self.locale = locale
|
|
78
|
+
self.timezone_id = timezone_id
|
|
79
|
+
self.browser_args = browser_args or []
|
|
80
|
+
self.proxy = proxy
|
|
81
|
+
self.record = record
|
|
82
|
+
self.record_dir = Path(record_dir) if record_dir else None
|
|
83
|
+
self.interactive = interactive
|
|
84
|
+
|
|
85
|
+
self._playwright = None
|
|
86
|
+
self._browser = None
|
|
87
|
+
self._context = None
|
|
88
|
+
self._page = None
|
|
89
|
+
self._closed_event: Optional[asyncio.Event] = None
|
|
90
|
+
self._cdp_owns_browser = False
|
|
91
|
+
|
|
92
|
+
self._response_handlers: List[Callable] = []
|
|
93
|
+
|
|
94
|
+
async def __aenter__(self):
|
|
95
|
+
await self.start()
|
|
96
|
+
return self
|
|
97
|
+
|
|
98
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
99
|
+
await self.close()
|
|
100
|
+
|
|
101
|
+
def _prepare_record_dir(self) -> Path:
|
|
102
|
+
"""Create and return the recording directory."""
|
|
103
|
+
if self.record_dir:
|
|
104
|
+
d = self.record_dir
|
|
105
|
+
else:
|
|
106
|
+
session_id = f"ses_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
|
107
|
+
d = Path("recordings") / session_id
|
|
108
|
+
d.mkdir(parents=True, exist_ok=True)
|
|
109
|
+
return d
|
|
110
|
+
|
|
111
|
+
def _build_context_options(self) -> dict:
|
|
112
|
+
"""Build shared context options for both persistent and ephemeral modes."""
|
|
113
|
+
opts = {}
|
|
114
|
+
if self.viewport:
|
|
115
|
+
opts["viewport"] = self.viewport
|
|
116
|
+
if self.user_agent:
|
|
117
|
+
opts["user_agent"] = self.user_agent
|
|
118
|
+
if self.locale:
|
|
119
|
+
opts["locale"] = self.locale
|
|
120
|
+
if self.timezone_id:
|
|
121
|
+
opts["timezone_id"] = self.timezone_id
|
|
122
|
+
if self.proxy:
|
|
123
|
+
opts["proxy"] = self.proxy
|
|
124
|
+
if self.record:
|
|
125
|
+
rec_dir = self._prepare_record_dir()
|
|
126
|
+
self.record_dir = rec_dir
|
|
127
|
+
opts["record_har_path"] = str(rec_dir / "network.har")
|
|
128
|
+
opts["record_har_content"] = "embed"
|
|
129
|
+
opts["record_video_dir"] = str(rec_dir)
|
|
130
|
+
return opts
|
|
131
|
+
|
|
132
|
+
async def start(self) -> "BrowserClient":
|
|
133
|
+
"""Start browser and return self."""
|
|
134
|
+
from patchright.async_api import async_playwright
|
|
135
|
+
|
|
136
|
+
self._playwright = await async_playwright().start()
|
|
137
|
+
|
|
138
|
+
if self.cdp_url:
|
|
139
|
+
self._browser = await self._playwright.chromium.connect_over_cdp(self.cdp_url)
|
|
140
|
+
self._cdp_owns_browser = True
|
|
141
|
+
contexts = self._browser.contexts
|
|
142
|
+
self._context = contexts[0] if contexts else await self._browser.new_context()
|
|
143
|
+
self._page = await self._context.new_page()
|
|
144
|
+
elif self.profile_path:
|
|
145
|
+
default_args = [
|
|
146
|
+
"--no-sandbox",
|
|
147
|
+
"--disable-dev-shm-usage",
|
|
148
|
+
"--disable-blink-features=AutomationControlled",
|
|
149
|
+
]
|
|
150
|
+
launch_args = list(set(default_args + self.browser_args))
|
|
151
|
+
ctx_opts = self._build_context_options()
|
|
152
|
+
|
|
153
|
+
if not self.profile_path.exists():
|
|
154
|
+
self.profile_path.mkdir(parents=True, exist_ok=True)
|
|
155
|
+
|
|
156
|
+
self._context = await self._playwright.chromium.launch_persistent_context(
|
|
157
|
+
user_data_dir=str(self.profile_path),
|
|
158
|
+
headless=self.headless,
|
|
159
|
+
channel=self.channel,
|
|
160
|
+
args=launch_args,
|
|
161
|
+
**ctx_opts,
|
|
162
|
+
)
|
|
163
|
+
self._page = self._context.pages[0] if self._context.pages else await self._context.new_page()
|
|
164
|
+
else:
|
|
165
|
+
default_args = [
|
|
166
|
+
"--no-sandbox",
|
|
167
|
+
"--disable-dev-shm-usage",
|
|
168
|
+
"--disable-blink-features=AutomationControlled",
|
|
169
|
+
]
|
|
170
|
+
launch_args = list(set(default_args + self.browser_args))
|
|
171
|
+
ctx_opts = self._build_context_options()
|
|
172
|
+
|
|
173
|
+
self._browser = await self._playwright.chromium.launch(
|
|
174
|
+
headless=self.headless,
|
|
175
|
+
channel=self.channel,
|
|
176
|
+
args=launch_args,
|
|
177
|
+
)
|
|
178
|
+
self._context = await self._browser.new_context(**ctx_opts)
|
|
179
|
+
self._page = await self._context.new_page()
|
|
180
|
+
|
|
181
|
+
if self.cookies:
|
|
182
|
+
await self.add_cookies(self.cookies)
|
|
183
|
+
|
|
184
|
+
if self._response_handlers:
|
|
185
|
+
self._page.on("response", self._on_response)
|
|
186
|
+
|
|
187
|
+
# Track browser close for interactive mode
|
|
188
|
+
if self.interactive:
|
|
189
|
+
self._closed_event = asyncio.Event()
|
|
190
|
+
self._setup_close_detection()
|
|
191
|
+
|
|
192
|
+
return self
|
|
193
|
+
|
|
194
|
+
def _setup_close_detection(self):
|
|
195
|
+
"""Detect when user closes browser (all pages closed or browser disconnected)."""
|
|
196
|
+
def on_page_close():
|
|
197
|
+
# Check if any pages remain
|
|
198
|
+
try:
|
|
199
|
+
if not self._context.pages:
|
|
200
|
+
self._closed_event.set()
|
|
201
|
+
except Exception:
|
|
202
|
+
self._closed_event.set()
|
|
203
|
+
|
|
204
|
+
# Watch existing pages
|
|
205
|
+
for p in self._context.pages:
|
|
206
|
+
p.on("close", lambda: on_page_close())
|
|
207
|
+
|
|
208
|
+
# Watch new pages too
|
|
209
|
+
self._context.on("page", lambda page: page.on("close", lambda: on_page_close()))
|
|
210
|
+
|
|
211
|
+
# Browser disconnect (persistent context)
|
|
212
|
+
self._context.on("close", lambda: self._closed_event.set())
|
|
213
|
+
|
|
214
|
+
# Browser disconnect (ephemeral)
|
|
215
|
+
if self._browser:
|
|
216
|
+
self._browser.on("disconnected", lambda: self._closed_event.set())
|
|
217
|
+
|
|
218
|
+
async def wait_closed(self):
|
|
219
|
+
"""Wait for the browser to be closed by the user (interactive mode)."""
|
|
220
|
+
if not self._closed_event:
|
|
221
|
+
self._closed_event = asyncio.Event()
|
|
222
|
+
self._setup_close_detection()
|
|
223
|
+
await self._closed_event.wait()
|
|
224
|
+
|
|
225
|
+
async def close(self):
|
|
226
|
+
"""Close browser and cleanup. Saves recordings if enabled."""
|
|
227
|
+
if self.record and self._context:
|
|
228
|
+
try:
|
|
229
|
+
state_path = self.record_dir / "state.json"
|
|
230
|
+
await self._context.storage_state(path=str(state_path))
|
|
231
|
+
except Exception:
|
|
232
|
+
pass
|
|
233
|
+
|
|
234
|
+
if self._cdp_owns_browser:
|
|
235
|
+
# CDP mode: only close pages we opened, don't kill the browser
|
|
236
|
+
if self._page:
|
|
237
|
+
try:
|
|
238
|
+
await self._page.close()
|
|
239
|
+
except Exception:
|
|
240
|
+
pass
|
|
241
|
+
else:
|
|
242
|
+
if self._context:
|
|
243
|
+
await self._context.close()
|
|
244
|
+
if self._browser:
|
|
245
|
+
await self._browser.close()
|
|
246
|
+
|
|
247
|
+
if self._playwright:
|
|
248
|
+
await self._playwright.stop()
|
|
249
|
+
if self.record and self.record_dir:
|
|
250
|
+
print(f"Recordings saved: {self.record_dir}")
|
|
251
|
+
|
|
252
|
+
# === Cookie Management ===
|
|
253
|
+
|
|
254
|
+
async def add_cookies(self, cookies: List[Dict]):
|
|
255
|
+
"""Add cookies to browser context."""
|
|
256
|
+
formatted = []
|
|
257
|
+
for cookie in cookies:
|
|
258
|
+
c = {
|
|
259
|
+
"name": cookie["name"],
|
|
260
|
+
"value": cookie["value"],
|
|
261
|
+
"domain": cookie.get("domain", ""),
|
|
262
|
+
"path": cookie.get("path", "/"),
|
|
263
|
+
}
|
|
264
|
+
if "httpOnly" in cookie:
|
|
265
|
+
c["httpOnly"] = cookie["httpOnly"]
|
|
266
|
+
if "secure" in cookie:
|
|
267
|
+
c["secure"] = cookie["secure"]
|
|
268
|
+
if "sameSite" in cookie:
|
|
269
|
+
c["sameSite"] = cookie["sameSite"]
|
|
270
|
+
formatted.append(c)
|
|
271
|
+
|
|
272
|
+
await self._context.add_cookies(formatted)
|
|
273
|
+
|
|
274
|
+
async def get_cookies(self) -> List[Dict]:
|
|
275
|
+
"""Get all cookies from context."""
|
|
276
|
+
if not self._context:
|
|
277
|
+
return []
|
|
278
|
+
return await self._context.cookies()
|
|
279
|
+
|
|
280
|
+
# === Response Interception ===
|
|
281
|
+
|
|
282
|
+
def on_response(self, handler: Callable):
|
|
283
|
+
"""Register response handler for intercepting network responses."""
|
|
284
|
+
self._response_handlers.append(handler)
|
|
285
|
+
if self._page:
|
|
286
|
+
self._page.on("response", self._on_response)
|
|
287
|
+
|
|
288
|
+
async def _on_response(self, response):
|
|
289
|
+
"""Internal response handler dispatcher."""
|
|
290
|
+
for handler in self._response_handlers:
|
|
291
|
+
try:
|
|
292
|
+
if asyncio.iscoroutinefunction(handler):
|
|
293
|
+
await handler(response)
|
|
294
|
+
else:
|
|
295
|
+
handler(response)
|
|
296
|
+
except Exception:
|
|
297
|
+
pass
|
o_browser/remote.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""
|
|
2
|
+
RemoteBrowser — Connect to a running browser service via CDP.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
async with RemoteBrowser("http://host:8080") as browser:
|
|
6
|
+
await browser.goto("https://example.com")
|
|
7
|
+
text = await browser.get_text()
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import re
|
|
11
|
+
from typing import Optional
|
|
12
|
+
|
|
13
|
+
from ._mixin import PageMixin
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class RemoteBrowser(PageMixin):
|
|
17
|
+
"""
|
|
18
|
+
Connects to a remote Chrome instance via CDP WebSocket.
|
|
19
|
+
|
|
20
|
+
Does NOT launch or kill the browser — only connects/disconnects.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, endpoint: str):
|
|
24
|
+
"""
|
|
25
|
+
Args:
|
|
26
|
+
endpoint: HTTP base URL (http://host:8080) or direct WS URL (ws://host:9222/devtools/...)
|
|
27
|
+
"""
|
|
28
|
+
self.endpoint = endpoint
|
|
29
|
+
self._playwright = None
|
|
30
|
+
self._browser = None
|
|
31
|
+
self._context = None
|
|
32
|
+
self._page = None
|
|
33
|
+
|
|
34
|
+
async def __aenter__(self):
|
|
35
|
+
await self.start()
|
|
36
|
+
return self
|
|
37
|
+
|
|
38
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
39
|
+
await self.close()
|
|
40
|
+
|
|
41
|
+
async def _discover_ws_url(self) -> str:
|
|
42
|
+
"""Auto-discover WebSocket URL from HTTP endpoint."""
|
|
43
|
+
import urllib.request
|
|
44
|
+
import json
|
|
45
|
+
|
|
46
|
+
# Strip trailing slash
|
|
47
|
+
base = self.endpoint.rstrip("/")
|
|
48
|
+
|
|
49
|
+
# Try /api/sessions/current first (o-browser service)
|
|
50
|
+
try:
|
|
51
|
+
with urllib.request.urlopen(f"{base}/api/sessions/current", timeout=5) as resp:
|
|
52
|
+
session = json.loads(resp.read())
|
|
53
|
+
ws_url = session.get("cdp", {}).get("ws_url")
|
|
54
|
+
if ws_url:
|
|
55
|
+
return ws_url
|
|
56
|
+
except Exception:
|
|
57
|
+
pass
|
|
58
|
+
|
|
59
|
+
# Fallback: direct CDP /json/version
|
|
60
|
+
cdp_base = re.sub(r":\d+", ":9222", base)
|
|
61
|
+
with urllib.request.urlopen(f"{cdp_base}/json/version", timeout=5) as resp:
|
|
62
|
+
data = json.loads(resp.read())
|
|
63
|
+
return data["webSocketDebuggerUrl"]
|
|
64
|
+
|
|
65
|
+
async def start(self) -> "RemoteBrowser":
|
|
66
|
+
"""Connect to remote browser."""
|
|
67
|
+
from patchright.async_api import async_playwright
|
|
68
|
+
|
|
69
|
+
self._playwright = await async_playwright().start()
|
|
70
|
+
|
|
71
|
+
if self.endpoint.startswith("ws://") or self.endpoint.startswith("wss://"):
|
|
72
|
+
ws_url = self.endpoint
|
|
73
|
+
else:
|
|
74
|
+
ws_url = await self._discover_ws_url()
|
|
75
|
+
|
|
76
|
+
self._browser = await self._playwright.chromium.connect_over_cdp(ws_url)
|
|
77
|
+
contexts = self._browser.contexts
|
|
78
|
+
self._context = contexts[0] if contexts else await self._browser.new_context()
|
|
79
|
+
self._page = self._context.pages[0] if self._context.pages else await self._context.new_page()
|
|
80
|
+
|
|
81
|
+
return self
|
|
82
|
+
|
|
83
|
+
async def close(self):
|
|
84
|
+
"""Disconnect (does NOT kill the remote browser)."""
|
|
85
|
+
if self._browser:
|
|
86
|
+
await self._browser.close()
|
|
87
|
+
if self._playwright:
|
|
88
|
+
await self._playwright.stop()
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
o_browser/__init__.py,sha256=nPx521wxJs0bgIQc4MGXfo0TllBmVDy50wPgAo8bmIE,114
|
|
2
|
+
o_browser/_mixin.py,sha256=1vfVaxfyES4dJvlYO0DTOoAJJPKz88XVDpAOVJckxf8,6695
|
|
3
|
+
o_browser/client.py,sha256=HJcdilnpz7K6B-c17dFO2Olgy45_-K9lRZAfDaXvgB4,10550
|
|
4
|
+
o_browser/remote.py,sha256=s4gdcKOi2tLn3ozfhoZhJoeLMOwOLgXEHbt0mXkioC8,2817
|
|
5
|
+
o_browser-0.1.0.dist-info/METADATA,sha256=qbBKWCF9XU0Hcw4KHb3jYtbETbImpOh4DDJPSWNFNI4,242
|
|
6
|
+
o_browser-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
7
|
+
o_browser-0.1.0.dist-info/RECORD,,
|