funbrowser 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- funbrowser/__init__.py +120 -0
- funbrowser/_cdp.py +181 -0
- funbrowser/_errors.py +32 -0
- funbrowser/_flags.py +89 -0
- funbrowser/_launcher.py +153 -0
- funbrowser/browser.py +281 -0
- funbrowser/context.py +163 -0
- funbrowser/context_pool.py +162 -0
- funbrowser/element.py +258 -0
- funbrowser/fingerprint/__init__.py +14 -0
- funbrowser/fingerprint/data.py +74 -0
- funbrowser/fingerprint/presets.py +588 -0
- funbrowser/geo.py +139 -0
- funbrowser/humanly.py +188 -0
- funbrowser/panel.py +1181 -0
- funbrowser/pool.py +152 -0
- funbrowser/profile.py +73 -0
- funbrowser/proxy.py +236 -0
- funbrowser/py.typed +0 -0
- funbrowser/solver/__init__.py +12 -0
- funbrowser/solver/bridge.py +167 -0
- funbrowser/solver/client.py +244 -0
- funbrowser/solver/scripts/__init__.py +0 -0
- funbrowser/solver/scripts/_bootstrap.js +30 -0
- funbrowser/solver/scripts/funcaptcha.js +74 -0
- funbrowser/solver/scripts/geetest.js +76 -0
- funbrowser/solver/scripts/hcaptcha.js +76 -0
- funbrowser/solver/scripts/recaptcha_v2.js +79 -0
- funbrowser/solver/scripts/recaptcha_v3.js +45 -0
- funbrowser/solver/scripts/turnstile.js +60 -0
- funbrowser/stealth/__init__.py +13 -0
- funbrowser/stealth/flags.py +54 -0
- funbrowser/stealth/patches.py +214 -0
- funbrowser/stealth/scripts/__init__.py +0 -0
- funbrowser/stealth/scripts/_camouflage.js +32 -0
- funbrowser/stealth/scripts/_cleanup.js +8 -0
- funbrowser/stealth/scripts/audio_noise.js +32 -0
- funbrowser/stealth/scripts/canvas_noise.js +43 -0
- funbrowser/stealth/scripts/chrome_runtime.js +53 -0
- funbrowser/stealth/scripts/hardware.js +15 -0
- funbrowser/stealth/scripts/languages.js +13 -0
- funbrowser/stealth/scripts/permissions.js +15 -0
- funbrowser/stealth/scripts/platform.js +18 -0
- funbrowser/stealth/scripts/plugins.js +37 -0
- funbrowser/stealth/scripts/screen_props.js +18 -0
- funbrowser/stealth/scripts/webdriver.js +14 -0
- funbrowser/stealth/scripts/webgl.js +27 -0
- funbrowser/stealth/scripts/webrtc.js +45 -0
- funbrowser/tab.py +345 -0
- funbrowser/tls/__init__.py +25 -0
- funbrowser/tls/ca.py +181 -0
- funbrowser/tls/http.py +145 -0
- funbrowser/tls/mitm.py +326 -0
- funbrowser-0.1.0.dist-info/METADATA +316 -0
- funbrowser-0.1.0.dist-info/RECORD +57 -0
- funbrowser-0.1.0.dist-info/WHEEL +4 -0
- funbrowser-0.1.0.dist-info/licenses/LICENSE +21 -0
funbrowser/browser.py
ADDED
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
"""Browser — the launched Chrome process plus the CDP control plane."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import dataclasses
|
|
7
|
+
import shutil
|
|
8
|
+
import time
|
|
9
|
+
from collections import deque
|
|
10
|
+
from collections.abc import Sequence
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from types import TracebackType
|
|
13
|
+
from typing import TYPE_CHECKING, Any, Self
|
|
14
|
+
|
|
15
|
+
from ._cdp import CDPConnection
|
|
16
|
+
from ._flags import merge_flags, mini_flags
|
|
17
|
+
from ._launcher import LaunchedBrowser, launch_chrome
|
|
18
|
+
from .fingerprint import Fingerprint
|
|
19
|
+
from .geo import GeoInfo, lookup_proxy_geo
|
|
20
|
+
from .humanly import DEFAULT as DEFAULT_HUMANLY
|
|
21
|
+
from .humanly import HumanBehavior
|
|
22
|
+
from .proxy import Proxy
|
|
23
|
+
from .proxy import parse as parse_proxy
|
|
24
|
+
from .solver import FunSolverClient
|
|
25
|
+
from .stealth import stealth_flags
|
|
26
|
+
from .tab import Tab
|
|
27
|
+
|
|
28
|
+
if TYPE_CHECKING:
|
|
29
|
+
from .context import BrowserContext as BrowserContextT
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _enrich_with_geo(fp: Fingerprint | None, geo: GeoInfo) -> Fingerprint:
|
|
33
|
+
"""Layer a geo lookup into a Fingerprint without clobbering caller values."""
|
|
34
|
+
if fp is None:
|
|
35
|
+
return Fingerprint(
|
|
36
|
+
timezone=geo.timezone or None,
|
|
37
|
+
locale=geo.locale or None,
|
|
38
|
+
accept_language=geo.accept_language or None,
|
|
39
|
+
)
|
|
40
|
+
return dataclasses.replace(
|
|
41
|
+
fp,
|
|
42
|
+
timezone=fp.timezone or geo.timezone or None,
|
|
43
|
+
locale=fp.locale or geo.locale or None,
|
|
44
|
+
accept_language=fp.accept_language or geo.accept_language or None,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class Browser:
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
launched: LaunchedBrowser,
|
|
52
|
+
cdp: CDPConnection,
|
|
53
|
+
*,
|
|
54
|
+
stealth: bool = True,
|
|
55
|
+
fingerprint: Fingerprint | None = None,
|
|
56
|
+
proxy: Proxy | None = None,
|
|
57
|
+
humanly: HumanBehavior | None = None,
|
|
58
|
+
solver_client: FunSolverClient | None = None,
|
|
59
|
+
geo: GeoInfo | None = None,
|
|
60
|
+
) -> None:
|
|
61
|
+
self._launched = launched
|
|
62
|
+
self._cdp = cdp
|
|
63
|
+
self._stealth = stealth
|
|
64
|
+
self._fingerprint = fingerprint
|
|
65
|
+
self._proxy = proxy
|
|
66
|
+
self._humanly = humanly
|
|
67
|
+
self._solver_client = solver_client
|
|
68
|
+
self._geo = geo
|
|
69
|
+
self._tabs: dict[str, Tab] = {}
|
|
70
|
+
# Browser-scoped event log — populated by the solver bridge whenever
|
|
71
|
+
# a captcha is attempted. Panel and any other observer can read it.
|
|
72
|
+
self._events: deque[dict[str, Any]] = deque(maxlen=100)
|
|
73
|
+
# Browser contexts (CDP browserContextIds) currently alive on this
|
|
74
|
+
# process. Populated by funbrowser.context._create_context_on,
|
|
75
|
+
# drained by BrowserContext.close.
|
|
76
|
+
self._contexts: set[str] = set()
|
|
77
|
+
|
|
78
|
+
@property
|
|
79
|
+
def stealth_enabled(self) -> bool:
|
|
80
|
+
return self._stealth
|
|
81
|
+
|
|
82
|
+
@property
|
|
83
|
+
def fingerprint(self) -> Fingerprint | None:
|
|
84
|
+
return self._fingerprint
|
|
85
|
+
|
|
86
|
+
@property
|
|
87
|
+
def proxy(self) -> Proxy | None:
|
|
88
|
+
return self._proxy
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
def auto_solve_enabled(self) -> bool:
|
|
92
|
+
return self._solver_client is not None
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
def solver_client(self) -> FunSolverClient | None:
|
|
96
|
+
return self._solver_client
|
|
97
|
+
|
|
98
|
+
@property
|
|
99
|
+
def humanly(self) -> HumanBehavior | None:
|
|
100
|
+
return self._humanly
|
|
101
|
+
|
|
102
|
+
@property
|
|
103
|
+
def events(self) -> tuple[dict[str, Any], ...]:
|
|
104
|
+
"""Recent browser-scoped events (most recent first)."""
|
|
105
|
+
return tuple(self._events)
|
|
106
|
+
|
|
107
|
+
def record_event(self, **fields: Any) -> None:
|
|
108
|
+
"""Append an event to the browser's log. Timestamp added automatically."""
|
|
109
|
+
self._events.appendleft({"ts": time.time(), **fields})
|
|
110
|
+
|
|
111
|
+
async def create_context(
|
|
112
|
+
self,
|
|
113
|
+
*,
|
|
114
|
+
proxy: str | Proxy | None = None,
|
|
115
|
+
proxy_bypass: list[str] | None = None,
|
|
116
|
+
) -> BrowserContextT:
|
|
117
|
+
"""Create an isolated :class:`BrowserContext` inside this Browser.
|
|
118
|
+
|
|
119
|
+
Cheap (~5-15 MB) compared to launching a fresh Chrome. Each
|
|
120
|
+
context has its own cookies / storage / cache, and optionally
|
|
121
|
+
its own proxy. See :mod:`funbrowser.context` for the trade-offs.
|
|
122
|
+
"""
|
|
123
|
+
from .context import _create_context_on
|
|
124
|
+
|
|
125
|
+
return await _create_context_on(self, proxy=proxy, proxy_bypass=proxy_bypass)
|
|
126
|
+
|
|
127
|
+
@property
|
|
128
|
+
def geo(self) -> GeoInfo | None:
|
|
129
|
+
return self._geo
|
|
130
|
+
|
|
131
|
+
@classmethod
|
|
132
|
+
async def start(
|
|
133
|
+
cls,
|
|
134
|
+
*,
|
|
135
|
+
executable: str | Path | None = None,
|
|
136
|
+
user_data_dir: str | Path | None = None,
|
|
137
|
+
headless: bool = False,
|
|
138
|
+
stealth: bool = True,
|
|
139
|
+
fingerprint: Fingerprint | None = None,
|
|
140
|
+
proxy: str | Proxy | None = None,
|
|
141
|
+
geo_autoconfigure: bool = True,
|
|
142
|
+
humanly: bool | HumanBehavior = False,
|
|
143
|
+
mini: bool = False,
|
|
144
|
+
api_key: str | None = None,
|
|
145
|
+
auto_solve: bool = True,
|
|
146
|
+
solver_base_url: str | None = None,
|
|
147
|
+
args: Sequence[str] = (),
|
|
148
|
+
) -> Self:
|
|
149
|
+
parts: list[list[str]] = []
|
|
150
|
+
if stealth:
|
|
151
|
+
parts.append(stealth_flags())
|
|
152
|
+
if mini:
|
|
153
|
+
parts.append(mini_flags())
|
|
154
|
+
|
|
155
|
+
proxy_obj: Proxy | None = None
|
|
156
|
+
if proxy is not None:
|
|
157
|
+
proxy_obj = parse_proxy(proxy)
|
|
158
|
+
parts.append([f"--proxy-server={proxy_obj.chrome_arg()}"])
|
|
159
|
+
|
|
160
|
+
parts.append(list(args))
|
|
161
|
+
extra = merge_flags(*parts)
|
|
162
|
+
|
|
163
|
+
launched = await launch_chrome(
|
|
164
|
+
executable=Path(executable) if executable else None,
|
|
165
|
+
user_data_dir=Path(user_data_dir) if user_data_dir else None,
|
|
166
|
+
headless=headless,
|
|
167
|
+
extra_args=extra,
|
|
168
|
+
)
|
|
169
|
+
cdp = CDPConnection(launched.ws_url)
|
|
170
|
+
await cdp.connect()
|
|
171
|
+
|
|
172
|
+
solver_client: FunSolverClient | None = None
|
|
173
|
+
if api_key and auto_solve:
|
|
174
|
+
if solver_base_url:
|
|
175
|
+
solver_client = FunSolverClient(api_key, base_url=solver_base_url)
|
|
176
|
+
else:
|
|
177
|
+
solver_client = FunSolverClient(api_key)
|
|
178
|
+
|
|
179
|
+
humanly_profile: HumanBehavior | None
|
|
180
|
+
if humanly is True:
|
|
181
|
+
humanly_profile = DEFAULT_HUMANLY
|
|
182
|
+
elif humanly is False:
|
|
183
|
+
humanly_profile = None
|
|
184
|
+
elif isinstance(humanly, HumanBehavior):
|
|
185
|
+
humanly_profile = humanly
|
|
186
|
+
else:
|
|
187
|
+
humanly_profile = None
|
|
188
|
+
|
|
189
|
+
# Geo auto-coupling: ask ip-api.com (through the proxy) for the exit
|
|
190
|
+
# IP's timezone + locale, fill any matching fingerprint fields the
|
|
191
|
+
# caller didn't already set. Skip silently on any failure.
|
|
192
|
+
geo: GeoInfo | None = None
|
|
193
|
+
if proxy_obj is not None and geo_autoconfigure:
|
|
194
|
+
geo = await lookup_proxy_geo(proxy_obj)
|
|
195
|
+
if geo is not None:
|
|
196
|
+
fingerprint = _enrich_with_geo(fingerprint, geo)
|
|
197
|
+
|
|
198
|
+
return cls(
|
|
199
|
+
launched,
|
|
200
|
+
cdp,
|
|
201
|
+
stealth=stealth,
|
|
202
|
+
fingerprint=fingerprint,
|
|
203
|
+
proxy=proxy_obj,
|
|
204
|
+
humanly=humanly_profile,
|
|
205
|
+
solver_client=solver_client,
|
|
206
|
+
geo=geo,
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
@property
|
|
210
|
+
def tabs(self) -> list[Tab]:
|
|
211
|
+
return list(self._tabs.values())
|
|
212
|
+
|
|
213
|
+
async def new_tab(self, url: str = "about:blank") -> Tab:
|
|
214
|
+
res = await self._cdp.send("Target.createTarget", {"url": url})
|
|
215
|
+
target_id = res["targetId"]
|
|
216
|
+
attach = await self._cdp.send(
|
|
217
|
+
"Target.attachToTarget",
|
|
218
|
+
{"targetId": target_id, "flatten": True},
|
|
219
|
+
)
|
|
220
|
+
session_id = attach["sessionId"]
|
|
221
|
+
tab = Tab(self, target_id, session_id)
|
|
222
|
+
await tab._initialize()
|
|
223
|
+
self._tabs[target_id] = tab
|
|
224
|
+
return tab
|
|
225
|
+
|
|
226
|
+
async def get(self, url: str, *, wait_until: str = "load") -> Tab:
|
|
227
|
+
tab = await self.new_tab(url="about:blank")
|
|
228
|
+
await tab.goto(url, wait_until=wait_until)
|
|
229
|
+
return tab
|
|
230
|
+
|
|
231
|
+
def _on_tab_closed(self, tab: Tab) -> None:
|
|
232
|
+
self._tabs.pop(tab.target_id, None)
|
|
233
|
+
|
|
234
|
+
# ── cookie store (browser-wide) ───────────────────────────────────
|
|
235
|
+
|
|
236
|
+
async def cookies(self) -> list[dict[str, Any]]:
|
|
237
|
+
"""Every cookie the browser is currently holding."""
|
|
238
|
+
result = await self._cdp.send("Storage.getCookies")
|
|
239
|
+
return list(result.get("cookies", []))
|
|
240
|
+
|
|
241
|
+
async def set_cookies(self, cookies: Sequence[dict[str, Any]]) -> None:
|
|
242
|
+
"""Add or overwrite cookies. Same dict shape as :meth:`cookies` returns."""
|
|
243
|
+
await self._cdp.send("Storage.setCookies", {"cookies": list(cookies)})
|
|
244
|
+
|
|
245
|
+
async def clear_cookies(self) -> None:
|
|
246
|
+
"""Wipe every cookie. Useful between tests."""
|
|
247
|
+
await self._cdp.send("Storage.clearCookies")
|
|
248
|
+
|
|
249
|
+
async def stop(self) -> None:
|
|
250
|
+
for tab in list(self._tabs.values()):
|
|
251
|
+
try:
|
|
252
|
+
await tab.close()
|
|
253
|
+
except Exception:
|
|
254
|
+
pass
|
|
255
|
+
await self._cdp.close()
|
|
256
|
+
if self._solver_client is not None:
|
|
257
|
+
try:
|
|
258
|
+
await self._solver_client.close()
|
|
259
|
+
except Exception:
|
|
260
|
+
pass
|
|
261
|
+
proc = self._launched.process
|
|
262
|
+
if proc.returncode is None:
|
|
263
|
+
try:
|
|
264
|
+
proc.terminate()
|
|
265
|
+
await asyncio.wait_for(proc.wait(), timeout=5.0)
|
|
266
|
+
except TimeoutError:
|
|
267
|
+
proc.kill()
|
|
268
|
+
await proc.wait()
|
|
269
|
+
if self._launched.user_data_dir_is_tmp:
|
|
270
|
+
shutil.rmtree(self._launched.user_data_dir, ignore_errors=True)
|
|
271
|
+
|
|
272
|
+
async def __aenter__(self) -> Self:
|
|
273
|
+
return self
|
|
274
|
+
|
|
275
|
+
async def __aexit__(
|
|
276
|
+
self,
|
|
277
|
+
exc_type: type[BaseException] | None,
|
|
278
|
+
exc: BaseException | None,
|
|
279
|
+
tb: TracebackType | None,
|
|
280
|
+
) -> None:
|
|
281
|
+
await self.stop()
|
funbrowser/context.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
"""BrowserContext — an isolated browsing context inside a shared Browser.
|
|
2
|
+
|
|
3
|
+
CDP exposes ``Target.createBrowserContext`` which carves a fresh, isolated
|
|
4
|
+
browsing identity out of an existing Chrome process: separate cookies,
|
|
5
|
+
localStorage, IndexedDB, cache, and (optionally) its own proxy. Each
|
|
6
|
+
context behaves like its own browser to the page running inside, but a
|
|
7
|
+
single Chrome process serves any number of them.
|
|
8
|
+
|
|
9
|
+
For farm operators who would otherwise spawn 10-50 standalone Chrome
|
|
10
|
+
processes, switching to one Chrome + N contexts cuts memory roughly
|
|
11
|
+
**7-10x** (each context adds ~5-15 MB on top of the host process, vs
|
|
12
|
+
~150 MB for a fresh Chrome).
|
|
13
|
+
|
|
14
|
+
Trade-offs vs full :class:`Browser` per slot:
|
|
15
|
+
|
|
16
|
+
- Crash in the host Chrome takes every context down with it
|
|
17
|
+
- All contexts share one process — kernel-level isolation is weaker
|
|
18
|
+
- Stealth + fingerprint patches still apply per-tab (each new tab gets
|
|
19
|
+
its own ``Page.addScriptToEvaluateOnNewDocument`` set)
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import logging
|
|
25
|
+
from typing import TYPE_CHECKING, Any
|
|
26
|
+
|
|
27
|
+
from .proxy import Proxy
|
|
28
|
+
from .proxy import parse as parse_proxy
|
|
29
|
+
from .tab import Tab
|
|
30
|
+
|
|
31
|
+
if TYPE_CHECKING:
|
|
32
|
+
from .browser import Browser
|
|
33
|
+
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class BrowserContext:
|
|
38
|
+
"""One isolated browsing identity inside a shared :class:`Browser`."""
|
|
39
|
+
|
|
40
|
+
def __init__(
|
|
41
|
+
self,
|
|
42
|
+
browser: Browser,
|
|
43
|
+
context_id: str,
|
|
44
|
+
*,
|
|
45
|
+
proxy: Proxy | None = None,
|
|
46
|
+
) -> None:
|
|
47
|
+
self._browser = browser
|
|
48
|
+
self._context_id = context_id
|
|
49
|
+
self._proxy = proxy
|
|
50
|
+
self._tabs: dict[str, Tab] = {}
|
|
51
|
+
self._closed = False
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def browser(self) -> Browser:
|
|
55
|
+
return self._browser
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def context_id(self) -> str:
|
|
59
|
+
return self._context_id
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def proxy(self) -> Proxy | None:
|
|
63
|
+
return self._proxy
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def tabs(self) -> list[Tab]:
|
|
67
|
+
return list(self._tabs.values())
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def closed(self) -> bool:
|
|
71
|
+
return self._closed
|
|
72
|
+
|
|
73
|
+
async def new_tab(self, url: str = "about:blank") -> Tab:
|
|
74
|
+
if self._closed:
|
|
75
|
+
raise RuntimeError("context is closed")
|
|
76
|
+
res = await self._browser._cdp.send(
|
|
77
|
+
"Target.createTarget",
|
|
78
|
+
{"url": url, "browserContextId": self._context_id},
|
|
79
|
+
)
|
|
80
|
+
target_id = res["targetId"]
|
|
81
|
+
attach = await self._browser._cdp.send(
|
|
82
|
+
"Target.attachToTarget",
|
|
83
|
+
{"targetId": target_id, "flatten": True},
|
|
84
|
+
)
|
|
85
|
+
session_id = attach["sessionId"]
|
|
86
|
+
tab = Tab(self._browser, target_id, session_id)
|
|
87
|
+
await tab._initialize()
|
|
88
|
+
self._tabs[target_id] = tab
|
|
89
|
+
# Also register on the host browser so its tab counters stay accurate.
|
|
90
|
+
self._browser._tabs[target_id] = tab
|
|
91
|
+
return tab
|
|
92
|
+
|
|
93
|
+
async def get(self, url: str, *, wait_until: str = "load") -> Tab:
|
|
94
|
+
tab = await self.new_tab()
|
|
95
|
+
await tab.goto(url, wait_until=wait_until)
|
|
96
|
+
return tab
|
|
97
|
+
|
|
98
|
+
async def cookies(self) -> list[dict[str, Any]]:
|
|
99
|
+
"""Cookies scoped to this context. No interference between contexts."""
|
|
100
|
+
result = await self._browser._cdp.send(
|
|
101
|
+
"Storage.getCookies",
|
|
102
|
+
{"browserContextId": self._context_id},
|
|
103
|
+
)
|
|
104
|
+
return list(result.get("cookies", []))
|
|
105
|
+
|
|
106
|
+
async def set_cookies(self, cookies: list[dict[str, Any]]) -> None:
|
|
107
|
+
await self._browser._cdp.send(
|
|
108
|
+
"Storage.setCookies",
|
|
109
|
+
{"cookies": list(cookies), "browserContextId": self._context_id},
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
async def clear_cookies(self) -> None:
|
|
113
|
+
await self._browser._cdp.send(
|
|
114
|
+
"Storage.clearCookies",
|
|
115
|
+
{"browserContextId": self._context_id},
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
async def close(self) -> None:
|
|
119
|
+
"""Close every tab in this context, then dispose the context itself."""
|
|
120
|
+
if self._closed:
|
|
121
|
+
return
|
|
122
|
+
self._closed = True
|
|
123
|
+
for tab in list(self._tabs.values()):
|
|
124
|
+
try:
|
|
125
|
+
await tab.close()
|
|
126
|
+
except Exception:
|
|
127
|
+
pass
|
|
128
|
+
self._tabs.clear()
|
|
129
|
+
try:
|
|
130
|
+
await self._browser._cdp.send(
|
|
131
|
+
"Target.disposeBrowserContext",
|
|
132
|
+
{"browserContextId": self._context_id},
|
|
133
|
+
)
|
|
134
|
+
except Exception:
|
|
135
|
+
logger.exception("context: disposeBrowserContext failed for %s", self._context_id)
|
|
136
|
+
self._browser._contexts.discard(self._context_id)
|
|
137
|
+
|
|
138
|
+
async def __aenter__(self) -> BrowserContext:
|
|
139
|
+
return self
|
|
140
|
+
|
|
141
|
+
async def __aexit__(self, *exc: object) -> None:
|
|
142
|
+
await self.close()
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
async def _create_context_on(
|
|
146
|
+
browser: Browser,
|
|
147
|
+
*,
|
|
148
|
+
proxy: str | Proxy | None = None,
|
|
149
|
+
proxy_bypass: list[str] | None = None,
|
|
150
|
+
) -> BrowserContext:
|
|
151
|
+
"""Internal: create a browser context on a Browser instance."""
|
|
152
|
+
params: dict[str, Any] = {}
|
|
153
|
+
proxy_obj: Proxy | None = None
|
|
154
|
+
if proxy is not None:
|
|
155
|
+
proxy_obj = parse_proxy(proxy)
|
|
156
|
+
params["proxyServer"] = proxy_obj.chrome_arg()
|
|
157
|
+
if proxy_bypass:
|
|
158
|
+
params["proxyBypassList"] = ",".join(proxy_bypass)
|
|
159
|
+
|
|
160
|
+
res = await browser._cdp.send("Target.createBrowserContext", params)
|
|
161
|
+
context_id = str(res["browserContextId"])
|
|
162
|
+
browser._contexts.add(context_id)
|
|
163
|
+
return BrowserContext(browser, context_id, proxy=proxy_obj)
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
"""ContextPool — lightweight pool of BrowserContexts on a shared Chrome.
|
|
2
|
+
|
|
3
|
+
API mirrors :class:`funbrowser.BrowserPool`: ``acquire()`` / ``run()`` /
|
|
4
|
+
``run_all()`` / ``stop()`` / async-context-manager support — only the
|
|
5
|
+
unit of isolation changes. Where ``BrowserPool`` keeps N Chrome
|
|
6
|
+
processes alive, ``ContextPool`` keeps **one** Chrome alive with N
|
|
7
|
+
isolated browser contexts.
|
|
8
|
+
|
|
9
|
+
Memory comparison (typical example, headless + mini):
|
|
10
|
+
|
|
11
|
+
- ``BrowserPool(size=10)`` -> ~10 x 100 MB = ~1.0 GB
|
|
12
|
+
- ``ContextPool(size=10)`` -> ~1 x 180 MB + 10 x 8 MB = ~260 MB
|
|
13
|
+
|
|
14
|
+
You give up the process-level isolation: if the host Chrome crashes,
|
|
15
|
+
every context dies with it. For farms where the workload is "many
|
|
16
|
+
parallel lightweight scrapes" this is the right shape; for "long-lived
|
|
17
|
+
high-value sessions where crash recovery matters", stick with
|
|
18
|
+
``BrowserPool``.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import asyncio
|
|
24
|
+
import logging
|
|
25
|
+
from collections.abc import AsyncIterator, Awaitable, Callable, Iterable, Sequence
|
|
26
|
+
from contextlib import asynccontextmanager
|
|
27
|
+
from types import TracebackType
|
|
28
|
+
from typing import Any, Self, TypeVar
|
|
29
|
+
|
|
30
|
+
from .browser import Browser
|
|
31
|
+
from .context import BrowserContext
|
|
32
|
+
from .proxy import Proxy
|
|
33
|
+
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
T = TypeVar("T")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class ContextPool:
|
|
40
|
+
def __init__(
|
|
41
|
+
self,
|
|
42
|
+
size: int = 5,
|
|
43
|
+
*,
|
|
44
|
+
proxies: Sequence[str | Proxy] | None = None,
|
|
45
|
+
**browser_kwargs: Any,
|
|
46
|
+
) -> None:
|
|
47
|
+
if size < 1:
|
|
48
|
+
raise ValueError("pool size must be >= 1")
|
|
49
|
+
self._size = size
|
|
50
|
+
self._proxies = list(proxies) if proxies else None
|
|
51
|
+
self._browser_kwargs = browser_kwargs
|
|
52
|
+
self._lock = asyncio.Lock()
|
|
53
|
+
self._browser: Browser | None = None
|
|
54
|
+
self._contexts: list[BrowserContext] = []
|
|
55
|
+
self._available: asyncio.Queue[BrowserContext] = asyncio.Queue()
|
|
56
|
+
self._closed = False
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def size(self) -> int:
|
|
60
|
+
return self._size
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def created(self) -> int:
|
|
64
|
+
"""Number of contexts actually spawned so far (lazy)."""
|
|
65
|
+
return len(self._contexts)
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def idle(self) -> int:
|
|
69
|
+
return self._available.qsize()
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def busy(self) -> int:
|
|
73
|
+
return len(self._contexts) - self._available.qsize()
|
|
74
|
+
|
|
75
|
+
@property
|
|
76
|
+
def contexts(self) -> tuple[BrowserContext, ...]:
|
|
77
|
+
return tuple(self._contexts)
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def browser(self) -> Browser | None:
|
|
81
|
+
"""The shared host Browser. ``None`` until first acquire."""
|
|
82
|
+
return self._browser
|
|
83
|
+
|
|
84
|
+
async def _ensure_browser(self) -> Browser:
|
|
85
|
+
if self._browser is None:
|
|
86
|
+
self._browser = await Browser.start(**self._browser_kwargs)
|
|
87
|
+
return self._browser
|
|
88
|
+
|
|
89
|
+
async def _spawn(self, index: int) -> BrowserContext:
|
|
90
|
+
browser = await self._ensure_browser()
|
|
91
|
+
proxy = self._proxies[index % len(self._proxies)] if self._proxies else None
|
|
92
|
+
logger.debug("context-pool: spawning context %d/%d", index + 1, self._size)
|
|
93
|
+
return await browser.create_context(proxy=proxy)
|
|
94
|
+
|
|
95
|
+
@asynccontextmanager
|
|
96
|
+
async def acquire(self) -> AsyncIterator[BrowserContext]:
|
|
97
|
+
if self._closed:
|
|
98
|
+
raise RuntimeError("pool is closed")
|
|
99
|
+
|
|
100
|
+
ctx: BrowserContext | None = None
|
|
101
|
+
try:
|
|
102
|
+
ctx = self._available.get_nowait()
|
|
103
|
+
except asyncio.QueueEmpty:
|
|
104
|
+
ctx = None
|
|
105
|
+
|
|
106
|
+
if ctx is None:
|
|
107
|
+
async with self._lock:
|
|
108
|
+
try:
|
|
109
|
+
ctx = self._available.get_nowait()
|
|
110
|
+
except asyncio.QueueEmpty:
|
|
111
|
+
if len(self._contexts) < self._size:
|
|
112
|
+
idx = len(self._contexts)
|
|
113
|
+
ctx = await self._spawn(idx)
|
|
114
|
+
self._contexts.append(ctx)
|
|
115
|
+
if ctx is None:
|
|
116
|
+
ctx = await self._available.get()
|
|
117
|
+
|
|
118
|
+
try:
|
|
119
|
+
yield ctx
|
|
120
|
+
finally:
|
|
121
|
+
if not self._closed:
|
|
122
|
+
self._available.put_nowait(ctx)
|
|
123
|
+
|
|
124
|
+
async def run(self, task: Callable[[BrowserContext], Awaitable[T]]) -> T:
|
|
125
|
+
async with self.acquire() as ctx:
|
|
126
|
+
return await task(ctx)
|
|
127
|
+
|
|
128
|
+
async def run_all(self, tasks: Iterable[Callable[[BrowserContext], Awaitable[T]]]) -> list[T]:
|
|
129
|
+
return list(await asyncio.gather(*(self.run(t) for t in tasks)))
|
|
130
|
+
|
|
131
|
+
async def stop(self) -> None:
|
|
132
|
+
if self._closed:
|
|
133
|
+
return
|
|
134
|
+
self._closed = True
|
|
135
|
+
for ctx in self._contexts:
|
|
136
|
+
try:
|
|
137
|
+
await ctx.close()
|
|
138
|
+
except Exception:
|
|
139
|
+
pass
|
|
140
|
+
self._contexts.clear()
|
|
141
|
+
while not self._available.empty():
|
|
142
|
+
try:
|
|
143
|
+
self._available.get_nowait()
|
|
144
|
+
except asyncio.QueueEmpty:
|
|
145
|
+
break
|
|
146
|
+
if self._browser is not None:
|
|
147
|
+
try:
|
|
148
|
+
await self._browser.stop()
|
|
149
|
+
except Exception:
|
|
150
|
+
pass
|
|
151
|
+
self._browser = None
|
|
152
|
+
|
|
153
|
+
async def __aenter__(self) -> Self:
|
|
154
|
+
return self
|
|
155
|
+
|
|
156
|
+
async def __aexit__(
|
|
157
|
+
self,
|
|
158
|
+
exc_type: type[BaseException] | None,
|
|
159
|
+
exc: BaseException | None,
|
|
160
|
+
tb: TracebackType | None,
|
|
161
|
+
) -> None:
|
|
162
|
+
await self.stop()
|