funbrowser 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- funbrowser/__init__.py +120 -0
- funbrowser/_cdp.py +181 -0
- funbrowser/_errors.py +32 -0
- funbrowser/_flags.py +89 -0
- funbrowser/_launcher.py +153 -0
- funbrowser/browser.py +281 -0
- funbrowser/context.py +163 -0
- funbrowser/context_pool.py +162 -0
- funbrowser/element.py +258 -0
- funbrowser/fingerprint/__init__.py +14 -0
- funbrowser/fingerprint/data.py +74 -0
- funbrowser/fingerprint/presets.py +588 -0
- funbrowser/geo.py +139 -0
- funbrowser/humanly.py +188 -0
- funbrowser/panel.py +1181 -0
- funbrowser/pool.py +152 -0
- funbrowser/profile.py +73 -0
- funbrowser/proxy.py +236 -0
- funbrowser/py.typed +0 -0
- funbrowser/solver/__init__.py +12 -0
- funbrowser/solver/bridge.py +167 -0
- funbrowser/solver/client.py +244 -0
- funbrowser/solver/scripts/__init__.py +0 -0
- funbrowser/solver/scripts/_bootstrap.js +30 -0
- funbrowser/solver/scripts/funcaptcha.js +74 -0
- funbrowser/solver/scripts/geetest.js +76 -0
- funbrowser/solver/scripts/hcaptcha.js +76 -0
- funbrowser/solver/scripts/recaptcha_v2.js +79 -0
- funbrowser/solver/scripts/recaptcha_v3.js +45 -0
- funbrowser/solver/scripts/turnstile.js +60 -0
- funbrowser/stealth/__init__.py +13 -0
- funbrowser/stealth/flags.py +54 -0
- funbrowser/stealth/patches.py +214 -0
- funbrowser/stealth/scripts/__init__.py +0 -0
- funbrowser/stealth/scripts/_camouflage.js +32 -0
- funbrowser/stealth/scripts/_cleanup.js +8 -0
- funbrowser/stealth/scripts/audio_noise.js +32 -0
- funbrowser/stealth/scripts/canvas_noise.js +43 -0
- funbrowser/stealth/scripts/chrome_runtime.js +53 -0
- funbrowser/stealth/scripts/hardware.js +15 -0
- funbrowser/stealth/scripts/languages.js +13 -0
- funbrowser/stealth/scripts/permissions.js +15 -0
- funbrowser/stealth/scripts/platform.js +18 -0
- funbrowser/stealth/scripts/plugins.js +37 -0
- funbrowser/stealth/scripts/screen_props.js +18 -0
- funbrowser/stealth/scripts/webdriver.js +14 -0
- funbrowser/stealth/scripts/webgl.js +27 -0
- funbrowser/stealth/scripts/webrtc.js +45 -0
- funbrowser/tab.py +345 -0
- funbrowser/tls/__init__.py +25 -0
- funbrowser/tls/ca.py +181 -0
- funbrowser/tls/http.py +145 -0
- funbrowser/tls/mitm.py +326 -0
- funbrowser-0.1.0.dist-info/METADATA +316 -0
- funbrowser-0.1.0.dist-info/RECORD +57 -0
- funbrowser-0.1.0.dist-info/WHEEL +4 -0
- funbrowser-0.1.0.dist-info/licenses/LICENSE +21 -0
funbrowser/pool.py
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
"""BrowserPool — run a farm of Browser instances with bounded concurrency.
|
|
2
|
+
|
|
3
|
+
Each pool holds up to ``size`` :class:`Browser` instances, created lazily on
|
|
4
|
+
first use and kept alive between tasks. Acquire one via the
|
|
5
|
+
``acquire()`` async-context-manager, or submit a callable via ``run(fn)`` /
|
|
6
|
+
``run_all([fn, ...])`` and the pool will dispatch + retrieve the result.
|
|
7
|
+
|
|
8
|
+
If ``proxies`` is given, each browser in the pool gets the next proxy from
|
|
9
|
+
the list (round-robin by creation order). Combined with
|
|
10
|
+
``geo_autoconfigure=True`` (default), the result is a fleet of browsers
|
|
11
|
+
each pinned to a different exit IP + timezone + locale.
|
|
12
|
+
|
|
13
|
+
Single-process only — for multi-machine farms, run separate pools.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import asyncio
|
|
19
|
+
import logging
|
|
20
|
+
from collections.abc import AsyncIterator, Awaitable, Callable, Iterable, Sequence
|
|
21
|
+
from contextlib import asynccontextmanager
|
|
22
|
+
from types import TracebackType
|
|
23
|
+
from typing import Any, Self, TypeVar
|
|
24
|
+
|
|
25
|
+
from .browser import Browser
|
|
26
|
+
from .proxy import Proxy
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
T = TypeVar("T")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class BrowserPool:
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
size: int = 5,
|
|
37
|
+
*,
|
|
38
|
+
proxies: Sequence[str | Proxy] | None = None,
|
|
39
|
+
**browser_kwargs: Any,
|
|
40
|
+
) -> None:
|
|
41
|
+
if size < 1:
|
|
42
|
+
raise ValueError("pool size must be >= 1")
|
|
43
|
+
self._size = size
|
|
44
|
+
self._proxies = list(proxies) if proxies else None
|
|
45
|
+
self._browser_kwargs = browser_kwargs
|
|
46
|
+
self._lock = asyncio.Lock()
|
|
47
|
+
self._created: list[Browser] = []
|
|
48
|
+
self._available: asyncio.Queue[Browser] = asyncio.Queue()
|
|
49
|
+
self._closed = False
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def size(self) -> int:
|
|
53
|
+
return self._size
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def created(self) -> int:
|
|
57
|
+
"""Number of browsers actually spawned so far (lazy)."""
|
|
58
|
+
return len(self._created)
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def idle(self) -> int:
|
|
62
|
+
"""How many of the created browsers are currently free."""
|
|
63
|
+
return self._available.qsize()
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def busy(self) -> int:
|
|
67
|
+
"""How many of the created browsers are currently in-use."""
|
|
68
|
+
return len(self._created) - self._available.qsize()
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def browsers(self) -> tuple[Browser, ...]:
|
|
72
|
+
"""Snapshot of all created browsers (busy or idle)."""
|
|
73
|
+
return tuple(self._created)
|
|
74
|
+
|
|
75
|
+
async def _spawn(self, index: int) -> Browser:
|
|
76
|
+
kwargs = dict(self._browser_kwargs)
|
|
77
|
+
if self._proxies:
|
|
78
|
+
kwargs["proxy"] = self._proxies[index % len(self._proxies)]
|
|
79
|
+
logger.debug("pool: spawning browser %d/%d", index + 1, self._size)
|
|
80
|
+
return await Browser.start(**kwargs)
|
|
81
|
+
|
|
82
|
+
@asynccontextmanager
|
|
83
|
+
async def acquire(self) -> AsyncIterator[Browser]:
|
|
84
|
+
if self._closed:
|
|
85
|
+
raise RuntimeError("pool is closed")
|
|
86
|
+
|
|
87
|
+
# Fast path — an idle browser is available.
|
|
88
|
+
browser: Browser | None = None
|
|
89
|
+
try:
|
|
90
|
+
browser = self._available.get_nowait()
|
|
91
|
+
except asyncio.QueueEmpty:
|
|
92
|
+
browser = None
|
|
93
|
+
|
|
94
|
+
if browser is None:
|
|
95
|
+
# Either lazy-spawn a fresh one (under cap) or wait for a busy
|
|
96
|
+
# browser to come back. The lock serialises the check.
|
|
97
|
+
async with self._lock:
|
|
98
|
+
try:
|
|
99
|
+
browser = self._available.get_nowait()
|
|
100
|
+
except asyncio.QueueEmpty:
|
|
101
|
+
if len(self._created) < self._size:
|
|
102
|
+
idx = len(self._created)
|
|
103
|
+
browser = await self._spawn(idx)
|
|
104
|
+
self._created.append(browser)
|
|
105
|
+
if browser is None:
|
|
106
|
+
browser = await self._available.get()
|
|
107
|
+
|
|
108
|
+
try:
|
|
109
|
+
yield browser
|
|
110
|
+
finally:
|
|
111
|
+
if not self._closed:
|
|
112
|
+
self._available.put_nowait(browser)
|
|
113
|
+
|
|
114
|
+
async def run(self, task: Callable[[Browser], Awaitable[T]]) -> T:
|
|
115
|
+
"""Acquire a browser, run ``task(browser)``, release. Returns the result."""
|
|
116
|
+
async with self.acquire() as browser:
|
|
117
|
+
return await task(browser)
|
|
118
|
+
|
|
119
|
+
async def run_all(self, tasks: Iterable[Callable[[Browser], Awaitable[T]]]) -> list[T]:
|
|
120
|
+
"""Dispatch every task across the pool concurrently and gather results.
|
|
121
|
+
|
|
122
|
+
At most :attr:`size` tasks execute in parallel; the rest queue.
|
|
123
|
+
"""
|
|
124
|
+
return list(await asyncio.gather(*(self.run(t) for t in tasks)))
|
|
125
|
+
|
|
126
|
+
async def stop(self) -> None:
|
|
127
|
+
"""Tear down every created browser. The pool is then unusable."""
|
|
128
|
+
if self._closed:
|
|
129
|
+
return
|
|
130
|
+
self._closed = True
|
|
131
|
+
await asyncio.gather(
|
|
132
|
+
*(b.stop() for b in self._created),
|
|
133
|
+
return_exceptions=True,
|
|
134
|
+
)
|
|
135
|
+
self._created.clear()
|
|
136
|
+
# Drain the queue so nobody waits forever on a closed pool.
|
|
137
|
+
while not self._available.empty():
|
|
138
|
+
try:
|
|
139
|
+
self._available.get_nowait()
|
|
140
|
+
except asyncio.QueueEmpty:
|
|
141
|
+
break
|
|
142
|
+
|
|
143
|
+
async def __aenter__(self) -> Self:
|
|
144
|
+
return self
|
|
145
|
+
|
|
146
|
+
async def __aexit__(
|
|
147
|
+
self,
|
|
148
|
+
exc_type: type[BaseException] | None,
|
|
149
|
+
exc: BaseException | None,
|
|
150
|
+
tb: TracebackType | None,
|
|
151
|
+
) -> None:
|
|
152
|
+
await self.stop()
|
funbrowser/profile.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""Persistent browser profiles.
|
|
2
|
+
|
|
3
|
+
A profile is a directory Chrome uses for cookies, localStorage, IndexedDB,
|
|
4
|
+
extensions, history, and login state. Two sessions sharing the same
|
|
5
|
+
profile directory share that state; two sessions with different directories
|
|
6
|
+
are isolated.
|
|
7
|
+
|
|
8
|
+
Lifecycle:
|
|
9
|
+
- ``Profile.ensure("alice")`` returns the path under ``./funbrowser_profiles/alice``,
|
|
10
|
+
creating it if missing.
|
|
11
|
+
- Pass the path to ``funbrowser.start(user_data_dir=...)``.
|
|
12
|
+
- ``Profile.delete("alice")`` wipes the directory (use to log out / reset).
|
|
13
|
+
- ``Profile.list()`` enumerates profiles under the default root.
|
|
14
|
+
|
|
15
|
+
Only one Chrome instance per profile directory can run at a time — Chrome
|
|
16
|
+
holds a lock on the dir. Spawn a second instance against the same profile
|
|
17
|
+
and Chrome will exit early.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import os
|
|
23
|
+
import re
|
|
24
|
+
import shutil
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
|
|
27
|
+
_SAFE_NAME = re.compile(r"^[A-Za-z0-9_.\-]+$")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _default_root() -> Path:
|
|
31
|
+
env = os.environ.get("FUNBROWSER_PROFILES")
|
|
32
|
+
if env:
|
|
33
|
+
return Path(env)
|
|
34
|
+
return Path.cwd() / "funbrowser_profiles"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class Profile:
|
|
38
|
+
@staticmethod
|
|
39
|
+
def root() -> Path:
|
|
40
|
+
return _default_root()
|
|
41
|
+
|
|
42
|
+
@staticmethod
|
|
43
|
+
def path(name: str, *, root: Path | None = None) -> Path:
|
|
44
|
+
if not _SAFE_NAME.match(name):
|
|
45
|
+
raise ValueError(f"profile name {name!r} must match {_SAFE_NAME.pattern!r}")
|
|
46
|
+
return (root or _default_root()) / name
|
|
47
|
+
|
|
48
|
+
@staticmethod
|
|
49
|
+
def ensure(name: str, *, root: Path | None = None) -> Path:
|
|
50
|
+
"""Return the profile path, creating the directory if needed."""
|
|
51
|
+
p = Profile.path(name, root=root)
|
|
52
|
+
p.mkdir(parents=True, exist_ok=True)
|
|
53
|
+
return p
|
|
54
|
+
|
|
55
|
+
@staticmethod
|
|
56
|
+
def exists(name: str, *, root: Path | None = None) -> bool:
|
|
57
|
+
return Profile.path(name, root=root).is_dir()
|
|
58
|
+
|
|
59
|
+
@staticmethod
|
|
60
|
+
def delete(name: str, *, root: Path | None = None) -> bool:
|
|
61
|
+
"""Remove the profile directory. Returns True if it existed."""
|
|
62
|
+
p = Profile.path(name, root=root)
|
|
63
|
+
if not p.exists():
|
|
64
|
+
return False
|
|
65
|
+
shutil.rmtree(p, ignore_errors=True)
|
|
66
|
+
return True
|
|
67
|
+
|
|
68
|
+
@staticmethod
|
|
69
|
+
def list(*, root: Path | None = None) -> list[str]:
|
|
70
|
+
r = root or _default_root()
|
|
71
|
+
if not r.is_dir():
|
|
72
|
+
return []
|
|
73
|
+
return sorted(p.name for p in r.iterdir() if p.is_dir())
|
funbrowser/proxy.py
ADDED
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
"""Proxy parsing for every format a proxy provider has ever invented.
|
|
2
|
+
|
|
3
|
+
Accepted on the wire — pass any of these to ``funbrowser.start(proxy=...)``:
|
|
4
|
+
|
|
5
|
+
- ``scheme://user:pass@host:port`` — RFC 3986
|
|
6
|
+
- ``scheme://host:port``
|
|
7
|
+
- ``user:pass@host:port`` — same with implicit ``http``
|
|
8
|
+
- ``host:port@user:pass`` — some Bright Data exports
|
|
9
|
+
- ``host:port`` — no auth
|
|
10
|
+
- ``host:port:user:pass`` — IPRoyal, Smartproxy, most lists
|
|
11
|
+
- ``user:pass:host:port`` — some legacy lists
|
|
12
|
+
- ``host:port:user`` — port-then-user (no password)
|
|
13
|
+
|
|
14
|
+
Schemes recognised: ``http``, ``https``, ``socks4``, ``socks5``, ``socks5h``.
|
|
15
|
+
|
|
16
|
+
The format is auto-detected by inspecting which segment contains a valid
|
|
17
|
+
TCP port and which side of an ``@`` looks like a ``host:port`` pair.
|
|
18
|
+
|
|
19
|
+
HTTP/HTTPS authentication is plumbed through CDP automatically. SOCKS
|
|
20
|
+
authentication isn't exposed by Chrome at the HTTP-auth layer; front it
|
|
21
|
+
with a local HTTP proxy that adds credentials upstream.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import logging
|
|
27
|
+
import re
|
|
28
|
+
from dataclasses import dataclass
|
|
29
|
+
from typing import TYPE_CHECKING
|
|
30
|
+
|
|
31
|
+
if TYPE_CHECKING:
|
|
32
|
+
from .tab import Tab
|
|
33
|
+
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
VALID_SCHEMES = frozenset({"http", "https", "socks", "socks4", "socks5", "socks5h"})
|
|
37
|
+
|
|
38
|
+
_IPV4 = re.compile(r"^\d{1,3}(?:\.\d{1,3}){3}$")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class ProxyParseError(ValueError):
|
|
42
|
+
"""Raised when a proxy string cannot be parsed into a Proxy."""
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass(frozen=True, slots=True)
|
|
46
|
+
class Proxy:
|
|
47
|
+
scheme: str
|
|
48
|
+
host: str
|
|
49
|
+
port: int
|
|
50
|
+
username: str | None = None
|
|
51
|
+
password: str | None = None
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def has_auth(self) -> bool:
|
|
55
|
+
return self.username is not None
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def is_socks(self) -> bool:
|
|
59
|
+
return self.scheme.startswith("socks")
|
|
60
|
+
|
|
61
|
+
def chrome_arg(self) -> str:
|
|
62
|
+
"""``--proxy-server=`` value. Auth is excluded — Chrome ignores it in the URL."""
|
|
63
|
+
scheme = "socks5" if self.scheme == "socks5h" else self.scheme
|
|
64
|
+
return f"{scheme}://{self.host}:{self.port}"
|
|
65
|
+
|
|
66
|
+
def url(self) -> str:
|
|
67
|
+
"""Full URL including auth, for libraries that take a single proxy URL."""
|
|
68
|
+
if self.has_auth:
|
|
69
|
+
return f"{self.scheme}://{self.username}:{self.password}@{self.host}:{self.port}"
|
|
70
|
+
return f"{self.scheme}://{self.host}:{self.port}"
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def parse(s: str | Proxy) -> Proxy:
|
|
74
|
+
"""Parse a proxy string in any supported format. See module docstring."""
|
|
75
|
+
if isinstance(s, Proxy):
|
|
76
|
+
return s
|
|
77
|
+
if not isinstance(s, str):
|
|
78
|
+
raise ProxyParseError(f"expected str or Proxy, got {type(s).__name__}")
|
|
79
|
+
raw = s.strip()
|
|
80
|
+
if not raw:
|
|
81
|
+
raise ProxyParseError("empty proxy string")
|
|
82
|
+
|
|
83
|
+
scheme = "http"
|
|
84
|
+
if "://" in raw:
|
|
85
|
+
scheme_part, _, rest = raw.partition("://")
|
|
86
|
+
scheme = scheme_part.lower()
|
|
87
|
+
if scheme not in VALID_SCHEMES:
|
|
88
|
+
raise ProxyParseError(f"unknown scheme {scheme!r}")
|
|
89
|
+
raw = rest
|
|
90
|
+
|
|
91
|
+
if "@" in raw:
|
|
92
|
+
return _parse_with_at(scheme, raw)
|
|
93
|
+
return _parse_colon_only(scheme, raw)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _parse_with_at(scheme: str, s: str) -> Proxy:
|
|
97
|
+
left, _, right = s.rpartition("@")
|
|
98
|
+
if _looks_like_server(right) and not _looks_like_server(left):
|
|
99
|
+
auth, server = left, right
|
|
100
|
+
elif _looks_like_server(left) and not _looks_like_server(right):
|
|
101
|
+
auth, server = right, left
|
|
102
|
+
elif _looks_like_server(right) and _looks_like_server(left):
|
|
103
|
+
# Both sides look like host:port — prefer the standard interpretation
|
|
104
|
+
# (auth on the left, server on the right).
|
|
105
|
+
auth, server = left, right
|
|
106
|
+
else:
|
|
107
|
+
raise ProxyParseError(f"could not locate host:port on either side of '@' in {s!r}")
|
|
108
|
+
user, _, pwd = auth.partition(":")
|
|
109
|
+
if not user:
|
|
110
|
+
raise ProxyParseError(f"empty username in {s!r}")
|
|
111
|
+
host, port = _split_host_port(server)
|
|
112
|
+
return Proxy(scheme, host, port, user, pwd or None)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _parse_colon_only(scheme: str, s: str) -> Proxy:
|
|
116
|
+
parts = s.split(":")
|
|
117
|
+
if len(parts) == 2:
|
|
118
|
+
host, port = _split_host_port(s)
|
|
119
|
+
return Proxy(scheme, host, port)
|
|
120
|
+
|
|
121
|
+
if len(parts) == 3:
|
|
122
|
+
# host:port:user — port-then-user, no password
|
|
123
|
+
if _is_port(parts[1]) and _looks_like_host(parts[0]):
|
|
124
|
+
return Proxy(scheme, parts[0], int(parts[1]), parts[2])
|
|
125
|
+
raise ProxyParseError(f"ambiguous 3-segment proxy {s!r}")
|
|
126
|
+
|
|
127
|
+
if len(parts) == 4:
|
|
128
|
+
a, b, c, d = parts
|
|
129
|
+
b_is_port = _is_port(b)
|
|
130
|
+
d_is_port = _is_port(d)
|
|
131
|
+
if b_is_port and not d_is_port:
|
|
132
|
+
return Proxy(scheme, a, int(b), c, d)
|
|
133
|
+
if d_is_port and not b_is_port:
|
|
134
|
+
return Proxy(scheme, c, int(d), a, b)
|
|
135
|
+
if b_is_port and d_is_port:
|
|
136
|
+
# Both look like ports; disambiguate by the host slot.
|
|
137
|
+
if _looks_like_host(a) and not _looks_like_host(c):
|
|
138
|
+
return Proxy(scheme, a, int(b), c, d)
|
|
139
|
+
if _looks_like_host(c) and not _looks_like_host(a):
|
|
140
|
+
return Proxy(scheme, c, int(d), a, b)
|
|
141
|
+
# Final fallback: assume host:port:user:pass (the more common
|
|
142
|
+
# listing convention from proxy providers).
|
|
143
|
+
return Proxy(scheme, a, int(b), c, d)
|
|
144
|
+
raise ProxyParseError(f"could not find a port in 4-segment proxy {s!r}")
|
|
145
|
+
|
|
146
|
+
raise ProxyParseError(f"could not parse proxy {s!r}")
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _is_port(s: str) -> bool:
|
|
150
|
+
return s.isdigit() and 1 <= int(s) <= 65535
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _looks_like_host(s: str) -> bool:
|
|
154
|
+
if not s:
|
|
155
|
+
return False
|
|
156
|
+
if s == "localhost":
|
|
157
|
+
return True
|
|
158
|
+
if _IPV4.match(s):
|
|
159
|
+
return True
|
|
160
|
+
if "." in s and any(ch.isalpha() for ch in s):
|
|
161
|
+
return True
|
|
162
|
+
return False
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _looks_like_server(s: str) -> bool:
|
|
166
|
+
if ":" not in s:
|
|
167
|
+
return False
|
|
168
|
+
host, _, port = s.rpartition(":")
|
|
169
|
+
return bool(host) and _is_port(port)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _split_host_port(s: str) -> tuple[str, int]:
|
|
173
|
+
host, _, port = s.rpartition(":")
|
|
174
|
+
if not host:
|
|
175
|
+
raise ProxyParseError(f"missing host in {s!r}")
|
|
176
|
+
if not _is_port(port):
|
|
177
|
+
raise ProxyParseError(f"invalid port in {s!r}")
|
|
178
|
+
return host, int(port)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
async def attach_auth(tab: Tab, proxy: Proxy) -> None:
|
|
182
|
+
"""Install a per-tab CDP handler that satisfies HTTP/HTTPS proxy auth.
|
|
183
|
+
|
|
184
|
+
SOCKS proxies don't surface their auth as an HTTP challenge — Chrome
|
|
185
|
+
doesn't expose a public hook for it. SOCKS-with-auth callers should
|
|
186
|
+
front the SOCKS server with a local HTTP proxy that adds credentials
|
|
187
|
+
upstream (e.g. ``microsocks``, ``proxychains``, or ``proxy.py``).
|
|
188
|
+
"""
|
|
189
|
+
if not proxy.has_auth:
|
|
190
|
+
return
|
|
191
|
+
if proxy.is_socks:
|
|
192
|
+
logger.warning(
|
|
193
|
+
"SOCKS proxy auth (%s) is not wired through CDP; Chrome will "
|
|
194
|
+
"fail the connection. Front with a local HTTP proxy that adds "
|
|
195
|
+
"credentials upstream.",
|
|
196
|
+
proxy.host,
|
|
197
|
+
)
|
|
198
|
+
return
|
|
199
|
+
|
|
200
|
+
# `handleAuthRequests=True` without `patterns` means only auth-required
|
|
201
|
+
# requests are paused — pass-through stays fast for everything else.
|
|
202
|
+
await tab._send("Fetch.enable", {"handleAuthRequests": True})
|
|
203
|
+
|
|
204
|
+
user = proxy.username or ""
|
|
205
|
+
pwd = proxy.password or ""
|
|
206
|
+
|
|
207
|
+
async def _on_auth_required(params: dict[str, object]) -> None:
|
|
208
|
+
try:
|
|
209
|
+
await tab._cdp.send(
|
|
210
|
+
"Fetch.continueWithAuth",
|
|
211
|
+
{
|
|
212
|
+
"requestId": params["requestId"],
|
|
213
|
+
"authChallengeResponse": {
|
|
214
|
+
"response": "ProvideCredentials",
|
|
215
|
+
"username": user,
|
|
216
|
+
"password": pwd,
|
|
217
|
+
},
|
|
218
|
+
},
|
|
219
|
+
session_id=tab.session_id,
|
|
220
|
+
)
|
|
221
|
+
except Exception:
|
|
222
|
+
logger.exception("proxy auth: continueWithAuth failed")
|
|
223
|
+
|
|
224
|
+
async def _on_request_paused(params: dict[str, object]) -> None:
|
|
225
|
+
# Paired with each auth-required request; just let it through.
|
|
226
|
+
try:
|
|
227
|
+
await tab._cdp.send(
|
|
228
|
+
"Fetch.continueRequest",
|
|
229
|
+
{"requestId": params["requestId"]},
|
|
230
|
+
session_id=tab.session_id,
|
|
231
|
+
)
|
|
232
|
+
except Exception:
|
|
233
|
+
pass # request may already have been resolved by the auth callback
|
|
234
|
+
|
|
235
|
+
tab._cdp.on("Fetch.authRequired", _on_auth_required, session_id=tab.session_id)
|
|
236
|
+
tab._cdp.on("Fetch.requestPaused", _on_request_paused, session_id=tab.session_id)
|
funbrowser/py.typed
ADDED
|
File without changes
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Solver — auto-solve captchas through the funsolver.com API.
|
|
2
|
+
|
|
3
|
+
Scope (M3): Cloudflare Turnstile. M4 adds the rest of the major captcha
|
|
4
|
+
families.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from .bridge import apply_solver
|
|
10
|
+
from .client import FunSolverClient, FunSolverError
|
|
11
|
+
|
|
12
|
+
__all__ = ["FunSolverClient", "FunSolverError", "apply_solver"]
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
"""Wire funsolver.com calls onto a Tab via a CDP binding.
|
|
2
|
+
|
|
3
|
+
Mechanism:
|
|
4
|
+
- ``Runtime.addBinding`` registers ``window.__funbrowser_solve`` so any JS
|
|
5
|
+
call to it shows up on the CDP side as a ``Runtime.bindingCalled`` event.
|
|
6
|
+
- The bootstrap script gives the page a Promise-based API
|
|
7
|
+
(``window.__funbrowser.solve(req)``) that wraps the binding call.
|
|
8
|
+
- The Python handler reads the payload, talks to funsolver.com via the
|
|
9
|
+
``FunSolverClient``, and pushes the result back with ``Runtime.evaluate``
|
|
10
|
+
calling ``window.__funbrowser_resolve(id, {ok, token|error})``.
|
|
11
|
+
|
|
12
|
+
Note: this requires ``Runtime.enable``, which has a known minor antibot
|
|
13
|
+
tell (CDP frames appear in error stacks). Moving the binding to an
|
|
14
|
+
isolated world via ``Page.createIsolatedWorld`` is planned for a
|
|
15
|
+
follow-up — it keeps the binding off the page's main world entirely.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import json
|
|
21
|
+
import logging
|
|
22
|
+
import time
|
|
23
|
+
from importlib.resources import files
|
|
24
|
+
from typing import TYPE_CHECKING, Any
|
|
25
|
+
|
|
26
|
+
from .client import FunSolverClient, FunSolverError
|
|
27
|
+
|
|
28
|
+
if TYPE_CHECKING:
|
|
29
|
+
from ..tab import Tab
|
|
30
|
+
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
BINDING_NAME = "__funbrowser_solve"
|
|
34
|
+
SCRIPTS = (
|
|
35
|
+
"_bootstrap.js",
|
|
36
|
+
"turnstile.js",
|
|
37
|
+
"recaptcha_v2.js",
|
|
38
|
+
"recaptcha_v3.js",
|
|
39
|
+
"hcaptcha.js",
|
|
40
|
+
"funcaptcha.js",
|
|
41
|
+
"geetest.js",
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _load_scripts() -> str:
|
|
46
|
+
pkg = files("funbrowser.solver.scripts")
|
|
47
|
+
return "\n".join(pkg.joinpath(s).read_text(encoding="utf-8") for s in SCRIPTS)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
_SOLVER_SOURCE = _load_scripts()
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
async def _solve_dispatch(client: FunSolverClient, payload: dict[str, Any]) -> str:
|
|
54
|
+
cap_type = payload.get("type")
|
|
55
|
+
if cap_type == "turnstile":
|
|
56
|
+
return await client.solve_turnstile(
|
|
57
|
+
sitekey=payload["sitekey"],
|
|
58
|
+
page_url=payload["url"],
|
|
59
|
+
action=payload.get("action"),
|
|
60
|
+
cdata=payload.get("cdata"),
|
|
61
|
+
)
|
|
62
|
+
if cap_type == "recaptcha2":
|
|
63
|
+
return await client.solve_recaptcha_v2(
|
|
64
|
+
sitekey=payload["sitekey"],
|
|
65
|
+
page_url=payload["url"],
|
|
66
|
+
invisible=bool(payload.get("invisible", False)),
|
|
67
|
+
data_s=payload.get("dataS"),
|
|
68
|
+
is_enterprise=bool(payload.get("enterprise", False)),
|
|
69
|
+
)
|
|
70
|
+
if cap_type == "recaptcha3":
|
|
71
|
+
return await client.solve_recaptcha_v3(
|
|
72
|
+
sitekey=payload["sitekey"],
|
|
73
|
+
page_url=payload["url"],
|
|
74
|
+
action=payload.get("action", "verify"),
|
|
75
|
+
min_score=float(payload.get("minScore", 0.7)),
|
|
76
|
+
is_enterprise=bool(payload.get("enterprise", False)),
|
|
77
|
+
)
|
|
78
|
+
if cap_type == "hcaptcha":
|
|
79
|
+
return await client.solve_hcaptcha(
|
|
80
|
+
sitekey=payload["sitekey"],
|
|
81
|
+
page_url=payload["url"],
|
|
82
|
+
is_invisible=bool(payload.get("invisible", False)),
|
|
83
|
+
)
|
|
84
|
+
if cap_type == "funcaptcha":
|
|
85
|
+
return await client.solve_funcaptcha(
|
|
86
|
+
public_key=payload["sitekey"],
|
|
87
|
+
page_url=payload["url"],
|
|
88
|
+
surl=payload.get("surl"),
|
|
89
|
+
data=payload.get("blob"),
|
|
90
|
+
)
|
|
91
|
+
if cap_type == "geetest":
|
|
92
|
+
return await client.solve_geetest(
|
|
93
|
+
gt=payload["gt"],
|
|
94
|
+
challenge=payload["challenge"],
|
|
95
|
+
page_url=payload["url"],
|
|
96
|
+
api_server=payload.get("apiServer"),
|
|
97
|
+
version=int(payload.get("version", 3)),
|
|
98
|
+
)
|
|
99
|
+
raise FunSolverError(f"unsupported captcha type: {cap_type!r}")
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
async def apply_solver(tab: Tab, client: FunSolverClient) -> None:
|
|
103
|
+
"""Attach the funsolver bridge + per-captcha detectors to a Tab."""
|
|
104
|
+
await tab._send("Runtime.enable")
|
|
105
|
+
await tab._send("Runtime.addBinding", {"name": BINDING_NAME})
|
|
106
|
+
|
|
107
|
+
async def on_binding_called(params: dict[str, Any]) -> None:
|
|
108
|
+
if params.get("name") != BINDING_NAME:
|
|
109
|
+
return
|
|
110
|
+
try:
|
|
111
|
+
payload = json.loads(params["payload"])
|
|
112
|
+
except (KeyError, ValueError):
|
|
113
|
+
logger.exception("solver: malformed binding payload")
|
|
114
|
+
return
|
|
115
|
+
|
|
116
|
+
task_id = payload.get("id")
|
|
117
|
+
cap_type = str(payload.get("type", "?"))
|
|
118
|
+
t0 = time.monotonic()
|
|
119
|
+
try:
|
|
120
|
+
token = await _solve_dispatch(client, payload)
|
|
121
|
+
ms = (time.monotonic() - t0) * 1000.0
|
|
122
|
+
result: dict[str, Any] = {"ok": True, "token": token}
|
|
123
|
+
tab._browser.record_event(
|
|
124
|
+
kind="captcha",
|
|
125
|
+
captcha=cap_type,
|
|
126
|
+
ok=True,
|
|
127
|
+
ms=ms,
|
|
128
|
+
token_preview=(token[:24] + "…") if len(token) > 24 else token,
|
|
129
|
+
url=payload.get("url"),
|
|
130
|
+
)
|
|
131
|
+
except Exception as exc:
|
|
132
|
+
ms = (time.monotonic() - t0) * 1000.0
|
|
133
|
+
logger.warning("solver: solve failed: %s", exc)
|
|
134
|
+
result = {"ok": False, "error": str(exc)}
|
|
135
|
+
tab._browser.record_event(
|
|
136
|
+
kind="captcha",
|
|
137
|
+
captcha=cap_type,
|
|
138
|
+
ok=False,
|
|
139
|
+
ms=ms,
|
|
140
|
+
error=str(exc),
|
|
141
|
+
url=payload.get("url"),
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
try:
|
|
145
|
+
await tab._cdp.send(
|
|
146
|
+
"Runtime.evaluate",
|
|
147
|
+
{
|
|
148
|
+
"expression": (
|
|
149
|
+
f"window.__funbrowser_resolve({json.dumps(task_id)}, {json.dumps(result)})"
|
|
150
|
+
),
|
|
151
|
+
"awaitPromise": False,
|
|
152
|
+
},
|
|
153
|
+
session_id=tab.session_id,
|
|
154
|
+
)
|
|
155
|
+
except Exception:
|
|
156
|
+
logger.exception("solver: failed to push result back to page")
|
|
157
|
+
|
|
158
|
+
tab._cdp.on(
|
|
159
|
+
"Runtime.bindingCalled",
|
|
160
|
+
on_binding_called,
|
|
161
|
+
session_id=tab.session_id,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
await tab._send(
|
|
165
|
+
"Page.addScriptToEvaluateOnNewDocument",
|
|
166
|
+
{"source": _SOLVER_SOURCE, "runImmediately": True},
|
|
167
|
+
)
|