anti-cf 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anti_cf/__init__.py +5 -0
- anti_cf/_constants.py +9 -0
- anti_cf/_flaresolverr.py +54 -0
- anti_cf/_persistent_session.py +150 -0
- anti_cf-1.0.1.dist-info/METADATA +123 -0
- anti_cf-1.0.1.dist-info/RECORD +7 -0
- anti_cf-1.0.1.dist-info/WHEEL +4 -0
anti_cf/__init__.py
ADDED
anti_cf/_constants.py
ADDED
@@ -0,0 +1,9 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from pathlib import Path
|
4
|
+
from typing import Final
|
5
|
+
|
6
|
+
CACHE_PATH = Path.home() / ".cache/anti_cf"
|
7
|
+
FLARESOLVERR_PROXY: Final[str] = "http://localhost:8191/"
|
8
|
+
CACHE_PATH.mkdir(exist_ok=True, parents=True)
|
9
|
+
DEFAULT_TIMEOUT: int = 600
|
anti_cf/_flaresolverr.py
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import subprocess
|
4
|
+
import time
|
5
|
+
|
6
|
+
import requests
|
7
|
+
from logprise import logger
|
8
|
+
|
9
|
+
from ._constants import FLARESOLVERR_PROXY
|
10
|
+
|
11
|
+
|
12
|
+
def get_flaresolverr_settings() -> dict | None:
|
13
|
+
"""Check if FlareSolverr API is reachable."""
|
14
|
+
try:
|
15
|
+
resp = requests.get(FLARESOLVERR_PROXY, timeout=0.1)
|
16
|
+
resp.raise_for_status()
|
17
|
+
return resp.json()
|
18
|
+
except: # noqa: E722
|
19
|
+
return None
|
20
|
+
|
21
|
+
|
22
|
+
def start_flaresolverr_docker() -> subprocess.Popen | None:
|
23
|
+
"""Start the FlareSolverr docker container."""
|
24
|
+
try:
|
25
|
+
logger.info("Starting FlareSolverr docker container...")
|
26
|
+
process = subprocess.Popen(
|
27
|
+
["docker", "run", "--rm", "-p", "8191:8191", "ghcr.io/svaningelgem/flaresolverr:latest"],
|
28
|
+
stdout=subprocess.PIPE,
|
29
|
+
stderr=subprocess.PIPE,
|
30
|
+
)
|
31
|
+
|
32
|
+
# Wait for container to be ready
|
33
|
+
for loop in range(10): # Try for 10 seconds
|
34
|
+
if loop > 0:
|
35
|
+
time.sleep(1)
|
36
|
+
|
37
|
+
if get_flaresolverr_settings() is not None:
|
38
|
+
logger.info("FlareSolverr is ready")
|
39
|
+
return process
|
40
|
+
|
41
|
+
logger.error("FlareSolverr container started but API not responding")
|
42
|
+
return process
|
43
|
+
except Exception as e:
|
44
|
+
logger.error(f"Failed to start FlareSolverr docker: {e}")
|
45
|
+
return None
|
46
|
+
|
47
|
+
|
48
|
+
def ensure_flaresolverr_running() -> subprocess.Popen | None:
|
49
|
+
"""Ensure FlareSolverr is running, start if needed."""
|
50
|
+
if get_flaresolverr_settings() is not None:
|
51
|
+
logger.info("FlareSolverr API is already running")
|
52
|
+
return None
|
53
|
+
|
54
|
+
return start_flaresolverr_docker()
|
@@ -0,0 +1,150 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import pickle
|
4
|
+
import tempfile
|
5
|
+
from typing import TYPE_CHECKING, ClassVar
|
6
|
+
|
7
|
+
import fake_useragent
|
8
|
+
from logprise import logger
|
9
|
+
|
10
|
+
from ._constants import CACHE_PATH, DEFAULT_TIMEOUT, FLARESOLVERR_PROXY
|
11
|
+
from ._flaresolverr import ensure_flaresolverr_running, get_flaresolverr_settings
|
12
|
+
|
13
|
+
try:
|
14
|
+
from requests_cache import CachedSession as Session
|
15
|
+
|
16
|
+
_HAS_CACHE = True
|
17
|
+
logger.info("Using CachedSession for persistent session")
|
18
|
+
except ImportError:
|
19
|
+
from requests import HTTPError, Session
|
20
|
+
|
21
|
+
_HAS_CACHE = False
|
22
|
+
|
23
|
+
if TYPE_CHECKING:
|
24
|
+
from pathlib import Path
|
25
|
+
|
26
|
+
from requests import Response
|
27
|
+
|
28
|
+
|
29
|
+
class PersistentSession(Session):
|
30
|
+
_COOKIES_FILE: ClassVar[Path] = CACHE_PATH / "cookies.pkl"
|
31
|
+
_USER_AGENT_FILE: ClassVar[Path] = CACHE_PATH / "user_agent.txt"
|
32
|
+
|
33
|
+
def __init__(self) -> None:
|
34
|
+
if _HAS_CACHE:
|
35
|
+
super().__init__(
|
36
|
+
CACHE_PATH / "url_cache.sqlite",
|
37
|
+
backend="sqlite",
|
38
|
+
cache_control=False,
|
39
|
+
expire_after=2 * 3600,
|
40
|
+
headers={
|
41
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
42
|
+
"Accept-Language": "en-US,en;q=0.5",
|
43
|
+
},
|
44
|
+
)
|
45
|
+
else:
|
46
|
+
super().__init__()
|
47
|
+
|
48
|
+
self._load_cookies()
|
49
|
+
ensure_flaresolverr_running()
|
50
|
+
self.set_user_agent()
|
51
|
+
|
52
|
+
def _get_user_agent(self) -> str:
|
53
|
+
flaresolverr_settings = get_flaresolverr_settings()
|
54
|
+
if flaresolverr_settings is not None:
|
55
|
+
return flaresolverr_settings["userAgent"]
|
56
|
+
|
57
|
+
if self._USER_AGENT_FILE.exists():
|
58
|
+
return self._USER_AGENT_FILE.read_text(encoding="utf8").strip()
|
59
|
+
|
60
|
+
return fake_useragent.UserAgent(os="windows", platforms="pc", browsers="chrome").random
|
61
|
+
|
62
|
+
def set_user_agent(self, user_agent: str | None = None) -> None:
|
63
|
+
if user_agent is None:
|
64
|
+
user_agent = self._get_user_agent()
|
65
|
+
|
66
|
+
self.headers["User-Agent"] = user_agent
|
67
|
+
self._USER_AGENT_FILE.write_text(user_agent, encoding="utf8")
|
68
|
+
|
69
|
+
def _load_cookies(self) -> None:
|
70
|
+
"""Load cookies from file if it exists."""
|
71
|
+
if self._COOKIES_FILE.exists():
|
72
|
+
try:
|
73
|
+
with self._COOKIES_FILE.open("rb") as fp:
|
74
|
+
self.cookies.update(pickle.load(fp))
|
75
|
+
except Exception as e:
|
76
|
+
logger.error(f"Failed to load cookies from {self._COOKIES_FILE}: {e}")
|
77
|
+
self._COOKIES_FILE.unlink()
|
78
|
+
|
79
|
+
def save_cookies(self) -> None:
|
80
|
+
"""Save current cookies to file."""
|
81
|
+
self._COOKIES_FILE.write_bytes(pickle.dumps(self.cookies, protocol=4))
|
82
|
+
|
83
|
+
def request(self, *args: object, **kwargs: object) -> Response:
|
84
|
+
"""Override request method to save cookies after each request."""
|
85
|
+
response = super().request(*args, **kwargs)
|
86
|
+
self.save_cookies()
|
87
|
+
return response
|
88
|
+
|
89
|
+
def get(self, url: str | bytes, *, try_with_cloudflare: bool = False, _cloudflare_counter: int = 0, **kwargs: object) -> Response | None:
|
90
|
+
if not try_with_cloudflare or "cf_clearance" in self.cookies:
|
91
|
+
try:
|
92
|
+
resp = super().get(url, **kwargs)
|
93
|
+
resp.raise_for_status()
|
94
|
+
return resp
|
95
|
+
except HTTPError as e:
|
96
|
+
if b"just a moment" not in e.response.content.lower():
|
97
|
+
logger.error("No cloudflare trigger in response?")
|
98
|
+
with tempfile.NamedTemporaryFile(delete=False) as f:
|
99
|
+
f.write(e.response.content)
|
100
|
+
logger.error(f"No cloudflare trigger in response? [exception: {e}] [content: {f.name}]")
|
101
|
+
logger.exception(e)
|
102
|
+
return None
|
103
|
+
|
104
|
+
if try_with_cloudflare:
|
105
|
+
logger.warning("Cloudflare cookie expired")
|
106
|
+
else:
|
107
|
+
logger.warning("Cloudflare detected, but `try_with_cloudflare` wasn't set to True!")
|
108
|
+
|
109
|
+
try:
|
110
|
+
self._get_url_via_flaresolverr(url)
|
111
|
+
# After the url is retrieved from the flaresolverr proxy, it's not necessarily the one we want
|
112
|
+
# --> So we'll re-request it here:
|
113
|
+
return super().get(url, **kwargs)
|
114
|
+
except Exception:
|
115
|
+
logger.error("FlareSolverr didn't solve it :(")
|
116
|
+
raise
|
117
|
+
|
118
|
+
def _get_url_via_flaresolverr(self, url: str) -> dict:
|
119
|
+
headers = {"Content-Type": "application/json"}
|
120
|
+
data = {
|
121
|
+
"cmd": "request.get",
|
122
|
+
"url": url,
|
123
|
+
"maxTimeout": DEFAULT_TIMEOUT * 1_000,
|
124
|
+
}
|
125
|
+
response = self.post(FLARESOLVERR_PROXY + "v1", headers=headers, json=data, timeout=DEFAULT_TIMEOUT)
|
126
|
+
response.raise_for_status()
|
127
|
+
|
128
|
+
dta = response.json()
|
129
|
+
for cookie in dta["solution"]["cookies"]:
|
130
|
+
self.cookies.set(
|
131
|
+
name=cookie["name"], # required
|
132
|
+
value=cookie["value"], # required
|
133
|
+
version=cookie.get("version", 0),
|
134
|
+
port=cookie.get("port", None),
|
135
|
+
domain=cookie.get("domain", ""),
|
136
|
+
path=cookie.get("path", "/"),
|
137
|
+
secure=cookie.get("secure", False),
|
138
|
+
expires=cookie.get("expires", None),
|
139
|
+
discard=cookie.get("discard", True),
|
140
|
+
comment=cookie.get("comment", None),
|
141
|
+
comment_url=cookie.get("comment_url", None),
|
142
|
+
rest=cookie.get("rest", {"HttpOnly": None}),
|
143
|
+
rfc2109=cookie.get("rfc2109", False),
|
144
|
+
)
|
145
|
+
self.save_cookies()
|
146
|
+
|
147
|
+
return dta
|
148
|
+
|
149
|
+
|
150
|
+
session = PersistentSession()
|
@@ -0,0 +1,123 @@
|
|
1
|
+
Metadata-Version: 2.3
|
2
|
+
Name: anti_cf
|
3
|
+
Version: 1.0.1
|
4
|
+
Summary: Anti-CloudFlare package
|
5
|
+
License: MIT
|
6
|
+
Author: Steven Van Ingelgem
|
7
|
+
Author-email: steven@vaningelgem.be
|
8
|
+
Requires-Python: >=3.11,<4.0
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
13
|
+
Classifier: Programming Language :: Python :: 3.13
|
14
|
+
Requires-Dist: fake-useragent
|
15
|
+
Requires-Dist: logprise
|
16
|
+
Requires-Dist: requests
|
17
|
+
Description-Content-Type: text/markdown
|
18
|
+
|
19
|
+
# anti-cf
|
20
|
+
|
21
|
+
A Python library for handling Cloudflare-protected websites using FlareSolverr.
|
22
|
+
|
23
|
+
## Overview
|
24
|
+
|
25
|
+
`anti-cf` provides a persistent session wrapper for handling websites protected by Cloudflare's anti-bot measures. It automatically manages cookies, user agents, and integrates with FlareSolverr to bypass Cloudflare challenges.
|
26
|
+
|
27
|
+
## Features
|
28
|
+
|
29
|
+
- Persistent cookie storage
|
30
|
+
- Automatic FlareSolverr management (including Docker startup)
|
31
|
+
- Optional request caching via `requests-cache`
|
32
|
+
- Random user agent generation
|
33
|
+
- Transparent handling of Cloudflare challenges
|
34
|
+
|
35
|
+
## Installation
|
36
|
+
|
37
|
+
```bash
|
38
|
+
pip install anti-cf
|
39
|
+
```
|
40
|
+
|
41
|
+
## Usage
|
42
|
+
|
43
|
+
### Basic Usage
|
44
|
+
|
45
|
+
```python
|
46
|
+
from anti_cf import session
|
47
|
+
|
48
|
+
# The library will automatically check if FlareSolverr is running
|
49
|
+
# and start it if needed using Docker
|
50
|
+
|
51
|
+
# For Cloudflare-protected sites
|
52
|
+
response = session.get("https://cloudflare-protected-site.com", try_with_cloudflare=True)
|
53
|
+
|
54
|
+
# For regular requests
|
55
|
+
response = session.get("https://example.com")
|
56
|
+
```
|
57
|
+
|
58
|
+
### Advanced Usage
|
59
|
+
|
60
|
+
```python
|
61
|
+
from anti_cf import session
|
62
|
+
|
63
|
+
# Set a custom user agent
|
64
|
+
session.set_user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
|
65
|
+
|
66
|
+
# Post requests work as normal
|
67
|
+
response = session.post("https://example.com/api", json={"key": "value"})
|
68
|
+
|
69
|
+
# All cookies are automatically saved between requests
|
70
|
+
```
|
71
|
+
|
72
|
+
### Error Handling
|
73
|
+
|
74
|
+
```python
|
75
|
+
from anti_cf import session
|
76
|
+
from requests import HTTPError
|
77
|
+
|
78
|
+
try:
|
79
|
+
response = session.get("https://cloudflare-protected-site.com", try_with_cloudflare=True)
|
80
|
+
response.raise_for_status()
|
81
|
+
except HTTPError as e:
|
82
|
+
print(f"HTTP error occurred: {e}")
|
83
|
+
```
|
84
|
+
|
85
|
+
## Dependencies
|
86
|
+
|
87
|
+
- Python 3.11+
|
88
|
+
- FlareSolverr
|
89
|
+
- Docker (optional, for automatic FlareSolverr startup)
|
90
|
+
- `requests` or `requests-cache` (optional for caching)
|
91
|
+
- `fake-useragent`
|
92
|
+
- `logprise`
|
93
|
+
|
94
|
+
## Configuration
|
95
|
+
|
96
|
+
The library uses the following default settings:
|
97
|
+
- Cache directory: `~/.cache/anti_cf/`
|
98
|
+
- FlareSolverr API: `http://localhost:8191/`
|
99
|
+
- Default timeout: 600 seconds
|
100
|
+
- Cache expiry: 2 hours (when using `requests-cache`)
|
101
|
+
|
102
|
+
## How It Works
|
103
|
+
|
104
|
+
1. When making a request to a Cloudflare-protected site:
|
105
|
+
- First attempts a normal request
|
106
|
+
- If Cloudflare challenge detected, sends the request through FlareSolverr
|
107
|
+
- Stores the resulting cookies for future requests
|
108
|
+
|
109
|
+
2. On startup:
|
110
|
+
- Checks if FlareSolverr API is reachable
|
111
|
+
- If not available, automatically starts the Docker container
|
112
|
+
|
113
|
+
## Docker
|
114
|
+
|
115
|
+
By default, `anti-cf` will attempt to start the FlareSolverr Docker container:
|
116
|
+
|
117
|
+
```
|
118
|
+
ghcr.io/svaningelgem/flaresolverr:latest
|
119
|
+
```
|
120
|
+
|
121
|
+
## License
|
122
|
+
|
123
|
+
Copyright © Steven Van Ingelgem <steven@vaningelgem.be>
|
@@ -0,0 +1,7 @@
|
|
1
|
+
anti_cf/__init__.py,sha256=ZFXXzfWeiBqKC56vdyod1yXj5_Kg4NXJeTejs2WoEEU,71
|
2
|
+
anti_cf/_constants.py,sha256=xFIXkeBVkuHsTQKQU4p2leJkySHdJ_6xApgIoVrBWHA,262
|
3
|
+
anti_cf/_flaresolverr.py,sha256=vOuut3fH1CstumQrNyIhm7B58ybLCSPUaK5NDTiQwj8,1633
|
4
|
+
anti_cf/_persistent_session.py,sha256=exxEpwqfRXonezFh3BEu4JclZ1lTGShM4l0fSlOIeeI,5634
|
5
|
+
anti_cf-1.0.1.dist-info/METADATA,sha256=9exeaEIAuWMlnuLceXcoo8A9dftwHKDh6NLZ5UIWZgk,3187
|
6
|
+
anti_cf-1.0.1.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
|
7
|
+
anti_cf-1.0.1.dist-info/RECORD,,
|