anti-cf 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
anti_cf/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ from ._persistent_session import session
2
+
3
+ __all__ = [
4
+ "session",
5
+ ]
anti_cf/_constants.py ADDED
@@ -0,0 +1,9 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Final
5
+
6
+ CACHE_PATH = Path.home() / ".cache/anti_cf"
7
+ FLARESOLVERR_PROXY: Final[str] = "http://localhost:8191/"
8
+ CACHE_PATH.mkdir(exist_ok=True, parents=True)
9
+ DEFAULT_TIMEOUT: int = 600
@@ -0,0 +1,54 @@
1
+ from __future__ import annotations
2
+
3
+ import subprocess
4
+ import time
5
+
6
+ import requests
7
+ from logprise import logger
8
+
9
+ from ._constants import FLARESOLVERR_PROXY
10
+
11
+
12
+ def get_flaresolverr_settings() -> dict | None:
13
+ """Check if FlareSolverr API is reachable."""
14
+ try:
15
+ resp = requests.get(FLARESOLVERR_PROXY, timeout=0.1)
16
+ resp.raise_for_status()
17
+ return resp.json()
18
+ except: # noqa: E722
19
+ return None
20
+
21
+
22
+ def start_flaresolverr_docker() -> subprocess.Popen | None:
23
+ """Start the FlareSolverr docker container."""
24
+ try:
25
+ logger.info("Starting FlareSolverr docker container...")
26
+ process = subprocess.Popen(
27
+ ["docker", "run", "--rm", "-p", "8191:8191", "ghcr.io/svaningelgem/flaresolverr:latest"],
28
+ stdout=subprocess.PIPE,
29
+ stderr=subprocess.PIPE,
30
+ )
31
+
32
+ # Wait for container to be ready
33
+ for loop in range(10): # Try for 10 seconds
34
+ if loop > 0:
35
+ time.sleep(1)
36
+
37
+ if get_flaresolverr_settings() is not None:
38
+ logger.info("FlareSolverr is ready")
39
+ return process
40
+
41
+ logger.error("FlareSolverr container started but API not responding")
42
+ return process
43
+ except Exception as e:
44
+ logger.error(f"Failed to start FlareSolverr docker: {e}")
45
+ return None
46
+
47
+
48
+ def ensure_flaresolverr_running() -> subprocess.Popen | None:
49
+ """Ensure FlareSolverr is running, start if needed."""
50
+ if get_flaresolverr_settings() is not None:
51
+ logger.info("FlareSolverr API is already running")
52
+ return None
53
+
54
+ return start_flaresolverr_docker()
@@ -0,0 +1,150 @@
1
+ from __future__ import annotations
2
+
3
+ import pickle
4
+ import tempfile
5
+ from typing import TYPE_CHECKING, ClassVar
6
+
7
+ import fake_useragent
8
+ from logprise import logger
9
+
10
+ from ._constants import CACHE_PATH, DEFAULT_TIMEOUT, FLARESOLVERR_PROXY
11
+ from ._flaresolverr import ensure_flaresolverr_running, get_flaresolverr_settings
12
+
13
+ try:
14
+ from requests_cache import CachedSession as Session
15
+
16
+ _HAS_CACHE = True
17
+ logger.info("Using CachedSession for persistent session")
18
+ except ImportError:
19
+ from requests import HTTPError, Session
20
+
21
+ _HAS_CACHE = False
22
+
23
+ if TYPE_CHECKING:
24
+ from pathlib import Path
25
+
26
+ from requests import Response
27
+
28
+
29
+ class PersistentSession(Session):
30
+ _COOKIES_FILE: ClassVar[Path] = CACHE_PATH / "cookies.pkl"
31
+ _USER_AGENT_FILE: ClassVar[Path] = CACHE_PATH / "user_agent.txt"
32
+
33
+ def __init__(self) -> None:
34
+ if _HAS_CACHE:
35
+ super().__init__(
36
+ CACHE_PATH / "url_cache.sqlite",
37
+ backend="sqlite",
38
+ cache_control=False,
39
+ expire_after=2 * 3600,
40
+ headers={
41
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
42
+ "Accept-Language": "en-US,en;q=0.5",
43
+ },
44
+ )
45
+ else:
46
+ super().__init__()
47
+
48
+ self._load_cookies()
49
+ ensure_flaresolverr_running()
50
+ self.set_user_agent()
51
+
52
+ def _get_user_agent(self) -> str:
53
+ flaresolverr_settings = get_flaresolverr_settings()
54
+ if flaresolverr_settings is not None:
55
+ return flaresolverr_settings["userAgent"]
56
+
57
+ if self._USER_AGENT_FILE.exists():
58
+ return self._USER_AGENT_FILE.read_text(encoding="utf8").strip()
59
+
60
+ return fake_useragent.UserAgent(os="windows", platforms="pc", browsers="chrome").random
61
+
62
+ def set_user_agent(self, user_agent: str | None = None) -> None:
63
+ if user_agent is None:
64
+ user_agent = self._get_user_agent()
65
+
66
+ self.headers["User-Agent"] = user_agent
67
+ self._USER_AGENT_FILE.write_text(user_agent, encoding="utf8")
68
+
69
+ def _load_cookies(self) -> None:
70
+ """Load cookies from file if it exists."""
71
+ if self._COOKIES_FILE.exists():
72
+ try:
73
+ with self._COOKIES_FILE.open("rb") as fp:
74
+ self.cookies.update(pickle.load(fp))
75
+ except Exception as e:
76
+ logger.error(f"Failed to load cookies from {self._COOKIES_FILE}: {e}")
77
+ self._COOKIES_FILE.unlink()
78
+
79
+ def save_cookies(self) -> None:
80
+ """Save current cookies to file."""
81
+ self._COOKIES_FILE.write_bytes(pickle.dumps(self.cookies, protocol=4))
82
+
83
+ def request(self, *args: object, **kwargs: object) -> Response:
84
+ """Override request method to save cookies after each request."""
85
+ response = super().request(*args, **kwargs)
86
+ self.save_cookies()
87
+ return response
88
+
89
+ def get(self, url: str | bytes, *, try_with_cloudflare: bool = False, _cloudflare_counter: int = 0, **kwargs: object) -> Response | None:
90
+ if not try_with_cloudflare or "cf_clearance" in self.cookies:
91
+ try:
92
+ resp = super().get(url, **kwargs)
93
+ resp.raise_for_status()
94
+ return resp
95
+ except HTTPError as e:
96
+ if b"just a moment" not in e.response.content.lower():
97
+ logger.error("No cloudflare trigger in response?")
98
+ with tempfile.NamedTemporaryFile(delete=False) as f:
99
+ f.write(e.response.content)
100
+ logger.error(f"No cloudflare trigger in response? [exception: {e}] [content: {f.name}]")
101
+ logger.exception(e)
102
+ return None
103
+
104
+ if try_with_cloudflare:
105
+ logger.warning("Cloudflare cookie expired")
106
+ else:
107
+ logger.warning("Cloudflare detected, but `try_with_cloudflare` wasn't set to True!")
108
+
109
+ try:
110
+ self._get_url_via_flaresolverr(url)
111
+ # After the url is retrieved from the flaresolverr proxy, it's not necessarily the one we want
112
+ # --> So we'll re-request it here:
113
+ return super().get(url, **kwargs)
114
+ except Exception:
115
+ logger.error("FlareSolverr didn't solve it :(")
116
+ raise
117
+
118
+ def _get_url_via_flaresolverr(self, url: str) -> dict:
119
+ headers = {"Content-Type": "application/json"}
120
+ data = {
121
+ "cmd": "request.get",
122
+ "url": url,
123
+ "maxTimeout": DEFAULT_TIMEOUT * 1_000,
124
+ }
125
+ response = self.post(FLARESOLVERR_PROXY + "v1", headers=headers, json=data, timeout=DEFAULT_TIMEOUT)
126
+ response.raise_for_status()
127
+
128
+ dta = response.json()
129
+ for cookie in dta["solution"]["cookies"]:
130
+ self.cookies.set(
131
+ name=cookie["name"], # required
132
+ value=cookie["value"], # required
133
+ version=cookie.get("version", 0),
134
+ port=cookie.get("port", None),
135
+ domain=cookie.get("domain", ""),
136
+ path=cookie.get("path", "/"),
137
+ secure=cookie.get("secure", False),
138
+ expires=cookie.get("expires", None),
139
+ discard=cookie.get("discard", True),
140
+ comment=cookie.get("comment", None),
141
+ comment_url=cookie.get("comment_url", None),
142
+ rest=cookie.get("rest", {"HttpOnly": None}),
143
+ rfc2109=cookie.get("rfc2109", False),
144
+ )
145
+ self.save_cookies()
146
+
147
+ return dta
148
+
149
+
150
+ session = PersistentSession()
@@ -0,0 +1,123 @@
1
+ Metadata-Version: 2.3
2
+ Name: anti_cf
3
+ Version: 1.0.1
4
+ Summary: Anti-CloudFlare package
5
+ License: MIT
6
+ Author: Steven Van Ingelgem
7
+ Author-email: steven@vaningelgem.be
8
+ Requires-Python: >=3.11,<4.0
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Programming Language :: Python :: 3.13
14
+ Requires-Dist: fake-useragent
15
+ Requires-Dist: logprise
16
+ Requires-Dist: requests
17
+ Description-Content-Type: text/markdown
18
+
19
+ # anti-cf
20
+
21
+ A Python library for handling Cloudflare-protected websites using FlareSolverr.
22
+
23
+ ## Overview
24
+
25
+ `anti-cf` provides a persistent session wrapper for handling websites protected by Cloudflare's anti-bot measures. It automatically manages cookies, user agents, and integrates with FlareSolverr to bypass Cloudflare challenges.
26
+
27
+ ## Features
28
+
29
+ - Persistent cookie storage
30
+ - Automatic FlareSolverr management (including Docker startup)
31
+ - Optional request caching via `requests-cache`
32
+ - Random user agent generation
33
+ - Transparent handling of Cloudflare challenges
34
+
35
+ ## Installation
36
+
37
+ ```bash
38
+ pip install anti-cf
39
+ ```
40
+
41
+ ## Usage
42
+
43
+ ### Basic Usage
44
+
45
+ ```python
46
+ from anti_cf import session
47
+
48
+ # The library will automatically check if FlareSolverr is running
49
+ # and start it if needed using Docker
50
+
51
+ # For Cloudflare-protected sites
52
+ response = session.get("https://cloudflare-protected-site.com", try_with_cloudflare=True)
53
+
54
+ # For regular requests
55
+ response = session.get("https://example.com")
56
+ ```
57
+
58
+ ### Advanced Usage
59
+
60
+ ```python
61
+ from anti_cf import session
62
+
63
+ # Set a custom user agent
64
+ session.set_user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
65
+
66
+ # Post requests work as normal
67
+ response = session.post("https://example.com/api", json={"key": "value"})
68
+
69
+ # All cookies are automatically saved between requests
70
+ ```
71
+
72
+ ### Error Handling
73
+
74
+ ```python
75
+ from anti_cf import session
76
+ from requests import HTTPError
77
+
78
+ try:
79
+ response = session.get("https://cloudflare-protected-site.com", try_with_cloudflare=True)
80
+ response.raise_for_status()
81
+ except HTTPError as e:
82
+ print(f"HTTP error occurred: {e}")
83
+ ```
84
+
85
+ ## Dependencies
86
+
87
+ - Python 3.11+
88
+ - FlareSolverr
89
+ - Docker (optional, for automatic FlareSolverr startup)
90
+ - `requests` or `requests-cache` (optional for caching)
91
+ - `fake-useragent`
92
+ - `logprise`
93
+
94
+ ## Configuration
95
+
96
+ The library uses the following default settings:
97
+ - Cache directory: `~/.cache/anti_cf/`
98
+ - FlareSolverr API: `http://localhost:8191/`
99
+ - Default timeout: 600 seconds
100
+ - Cache expiry: 2 hours (when using `requests-cache`)
101
+
102
+ ## How It Works
103
+
104
+ 1. When making a request to a Cloudflare-protected site:
105
+ - First attempts a normal request
106
+ - If Cloudflare challenge detected, sends the request through FlareSolverr
107
+ - Stores the resulting cookies for future requests
108
+
109
+ 2. On startup:
110
+ - Checks if FlareSolverr API is reachable
111
+ - If not available, automatically starts the Docker container
112
+
113
+ ## Docker
114
+
115
+ By default, `anti-cf` will attempt to start the FlareSolverr Docker container:
116
+
117
+ ```
118
+ ghcr.io/svaningelgem/flaresolverr:latest
119
+ ```
120
+
121
+ ## License
122
+
123
+ Copyright © Steven Van Ingelgem <steven@vaningelgem.be>
@@ -0,0 +1,7 @@
1
+ anti_cf/__init__.py,sha256=ZFXXzfWeiBqKC56vdyod1yXj5_Kg4NXJeTejs2WoEEU,71
2
+ anti_cf/_constants.py,sha256=xFIXkeBVkuHsTQKQU4p2leJkySHdJ_6xApgIoVrBWHA,262
3
+ anti_cf/_flaresolverr.py,sha256=vOuut3fH1CstumQrNyIhm7B58ybLCSPUaK5NDTiQwj8,1633
4
+ anti_cf/_persistent_session.py,sha256=exxEpwqfRXonezFh3BEu4JclZ1lTGShM4l0fSlOIeeI,5634
5
+ anti_cf-1.0.1.dist-info/METADATA,sha256=9exeaEIAuWMlnuLceXcoo8A9dftwHKDh6NLZ5UIWZgk,3187
6
+ anti_cf-1.0.1.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
7
+ anti_cf-1.0.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: poetry-core 2.1.2
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any