anti-cf 1.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
anti_cf-1.0.1/PKG-INFO ADDED
@@ -0,0 +1,123 @@
1
+ Metadata-Version: 2.3
2
+ Name: anti_cf
3
+ Version: 1.0.1
4
+ Summary: Anti-CloudFlare package
5
+ License: MIT
6
+ Author: Steven Van Ingelgem
7
+ Author-email: steven@vaningelgem.be
8
+ Requires-Python: >=3.11,<4.0
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Programming Language :: Python :: 3.13
14
+ Requires-Dist: fake-useragent
15
+ Requires-Dist: logprise
16
+ Requires-Dist: requests
17
+ Description-Content-Type: text/markdown
18
+
19
+ # anti-cf
20
+
21
+ A Python library for handling Cloudflare-protected websites using FlareSolverr.
22
+
23
+ ## Overview
24
+
25
+ `anti-cf` provides a persistent session wrapper for handling websites protected by Cloudflare's anti-bot measures. It automatically manages cookies, user agents, and integrates with FlareSolverr to bypass Cloudflare challenges.
26
+
27
+ ## Features
28
+
29
+ - Persistent cookie storage
30
+ - Automatic FlareSolverr management (including Docker startup)
31
+ - Optional request caching via `requests-cache`
32
+ - Random user agent generation
33
+ - Transparent handling of Cloudflare challenges
34
+
35
+ ## Installation
36
+
37
+ ```bash
38
+ pip install anti-cf
39
+ ```
40
+
41
+ ## Usage
42
+
43
+ ### Basic Usage
44
+
45
+ ```python
46
+ from anti_cf import session
47
+
48
+ # The library will automatically check if FlareSolverr is running
49
+ # and start it if needed using Docker
50
+
51
+ # For Cloudflare-protected sites
52
+ response = session.get("https://cloudflare-protected-site.com", try_with_cloudflare=True)
53
+
54
+ # For regular requests
55
+ response = session.get("https://example.com")
56
+ ```
57
+
58
+ ### Advanced Usage
59
+
60
+ ```python
61
+ from anti_cf import session
62
+
63
+ # Set a custom user agent
64
+ session.set_user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
65
+
66
+ # Post requests work as normal
67
+ response = session.post("https://example.com/api", json={"key": "value"})
68
+
69
+ # All cookies are automatically saved between requests
70
+ ```
71
+
72
+ ### Error Handling
73
+
74
+ ```python
75
+ from anti_cf import session
76
+ from requests import HTTPError
77
+
78
+ try:
79
+ response = session.get("https://cloudflare-protected-site.com", try_with_cloudflare=True)
80
+ response.raise_for_status()
81
+ except HTTPError as e:
82
+ print(f"HTTP error occurred: {e}")
83
+ ```
84
+
85
+ ## Dependencies
86
+
87
+ - Python 3.11+
88
+ - FlareSolverr
89
+ - Docker (optional, for automatic FlareSolverr startup)
90
+ - `requests` or `requests-cache` (optional for caching)
91
+ - `fake-useragent`
92
+ - `logprise`
93
+
94
+ ## Configuration
95
+
96
+ The library uses the following default settings:
97
+ - Cache directory: `~/.cache/anti_cf/`
98
+ - FlareSolverr API: `http://localhost:8191/`
99
+ - Default timeout: 600 seconds
100
+ - Cache expiry: 2 hours (when using `requests-cache`)
101
+
102
+ ## How It Works
103
+
104
+ 1. When making a request to a Cloudflare-protected site:
105
+ - First attempts a normal request
106
+ - If Cloudflare challenge detected, sends the request through FlareSolverr
107
+ - Stores the resulting cookies for future requests
108
+
109
+ 2. On startup:
110
+ - Checks if FlareSolverr API is reachable
111
+ - If not available, automatically starts the Docker container
112
+
113
+ ## Docker
114
+
115
+ By default, `anti-cf` will attempt to start the FlareSolverr Docker container:
116
+
117
+ ```
118
+ ghcr.io/svaningelgem/flaresolverr:latest
119
+ ```
120
+
121
+ ## License
122
+
123
+ Copyright © Steven Van Ingelgem <steven@vaningelgem.be>
@@ -0,0 +1,105 @@
1
+ # anti-cf
2
+
3
+ A Python library for handling Cloudflare-protected websites using FlareSolverr.
4
+
5
+ ## Overview
6
+
7
+ `anti-cf` provides a persistent session wrapper for handling websites protected by Cloudflare's anti-bot measures. It automatically manages cookies, user agents, and integrates with FlareSolverr to bypass Cloudflare challenges.
8
+
9
+ ## Features
10
+
11
+ - Persistent cookie storage
12
+ - Automatic FlareSolverr management (including Docker startup)
13
+ - Optional request caching via `requests-cache`
14
+ - Random user agent generation
15
+ - Transparent handling of Cloudflare challenges
16
+
17
+ ## Installation
18
+
19
+ ```bash
20
+ pip install anti-cf
21
+ ```
22
+
23
+ ## Usage
24
+
25
+ ### Basic Usage
26
+
27
+ ```python
28
+ from anti_cf import session
29
+
30
+ # The library will automatically check if FlareSolverr is running
31
+ # and start it if needed using Docker
32
+
33
+ # For Cloudflare-protected sites
34
+ response = session.get("https://cloudflare-protected-site.com", try_with_cloudflare=True)
35
+
36
+ # For regular requests
37
+ response = session.get("https://example.com")
38
+ ```
39
+
40
+ ### Advanced Usage
41
+
42
+ ```python
43
+ from anti_cf import session
44
+
45
+ # Set a custom user agent
46
+ session.set_user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
47
+
48
+ # Post requests work as normal
49
+ response = session.post("https://example.com/api", json={"key": "value"})
50
+
51
+ # All cookies are automatically saved between requests
52
+ ```
53
+
54
+ ### Error Handling
55
+
56
+ ```python
57
+ from anti_cf import session
58
+ from requests import HTTPError
59
+
60
+ try:
61
+ response = session.get("https://cloudflare-protected-site.com", try_with_cloudflare=True)
62
+ response.raise_for_status()
63
+ except HTTPError as e:
64
+ print(f"HTTP error occurred: {e}")
65
+ ```
66
+
67
+ ## Dependencies
68
+
69
+ - Python 3.11+
70
+ - FlareSolverr
71
+ - Docker (optional, for automatic FlareSolverr startup)
72
+ - `requests` or `requests-cache` (optional for caching)
73
+ - `fake-useragent`
74
+ - `logprise`
75
+
76
+ ## Configuration
77
+
78
+ The library uses the following default settings:
79
+ - Cache directory: `~/.cache/anti_cf/`
80
+ - FlareSolverr API: `http://localhost:8191/`
81
+ - Default timeout: 600 seconds
82
+ - Cache expiry: 2 hours (when using `requests-cache`)
83
+
84
+ ## How It Works
85
+
86
+ 1. When making a request to a Cloudflare-protected site:
87
+ - First attempts a normal request
88
+ - If Cloudflare challenge detected, sends the request through FlareSolverr
89
+ - Stores the resulting cookies for future requests
90
+
91
+ 2. On startup:
92
+ - Checks if FlareSolverr API is reachable
93
+ - If not available, automatically starts the Docker container
94
+
95
+ ## Docker
96
+
97
+ By default, `anti-cf` will attempt to start the FlareSolverr Docker container:
98
+
99
+ ```
100
+ ghcr.io/svaningelgem/flaresolverr:latest
101
+ ```
102
+
103
+ ## License
104
+
105
+ Copyright © Steven Van Ingelgem <steven@vaningelgem.be>
@@ -0,0 +1,121 @@
1
+ [tool.poetry]
2
+ name = "anti_cf"
3
+ version = "1.0.1"
4
+ description = "Anti-CloudFlare package"
5
+ authors = ["Steven Van Ingelgem <steven@vaningelgem.be>"]
6
+ license = "MIT"
7
+ readme = "README.md"
8
+
9
+ [tool.poetry.dependencies]
10
+ python = "^3.11"
11
+ logprise = "*"
12
+ fake-useragent = "*"
13
+ requests = "*"
14
+
15
+ [tool.poetry.group.dev.dependencies]
16
+ pytest-cov = "*"
17
+ pytest-mock = "*"
18
+ ruff = "*"
19
+
20
+ [build-system]
21
+ requires = ["poetry-core"]
22
+ build-backend = "poetry.core.masonry.api"
23
+
24
+
25
+
26
+
27
+ [tool.pytest.ini_options]
28
+ testpaths = [
29
+ "tests",
30
+ ]
31
+ python_files = ['*_tests.py', '*_test.py', "test_*.py", "tests_*.py"]
32
+ pythonpath = [
33
+ "src"
34
+ ]
35
+
36
+ [tool.ruff]
37
+ line-length = 160
38
+ fix = true
39
+ unsafe-fixes = true
40
+
41
+ [tool.ruff.lint]
42
+ select = [
43
+ "ARG", # flake8-unused-arguments
44
+ "E", # pycodestyle
45
+ "W", # pycodestyle
46
+ "F", # Pyflakes
47
+ "B", # flake8-bugbear
48
+ "C4", # flake8-comprehensions
49
+ "D", # flake8-docstrings
50
+ "I", # isort
51
+ "SIM", # flake8-simplify
52
+ "TCH", # flake8-type-checking
53
+ "TID", # flake8-tidy-imports
54
+ "UP", # pyupgrade
55
+ "PT", # flake8-pytest-style
56
+ "RUF", # Ruff-specific rules
57
+ "PTH", # flake8-use-pathlib
58
+ "FA", # flake8-future-annotations
59
+ "PIE", # flake8-pie
60
+ # "TC", # flake8-type-checking
61
+ "TD", # flake8-todos
62
+ "TRY", # tryceratops
63
+ "FBT001", # flake8-boolean-trap
64
+ "RET", # flake8-return
65
+ "SIM", # flake8-simplify
66
+ "ANN", # flake8-annotations
67
+ ]
68
+
69
+ ignore = [
70
+ # Line length regulated by black
71
+ "E501",
72
+ # pydocstyle: http://www.pydocstyle.org/en/stable/error_codes.html
73
+ # numpy convention with a few additional lints
74
+ "D107",
75
+ "D203",
76
+ "D212",
77
+ "D401",
78
+ "D402",
79
+ "D415",
80
+ "D416",
81
+ # flake8-pytest-style:
82
+ "PT011", # pytest.raises({exception}) is too broad, set the match parameter or use a more specific exception
83
+ # flake8-simplify
84
+ "SIM102", # Use a single `if` statement instead of nested `if` statements
85
+ "SIM108", # Use ternary operator
86
+ # ruff
87
+ "RUF005", # unpack-instead-of-concatenating-to-collection-literal
88
+ # pycodestyle
89
+ # TODO: Remove errors below to further improve docstring linting
90
+ # Ordered from most common to least common errors.
91
+ "D105", # Missing docstring in magic method
92
+ "D100", # Missing docstring in public module
93
+ "D104", # Missing docstring in public package
94
+ # flake8-todos
95
+ "TD002", # Missing author in TODO
96
+ "TD003", # Missing issue link on the line following this TODO
97
+ # tryceratops
98
+ "TRY003", # Avoid specifying long messages outside the exception class
99
+ # Lints below are turned off because of conflicts with the ruff formatter
100
+ "D206",
101
+ "W191",
102
+
103
+ "TID252", # Relative imports are banned
104
+ "D101", # Missing docstring in public class
105
+ "D102", # Missing docstring in public method
106
+ "D103", # Missing docstring in public function
107
+
108
+ "TRY300", # Consider moving this statement to an `else` block
109
+ ]
110
+
111
+ [tool.ruff.lint.pycodestyle]
112
+ max-doc-length = 160
113
+
114
+ [tool.ruff.lint.flake8-tidy-imports]
115
+ ban-relative-imports = "all"
116
+
117
+ [tool.ruff.lint.flake8-type-checking]
118
+ strict = true
119
+
120
+ [tool.ruff.lint.per-file-ignores]
121
+ "tests/**/*.py" = ["D100", "D103", "B018", "FBT001"]
@@ -0,0 +1,5 @@
1
+ from ._persistent_session import session
2
+
3
+ __all__ = [
4
+ "session",
5
+ ]
@@ -0,0 +1,9 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Final
5
+
6
+ CACHE_PATH = Path.home() / ".cache/anti_cf"
7
+ FLARESOLVERR_PROXY: Final[str] = "http://localhost:8191/"
8
+ CACHE_PATH.mkdir(exist_ok=True, parents=True)
9
+ DEFAULT_TIMEOUT: int = 600
@@ -0,0 +1,54 @@
1
+ from __future__ import annotations
2
+
3
+ import subprocess
4
+ import time
5
+
6
+ import requests
7
+ from logprise import logger
8
+
9
+ from ._constants import FLARESOLVERR_PROXY
10
+
11
+
12
+ def get_flaresolverr_settings() -> dict | None:
13
+ """Check if FlareSolverr API is reachable."""
14
+ try:
15
+ resp = requests.get(FLARESOLVERR_PROXY, timeout=0.1)
16
+ resp.raise_for_status()
17
+ return resp.json()
18
+ except: # noqa: E722
19
+ return None
20
+
21
+
22
+ def start_flaresolverr_docker() -> subprocess.Popen | None:
23
+ """Start the FlareSolverr docker container."""
24
+ try:
25
+ logger.info("Starting FlareSolverr docker container...")
26
+ process = subprocess.Popen(
27
+ ["docker", "run", "--rm", "-p", "8191:8191", "ghcr.io/svaningelgem/flaresolverr:latest"],
28
+ stdout=subprocess.PIPE,
29
+ stderr=subprocess.PIPE,
30
+ )
31
+
32
+ # Wait for container to be ready
33
+ for loop in range(10): # Try for 10 seconds
34
+ if loop > 0:
35
+ time.sleep(1)
36
+
37
+ if get_flaresolverr_settings() is not None:
38
+ logger.info("FlareSolverr is ready")
39
+ return process
40
+
41
+ logger.error("FlareSolverr container started but API not responding")
42
+ return process
43
+ except Exception as e:
44
+ logger.error(f"Failed to start FlareSolverr docker: {e}")
45
+ return None
46
+
47
+
48
+ def ensure_flaresolverr_running() -> subprocess.Popen | None:
49
+ """Ensure FlareSolverr is running, start if needed."""
50
+ if get_flaresolverr_settings() is not None:
51
+ logger.info("FlareSolverr API is already running")
52
+ return None
53
+
54
+ return start_flaresolverr_docker()
@@ -0,0 +1,150 @@
1
+ from __future__ import annotations
2
+
3
+ import pickle
4
+ import tempfile
5
+ from typing import TYPE_CHECKING, ClassVar
6
+
7
+ import fake_useragent
8
+ from logprise import logger
9
+
10
+ from ._constants import CACHE_PATH, DEFAULT_TIMEOUT, FLARESOLVERR_PROXY
11
+ from ._flaresolverr import ensure_flaresolverr_running, get_flaresolverr_settings
12
+
13
+ try:
14
+ from requests_cache import CachedSession as Session
15
+
16
+ _HAS_CACHE = True
17
+ logger.info("Using CachedSession for persistent session")
18
+ except ImportError:
19
+ from requests import HTTPError, Session
20
+
21
+ _HAS_CACHE = False
22
+
23
+ if TYPE_CHECKING:
24
+ from pathlib import Path
25
+
26
+ from requests import Response
27
+
28
+
29
+ class PersistentSession(Session):
30
+ _COOKIES_FILE: ClassVar[Path] = CACHE_PATH / "cookies.pkl"
31
+ _USER_AGENT_FILE: ClassVar[Path] = CACHE_PATH / "user_agent.txt"
32
+
33
+ def __init__(self) -> None:
34
+ if _HAS_CACHE:
35
+ super().__init__(
36
+ CACHE_PATH / "url_cache.sqlite",
37
+ backend="sqlite",
38
+ cache_control=False,
39
+ expire_after=2 * 3600,
40
+ headers={
41
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
42
+ "Accept-Language": "en-US,en;q=0.5",
43
+ },
44
+ )
45
+ else:
46
+ super().__init__()
47
+
48
+ self._load_cookies()
49
+ ensure_flaresolverr_running()
50
+ self.set_user_agent()
51
+
52
+ def _get_user_agent(self) -> str:
53
+ flaresolverr_settings = get_flaresolverr_settings()
54
+ if flaresolverr_settings is not None:
55
+ return flaresolverr_settings["userAgent"]
56
+
57
+ if self._USER_AGENT_FILE.exists():
58
+ return self._USER_AGENT_FILE.read_text(encoding="utf8").strip()
59
+
60
+ return fake_useragent.UserAgent(os="windows", platforms="pc", browsers="chrome").random
61
+
62
+ def set_user_agent(self, user_agent: str | None = None) -> None:
63
+ if user_agent is None:
64
+ user_agent = self._get_user_agent()
65
+
66
+ self.headers["User-Agent"] = user_agent
67
+ self._USER_AGENT_FILE.write_text(user_agent, encoding="utf8")
68
+
69
+ def _load_cookies(self) -> None:
70
+ """Load cookies from file if it exists."""
71
+ if self._COOKIES_FILE.exists():
72
+ try:
73
+ with self._COOKIES_FILE.open("rb") as fp:
74
+ self.cookies.update(pickle.load(fp))
75
+ except Exception as e:
76
+ logger.error(f"Failed to load cookies from {self._COOKIES_FILE}: {e}")
77
+ self._COOKIES_FILE.unlink()
78
+
79
+ def save_cookies(self) -> None:
80
+ """Save current cookies to file."""
81
+ self._COOKIES_FILE.write_bytes(pickle.dumps(self.cookies, protocol=4))
82
+
83
+ def request(self, *args: object, **kwargs: object) -> Response:
84
+ """Override request method to save cookies after each request."""
85
+ response = super().request(*args, **kwargs)
86
+ self.save_cookies()
87
+ return response
88
+
89
+ def get(self, url: str | bytes, *, try_with_cloudflare: bool = False, _cloudflare_counter: int = 0, **kwargs: object) -> Response | None:
90
+ if not try_with_cloudflare or "cf_clearance" in self.cookies:
91
+ try:
92
+ resp = super().get(url, **kwargs)
93
+ resp.raise_for_status()
94
+ return resp
95
+ except HTTPError as e:
96
+ if b"just a moment" not in e.response.content.lower():
97
+ logger.error("No cloudflare trigger in response?")
98
+ with tempfile.NamedTemporaryFile(delete=False) as f:
99
+ f.write(e.response.content)
100
+ logger.error(f"No cloudflare trigger in response? [exception: {e}] [content: {f.name}]")
101
+ logger.exception(e)
102
+ return None
103
+
104
+ if try_with_cloudflare:
105
+ logger.warning("Cloudflare cookie expired")
106
+ else:
107
+ logger.warning("Cloudflare detected, but `try_with_cloudflare` wasn't set to True!")
108
+
109
+ try:
110
+ self._get_url_via_flaresolverr(url)
111
+ # After the url is retrieved from the flaresolverr proxy, it's not necessarily the one we want
112
+ # --> So we'll re-request it here:
113
+ return super().get(url, **kwargs)
114
+ except Exception:
115
+ logger.error("FlareSolverr didn't solve it :(")
116
+ raise
117
+
118
+ def _get_url_via_flaresolverr(self, url: str) -> dict:
119
+ headers = {"Content-Type": "application/json"}
120
+ data = {
121
+ "cmd": "request.get",
122
+ "url": url,
123
+ "maxTimeout": DEFAULT_TIMEOUT * 1_000,
124
+ }
125
+ response = self.post(FLARESOLVERR_PROXY + "v1", headers=headers, json=data, timeout=DEFAULT_TIMEOUT)
126
+ response.raise_for_status()
127
+
128
+ dta = response.json()
129
+ for cookie in dta["solution"]["cookies"]:
130
+ self.cookies.set(
131
+ name=cookie["name"], # required
132
+ value=cookie["value"], # required
133
+ version=cookie.get("version", 0),
134
+ port=cookie.get("port", None),
135
+ domain=cookie.get("domain", ""),
136
+ path=cookie.get("path", "/"),
137
+ secure=cookie.get("secure", False),
138
+ expires=cookie.get("expires", None),
139
+ discard=cookie.get("discard", True),
140
+ comment=cookie.get("comment", None),
141
+ comment_url=cookie.get("comment_url", None),
142
+ rest=cookie.get("rest", {"HttpOnly": None}),
143
+ rfc2109=cookie.get("rfc2109", False),
144
+ )
145
+ self.save_cookies()
146
+
147
+ return dta
148
+
149
+
150
+ session = PersistentSession()