cfspider 1.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cfspider/__init__.py +230 -0
- cfspider/api.py +937 -0
- cfspider/async_api.py +418 -0
- cfspider/async_session.py +281 -0
- cfspider/browser.py +335 -0
- cfspider/cli.py +81 -0
- cfspider/impersonate.py +388 -0
- cfspider/ip_map.py +522 -0
- cfspider/mirror.py +682 -0
- cfspider/session.py +239 -0
- cfspider/stealth.py +537 -0
- cfspider/vless_client.py +572 -0
- cfspider-1.7.4.dist-info/METADATA +1390 -0
- cfspider-1.7.4.dist-info/RECORD +18 -0
- cfspider-1.7.4.dist-info/WHEEL +5 -0
- cfspider-1.7.4.dist-info/entry_points.txt +2 -0
- cfspider-1.7.4.dist-info/licenses/LICENSE +201 -0
- cfspider-1.7.4.dist-info/top_level.txt +1 -0
cfspider/__init__.py
ADDED
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CFspider - Cloudflare 代理 IP 池 Python 库
|
|
3
|
+
|
|
4
|
+
一个基于 Cloudflare Workers 的代理 IP 池库,提供:
|
|
5
|
+
- 同步/异步 HTTP 请求(兼容 requests/httpx)
|
|
6
|
+
- TLS 指纹模拟(基于 curl_cffi,支持 25+ 浏览器指纹)
|
|
7
|
+
- 隐身模式(自动添加完整浏览器请求头,避免反爬检测)
|
|
8
|
+
- 浏览器自动化(基于 Playwright,支持 VLESS 代理)
|
|
9
|
+
- IP 地图可视化(生成 Cyberpunk 风格的地图)
|
|
10
|
+
- 网页镜像(保存网页到本地,自动重写资源链接)
|
|
11
|
+
|
|
12
|
+
快速开始:
|
|
13
|
+
>>> import cfspider
|
|
14
|
+
>>>
|
|
15
|
+
>>> # 基本 GET 请求(无代理)
|
|
16
|
+
>>> response = cfspider.get("https://httpbin.org/ip")
|
|
17
|
+
>>> print(response.json())
|
|
18
|
+
>>>
|
|
19
|
+
>>> # 使用 Cloudflare Workers 代理
|
|
20
|
+
>>> response = cfspider.get(
|
|
21
|
+
... "https://httpbin.org/ip",
|
|
22
|
+
... cf_proxies="https://your-workers.dev"
|
|
23
|
+
... )
|
|
24
|
+
>>> print(response.cf_colo) # Cloudflare 节点代码
|
|
25
|
+
>>>
|
|
26
|
+
>>> # 启用隐身模式(自动添加 15+ 浏览器请求头)
|
|
27
|
+
>>> response = cfspider.get(
|
|
28
|
+
... "https://example.com",
|
|
29
|
+
... stealth=True,
|
|
30
|
+
... stealth_browser='chrome'
|
|
31
|
+
... )
|
|
32
|
+
>>>
|
|
33
|
+
>>> # TLS 指纹模拟
|
|
34
|
+
>>> response = cfspider.get(
|
|
35
|
+
... "https://example.com",
|
|
36
|
+
... impersonate="chrome131"
|
|
37
|
+
... )
|
|
38
|
+
|
|
39
|
+
版本信息:
|
|
40
|
+
- 版本号: 1.7.0
|
|
41
|
+
- 协议: Apache License 2.0
|
|
42
|
+
- 文档: https://spider.violetteam.cloud
|
|
43
|
+
|
|
44
|
+
依赖关系:
|
|
45
|
+
必需:requests
|
|
46
|
+
可选:
|
|
47
|
+
- httpx[http2]: HTTP/2 和异步请求支持
|
|
48
|
+
- curl_cffi: TLS 指纹模拟
|
|
49
|
+
- playwright: 浏览器自动化
|
|
50
|
+
- beautifulsoup4: 网页镜像
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
from .api import (
|
|
54
|
+
get, post, put, delete, head, options, patch, request,
|
|
55
|
+
clear_map_records, get_map_collector
|
|
56
|
+
)
|
|
57
|
+
from .session import Session
|
|
58
|
+
from .cli import install_browser
|
|
59
|
+
|
|
60
|
+
# IP 地图可视化
|
|
61
|
+
from .ip_map import (
|
|
62
|
+
IPMapCollector, generate_map_html, add_ip_record,
|
|
63
|
+
get_collector as get_ip_collector, clear_records as clear_ip_records,
|
|
64
|
+
COLO_COORDINATES
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# 网页镜像
|
|
68
|
+
from .mirror import mirror, MirrorResult, WebMirror
|
|
69
|
+
|
|
70
|
+
# 异步 API(基于 httpx)
|
|
71
|
+
from .async_api import (
|
|
72
|
+
aget, apost, aput, adelete, ahead, aoptions, apatch,
|
|
73
|
+
arequest, astream,
|
|
74
|
+
AsyncCFSpiderResponse, AsyncStreamResponse
|
|
75
|
+
)
|
|
76
|
+
from .async_session import AsyncSession
|
|
77
|
+
|
|
78
|
+
# TLS 指纹模拟 API(基于 curl_cffi)
|
|
79
|
+
from .impersonate import (
|
|
80
|
+
impersonate_get, impersonate_post, impersonate_put,
|
|
81
|
+
impersonate_delete, impersonate_head, impersonate_options,
|
|
82
|
+
impersonate_patch, impersonate_request,
|
|
83
|
+
ImpersonateSession, ImpersonateResponse,
|
|
84
|
+
get_supported_browsers, SUPPORTED_BROWSERS
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
# 隐身模式(反爬虫规避)
|
|
88
|
+
from .stealth import (
|
|
89
|
+
StealthSession,
|
|
90
|
+
get_stealth_headers, get_random_browser_headers,
|
|
91
|
+
random_delay, get_referer, update_sec_fetch_headers,
|
|
92
|
+
BROWSER_PROFILES, SUPPORTED_BROWSERS as STEALTH_BROWSERS,
|
|
93
|
+
CHROME_HEADERS, FIREFOX_HEADERS, SAFARI_HEADERS, EDGE_HEADERS, CHROME_MOBILE_HEADERS
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
# 延迟导入 Browser,避免强制依赖 playwright
|
|
98
|
+
def Browser(cf_proxies=None, headless=True, timeout=30, vless_uuid=None):
|
|
99
|
+
"""
|
|
100
|
+
创建浏览器实例
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
cf_proxies: 代理地址,支持以下格式:
|
|
104
|
+
- VLESS 链接: "vless://uuid@host:port?path=/xxx#name"(推荐)
|
|
105
|
+
- HTTP 代理: "http://ip:port" 或 "ip:port"
|
|
106
|
+
- SOCKS5 代理: "socks5://ip:port"
|
|
107
|
+
- edgetunnel 域名: "v2.example.com"(需配合 vless_uuid)
|
|
108
|
+
如不指定,则直接使用本地网络
|
|
109
|
+
headless: 是否无头模式,默认 True
|
|
110
|
+
timeout: 请求超时时间(秒),默认 30
|
|
111
|
+
vless_uuid: VLESS UUID,仅当使用域名方式时需要指定
|
|
112
|
+
如果使用完整 VLESS 链接,则无需此参数
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
Browser: 浏览器实例
|
|
116
|
+
|
|
117
|
+
Example:
|
|
118
|
+
>>> import cfspider
|
|
119
|
+
>>> # 使用完整 VLESS 链接(推荐,无需 vless_uuid)
|
|
120
|
+
>>> browser = cfspider.Browser(
|
|
121
|
+
... cf_proxies="vless://uuid@v2.example.com:443?path=/"
|
|
122
|
+
... )
|
|
123
|
+
>>> html = browser.html("https://example.com")
|
|
124
|
+
>>> browser.close()
|
|
125
|
+
>>>
|
|
126
|
+
>>> # 使用域名 + UUID(旧方式)
|
|
127
|
+
>>> browser = cfspider.Browser(
|
|
128
|
+
... cf_proxies="v2.example.com",
|
|
129
|
+
... vless_uuid="your-vless-uuid"
|
|
130
|
+
... )
|
|
131
|
+
>>>
|
|
132
|
+
>>> # 直接使用(无代理)
|
|
133
|
+
>>> browser = cfspider.Browser()
|
|
134
|
+
"""
|
|
135
|
+
from .browser import Browser as _Browser
|
|
136
|
+
return _Browser(cf_proxies, headless, timeout, vless_uuid)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def parse_vless_link(vless_link):
|
|
140
|
+
"""
|
|
141
|
+
解析 VLESS 链接
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
vless_link: VLESS 链接字符串,如 "vless://uuid@host:port?path=/xxx#name"
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
dict: 包含 uuid, host, port, path 的字典,解析失败返回 None
|
|
148
|
+
|
|
149
|
+
Example:
|
|
150
|
+
>>> import cfspider
|
|
151
|
+
>>> info = cfspider.parse_vless_link("vless://abc123@v2.example.com:443?path=/ws#proxy")
|
|
152
|
+
>>> print(info)
|
|
153
|
+
{'uuid': 'abc123', 'host': 'v2.example.com', 'port': 443, 'path': '/ws'}
|
|
154
|
+
"""
|
|
155
|
+
from .browser import parse_vless_link as _parse
|
|
156
|
+
return _parse(vless_link)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class CFSpiderError(Exception):
|
|
160
|
+
"""
|
|
161
|
+
CFspider 基础异常类
|
|
162
|
+
|
|
163
|
+
所有 CFspider 相关的异常都继承自此类。
|
|
164
|
+
|
|
165
|
+
Example:
|
|
166
|
+
>>> try:
|
|
167
|
+
... response = cfspider.get("https://invalid-url")
|
|
168
|
+
... except cfspider.CFSpiderError as e:
|
|
169
|
+
... print(f"请求失败: {e}")
|
|
170
|
+
"""
|
|
171
|
+
pass
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
class BrowserNotInstalledError(CFSpiderError):
|
|
175
|
+
"""
|
|
176
|
+
浏览器未安装错误
|
|
177
|
+
|
|
178
|
+
当尝试使用浏览器模式但 Chromium 未安装时抛出。
|
|
179
|
+
|
|
180
|
+
解决方案:
|
|
181
|
+
>>> import cfspider
|
|
182
|
+
>>> cfspider.install_browser() # 自动安装 Chromium
|
|
183
|
+
|
|
184
|
+
或使用命令行:
|
|
185
|
+
$ cfspider install
|
|
186
|
+
"""
|
|
187
|
+
pass
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
class PlaywrightNotInstalledError(CFSpiderError):
|
|
191
|
+
"""
|
|
192
|
+
Playwright 未安装错误
|
|
193
|
+
|
|
194
|
+
当尝试使用浏览器模式但 Playwright 库未安装时抛出。
|
|
195
|
+
|
|
196
|
+
解决方案:
|
|
197
|
+
$ pip install playwright
|
|
198
|
+
"""
|
|
199
|
+
pass
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
__version__ = "1.7.4"
|
|
203
|
+
__all__ = [
|
|
204
|
+
# 同步 API (requests)
|
|
205
|
+
"get", "post", "put", "delete", "head", "options", "patch", "request",
|
|
206
|
+
"Session", "Browser", "install_browser", "parse_vless_link",
|
|
207
|
+
"CFSpiderError", "BrowserNotInstalledError", "PlaywrightNotInstalledError",
|
|
208
|
+
# 异步 API (httpx)
|
|
209
|
+
"aget", "apost", "aput", "adelete", "ahead", "aoptions", "apatch",
|
|
210
|
+
"arequest", "astream",
|
|
211
|
+
"AsyncSession", "AsyncCFSpiderResponse", "AsyncStreamResponse",
|
|
212
|
+
# TLS 指纹模拟 API (curl_cffi)
|
|
213
|
+
"impersonate_get", "impersonate_post", "impersonate_put",
|
|
214
|
+
"impersonate_delete", "impersonate_head", "impersonate_options",
|
|
215
|
+
"impersonate_patch", "impersonate_request",
|
|
216
|
+
"ImpersonateSession", "ImpersonateResponse",
|
|
217
|
+
"get_supported_browsers", "SUPPORTED_BROWSERS",
|
|
218
|
+
# 隐身模式(反爬虫规避)
|
|
219
|
+
"StealthSession",
|
|
220
|
+
"get_stealth_headers", "get_random_browser_headers",
|
|
221
|
+
"random_delay", "get_referer", "update_sec_fetch_headers",
|
|
222
|
+
"BROWSER_PROFILES", "STEALTH_BROWSERS",
|
|
223
|
+
"CHROME_HEADERS", "FIREFOX_HEADERS", "SAFARI_HEADERS", "EDGE_HEADERS", "CHROME_MOBILE_HEADERS",
|
|
224
|
+
# IP 地图可视化
|
|
225
|
+
"IPMapCollector", "generate_map_html", "add_ip_record",
|
|
226
|
+
"get_ip_collector", "clear_ip_records", "COLO_COORDINATES",
|
|
227
|
+
"clear_map_records", "get_map_collector",
|
|
228
|
+
# 网页镜像
|
|
229
|
+
"mirror", "MirrorResult", "WebMirror"
|
|
230
|
+
]
|