cfspider 1.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cfspider/__init__.py +230 -0
- cfspider/api.py +937 -0
- cfspider/async_api.py +418 -0
- cfspider/async_session.py +281 -0
- cfspider/browser.py +335 -0
- cfspider/cli.py +81 -0
- cfspider/impersonate.py +388 -0
- cfspider/ip_map.py +522 -0
- cfspider/mirror.py +682 -0
- cfspider/session.py +239 -0
- cfspider/stealth.py +537 -0
- cfspider/vless_client.py +572 -0
- cfspider-1.7.4.dist-info/METADATA +1390 -0
- cfspider-1.7.4.dist-info/RECORD +18 -0
- cfspider-1.7.4.dist-info/WHEEL +5 -0
- cfspider-1.7.4.dist-info/entry_points.txt +2 -0
- cfspider-1.7.4.dist-info/licenses/LICENSE +201 -0
- cfspider-1.7.4.dist-info/top_level.txt +1 -0
cfspider/api.py
ADDED
|
@@ -0,0 +1,937 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CFspider 核心 API 模块
|
|
3
|
+
|
|
4
|
+
提供同步 HTTP 请求功能,支持:
|
|
5
|
+
- 通过 Cloudflare Workers 代理请求
|
|
6
|
+
- TLS 指纹模拟 (curl_cffi)
|
|
7
|
+
- HTTP/2 支持 (httpx)
|
|
8
|
+
- 隐身模式(完整浏览器请求头)
|
|
9
|
+
- IP 地图可视化
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import requests
|
|
13
|
+
import time
|
|
14
|
+
from urllib.parse import urlencode, quote
|
|
15
|
+
from typing import Optional, Any
|
|
16
|
+
|
|
17
|
+
# 延迟导入 IP 地图模块
|
|
18
|
+
from . import ip_map
|
|
19
|
+
|
|
20
|
+
# 延迟导入 httpx,仅在需要 HTTP/2 时使用
|
|
21
|
+
_httpx = None
|
|
22
|
+
|
|
23
|
+
def _get_httpx():
|
|
24
|
+
"""延迟加载 httpx 模块"""
|
|
25
|
+
global _httpx
|
|
26
|
+
if _httpx is None:
|
|
27
|
+
try:
|
|
28
|
+
import httpx
|
|
29
|
+
_httpx = httpx
|
|
30
|
+
except ImportError:
|
|
31
|
+
raise ImportError(
|
|
32
|
+
"httpx is required for HTTP/2 support. "
|
|
33
|
+
"Install it with: pip install httpx[http2]"
|
|
34
|
+
)
|
|
35
|
+
return _httpx
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# 延迟导入 curl_cffi,仅在需要 TLS 指纹时使用
|
|
39
|
+
_curl_cffi = None
|
|
40
|
+
|
|
41
|
+
def _get_curl_cffi():
|
|
42
|
+
"""延迟加载 curl_cffi 模块"""
|
|
43
|
+
global _curl_cffi
|
|
44
|
+
if _curl_cffi is None:
|
|
45
|
+
try:
|
|
46
|
+
from curl_cffi import requests as curl_requests
|
|
47
|
+
_curl_cffi = curl_requests
|
|
48
|
+
except ImportError:
|
|
49
|
+
raise ImportError(
|
|
50
|
+
"curl_cffi is required for TLS fingerprint impersonation. "
|
|
51
|
+
"Install it with: pip install curl_cffi"
|
|
52
|
+
)
|
|
53
|
+
return _curl_cffi
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class CFSpiderResponse:
|
|
57
|
+
"""
|
|
58
|
+
CFspider 响应对象
|
|
59
|
+
|
|
60
|
+
封装 HTTP 响应,提供与 requests.Response 兼容的接口,
|
|
61
|
+
并额外提供 Cloudflare 特有的信息(如节点代码、Ray ID)。
|
|
62
|
+
|
|
63
|
+
Attributes:
|
|
64
|
+
cf_colo (str): Cloudflare 数据中心代码(如 NRT=东京, SIN=新加坡, LAX=洛杉矶)
|
|
65
|
+
使用 Workers 代理时可用,表示请求经过的 CF 节点
|
|
66
|
+
cf_ray (str): Cloudflare Ray ID,每个请求的唯一标识符
|
|
67
|
+
可用于调试和追踪请求
|
|
68
|
+
text (str): 响应文本内容(自动解码)
|
|
69
|
+
content (bytes): 响应原始字节内容
|
|
70
|
+
status_code (int): HTTP 状态码(如 200, 404, 500)
|
|
71
|
+
headers (dict): 响应头字典
|
|
72
|
+
cookies: 响应 Cookie
|
|
73
|
+
url (str): 最终请求的 URL(跟随重定向后)
|
|
74
|
+
encoding (str): 响应编码
|
|
75
|
+
|
|
76
|
+
Methods:
|
|
77
|
+
json(**kwargs): 将响应解析为 JSON
|
|
78
|
+
raise_for_status(): 当状态码非 2xx 时抛出 HTTPError
|
|
79
|
+
|
|
80
|
+
Example:
|
|
81
|
+
>>> response = cfspider.get("https://httpbin.org/ip", cf_proxies="...")
|
|
82
|
+
>>> print(response.status_code) # 200
|
|
83
|
+
>>> print(response.cf_colo) # NRT (东京节点)
|
|
84
|
+
>>> print(response.cf_ray) # 8a1b2c3d4e5f-NRT
|
|
85
|
+
>>> data = response.json()
|
|
86
|
+
>>> print(data['origin']) # Cloudflare IP
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
def __init__(self, response, cf_colo=None, cf_ray=None):
|
|
90
|
+
"""
|
|
91
|
+
初始化响应对象
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
response: 原始 requests/httpx/curl_cffi 响应对象
|
|
95
|
+
cf_colo: Cloudflare 数据中心代码(从响应头获取)
|
|
96
|
+
cf_ray: Cloudflare Ray ID(从响应头获取)
|
|
97
|
+
"""
|
|
98
|
+
self._response = response
|
|
99
|
+
self.cf_colo = cf_colo
|
|
100
|
+
self.cf_ray = cf_ray
|
|
101
|
+
|
|
102
|
+
@property
|
|
103
|
+
def text(self) -> str:
|
|
104
|
+
"""响应文本内容(自动解码)"""
|
|
105
|
+
return self._response.text
|
|
106
|
+
|
|
107
|
+
@property
|
|
108
|
+
def content(self) -> bytes:
|
|
109
|
+
"""响应原始字节内容"""
|
|
110
|
+
return self._response.content
|
|
111
|
+
|
|
112
|
+
@property
|
|
113
|
+
def status_code(self) -> int:
|
|
114
|
+
"""HTTP 状态码"""
|
|
115
|
+
return self._response.status_code
|
|
116
|
+
|
|
117
|
+
@property
|
|
118
|
+
def headers(self):
|
|
119
|
+
"""响应头字典"""
|
|
120
|
+
return self._response.headers
|
|
121
|
+
|
|
122
|
+
@property
|
|
123
|
+
def cookies(self):
|
|
124
|
+
"""响应 Cookie"""
|
|
125
|
+
return self._response.cookies
|
|
126
|
+
|
|
127
|
+
@property
|
|
128
|
+
def url(self) -> str:
|
|
129
|
+
"""最终请求的 URL(跟随重定向后)"""
|
|
130
|
+
return self._response.url
|
|
131
|
+
|
|
132
|
+
@property
|
|
133
|
+
def encoding(self) -> Optional[str]:
|
|
134
|
+
"""响应编码"""
|
|
135
|
+
return self._response.encoding
|
|
136
|
+
|
|
137
|
+
@encoding.setter
|
|
138
|
+
def encoding(self, value: str):
|
|
139
|
+
"""设置响应编码"""
|
|
140
|
+
self._response.encoding = value
|
|
141
|
+
|
|
142
|
+
def json(self, **kwargs) -> Any:
|
|
143
|
+
"""
|
|
144
|
+
将响应解析为 JSON
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
**kwargs: 传递给 json.loads() 的参数
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
解析后的 JSON 数据(dict 或 list)
|
|
151
|
+
|
|
152
|
+
Raises:
|
|
153
|
+
JSONDecodeError: 当响应不是有效的 JSON 时
|
|
154
|
+
"""
|
|
155
|
+
return self._response.json(**kwargs)
|
|
156
|
+
|
|
157
|
+
def raise_for_status(self):
|
|
158
|
+
"""
|
|
159
|
+
当状态码非 2xx 时抛出 HTTPError
|
|
160
|
+
|
|
161
|
+
Raises:
|
|
162
|
+
requests.HTTPError: 当状态码表示错误时
|
|
163
|
+
"""
|
|
164
|
+
self._response.raise_for_status()
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def request(method, url, cf_proxies=None, cf_workers=True, http2=False, impersonate=None,
|
|
168
|
+
map_output=False, map_file="cfspider_map.html",
|
|
169
|
+
stealth=False, stealth_browser='chrome', delay=None, token=None, **kwargs):
|
|
170
|
+
"""
|
|
171
|
+
发送 HTTP 请求 / Send HTTP request
|
|
172
|
+
|
|
173
|
+
这是 CFspider 的核心函数,支持多种代理模式和反爬虫功能。
|
|
174
|
+
This is the core function of CFspider, supporting multiple proxy modes and anti-crawler features.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
method (str): HTTP 方法(GET, POST, PUT, DELETE, HEAD, OPTIONS, PATCH)
|
|
178
|
+
/ HTTP method (GET, POST, PUT, DELETE, HEAD, OPTIONS, PATCH)
|
|
179
|
+
url (str): 目标 URL,必须包含协议(https://)
|
|
180
|
+
/ Target URL (must include protocol, e.g., https://)
|
|
181
|
+
cf_proxies (str, optional): 代理地址,根据 cf_workers 参数有不同含义
|
|
182
|
+
/ Proxy address, meaning depends on cf_workers parameter
|
|
183
|
+
- 当 cf_workers=True 时:填写 CFspider Workers 地址(如 "https://your-workers.dev")
|
|
184
|
+
- When cf_workers=True: CFspider Workers address (e.g., "https://your-workers.dev")
|
|
185
|
+
- 当 cf_workers=False 时:填写普通 HTTP/SOCKS5 代理(如 "http://127.0.0.1:8080")
|
|
186
|
+
- When cf_workers=False: Regular HTTP/SOCKS5 proxy (e.g., "http://127.0.0.1:8080")
|
|
187
|
+
- 不填写时:直接请求目标 URL,不使用代理
|
|
188
|
+
- None: Direct request without proxy
|
|
189
|
+
cf_workers (bool): 是否使用 CFspider Workers API(默认 True)
|
|
190
|
+
/ Whether to use CFspider Workers API (default: True)
|
|
191
|
+
- True: cf_proxies 是 Workers 地址,请求通过 Workers API 转发
|
|
192
|
+
- True: cf_proxies is Workers address, requests forwarded via Workers API
|
|
193
|
+
- False: cf_proxies 是普通代理,使用 requests/httpx 的 proxies 参数
|
|
194
|
+
- False: cf_proxies is regular proxy, uses requests/httpx proxies parameter
|
|
195
|
+
http2 (bool): 是否启用 HTTP/2 协议(默认 False)
|
|
196
|
+
/ Whether to enable HTTP/2 protocol (default: False)
|
|
197
|
+
- True: 使用 httpx 客户端,支持 HTTP/2
|
|
198
|
+
- True: Uses httpx client with HTTP/2 support
|
|
199
|
+
- False: 使用 requests 库(默认行为)
|
|
200
|
+
- False: Uses requests library (default behavior)
|
|
201
|
+
- 注意:http2 和 impersonate 不能同时使用
|
|
202
|
+
- Note: http2 and impersonate cannot be used together
|
|
203
|
+
impersonate (str, optional): TLS 指纹模拟,模拟真实浏览器的 TLS 握手特征
|
|
204
|
+
/ TLS fingerprint impersonation, mimics real browser TLS handshake
|
|
205
|
+
- 可选值:chrome131, chrome124, safari18_0, firefox133, edge101 等
|
|
206
|
+
- Options: chrome131, chrome124, safari18_0, firefox133, edge101, etc.
|
|
207
|
+
- 设置后自动使用 curl_cffi 发送请求
|
|
208
|
+
- Automatically uses curl_cffi when set
|
|
209
|
+
- 完整列表:cfspider.get_supported_browsers()
|
|
210
|
+
- Full list: cfspider.get_supported_browsers()
|
|
211
|
+
map_output (bool): 是否生成 IP 地图 HTML 文件(默认 False)
|
|
212
|
+
/ Whether to generate IP map HTML file (default: False)
|
|
213
|
+
- True: 请求完成后生成包含代理 IP 信息的交互式地图
|
|
214
|
+
- True: Generates interactive map with proxy IP information after request
|
|
215
|
+
map_file (str): 地图输出文件名(默认 "cfspider_map.html")
|
|
216
|
+
/ Map output filename (default: "cfspider_map.html")
|
|
217
|
+
stealth (bool): 是否启用隐身模式(默认 False)
|
|
218
|
+
/ Whether to enable stealth mode (default: False)
|
|
219
|
+
- True: 自动添加 15+ 个完整浏览器请求头,模拟真实浏览器访问
|
|
220
|
+
- True: Automatically adds 15+ complete browser headers, mimics real browser
|
|
221
|
+
- 添加的请求头包括:User-Agent, Accept, Accept-Language, Sec-Fetch-*, Sec-CH-UA 等
|
|
222
|
+
- Headers include: User-Agent, Accept, Accept-Language, Sec-Fetch-*, Sec-CH-UA, etc.
|
|
223
|
+
stealth_browser (str): 隐身模式使用的浏览器类型(默认 'chrome')
|
|
224
|
+
/ Stealth mode browser type (default: 'chrome')
|
|
225
|
+
- 可选值:chrome, firefox, safari, edge, chrome_mobile
|
|
226
|
+
- Options: chrome, firefox, safari, edge, chrome_mobile
|
|
227
|
+
delay (tuple, optional): 请求前的随机延迟范围(秒)
|
|
228
|
+
/ Random delay range before request (seconds)
|
|
229
|
+
- 如 (1, 3) 表示请求前随机等待 1-3 秒
|
|
230
|
+
- e.g., (1, 3) means random wait 1-3 seconds before request
|
|
231
|
+
- 用于模拟人类行为,避免被反爬系统检测
|
|
232
|
+
- Used to simulate human behavior, avoid anti-crawler detection
|
|
233
|
+
token (str, optional): Workers API 鉴权 token
|
|
234
|
+
/ Workers API authentication token
|
|
235
|
+
- 当使用 Workers API(cf_workers=True)时,将 token 添加到查询参数
|
|
236
|
+
- When using Workers API (cf_workers=True), adds token to query parameters
|
|
237
|
+
- 如果 Workers 端配置了 TOKEN 环境变量,必须提供有效的 token
|
|
238
|
+
- Required when Workers has TOKEN environment variable configured
|
|
239
|
+
- 格式:从查询参数 ?token=xxx 传递
|
|
240
|
+
- Format: Passed via query parameter ?token=xxx
|
|
241
|
+
**kwargs: 其他参数,与 requests 库完全兼容
|
|
242
|
+
/ Other parameters, fully compatible with requests library
|
|
243
|
+
- params (dict): URL 查询参数 / URL query parameters
|
|
244
|
+
- headers (dict): 自定义请求头(会与隐身模式头合并)
|
|
245
|
+
/ Custom headers (merged with stealth mode headers)
|
|
246
|
+
- data (dict/str): 表单数据 / Form data
|
|
247
|
+
- json (dict): JSON 数据(自动设置 Content-Type)
|
|
248
|
+
/ JSON data (Content-Type set automatically)
|
|
249
|
+
- cookies (dict): Cookie
|
|
250
|
+
- timeout (int/float): 超时时间(秒),默认 30
|
|
251
|
+
/ Timeout (seconds), default: 30
|
|
252
|
+
- allow_redirects (bool): 是否跟随重定向,默认 True
|
|
253
|
+
/ Whether to follow redirects, default: True
|
|
254
|
+
- verify (bool): 是否验证 SSL 证书,默认 True
|
|
255
|
+
/ Whether to verify SSL certificate, default: True
|
|
256
|
+
|
|
257
|
+
Returns:
|
|
258
|
+
CFSpiderResponse: 响应对象,包含以下属性
|
|
259
|
+
/ Response object with the following attributes
|
|
260
|
+
- text: 响应文本 / Response text
|
|
261
|
+
- content: 响应字节 / Response bytes
|
|
262
|
+
- json(): 解析 JSON / Parse JSON
|
|
263
|
+
- status_code: HTTP 状态码 / HTTP status code
|
|
264
|
+
- headers: 响应头 / Response headers
|
|
265
|
+
- cf_colo: Cloudflare 节点代码(使用 Workers 时可用)
|
|
266
|
+
/ Cloudflare colo code (available when using Workers)
|
|
267
|
+
- cf_ray: Cloudflare Ray ID
|
|
268
|
+
|
|
269
|
+
Raises:
|
|
270
|
+
ImportError: 当需要的可选依赖未安装时
|
|
271
|
+
/ When required optional dependencies are not installed
|
|
272
|
+
- http2=True 需要 httpx[http2] / http2=True requires httpx[http2]
|
|
273
|
+
- impersonate 需要 curl_cffi / impersonate requires curl_cffi
|
|
274
|
+
ValueError: 当 http2 和 impersonate 同时启用时
|
|
275
|
+
/ When http2 and impersonate are both enabled
|
|
276
|
+
requests.RequestException: 网络请求失败时
|
|
277
|
+
/ When network request fails
|
|
278
|
+
|
|
279
|
+
Examples:
|
|
280
|
+
>>> import cfspider
|
|
281
|
+
>>>
|
|
282
|
+
>>> # 基本 GET 请求
|
|
283
|
+
>>> response = cfspider.get("https://httpbin.org/ip")
|
|
284
|
+
>>> print(response.json())
|
|
285
|
+
>>>
|
|
286
|
+
>>> # 使用 Workers 代理
|
|
287
|
+
>>> response = cfspider.get(
|
|
288
|
+
... "https://httpbin.org/ip",
|
|
289
|
+
... cf_proxies="https://your-workers.dev"
|
|
290
|
+
... )
|
|
291
|
+
>>> print(response.cf_colo) # NRT, SIN, LAX 等
|
|
292
|
+
>>>
|
|
293
|
+
>>> # 隐身模式 + TLS 指纹
|
|
294
|
+
>>> response = cfspider.get(
|
|
295
|
+
... "https://example.com",
|
|
296
|
+
... stealth=True,
|
|
297
|
+
... impersonate="chrome131"
|
|
298
|
+
... )
|
|
299
|
+
|
|
300
|
+
Notes:
|
|
301
|
+
- http2 和 impersonate 使用不同的后端(httpx/curl_cffi),不能同时启用
|
|
302
|
+
- 隐身模式的请求头优先级:用户自定义 > stealth 默认头
|
|
303
|
+
- 使用 Workers 代理时,自定义请求头通过 X-CFSpider-Header-* 传递
|
|
304
|
+
"""
|
|
305
|
+
# 应用随机延迟
|
|
306
|
+
if delay:
|
|
307
|
+
from .stealth import random_delay
|
|
308
|
+
random_delay(delay[0], delay[1])
|
|
309
|
+
|
|
310
|
+
params = kwargs.pop("params", None)
|
|
311
|
+
headers = kwargs.pop("headers", {})
|
|
312
|
+
|
|
313
|
+
# 如果启用隐身模式,添加完整的浏览器请求头
|
|
314
|
+
if stealth:
|
|
315
|
+
from .stealth import get_stealth_headers
|
|
316
|
+
stealth_headers = get_stealth_headers(stealth_browser)
|
|
317
|
+
# 用户自定义的 headers 优先级更高
|
|
318
|
+
final_headers = stealth_headers.copy()
|
|
319
|
+
final_headers.update(headers)
|
|
320
|
+
headers = final_headers
|
|
321
|
+
data = kwargs.pop("data", None)
|
|
322
|
+
json_data = kwargs.pop("json", None)
|
|
323
|
+
cookies = kwargs.pop("cookies", None)
|
|
324
|
+
timeout = kwargs.pop("timeout", 30)
|
|
325
|
+
|
|
326
|
+
# 记录请求开始时间
|
|
327
|
+
start_time = time.time()
|
|
328
|
+
|
|
329
|
+
# 如果指定了 impersonate,使用 curl_cffi
|
|
330
|
+
if impersonate:
|
|
331
|
+
response = _request_impersonate(
|
|
332
|
+
method, url, cf_proxies, cf_workers, impersonate,
|
|
333
|
+
params=params, headers=headers, data=data,
|
|
334
|
+
json_data=json_data, cookies=cookies, timeout=timeout,
|
|
335
|
+
token=token, **kwargs
|
|
336
|
+
)
|
|
337
|
+
_handle_map_output(response, url, start_time, map_output, map_file)
|
|
338
|
+
return response
|
|
339
|
+
|
|
340
|
+
# 如果启用 HTTP/2,使用 httpx
|
|
341
|
+
if http2:
|
|
342
|
+
response = _request_httpx(
|
|
343
|
+
method, url, cf_proxies, cf_workers,
|
|
344
|
+
params=params, headers=headers, data=data,
|
|
345
|
+
json_data=json_data, cookies=cookies, timeout=timeout,
|
|
346
|
+
token=token, **kwargs
|
|
347
|
+
)
|
|
348
|
+
_handle_map_output(response, url, start_time, map_output, map_file)
|
|
349
|
+
return response
|
|
350
|
+
|
|
351
|
+
# 如果没有指定 cf_proxies,直接使用 requests
|
|
352
|
+
if not cf_proxies:
|
|
353
|
+
resp = requests.request(
|
|
354
|
+
method,
|
|
355
|
+
url,
|
|
356
|
+
params=params,
|
|
357
|
+
headers=headers,
|
|
358
|
+
data=data,
|
|
359
|
+
json=json_data,
|
|
360
|
+
cookies=cookies,
|
|
361
|
+
timeout=timeout,
|
|
362
|
+
**kwargs
|
|
363
|
+
)
|
|
364
|
+
response = CFSpiderResponse(resp)
|
|
365
|
+
_handle_map_output(response, url, start_time, map_output, map_file)
|
|
366
|
+
return response
|
|
367
|
+
|
|
368
|
+
# cf_workers=False:使用普通代理
|
|
369
|
+
if not cf_workers:
|
|
370
|
+
# 处理代理格式
|
|
371
|
+
proxy_url = cf_proxies
|
|
372
|
+
if not proxy_url.startswith(('http://', 'https://', 'socks5://')):
|
|
373
|
+
proxy_url = f"http://{proxy_url}"
|
|
374
|
+
|
|
375
|
+
proxies = {
|
|
376
|
+
"http": proxy_url,
|
|
377
|
+
"https": proxy_url
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
resp = requests.request(
|
|
381
|
+
method,
|
|
382
|
+
url,
|
|
383
|
+
params=params,
|
|
384
|
+
headers=headers,
|
|
385
|
+
data=data,
|
|
386
|
+
json=json_data,
|
|
387
|
+
cookies=cookies,
|
|
388
|
+
timeout=timeout,
|
|
389
|
+
proxies=proxies,
|
|
390
|
+
**kwargs
|
|
391
|
+
)
|
|
392
|
+
response = CFSpiderResponse(resp)
|
|
393
|
+
_handle_map_output(response, url, start_time, map_output, map_file)
|
|
394
|
+
return response
|
|
395
|
+
|
|
396
|
+
# cf_workers=True:使用 CFspider Workers API 代理
|
|
397
|
+
cf_proxies_url = cf_proxies.rstrip("/")
|
|
398
|
+
|
|
399
|
+
# 确保有协议前缀
|
|
400
|
+
if not cf_proxies_url.startswith(('http://', 'https://')):
|
|
401
|
+
cf_proxies_url = f"https://{cf_proxies_url}"
|
|
402
|
+
|
|
403
|
+
target_url = url
|
|
404
|
+
if params:
|
|
405
|
+
target_url = f"{url}?{urlencode(params)}"
|
|
406
|
+
|
|
407
|
+
# 构建代理 URL,添加 token 参数(如果提供)
|
|
408
|
+
proxy_url = f"{cf_proxies_url}/proxy?url={quote(target_url, safe='')}&method={method.upper()}"
|
|
409
|
+
if token:
|
|
410
|
+
proxy_url += f"&token={quote(token, safe='')}"
|
|
411
|
+
|
|
412
|
+
request_headers = {}
|
|
413
|
+
if headers:
|
|
414
|
+
for key, value in headers.items():
|
|
415
|
+
request_headers[f"X-CFSpider-Header-{key}"] = value
|
|
416
|
+
|
|
417
|
+
if cookies:
|
|
418
|
+
cookie_str = "; ".join([f"{k}={v}" for k, v in cookies.items()])
|
|
419
|
+
request_headers["X-CFSpider-Header-Cookie"] = cookie_str
|
|
420
|
+
|
|
421
|
+
resp = requests.post(
|
|
422
|
+
proxy_url,
|
|
423
|
+
headers=request_headers,
|
|
424
|
+
data=data,
|
|
425
|
+
json=json_data,
|
|
426
|
+
timeout=timeout,
|
|
427
|
+
**kwargs
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
cf_colo = resp.headers.get("X-CF-Colo")
|
|
431
|
+
cf_ray = resp.headers.get("CF-Ray")
|
|
432
|
+
|
|
433
|
+
response = CFSpiderResponse(resp, cf_colo=cf_colo, cf_ray=cf_ray)
|
|
434
|
+
_handle_map_output(response, url, start_time, map_output, map_file)
|
|
435
|
+
return response
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
def _handle_map_output(response, url, start_time, map_output, map_file):
|
|
439
|
+
"""处理 IP 地图输出"""
|
|
440
|
+
if not map_output:
|
|
441
|
+
return
|
|
442
|
+
|
|
443
|
+
# 计算响应时间
|
|
444
|
+
response_time = (time.time() - start_time) * 1000 # 毫秒
|
|
445
|
+
|
|
446
|
+
# 收集 IP 记录
|
|
447
|
+
ip_map.add_ip_record(
|
|
448
|
+
url=url,
|
|
449
|
+
ip=None, # 无法直接获取 IP,但有 cf_colo
|
|
450
|
+
cf_colo=getattr(response, 'cf_colo', None),
|
|
451
|
+
cf_ray=getattr(response, 'cf_ray', None),
|
|
452
|
+
status_code=response.status_code,
|
|
453
|
+
response_time=response_time
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
# 生成地图 HTML
|
|
457
|
+
ip_map.generate_map_html(output_file=map_file)
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
def _request_impersonate(method, url, cf_proxies, cf_workers, impersonate,
|
|
461
|
+
params=None, headers=None, data=None, json_data=None,
|
|
462
|
+
cookies=None, timeout=30, token=None, **kwargs):
|
|
463
|
+
"""使用 curl_cffi 发送请求(支持 TLS 指纹模拟)"""
|
|
464
|
+
curl_requests = _get_curl_cffi()
|
|
465
|
+
|
|
466
|
+
# 如果没有指定 cf_proxies,直接请求
|
|
467
|
+
if not cf_proxies:
|
|
468
|
+
response = curl_requests.request(
|
|
469
|
+
method,
|
|
470
|
+
url,
|
|
471
|
+
params=params,
|
|
472
|
+
headers=headers,
|
|
473
|
+
data=data,
|
|
474
|
+
json=json_data,
|
|
475
|
+
cookies=cookies,
|
|
476
|
+
timeout=timeout,
|
|
477
|
+
impersonate=impersonate,
|
|
478
|
+
**kwargs
|
|
479
|
+
)
|
|
480
|
+
return CFSpiderResponse(response)
|
|
481
|
+
|
|
482
|
+
# cf_workers=False:使用普通代理
|
|
483
|
+
if not cf_workers:
|
|
484
|
+
proxy_url = cf_proxies
|
|
485
|
+
if not proxy_url.startswith(('http://', 'https://', 'socks5://')):
|
|
486
|
+
proxy_url = f"http://{proxy_url}"
|
|
487
|
+
|
|
488
|
+
response = curl_requests.request(
|
|
489
|
+
method,
|
|
490
|
+
url,
|
|
491
|
+
params=params,
|
|
492
|
+
headers=headers,
|
|
493
|
+
data=data,
|
|
494
|
+
json=json_data,
|
|
495
|
+
cookies=cookies,
|
|
496
|
+
timeout=timeout,
|
|
497
|
+
impersonate=impersonate,
|
|
498
|
+
proxies={"http": proxy_url, "https": proxy_url},
|
|
499
|
+
**kwargs
|
|
500
|
+
)
|
|
501
|
+
return CFSpiderResponse(response)
|
|
502
|
+
|
|
503
|
+
# cf_workers=True:使用 CFspider Workers API 代理
|
|
504
|
+
cf_proxies = cf_proxies.rstrip("/")
|
|
505
|
+
|
|
506
|
+
if not cf_proxies.startswith(('http://', 'https://')):
|
|
507
|
+
cf_proxies = f"https://{cf_proxies}"
|
|
508
|
+
|
|
509
|
+
target_url = url
|
|
510
|
+
if params:
|
|
511
|
+
target_url = f"{url}?{urlencode(params)}"
|
|
512
|
+
|
|
513
|
+
proxy_url = f"{cf_proxies}/proxy?url={quote(target_url, safe='')}&method={method.upper()}"
|
|
514
|
+
if token:
|
|
515
|
+
proxy_url += f"&token={quote(token, safe='')}"
|
|
516
|
+
|
|
517
|
+
request_headers = {}
|
|
518
|
+
if headers:
|
|
519
|
+
for key, value in headers.items():
|
|
520
|
+
request_headers[f"X-CFSpider-Header-{key}"] = value
|
|
521
|
+
|
|
522
|
+
if cookies:
|
|
523
|
+
cookie_str = "; ".join([f"{k}={v}" for k, v in cookies.items()])
|
|
524
|
+
request_headers["X-CFSpider-Header-Cookie"] = cookie_str
|
|
525
|
+
|
|
526
|
+
response = curl_requests.post(
|
|
527
|
+
proxy_url,
|
|
528
|
+
headers=request_headers,
|
|
529
|
+
data=data,
|
|
530
|
+
json=json_data,
|
|
531
|
+
timeout=timeout,
|
|
532
|
+
impersonate=impersonate,
|
|
533
|
+
**kwargs
|
|
534
|
+
)
|
|
535
|
+
|
|
536
|
+
cf_colo = response.headers.get("X-CF-Colo")
|
|
537
|
+
cf_ray = response.headers.get("CF-Ray")
|
|
538
|
+
|
|
539
|
+
return CFSpiderResponse(response, cf_colo=cf_colo, cf_ray=cf_ray)
|
|
540
|
+
|
|
541
|
+
|
|
542
|
+
def _request_httpx(method, url, cf_proxies, cf_workers, params=None, headers=None,
|
|
543
|
+
data=None, json_data=None, cookies=None, timeout=30, token=None, **kwargs):
|
|
544
|
+
"""使用 httpx 发送请求(支持 HTTP/2)"""
|
|
545
|
+
httpx = _get_httpx()
|
|
546
|
+
|
|
547
|
+
# 如果没有指定 cf_proxies,直接请求
|
|
548
|
+
if not cf_proxies:
|
|
549
|
+
with httpx.Client(http2=True, timeout=timeout) as client:
|
|
550
|
+
response = client.request(
|
|
551
|
+
method,
|
|
552
|
+
url,
|
|
553
|
+
params=params,
|
|
554
|
+
headers=headers,
|
|
555
|
+
data=data,
|
|
556
|
+
json=json_data,
|
|
557
|
+
cookies=cookies,
|
|
558
|
+
**kwargs
|
|
559
|
+
)
|
|
560
|
+
return CFSpiderResponse(response)
|
|
561
|
+
|
|
562
|
+
# cf_workers=False:使用普通代理
|
|
563
|
+
if not cf_workers:
|
|
564
|
+
proxy_url = cf_proxies
|
|
565
|
+
if not proxy_url.startswith(('http://', 'https://', 'socks5://')):
|
|
566
|
+
proxy_url = f"http://{proxy_url}"
|
|
567
|
+
|
|
568
|
+
with httpx.Client(http2=True, timeout=timeout, proxy=proxy_url) as client:
|
|
569
|
+
response = client.request(
|
|
570
|
+
method,
|
|
571
|
+
url,
|
|
572
|
+
params=params,
|
|
573
|
+
headers=headers,
|
|
574
|
+
data=data,
|
|
575
|
+
json=json_data,
|
|
576
|
+
cookies=cookies,
|
|
577
|
+
**kwargs
|
|
578
|
+
)
|
|
579
|
+
return CFSpiderResponse(response)
|
|
580
|
+
|
|
581
|
+
# cf_workers=True:使用 CFspider Workers API 代理
|
|
582
|
+
cf_proxies = cf_proxies.rstrip("/")
|
|
583
|
+
|
|
584
|
+
if not cf_proxies.startswith(('http://', 'https://')):
|
|
585
|
+
cf_proxies = f"https://{cf_proxies}"
|
|
586
|
+
|
|
587
|
+
target_url = url
|
|
588
|
+
if params:
|
|
589
|
+
target_url = f"{url}?{urlencode(params)}"
|
|
590
|
+
|
|
591
|
+
proxy_url = f"{cf_proxies}/proxy?url={quote(target_url, safe='')}&method={method.upper()}"
|
|
592
|
+
if token:
|
|
593
|
+
proxy_url += f"&token={quote(token, safe='')}"
|
|
594
|
+
|
|
595
|
+
request_headers = {}
|
|
596
|
+
if headers:
|
|
597
|
+
for key, value in headers.items():
|
|
598
|
+
request_headers[f"X-CFSpider-Header-{key}"] = value
|
|
599
|
+
|
|
600
|
+
if cookies:
|
|
601
|
+
cookie_str = "; ".join([f"{k}={v}" for k, v in cookies.items()])
|
|
602
|
+
request_headers["X-CFSpider-Header-Cookie"] = cookie_str
|
|
603
|
+
|
|
604
|
+
with httpx.Client(http2=True, timeout=timeout) as client:
|
|
605
|
+
response = client.post(
|
|
606
|
+
proxy_url,
|
|
607
|
+
headers=request_headers,
|
|
608
|
+
data=data,
|
|
609
|
+
json=json_data,
|
|
610
|
+
**kwargs
|
|
611
|
+
)
|
|
612
|
+
|
|
613
|
+
cf_colo = response.headers.get("X-CF-Colo")
|
|
614
|
+
cf_ray = response.headers.get("CF-Ray")
|
|
615
|
+
|
|
616
|
+
return CFSpiderResponse(response, cf_colo=cf_colo, cf_ray=cf_ray)
|
|
617
|
+
|
|
618
|
+
|
|
619
|
+
def get(url, cf_proxies=None, cf_workers=True, http2=False, impersonate=None,
|
|
620
|
+
map_output=False, map_file="cfspider_map.html",
|
|
621
|
+
stealth=False, stealth_browser='chrome', delay=None, token=None, **kwargs):
|
|
622
|
+
"""
|
|
623
|
+
发送 GET 请求 / Send GET request
|
|
624
|
+
|
|
625
|
+
Args:
|
|
626
|
+
url (str): 目标 URL / Target URL (must include protocol, e.g., https://)
|
|
627
|
+
cf_proxies (str, optional): 代理地址 / Proxy address
|
|
628
|
+
- 当 cf_workers=True 时:CFspider Workers 地址(如 "https://your-workers.dev")
|
|
629
|
+
- When cf_workers=True: CFspider Workers address (e.g., "https://your-workers.dev")
|
|
630
|
+
- 当 cf_workers=False 时:普通 HTTP/SOCKS5 代理(如 "http://127.0.0.1:8080")
|
|
631
|
+
- When cf_workers=False: Regular HTTP/SOCKS5 proxy (e.g., "http://127.0.0.1:8080")
|
|
632
|
+
- 不填写时:直接请求,不使用代理 / None: Direct request without proxy
|
|
633
|
+
cf_workers (bool): 是否使用 CFspider Workers API(默认 True)
|
|
634
|
+
/ Whether to use CFspider Workers API (default: True)
|
|
635
|
+
http2 (bool): 是否启用 HTTP/2 协议(默认 False)
|
|
636
|
+
/ Whether to enable HTTP/2 protocol (default: False)
|
|
637
|
+
impersonate (str, optional): TLS 指纹模拟 / TLS fingerprint impersonation
|
|
638
|
+
- 可选值:chrome131, chrome124, safari18_0, firefox133, edge101 等
|
|
639
|
+
- Options: chrome131, chrome124, safari18_0, firefox133, edge101, etc.
|
|
640
|
+
- 设置后自动使用 curl_cffi 发送请求
|
|
641
|
+
- Automatically uses curl_cffi when set
|
|
642
|
+
map_output (bool): 是否生成 IP 地图 HTML 文件(默认 False)
|
|
643
|
+
/ Whether to generate IP map HTML file (default: False)
|
|
644
|
+
map_file (str): 地图输出文件名(默认 "cfspider_map.html")
|
|
645
|
+
/ Map output filename (default: "cfspider_map.html")
|
|
646
|
+
stealth (bool): 是否启用隐身模式(默认 False)
|
|
647
|
+
/ Whether to enable stealth mode (default: False)
|
|
648
|
+
- True: 自动添加 15+ 个完整浏览器请求头
|
|
649
|
+
- True: Automatically adds 15+ complete browser headers
|
|
650
|
+
stealth_browser (str): 隐身模式浏览器类型(默认 'chrome')
|
|
651
|
+
/ Stealth mode browser type (default: 'chrome')
|
|
652
|
+
- 可选值:chrome, firefox, safari, edge, chrome_mobile
|
|
653
|
+
- Options: chrome, firefox, safari, edge, chrome_mobile
|
|
654
|
+
delay (tuple, optional): 请求前随机延迟范围(秒),如 (1, 3)
|
|
655
|
+
/ Random delay range before request (seconds), e.g., (1, 3)
|
|
656
|
+
token (str, optional): Workers API 鉴权 token
|
|
657
|
+
/ Workers API authentication token
|
|
658
|
+
- 当 Workers 配置了 TOKEN 环境变量时必填
|
|
659
|
+
- Required when Workers has TOKEN environment variable configured
|
|
660
|
+
**kwargs: 其他参数,与 requests 库完全兼容
|
|
661
|
+
/ Other parameters, fully compatible with requests library
|
|
662
|
+
- params (dict): URL 查询参数 / URL query parameters
|
|
663
|
+
- headers (dict): 自定义请求头 / Custom headers
|
|
664
|
+
- data (dict/str): 表单数据 / Form data
|
|
665
|
+
- json (dict): JSON 数据 / JSON data
|
|
666
|
+
- cookies (dict): Cookie
|
|
667
|
+
- timeout (int/float): 超时时间(秒),默认 30 / Timeout (seconds), default: 30
|
|
668
|
+
|
|
669
|
+
Returns:
|
|
670
|
+
CFSpiderResponse: 响应对象 / Response object
|
|
671
|
+
- text: 响应文本 / Response text
|
|
672
|
+
- content: 响应字节 / Response bytes
|
|
673
|
+
- json(): 解析 JSON / Parse JSON
|
|
674
|
+
- status_code: HTTP 状态码 / HTTP status code
|
|
675
|
+
- headers: 响应头 / Response headers
|
|
676
|
+
- cf_colo: Cloudflare 节点代码(使用 Workers 时可用)
|
|
677
|
+
/ Cloudflare colo code (available when using Workers)
|
|
678
|
+
- cf_ray: Cloudflare Ray ID
|
|
679
|
+
"""
|
|
680
|
+
return request("GET", url, cf_proxies=cf_proxies, cf_workers=cf_workers,
|
|
681
|
+
http2=http2, impersonate=impersonate,
|
|
682
|
+
map_output=map_output, map_file=map_file,
|
|
683
|
+
stealth=stealth, stealth_browser=stealth_browser, delay=delay, token=token, **kwargs)
|
|
684
|
+
|
|
685
|
+
|
|
686
|
+
def post(url, cf_proxies=None, cf_workers=True, http2=False, impersonate=None,
|
|
687
|
+
map_output=False, map_file="cfspider_map.html",
|
|
688
|
+
stealth=False, stealth_browser='chrome', delay=None, token=None, **kwargs):
|
|
689
|
+
"""
|
|
690
|
+
发送 POST 请求 / Send POST request
|
|
691
|
+
|
|
692
|
+
Args:
|
|
693
|
+
url (str): 目标 URL / Target URL (must include protocol, e.g., https://)
|
|
694
|
+
cf_proxies (str, optional): 代理地址 / Proxy address
|
|
695
|
+
- 当 cf_workers=True 时:CFspider Workers 地址(如 "https://your-workers.dev")
|
|
696
|
+
- When cf_workers=True: CFspider Workers address (e.g., "https://your-workers.dev")
|
|
697
|
+
- 当 cf_workers=False 时:普通 HTTP/SOCKS5 代理(如 "http://127.0.0.1:8080")
|
|
698
|
+
- When cf_workers=False: Regular HTTP/SOCKS5 proxy (e.g., "http://127.0.0.1:8080")
|
|
699
|
+
- 不填写时:直接请求,不使用代理 / None: Direct request without proxy
|
|
700
|
+
cf_workers (bool): 是否使用 CFspider Workers API(默认 True)
|
|
701
|
+
/ Whether to use CFspider Workers API (default: True)
|
|
702
|
+
http2 (bool): 是否启用 HTTP/2 协议(默认 False)
|
|
703
|
+
/ Whether to enable HTTP/2 protocol (default: False)
|
|
704
|
+
impersonate (str, optional): TLS 指纹模拟 / TLS fingerprint impersonation
|
|
705
|
+
- 可选值:chrome131, chrome124, safari18_0, firefox133, edge101 等
|
|
706
|
+
- Options: chrome131, chrome124, safari18_0, firefox133, edge101, etc.
|
|
707
|
+
map_output (bool): 是否生成 IP 地图 HTML 文件(默认 False)
|
|
708
|
+
/ Whether to generate IP map HTML file (default: False)
|
|
709
|
+
map_file (str): 地图输出文件名(默认 "cfspider_map.html")
|
|
710
|
+
/ Map output filename (default: "cfspider_map.html")
|
|
711
|
+
stealth (bool): 是否启用隐身模式(默认 False)
|
|
712
|
+
/ Whether to enable stealth mode (default: False)
|
|
713
|
+
stealth_browser (str): 隐身模式浏览器类型(默认 'chrome')
|
|
714
|
+
/ Stealth mode browser type (default: 'chrome')
|
|
715
|
+
- 可选值:chrome, firefox, safari, edge, chrome_mobile
|
|
716
|
+
- Options: chrome, firefox, safari, edge, chrome_mobile
|
|
717
|
+
delay (tuple, optional): 请求前随机延迟范围(秒),如 (1, 3)
|
|
718
|
+
/ Random delay range before request (seconds), e.g., (1, 3)
|
|
719
|
+
token (str, optional): Workers API 鉴权 token
|
|
720
|
+
/ Workers API authentication token
|
|
721
|
+
- 当 Workers 配置了 TOKEN 环境变量时必填
|
|
722
|
+
- Required when Workers has TOKEN environment variable configured
|
|
723
|
+
**kwargs: 其他参数,与 requests 库完全兼容
|
|
724
|
+
/ Other parameters, fully compatible with requests library
|
|
725
|
+
- data (dict/str): 表单数据 / Form data
|
|
726
|
+
- json (dict): JSON 数据 / JSON data
|
|
727
|
+
- headers (dict): 自定义请求头 / Custom headers
|
|
728
|
+
- cookies (dict): Cookie
|
|
729
|
+
- timeout (int/float): 超时时间(秒),默认 30 / Timeout (seconds), default: 30
|
|
730
|
+
|
|
731
|
+
Returns:
|
|
732
|
+
CFSpiderResponse: 响应对象 / Response object
|
|
733
|
+
"""
|
|
734
|
+
return request("POST", url, cf_proxies=cf_proxies, cf_workers=cf_workers,
|
|
735
|
+
http2=http2, impersonate=impersonate,
|
|
736
|
+
map_output=map_output, map_file=map_file,
|
|
737
|
+
stealth=stealth, stealth_browser=stealth_browser, delay=delay, token=token, **kwargs)
|
|
738
|
+
|
|
739
|
+
|
|
740
|
+
def put(url, cf_proxies=None, cf_workers=True, http2=False, impersonate=None,
|
|
741
|
+
map_output=False, map_file="cfspider_map.html",
|
|
742
|
+
stealth=False, stealth_browser='chrome', delay=None, token=None, **kwargs):
|
|
743
|
+
"""
|
|
744
|
+
发送 PUT 请求 / Send PUT request
|
|
745
|
+
|
|
746
|
+
Args:
|
|
747
|
+
url (str): 目标 URL / Target URL
|
|
748
|
+
cf_proxies (str, optional): 代理地址 / Proxy address
|
|
749
|
+
cf_workers (bool): 是否使用 CFspider Workers API(默认 True)
|
|
750
|
+
/ Whether to use CFspider Workers API (default: True)
|
|
751
|
+
http2 (bool): 是否启用 HTTP/2 协议(默认 False)
|
|
752
|
+
/ Whether to enable HTTP/2 protocol (default: False)
|
|
753
|
+
impersonate (str, optional): TLS 指纹模拟 / TLS fingerprint impersonation
|
|
754
|
+
map_output (bool): 是否生成 IP 地图 HTML 文件(默认 False)
|
|
755
|
+
/ Whether to generate IP map HTML file (default: False)
|
|
756
|
+
map_file (str): 地图输出文件名(默认 "cfspider_map.html")
|
|
757
|
+
/ Map output filename (default: "cfspider_map.html")
|
|
758
|
+
stealth (bool): 是否启用隐身模式(默认 False)
|
|
759
|
+
/ Whether to enable stealth mode (default: False)
|
|
760
|
+
stealth_browser (str): 隐身模式浏览器类型(默认 'chrome')
|
|
761
|
+
/ Stealth mode browser type (default: 'chrome')
|
|
762
|
+
delay (tuple, optional): 请求前随机延迟范围(秒),如 (1, 3)
|
|
763
|
+
/ Random delay range before request (seconds), e.g., (1, 3)
|
|
764
|
+
token (str, optional): Workers API 鉴权 token
|
|
765
|
+
/ Workers API authentication token
|
|
766
|
+
**kwargs: 其他参数,与 requests 库完全兼容
|
|
767
|
+
/ Other parameters, fully compatible with requests library
|
|
768
|
+
|
|
769
|
+
Returns:
|
|
770
|
+
CFSpiderResponse: 响应对象 / Response object
|
|
771
|
+
"""
|
|
772
|
+
return request("PUT", url, cf_proxies=cf_proxies, cf_workers=cf_workers,
|
|
773
|
+
http2=http2, impersonate=impersonate,
|
|
774
|
+
map_output=map_output, map_file=map_file,
|
|
775
|
+
stealth=stealth, stealth_browser=stealth_browser, delay=delay, token=token, **kwargs)
|
|
776
|
+
|
|
777
|
+
|
|
778
|
+
def delete(url, cf_proxies=None, cf_workers=True, http2=False, impersonate=None,
|
|
779
|
+
map_output=False, map_file="cfspider_map.html",
|
|
780
|
+
stealth=False, stealth_browser='chrome', delay=None, token=None, **kwargs):
|
|
781
|
+
"""
|
|
782
|
+
发送 DELETE 请求 / Send DELETE request
|
|
783
|
+
|
|
784
|
+
Args:
|
|
785
|
+
url (str): 目标 URL / Target URL
|
|
786
|
+
cf_proxies (str, optional): 代理地址 / Proxy address
|
|
787
|
+
cf_workers (bool): 是否使用 CFspider Workers API(默认 True)
|
|
788
|
+
/ Whether to use CFspider Workers API (default: True)
|
|
789
|
+
http2 (bool): 是否启用 HTTP/2 协议(默认 False)
|
|
790
|
+
/ Whether to enable HTTP/2 protocol (default: False)
|
|
791
|
+
impersonate (str, optional): TLS 指纹模拟 / TLS fingerprint impersonation
|
|
792
|
+
map_output (bool): 是否生成 IP 地图 HTML 文件(默认 False)
|
|
793
|
+
/ Whether to generate IP map HTML file (default: False)
|
|
794
|
+
map_file (str): 地图输出文件名(默认 "cfspider_map.html")
|
|
795
|
+
/ Map output filename (default: "cfspider_map.html")
|
|
796
|
+
stealth (bool): 是否启用隐身模式(默认 False)
|
|
797
|
+
/ Whether to enable stealth mode (default: False)
|
|
798
|
+
stealth_browser (str): 隐身模式浏览器类型(默认 'chrome')
|
|
799
|
+
/ Stealth mode browser type (default: 'chrome')
|
|
800
|
+
delay (tuple, optional): 请求前随机延迟范围(秒),如 (1, 3)
|
|
801
|
+
/ Random delay range before request (seconds), e.g., (1, 3)
|
|
802
|
+
token (str, optional): Workers API 鉴权 token
|
|
803
|
+
/ Workers API authentication token
|
|
804
|
+
**kwargs: 其他参数,与 requests 库完全兼容
|
|
805
|
+
/ Other parameters, fully compatible with requests library
|
|
806
|
+
|
|
807
|
+
Returns:
|
|
808
|
+
CFSpiderResponse: 响应对象 / Response object
|
|
809
|
+
"""
|
|
810
|
+
return request("DELETE", url, cf_proxies=cf_proxies, cf_workers=cf_workers,
|
|
811
|
+
http2=http2, impersonate=impersonate,
|
|
812
|
+
map_output=map_output, map_file=map_file,
|
|
813
|
+
stealth=stealth, stealth_browser=stealth_browser, delay=delay, token=token, **kwargs)
|
|
814
|
+
|
|
815
|
+
|
|
816
|
+
def head(url, cf_proxies=None, cf_workers=True, http2=False, impersonate=None,
|
|
817
|
+
map_output=False, map_file="cfspider_map.html",
|
|
818
|
+
stealth=False, stealth_browser='chrome', delay=None, token=None, **kwargs):
|
|
819
|
+
"""
|
|
820
|
+
发送 HEAD 请求 / Send HEAD request
|
|
821
|
+
|
|
822
|
+
Args:
|
|
823
|
+
url (str): 目标 URL / Target URL
|
|
824
|
+
cf_proxies (str, optional): 代理地址 / Proxy address
|
|
825
|
+
cf_workers (bool): 是否使用 CFspider Workers API(默认 True)
|
|
826
|
+
/ Whether to use CFspider Workers API (default: True)
|
|
827
|
+
http2 (bool): 是否启用 HTTP/2 协议(默认 False)
|
|
828
|
+
/ Whether to enable HTTP/2 protocol (default: False)
|
|
829
|
+
impersonate (str, optional): TLS 指纹模拟 / TLS fingerprint impersonation
|
|
830
|
+
map_output (bool): 是否生成 IP 地图 HTML 文件(默认 False)
|
|
831
|
+
/ Whether to generate IP map HTML file (default: False)
|
|
832
|
+
map_file (str): 地图输出文件名(默认 "cfspider_map.html")
|
|
833
|
+
/ Map output filename (default: "cfspider_map.html")
|
|
834
|
+
stealth (bool): 是否启用隐身模式(默认 False)
|
|
835
|
+
/ Whether to enable stealth mode (default: False)
|
|
836
|
+
stealth_browser (str): 隐身模式浏览器类型(默认 'chrome')
|
|
837
|
+
/ Stealth mode browser type (default: 'chrome')
|
|
838
|
+
delay (tuple, optional): 请求前随机延迟范围(秒),如 (1, 3)
|
|
839
|
+
/ Random delay range before request (seconds), e.g., (1, 3)
|
|
840
|
+
token (str, optional): Workers API 鉴权 token
|
|
841
|
+
/ Workers API authentication token
|
|
842
|
+
**kwargs: 其他参数,与 requests 库完全兼容
|
|
843
|
+
/ Other parameters, fully compatible with requests library
|
|
844
|
+
|
|
845
|
+
Returns:
|
|
846
|
+
CFSpiderResponse: 响应对象 / Response object
|
|
847
|
+
"""
|
|
848
|
+
return request("HEAD", url, cf_proxies=cf_proxies, cf_workers=cf_workers,
|
|
849
|
+
http2=http2, impersonate=impersonate,
|
|
850
|
+
map_output=map_output, map_file=map_file,
|
|
851
|
+
stealth=stealth, stealth_browser=stealth_browser, delay=delay, token=token, **kwargs)
|
|
852
|
+
|
|
853
|
+
|
|
854
|
+
def options(url, cf_proxies=None, cf_workers=True, http2=False, impersonate=None,
|
|
855
|
+
map_output=False, map_file="cfspider_map.html",
|
|
856
|
+
stealth=False, stealth_browser='chrome', delay=None, token=None, **kwargs):
|
|
857
|
+
"""
|
|
858
|
+
发送 OPTIONS 请求 / Send OPTIONS request
|
|
859
|
+
|
|
860
|
+
Args:
|
|
861
|
+
url (str): 目标 URL / Target URL
|
|
862
|
+
cf_proxies (str, optional): 代理地址 / Proxy address
|
|
863
|
+
cf_workers (bool): 是否使用 CFspider Workers API(默认 True)
|
|
864
|
+
/ Whether to use CFspider Workers API (default: True)
|
|
865
|
+
http2 (bool): 是否启用 HTTP/2 协议(默认 False)
|
|
866
|
+
/ Whether to enable HTTP/2 protocol (default: False)
|
|
867
|
+
impersonate (str, optional): TLS 指纹模拟 / TLS fingerprint impersonation
|
|
868
|
+
map_output (bool): 是否生成 IP 地图 HTML 文件(默认 False)
|
|
869
|
+
/ Whether to generate IP map HTML file (default: False)
|
|
870
|
+
map_file (str): 地图输出文件名(默认 "cfspider_map.html")
|
|
871
|
+
/ Map output filename (default: "cfspider_map.html")
|
|
872
|
+
stealth (bool): 是否启用隐身模式(默认 False)
|
|
873
|
+
/ Whether to enable stealth mode (default: False)
|
|
874
|
+
stealth_browser (str): 隐身模式浏览器类型(默认 'chrome')
|
|
875
|
+
/ Stealth mode browser type (default: 'chrome')
|
|
876
|
+
delay (tuple, optional): 请求前随机延迟范围(秒),如 (1, 3)
|
|
877
|
+
/ Random delay range before request (seconds), e.g., (1, 3)
|
|
878
|
+
token (str, optional): Workers API 鉴权 token
|
|
879
|
+
/ Workers API authentication token
|
|
880
|
+
**kwargs: 其他参数,与 requests 库完全兼容
|
|
881
|
+
/ Other parameters, fully compatible with requests library
|
|
882
|
+
|
|
883
|
+
Returns:
|
|
884
|
+
CFSpiderResponse: 响应对象 / Response object
|
|
885
|
+
"""
|
|
886
|
+
return request("OPTIONS", url, cf_proxies=cf_proxies, cf_workers=cf_workers,
|
|
887
|
+
http2=http2, impersonate=impersonate,
|
|
888
|
+
map_output=map_output, map_file=map_file,
|
|
889
|
+
stealth=stealth, stealth_browser=stealth_browser, delay=delay, token=token, **kwargs)
|
|
890
|
+
|
|
891
|
+
|
|
892
|
+
def patch(url, cf_proxies=None, cf_workers=True, http2=False, impersonate=None,
|
|
893
|
+
map_output=False, map_file="cfspider_map.html",
|
|
894
|
+
stealth=False, stealth_browser='chrome', delay=None, token=None, **kwargs):
|
|
895
|
+
"""
|
|
896
|
+
发送 PATCH 请求 / Send PATCH request
|
|
897
|
+
|
|
898
|
+
Args:
|
|
899
|
+
url (str): 目标 URL / Target URL
|
|
900
|
+
cf_proxies (str, optional): 代理地址 / Proxy address
|
|
901
|
+
cf_workers (bool): 是否使用 CFspider Workers API(默认 True)
|
|
902
|
+
/ Whether to use CFspider Workers API (default: True)
|
|
903
|
+
http2 (bool): 是否启用 HTTP/2 协议(默认 False)
|
|
904
|
+
/ Whether to enable HTTP/2 protocol (default: False)
|
|
905
|
+
impersonate (str, optional): TLS 指纹模拟 / TLS fingerprint impersonation
|
|
906
|
+
map_output (bool): 是否生成 IP 地图 HTML 文件(默认 False)
|
|
907
|
+
/ Whether to generate IP map HTML file (default: False)
|
|
908
|
+
map_file (str): 地图输出文件名(默认 "cfspider_map.html")
|
|
909
|
+
/ Map output filename (default: "cfspider_map.html")
|
|
910
|
+
stealth (bool): 是否启用隐身模式(默认 False)
|
|
911
|
+
/ Whether to enable stealth mode (default: False)
|
|
912
|
+
stealth_browser (str): 隐身模式浏览器类型(默认 'chrome')
|
|
913
|
+
/ Stealth mode browser type (default: 'chrome')
|
|
914
|
+
delay (tuple, optional): 请求前随机延迟范围(秒),如 (1, 3)
|
|
915
|
+
/ Random delay range before request (seconds), e.g., (1, 3)
|
|
916
|
+
token (str, optional): Workers API 鉴权 token
|
|
917
|
+
/ Workers API authentication token
|
|
918
|
+
**kwargs: 其他参数,与 requests 库完全兼容
|
|
919
|
+
/ Other parameters, fully compatible with requests library
|
|
920
|
+
|
|
921
|
+
Returns:
|
|
922
|
+
CFSpiderResponse: 响应对象 / Response object
|
|
923
|
+
"""
|
|
924
|
+
return request("PATCH", url, cf_proxies=cf_proxies, cf_workers=cf_workers,
|
|
925
|
+
http2=http2, impersonate=impersonate,
|
|
926
|
+
map_output=map_output, map_file=map_file,
|
|
927
|
+
stealth=stealth, stealth_browser=stealth_browser, delay=delay, token=token, **kwargs)
|
|
928
|
+
|
|
929
|
+
|
|
930
|
+
def clear_map_records():
|
|
931
|
+
"""清空 IP 地图记录"""
|
|
932
|
+
ip_map.clear_records()
|
|
933
|
+
|
|
934
|
+
|
|
935
|
+
def get_map_collector():
|
|
936
|
+
"""获取 IP 地图收集器"""
|
|
937
|
+
return ip_map.get_collector()
|