cfspider 1.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cfspider/api.py ADDED
@@ -0,0 +1,937 @@
1
+ """
2
+ CFspider 核心 API 模块
3
+
4
+ 提供同步 HTTP 请求功能,支持:
5
+ - 通过 Cloudflare Workers 代理请求
6
+ - TLS 指纹模拟 (curl_cffi)
7
+ - HTTP/2 支持 (httpx)
8
+ - 隐身模式(完整浏览器请求头)
9
+ - IP 地图可视化
10
+ """
11
+
12
+ import requests
13
+ import time
14
+ from urllib.parse import urlencode, quote
15
+ from typing import Optional, Any
16
+
17
+ # 延迟导入 IP 地图模块
18
+ from . import ip_map
19
+
20
+ # 延迟导入 httpx,仅在需要 HTTP/2 时使用
21
+ _httpx = None
22
+
23
+ def _get_httpx():
24
+ """延迟加载 httpx 模块"""
25
+ global _httpx
26
+ if _httpx is None:
27
+ try:
28
+ import httpx
29
+ _httpx = httpx
30
+ except ImportError:
31
+ raise ImportError(
32
+ "httpx is required for HTTP/2 support. "
33
+ "Install it with: pip install httpx[http2]"
34
+ )
35
+ return _httpx
36
+
37
+
38
+ # 延迟导入 curl_cffi,仅在需要 TLS 指纹时使用
39
+ _curl_cffi = None
40
+
41
+ def _get_curl_cffi():
42
+ """延迟加载 curl_cffi 模块"""
43
+ global _curl_cffi
44
+ if _curl_cffi is None:
45
+ try:
46
+ from curl_cffi import requests as curl_requests
47
+ _curl_cffi = curl_requests
48
+ except ImportError:
49
+ raise ImportError(
50
+ "curl_cffi is required for TLS fingerprint impersonation. "
51
+ "Install it with: pip install curl_cffi"
52
+ )
53
+ return _curl_cffi
54
+
55
+
56
+ class CFSpiderResponse:
57
+ """
58
+ CFspider 响应对象
59
+
60
+ 封装 HTTP 响应,提供与 requests.Response 兼容的接口,
61
+ 并额外提供 Cloudflare 特有的信息(如节点代码、Ray ID)。
62
+
63
+ Attributes:
64
+ cf_colo (str): Cloudflare 数据中心代码(如 NRT=东京, SIN=新加坡, LAX=洛杉矶)
65
+ 使用 Workers 代理时可用,表示请求经过的 CF 节点
66
+ cf_ray (str): Cloudflare Ray ID,每个请求的唯一标识符
67
+ 可用于调试和追踪请求
68
+ text (str): 响应文本内容(自动解码)
69
+ content (bytes): 响应原始字节内容
70
+ status_code (int): HTTP 状态码(如 200, 404, 500)
71
+ headers (dict): 响应头字典
72
+ cookies: 响应 Cookie
73
+ url (str): 最终请求的 URL(跟随重定向后)
74
+ encoding (str): 响应编码
75
+
76
+ Methods:
77
+ json(**kwargs): 将响应解析为 JSON
78
+ raise_for_status(): 当状态码非 2xx 时抛出 HTTPError
79
+
80
+ Example:
81
+ >>> response = cfspider.get("https://httpbin.org/ip", cf_proxies="...")
82
+ >>> print(response.status_code) # 200
83
+ >>> print(response.cf_colo) # NRT (东京节点)
84
+ >>> print(response.cf_ray) # 8a1b2c3d4e5f-NRT
85
+ >>> data = response.json()
86
+ >>> print(data['origin']) # Cloudflare IP
87
+ """
88
+
89
+ def __init__(self, response, cf_colo=None, cf_ray=None):
90
+ """
91
+ 初始化响应对象
92
+
93
+ Args:
94
+ response: 原始 requests/httpx/curl_cffi 响应对象
95
+ cf_colo: Cloudflare 数据中心代码(从响应头获取)
96
+ cf_ray: Cloudflare Ray ID(从响应头获取)
97
+ """
98
+ self._response = response
99
+ self.cf_colo = cf_colo
100
+ self.cf_ray = cf_ray
101
+
102
+ @property
103
+ def text(self) -> str:
104
+ """响应文本内容(自动解码)"""
105
+ return self._response.text
106
+
107
+ @property
108
+ def content(self) -> bytes:
109
+ """响应原始字节内容"""
110
+ return self._response.content
111
+
112
+ @property
113
+ def status_code(self) -> int:
114
+ """HTTP 状态码"""
115
+ return self._response.status_code
116
+
117
+ @property
118
+ def headers(self):
119
+ """响应头字典"""
120
+ return self._response.headers
121
+
122
+ @property
123
+ def cookies(self):
124
+ """响应 Cookie"""
125
+ return self._response.cookies
126
+
127
+ @property
128
+ def url(self) -> str:
129
+ """最终请求的 URL(跟随重定向后)"""
130
+ return self._response.url
131
+
132
+ @property
133
+ def encoding(self) -> Optional[str]:
134
+ """响应编码"""
135
+ return self._response.encoding
136
+
137
+ @encoding.setter
138
+ def encoding(self, value: str):
139
+ """设置响应编码"""
140
+ self._response.encoding = value
141
+
142
+ def json(self, **kwargs) -> Any:
143
+ """
144
+ 将响应解析为 JSON
145
+
146
+ Args:
147
+ **kwargs: 传递给 json.loads() 的参数
148
+
149
+ Returns:
150
+ 解析后的 JSON 数据(dict 或 list)
151
+
152
+ Raises:
153
+ JSONDecodeError: 当响应不是有效的 JSON 时
154
+ """
155
+ return self._response.json(**kwargs)
156
+
157
+ def raise_for_status(self):
158
+ """
159
+ 当状态码非 2xx 时抛出 HTTPError
160
+
161
+ Raises:
162
+ requests.HTTPError: 当状态码表示错误时
163
+ """
164
+ self._response.raise_for_status()
165
+
166
+
167
+ def request(method, url, cf_proxies=None, cf_workers=True, http2=False, impersonate=None,
168
+ map_output=False, map_file="cfspider_map.html",
169
+ stealth=False, stealth_browser='chrome', delay=None, token=None, **kwargs):
170
+ """
171
+ 发送 HTTP 请求 / Send HTTP request
172
+
173
+ 这是 CFspider 的核心函数,支持多种代理模式和反爬虫功能。
174
+ This is the core function of CFspider, supporting multiple proxy modes and anti-crawler features.
175
+
176
+ Args:
177
+ method (str): HTTP 方法(GET, POST, PUT, DELETE, HEAD, OPTIONS, PATCH)
178
+ / HTTP method (GET, POST, PUT, DELETE, HEAD, OPTIONS, PATCH)
179
+ url (str): 目标 URL,必须包含协议(https://)
180
+ / Target URL (must include protocol, e.g., https://)
181
+ cf_proxies (str, optional): 代理地址,根据 cf_workers 参数有不同含义
182
+ / Proxy address, meaning depends on cf_workers parameter
183
+ - 当 cf_workers=True 时:填写 CFspider Workers 地址(如 "https://your-workers.dev")
184
+ - When cf_workers=True: CFspider Workers address (e.g., "https://your-workers.dev")
185
+ - 当 cf_workers=False 时:填写普通 HTTP/SOCKS5 代理(如 "http://127.0.0.1:8080")
186
+ - When cf_workers=False: Regular HTTP/SOCKS5 proxy (e.g., "http://127.0.0.1:8080")
187
+ - 不填写时:直接请求目标 URL,不使用代理
188
+ - None: Direct request without proxy
189
+ cf_workers (bool): 是否使用 CFspider Workers API(默认 True)
190
+ / Whether to use CFspider Workers API (default: True)
191
+ - True: cf_proxies 是 Workers 地址,请求通过 Workers API 转发
192
+ - True: cf_proxies is Workers address, requests forwarded via Workers API
193
+ - False: cf_proxies 是普通代理,使用 requests/httpx 的 proxies 参数
194
+ - False: cf_proxies is regular proxy, uses requests/httpx proxies parameter
195
+ http2 (bool): 是否启用 HTTP/2 协议(默认 False)
196
+ / Whether to enable HTTP/2 protocol (default: False)
197
+ - True: 使用 httpx 客户端,支持 HTTP/2
198
+ - True: Uses httpx client with HTTP/2 support
199
+ - False: 使用 requests 库(默认行为)
200
+ - False: Uses requests library (default behavior)
201
+ - 注意:http2 和 impersonate 不能同时使用
202
+ - Note: http2 and impersonate cannot be used together
203
+ impersonate (str, optional): TLS 指纹模拟,模拟真实浏览器的 TLS 握手特征
204
+ / TLS fingerprint impersonation, mimics real browser TLS handshake
205
+ - 可选值:chrome131, chrome124, safari18_0, firefox133, edge101 等
206
+ - Options: chrome131, chrome124, safari18_0, firefox133, edge101, etc.
207
+ - 设置后自动使用 curl_cffi 发送请求
208
+ - Automatically uses curl_cffi when set
209
+ - 完整列表:cfspider.get_supported_browsers()
210
+ - Full list: cfspider.get_supported_browsers()
211
+ map_output (bool): 是否生成 IP 地图 HTML 文件(默认 False)
212
+ / Whether to generate IP map HTML file (default: False)
213
+ - True: 请求完成后生成包含代理 IP 信息的交互式地图
214
+ - True: Generates interactive map with proxy IP information after request
215
+ map_file (str): 地图输出文件名(默认 "cfspider_map.html")
216
+ / Map output filename (default: "cfspider_map.html")
217
+ stealth (bool): 是否启用隐身模式(默认 False)
218
+ / Whether to enable stealth mode (default: False)
219
+ - True: 自动添加 15+ 个完整浏览器请求头,模拟真实浏览器访问
220
+ - True: Automatically adds 15+ complete browser headers, mimics real browser
221
+ - 添加的请求头包括:User-Agent, Accept, Accept-Language, Sec-Fetch-*, Sec-CH-UA 等
222
+ - Headers include: User-Agent, Accept, Accept-Language, Sec-Fetch-*, Sec-CH-UA, etc.
223
+ stealth_browser (str): 隐身模式使用的浏览器类型(默认 'chrome')
224
+ / Stealth mode browser type (default: 'chrome')
225
+ - 可选值:chrome, firefox, safari, edge, chrome_mobile
226
+ - Options: chrome, firefox, safari, edge, chrome_mobile
227
+ delay (tuple, optional): 请求前的随机延迟范围(秒)
228
+ / Random delay range before request (seconds)
229
+ - 如 (1, 3) 表示请求前随机等待 1-3 秒
230
+ - e.g., (1, 3) means random wait 1-3 seconds before request
231
+ - 用于模拟人类行为,避免被反爬系统检测
232
+ - Used to simulate human behavior, avoid anti-crawler detection
233
+ token (str, optional): Workers API 鉴权 token
234
+ / Workers API authentication token
235
+ - 当使用 Workers API(cf_workers=True)时,将 token 添加到查询参数
236
+ - When using Workers API (cf_workers=True), adds token to query parameters
237
+ - 如果 Workers 端配置了 TOKEN 环境变量,必须提供有效的 token
238
+ - Required when Workers has TOKEN environment variable configured
239
+ - 格式:从查询参数 ?token=xxx 传递
240
+ - Format: Passed via query parameter ?token=xxx
241
+ **kwargs: 其他参数,与 requests 库完全兼容
242
+ / Other parameters, fully compatible with requests library
243
+ - params (dict): URL 查询参数 / URL query parameters
244
+ - headers (dict): 自定义请求头(会与隐身模式头合并)
245
+ / Custom headers (merged with stealth mode headers)
246
+ - data (dict/str): 表单数据 / Form data
247
+ - json (dict): JSON 数据(自动设置 Content-Type)
248
+ / JSON data (Content-Type set automatically)
249
+ - cookies (dict): Cookie
250
+ - timeout (int/float): 超时时间(秒),默认 30
251
+ / Timeout (seconds), default: 30
252
+ - allow_redirects (bool): 是否跟随重定向,默认 True
253
+ / Whether to follow redirects, default: True
254
+ - verify (bool): 是否验证 SSL 证书,默认 True
255
+ / Whether to verify SSL certificate, default: True
256
+
257
+ Returns:
258
+ CFSpiderResponse: 响应对象,包含以下属性
259
+ / Response object with the following attributes
260
+ - text: 响应文本 / Response text
261
+ - content: 响应字节 / Response bytes
262
+ - json(): 解析 JSON / Parse JSON
263
+ - status_code: HTTP 状态码 / HTTP status code
264
+ - headers: 响应头 / Response headers
265
+ - cf_colo: Cloudflare 节点代码(使用 Workers 时可用)
266
+ / Cloudflare colo code (available when using Workers)
267
+ - cf_ray: Cloudflare Ray ID
268
+
269
+ Raises:
270
+ ImportError: 当需要的可选依赖未安装时
271
+ / When required optional dependencies are not installed
272
+ - http2=True 需要 httpx[http2] / http2=True requires httpx[http2]
273
+ - impersonate 需要 curl_cffi / impersonate requires curl_cffi
274
+ ValueError: 当 http2 和 impersonate 同时启用时
275
+ / When http2 and impersonate are both enabled
276
+ requests.RequestException: 网络请求失败时
277
+ / When network request fails
278
+
279
+ Examples:
280
+ >>> import cfspider
281
+ >>>
282
+ >>> # 基本 GET 请求
283
+ >>> response = cfspider.get("https://httpbin.org/ip")
284
+ >>> print(response.json())
285
+ >>>
286
+ >>> # 使用 Workers 代理
287
+ >>> response = cfspider.get(
288
+ ... "https://httpbin.org/ip",
289
+ ... cf_proxies="https://your-workers.dev"
290
+ ... )
291
+ >>> print(response.cf_colo) # NRT, SIN, LAX 等
292
+ >>>
293
+ >>> # 隐身模式 + TLS 指纹
294
+ >>> response = cfspider.get(
295
+ ... "https://example.com",
296
+ ... stealth=True,
297
+ ... impersonate="chrome131"
298
+ ... )
299
+
300
+ Notes:
301
+ - http2 和 impersonate 使用不同的后端(httpx/curl_cffi),不能同时启用
302
+ - 隐身模式的请求头优先级:用户自定义 > stealth 默认头
303
+ - 使用 Workers 代理时,自定义请求头通过 X-CFSpider-Header-* 传递
304
+ """
305
+ # 应用随机延迟
306
+ if delay:
307
+ from .stealth import random_delay
308
+ random_delay(delay[0], delay[1])
309
+
310
+ params = kwargs.pop("params", None)
311
+ headers = kwargs.pop("headers", {})
312
+
313
+ # 如果启用隐身模式,添加完整的浏览器请求头
314
+ if stealth:
315
+ from .stealth import get_stealth_headers
316
+ stealth_headers = get_stealth_headers(stealth_browser)
317
+ # 用户自定义的 headers 优先级更高
318
+ final_headers = stealth_headers.copy()
319
+ final_headers.update(headers)
320
+ headers = final_headers
321
+ data = kwargs.pop("data", None)
322
+ json_data = kwargs.pop("json", None)
323
+ cookies = kwargs.pop("cookies", None)
324
+ timeout = kwargs.pop("timeout", 30)
325
+
326
+ # 记录请求开始时间
327
+ start_time = time.time()
328
+
329
+ # 如果指定了 impersonate,使用 curl_cffi
330
+ if impersonate:
331
+ response = _request_impersonate(
332
+ method, url, cf_proxies, cf_workers, impersonate,
333
+ params=params, headers=headers, data=data,
334
+ json_data=json_data, cookies=cookies, timeout=timeout,
335
+ token=token, **kwargs
336
+ )
337
+ _handle_map_output(response, url, start_time, map_output, map_file)
338
+ return response
339
+
340
+ # 如果启用 HTTP/2,使用 httpx
341
+ if http2:
342
+ response = _request_httpx(
343
+ method, url, cf_proxies, cf_workers,
344
+ params=params, headers=headers, data=data,
345
+ json_data=json_data, cookies=cookies, timeout=timeout,
346
+ token=token, **kwargs
347
+ )
348
+ _handle_map_output(response, url, start_time, map_output, map_file)
349
+ return response
350
+
351
+ # 如果没有指定 cf_proxies,直接使用 requests
352
+ if not cf_proxies:
353
+ resp = requests.request(
354
+ method,
355
+ url,
356
+ params=params,
357
+ headers=headers,
358
+ data=data,
359
+ json=json_data,
360
+ cookies=cookies,
361
+ timeout=timeout,
362
+ **kwargs
363
+ )
364
+ response = CFSpiderResponse(resp)
365
+ _handle_map_output(response, url, start_time, map_output, map_file)
366
+ return response
367
+
368
+ # cf_workers=False:使用普通代理
369
+ if not cf_workers:
370
+ # 处理代理格式
371
+ proxy_url = cf_proxies
372
+ if not proxy_url.startswith(('http://', 'https://', 'socks5://')):
373
+ proxy_url = f"http://{proxy_url}"
374
+
375
+ proxies = {
376
+ "http": proxy_url,
377
+ "https": proxy_url
378
+ }
379
+
380
+ resp = requests.request(
381
+ method,
382
+ url,
383
+ params=params,
384
+ headers=headers,
385
+ data=data,
386
+ json=json_data,
387
+ cookies=cookies,
388
+ timeout=timeout,
389
+ proxies=proxies,
390
+ **kwargs
391
+ )
392
+ response = CFSpiderResponse(resp)
393
+ _handle_map_output(response, url, start_time, map_output, map_file)
394
+ return response
395
+
396
+ # cf_workers=True:使用 CFspider Workers API 代理
397
+ cf_proxies_url = cf_proxies.rstrip("/")
398
+
399
+ # 确保有协议前缀
400
+ if not cf_proxies_url.startswith(('http://', 'https://')):
401
+ cf_proxies_url = f"https://{cf_proxies_url}"
402
+
403
+ target_url = url
404
+ if params:
405
+ target_url = f"{url}?{urlencode(params)}"
406
+
407
+ # 构建代理 URL,添加 token 参数(如果提供)
408
+ proxy_url = f"{cf_proxies_url}/proxy?url={quote(target_url, safe='')}&method={method.upper()}"
409
+ if token:
410
+ proxy_url += f"&token={quote(token, safe='')}"
411
+
412
+ request_headers = {}
413
+ if headers:
414
+ for key, value in headers.items():
415
+ request_headers[f"X-CFSpider-Header-{key}"] = value
416
+
417
+ if cookies:
418
+ cookie_str = "; ".join([f"{k}={v}" for k, v in cookies.items()])
419
+ request_headers["X-CFSpider-Header-Cookie"] = cookie_str
420
+
421
+ resp = requests.post(
422
+ proxy_url,
423
+ headers=request_headers,
424
+ data=data,
425
+ json=json_data,
426
+ timeout=timeout,
427
+ **kwargs
428
+ )
429
+
430
+ cf_colo = resp.headers.get("X-CF-Colo")
431
+ cf_ray = resp.headers.get("CF-Ray")
432
+
433
+ response = CFSpiderResponse(resp, cf_colo=cf_colo, cf_ray=cf_ray)
434
+ _handle_map_output(response, url, start_time, map_output, map_file)
435
+ return response
436
+
437
+
438
+ def _handle_map_output(response, url, start_time, map_output, map_file):
439
+ """处理 IP 地图输出"""
440
+ if not map_output:
441
+ return
442
+
443
+ # 计算响应时间
444
+ response_time = (time.time() - start_time) * 1000 # 毫秒
445
+
446
+ # 收集 IP 记录
447
+ ip_map.add_ip_record(
448
+ url=url,
449
+ ip=None, # 无法直接获取 IP,但有 cf_colo
450
+ cf_colo=getattr(response, 'cf_colo', None),
451
+ cf_ray=getattr(response, 'cf_ray', None),
452
+ status_code=response.status_code,
453
+ response_time=response_time
454
+ )
455
+
456
+ # 生成地图 HTML
457
+ ip_map.generate_map_html(output_file=map_file)
458
+
459
+
460
+ def _request_impersonate(method, url, cf_proxies, cf_workers, impersonate,
461
+ params=None, headers=None, data=None, json_data=None,
462
+ cookies=None, timeout=30, token=None, **kwargs):
463
+ """使用 curl_cffi 发送请求(支持 TLS 指纹模拟)"""
464
+ curl_requests = _get_curl_cffi()
465
+
466
+ # 如果没有指定 cf_proxies,直接请求
467
+ if not cf_proxies:
468
+ response = curl_requests.request(
469
+ method,
470
+ url,
471
+ params=params,
472
+ headers=headers,
473
+ data=data,
474
+ json=json_data,
475
+ cookies=cookies,
476
+ timeout=timeout,
477
+ impersonate=impersonate,
478
+ **kwargs
479
+ )
480
+ return CFSpiderResponse(response)
481
+
482
+ # cf_workers=False:使用普通代理
483
+ if not cf_workers:
484
+ proxy_url = cf_proxies
485
+ if not proxy_url.startswith(('http://', 'https://', 'socks5://')):
486
+ proxy_url = f"http://{proxy_url}"
487
+
488
+ response = curl_requests.request(
489
+ method,
490
+ url,
491
+ params=params,
492
+ headers=headers,
493
+ data=data,
494
+ json=json_data,
495
+ cookies=cookies,
496
+ timeout=timeout,
497
+ impersonate=impersonate,
498
+ proxies={"http": proxy_url, "https": proxy_url},
499
+ **kwargs
500
+ )
501
+ return CFSpiderResponse(response)
502
+
503
+ # cf_workers=True:使用 CFspider Workers API 代理
504
+ cf_proxies = cf_proxies.rstrip("/")
505
+
506
+ if not cf_proxies.startswith(('http://', 'https://')):
507
+ cf_proxies = f"https://{cf_proxies}"
508
+
509
+ target_url = url
510
+ if params:
511
+ target_url = f"{url}?{urlencode(params)}"
512
+
513
+ proxy_url = f"{cf_proxies}/proxy?url={quote(target_url, safe='')}&method={method.upper()}"
514
+ if token:
515
+ proxy_url += f"&token={quote(token, safe='')}"
516
+
517
+ request_headers = {}
518
+ if headers:
519
+ for key, value in headers.items():
520
+ request_headers[f"X-CFSpider-Header-{key}"] = value
521
+
522
+ if cookies:
523
+ cookie_str = "; ".join([f"{k}={v}" for k, v in cookies.items()])
524
+ request_headers["X-CFSpider-Header-Cookie"] = cookie_str
525
+
526
+ response = curl_requests.post(
527
+ proxy_url,
528
+ headers=request_headers,
529
+ data=data,
530
+ json=json_data,
531
+ timeout=timeout,
532
+ impersonate=impersonate,
533
+ **kwargs
534
+ )
535
+
536
+ cf_colo = response.headers.get("X-CF-Colo")
537
+ cf_ray = response.headers.get("CF-Ray")
538
+
539
+ return CFSpiderResponse(response, cf_colo=cf_colo, cf_ray=cf_ray)
540
+
541
+
542
+ def _request_httpx(method, url, cf_proxies, cf_workers, params=None, headers=None,
543
+ data=None, json_data=None, cookies=None, timeout=30, token=None, **kwargs):
544
+ """使用 httpx 发送请求(支持 HTTP/2)"""
545
+ httpx = _get_httpx()
546
+
547
+ # 如果没有指定 cf_proxies,直接请求
548
+ if not cf_proxies:
549
+ with httpx.Client(http2=True, timeout=timeout) as client:
550
+ response = client.request(
551
+ method,
552
+ url,
553
+ params=params,
554
+ headers=headers,
555
+ data=data,
556
+ json=json_data,
557
+ cookies=cookies,
558
+ **kwargs
559
+ )
560
+ return CFSpiderResponse(response)
561
+
562
+ # cf_workers=False:使用普通代理
563
+ if not cf_workers:
564
+ proxy_url = cf_proxies
565
+ if not proxy_url.startswith(('http://', 'https://', 'socks5://')):
566
+ proxy_url = f"http://{proxy_url}"
567
+
568
+ with httpx.Client(http2=True, timeout=timeout, proxy=proxy_url) as client:
569
+ response = client.request(
570
+ method,
571
+ url,
572
+ params=params,
573
+ headers=headers,
574
+ data=data,
575
+ json=json_data,
576
+ cookies=cookies,
577
+ **kwargs
578
+ )
579
+ return CFSpiderResponse(response)
580
+
581
+ # cf_workers=True:使用 CFspider Workers API 代理
582
+ cf_proxies = cf_proxies.rstrip("/")
583
+
584
+ if not cf_proxies.startswith(('http://', 'https://')):
585
+ cf_proxies = f"https://{cf_proxies}"
586
+
587
+ target_url = url
588
+ if params:
589
+ target_url = f"{url}?{urlencode(params)}"
590
+
591
+ proxy_url = f"{cf_proxies}/proxy?url={quote(target_url, safe='')}&method={method.upper()}"
592
+ if token:
593
+ proxy_url += f"&token={quote(token, safe='')}"
594
+
595
+ request_headers = {}
596
+ if headers:
597
+ for key, value in headers.items():
598
+ request_headers[f"X-CFSpider-Header-{key}"] = value
599
+
600
+ if cookies:
601
+ cookie_str = "; ".join([f"{k}={v}" for k, v in cookies.items()])
602
+ request_headers["X-CFSpider-Header-Cookie"] = cookie_str
603
+
604
+ with httpx.Client(http2=True, timeout=timeout) as client:
605
+ response = client.post(
606
+ proxy_url,
607
+ headers=request_headers,
608
+ data=data,
609
+ json=json_data,
610
+ **kwargs
611
+ )
612
+
613
+ cf_colo = response.headers.get("X-CF-Colo")
614
+ cf_ray = response.headers.get("CF-Ray")
615
+
616
+ return CFSpiderResponse(response, cf_colo=cf_colo, cf_ray=cf_ray)
617
+
618
+
619
+ def get(url, cf_proxies=None, cf_workers=True, http2=False, impersonate=None,
620
+ map_output=False, map_file="cfspider_map.html",
621
+ stealth=False, stealth_browser='chrome', delay=None, token=None, **kwargs):
622
+ """
623
+ 发送 GET 请求 / Send GET request
624
+
625
+ Args:
626
+ url (str): 目标 URL / Target URL (must include protocol, e.g., https://)
627
+ cf_proxies (str, optional): 代理地址 / Proxy address
628
+ - 当 cf_workers=True 时:CFspider Workers 地址(如 "https://your-workers.dev")
629
+ - When cf_workers=True: CFspider Workers address (e.g., "https://your-workers.dev")
630
+ - 当 cf_workers=False 时:普通 HTTP/SOCKS5 代理(如 "http://127.0.0.1:8080")
631
+ - When cf_workers=False: Regular HTTP/SOCKS5 proxy (e.g., "http://127.0.0.1:8080")
632
+ - 不填写时:直接请求,不使用代理 / None: Direct request without proxy
633
+ cf_workers (bool): 是否使用 CFspider Workers API(默认 True)
634
+ / Whether to use CFspider Workers API (default: True)
635
+ http2 (bool): 是否启用 HTTP/2 协议(默认 False)
636
+ / Whether to enable HTTP/2 protocol (default: False)
637
+ impersonate (str, optional): TLS 指纹模拟 / TLS fingerprint impersonation
638
+ - 可选值:chrome131, chrome124, safari18_0, firefox133, edge101 等
639
+ - Options: chrome131, chrome124, safari18_0, firefox133, edge101, etc.
640
+ - 设置后自动使用 curl_cffi 发送请求
641
+ - Automatically uses curl_cffi when set
642
+ map_output (bool): 是否生成 IP 地图 HTML 文件(默认 False)
643
+ / Whether to generate IP map HTML file (default: False)
644
+ map_file (str): 地图输出文件名(默认 "cfspider_map.html")
645
+ / Map output filename (default: "cfspider_map.html")
646
+ stealth (bool): 是否启用隐身模式(默认 False)
647
+ / Whether to enable stealth mode (default: False)
648
+ - True: 自动添加 15+ 个完整浏览器请求头
649
+ - True: Automatically adds 15+ complete browser headers
650
+ stealth_browser (str): 隐身模式浏览器类型(默认 'chrome')
651
+ / Stealth mode browser type (default: 'chrome')
652
+ - 可选值:chrome, firefox, safari, edge, chrome_mobile
653
+ - Options: chrome, firefox, safari, edge, chrome_mobile
654
+ delay (tuple, optional): 请求前随机延迟范围(秒),如 (1, 3)
655
+ / Random delay range before request (seconds), e.g., (1, 3)
656
+ token (str, optional): Workers API 鉴权 token
657
+ / Workers API authentication token
658
+ - 当 Workers 配置了 TOKEN 环境变量时必填
659
+ - Required when Workers has TOKEN environment variable configured
660
+ **kwargs: 其他参数,与 requests 库完全兼容
661
+ / Other parameters, fully compatible with requests library
662
+ - params (dict): URL 查询参数 / URL query parameters
663
+ - headers (dict): 自定义请求头 / Custom headers
664
+ - data (dict/str): 表单数据 / Form data
665
+ - json (dict): JSON 数据 / JSON data
666
+ - cookies (dict): Cookie
667
+ - timeout (int/float): 超时时间(秒),默认 30 / Timeout (seconds), default: 30
668
+
669
+ Returns:
670
+ CFSpiderResponse: 响应对象 / Response object
671
+ - text: 响应文本 / Response text
672
+ - content: 响应字节 / Response bytes
673
+ - json(): 解析 JSON / Parse JSON
674
+ - status_code: HTTP 状态码 / HTTP status code
675
+ - headers: 响应头 / Response headers
676
+ - cf_colo: Cloudflare 节点代码(使用 Workers 时可用)
677
+ / Cloudflare colo code (available when using Workers)
678
+ - cf_ray: Cloudflare Ray ID
679
+ """
680
+ return request("GET", url, cf_proxies=cf_proxies, cf_workers=cf_workers,
681
+ http2=http2, impersonate=impersonate,
682
+ map_output=map_output, map_file=map_file,
683
+ stealth=stealth, stealth_browser=stealth_browser, delay=delay, token=token, **kwargs)
684
+
685
+
686
+ def post(url, cf_proxies=None, cf_workers=True, http2=False, impersonate=None,
687
+ map_output=False, map_file="cfspider_map.html",
688
+ stealth=False, stealth_browser='chrome', delay=None, token=None, **kwargs):
689
+ """
690
+ 发送 POST 请求 / Send POST request
691
+
692
+ Args:
693
+ url (str): 目标 URL / Target URL (must include protocol, e.g., https://)
694
+ cf_proxies (str, optional): 代理地址 / Proxy address
695
+ - 当 cf_workers=True 时:CFspider Workers 地址(如 "https://your-workers.dev")
696
+ - When cf_workers=True: CFspider Workers address (e.g., "https://your-workers.dev")
697
+ - 当 cf_workers=False 时:普通 HTTP/SOCKS5 代理(如 "http://127.0.0.1:8080")
698
+ - When cf_workers=False: Regular HTTP/SOCKS5 proxy (e.g., "http://127.0.0.1:8080")
699
+ - 不填写时:直接请求,不使用代理 / None: Direct request without proxy
700
+ cf_workers (bool): 是否使用 CFspider Workers API(默认 True)
701
+ / Whether to use CFspider Workers API (default: True)
702
+ http2 (bool): 是否启用 HTTP/2 协议(默认 False)
703
+ / Whether to enable HTTP/2 protocol (default: False)
704
+ impersonate (str, optional): TLS 指纹模拟 / TLS fingerprint impersonation
705
+ - 可选值:chrome131, chrome124, safari18_0, firefox133, edge101 等
706
+ - Options: chrome131, chrome124, safari18_0, firefox133, edge101, etc.
707
+ map_output (bool): 是否生成 IP 地图 HTML 文件(默认 False)
708
+ / Whether to generate IP map HTML file (default: False)
709
+ map_file (str): 地图输出文件名(默认 "cfspider_map.html")
710
+ / Map output filename (default: "cfspider_map.html")
711
+ stealth (bool): 是否启用隐身模式(默认 False)
712
+ / Whether to enable stealth mode (default: False)
713
+ stealth_browser (str): 隐身模式浏览器类型(默认 'chrome')
714
+ / Stealth mode browser type (default: 'chrome')
715
+ - 可选值:chrome, firefox, safari, edge, chrome_mobile
716
+ - Options: chrome, firefox, safari, edge, chrome_mobile
717
+ delay (tuple, optional): 请求前随机延迟范围(秒),如 (1, 3)
718
+ / Random delay range before request (seconds), e.g., (1, 3)
719
+ token (str, optional): Workers API 鉴权 token
720
+ / Workers API authentication token
721
+ - 当 Workers 配置了 TOKEN 环境变量时必填
722
+ - Required when Workers has TOKEN environment variable configured
723
+ **kwargs: 其他参数,与 requests 库完全兼容
724
+ / Other parameters, fully compatible with requests library
725
+ - data (dict/str): 表单数据 / Form data
726
+ - json (dict): JSON 数据 / JSON data
727
+ - headers (dict): 自定义请求头 / Custom headers
728
+ - cookies (dict): Cookie
729
+ - timeout (int/float): 超时时间(秒),默认 30 / Timeout (seconds), default: 30
730
+
731
+ Returns:
732
+ CFSpiderResponse: 响应对象 / Response object
733
+ """
734
+ return request("POST", url, cf_proxies=cf_proxies, cf_workers=cf_workers,
735
+ http2=http2, impersonate=impersonate,
736
+ map_output=map_output, map_file=map_file,
737
+ stealth=stealth, stealth_browser=stealth_browser, delay=delay, token=token, **kwargs)
738
+
739
+
740
+ def put(url, cf_proxies=None, cf_workers=True, http2=False, impersonate=None,
741
+ map_output=False, map_file="cfspider_map.html",
742
+ stealth=False, stealth_browser='chrome', delay=None, token=None, **kwargs):
743
+ """
744
+ 发送 PUT 请求 / Send PUT request
745
+
746
+ Args:
747
+ url (str): 目标 URL / Target URL
748
+ cf_proxies (str, optional): 代理地址 / Proxy address
749
+ cf_workers (bool): 是否使用 CFspider Workers API(默认 True)
750
+ / Whether to use CFspider Workers API (default: True)
751
+ http2 (bool): 是否启用 HTTP/2 协议(默认 False)
752
+ / Whether to enable HTTP/2 protocol (default: False)
753
+ impersonate (str, optional): TLS 指纹模拟 / TLS fingerprint impersonation
754
+ map_output (bool): 是否生成 IP 地图 HTML 文件(默认 False)
755
+ / Whether to generate IP map HTML file (default: False)
756
+ map_file (str): 地图输出文件名(默认 "cfspider_map.html")
757
+ / Map output filename (default: "cfspider_map.html")
758
+ stealth (bool): 是否启用隐身模式(默认 False)
759
+ / Whether to enable stealth mode (default: False)
760
+ stealth_browser (str): 隐身模式浏览器类型(默认 'chrome')
761
+ / Stealth mode browser type (default: 'chrome')
762
+ delay (tuple, optional): 请求前随机延迟范围(秒),如 (1, 3)
763
+ / Random delay range before request (seconds), e.g., (1, 3)
764
+ token (str, optional): Workers API 鉴权 token
765
+ / Workers API authentication token
766
+ **kwargs: 其他参数,与 requests 库完全兼容
767
+ / Other parameters, fully compatible with requests library
768
+
769
+ Returns:
770
+ CFSpiderResponse: 响应对象 / Response object
771
+ """
772
+ return request("PUT", url, cf_proxies=cf_proxies, cf_workers=cf_workers,
773
+ http2=http2, impersonate=impersonate,
774
+ map_output=map_output, map_file=map_file,
775
+ stealth=stealth, stealth_browser=stealth_browser, delay=delay, token=token, **kwargs)
776
+
777
+
778
+ def delete(url, cf_proxies=None, cf_workers=True, http2=False, impersonate=None,
779
+ map_output=False, map_file="cfspider_map.html",
780
+ stealth=False, stealth_browser='chrome', delay=None, token=None, **kwargs):
781
+ """
782
+ 发送 DELETE 请求 / Send DELETE request
783
+
784
+ Args:
785
+ url (str): 目标 URL / Target URL
786
+ cf_proxies (str, optional): 代理地址 / Proxy address
787
+ cf_workers (bool): 是否使用 CFspider Workers API(默认 True)
788
+ / Whether to use CFspider Workers API (default: True)
789
+ http2 (bool): 是否启用 HTTP/2 协议(默认 False)
790
+ / Whether to enable HTTP/2 protocol (default: False)
791
+ impersonate (str, optional): TLS 指纹模拟 / TLS fingerprint impersonation
792
+ map_output (bool): 是否生成 IP 地图 HTML 文件(默认 False)
793
+ / Whether to generate IP map HTML file (default: False)
794
+ map_file (str): 地图输出文件名(默认 "cfspider_map.html")
795
+ / Map output filename (default: "cfspider_map.html")
796
+ stealth (bool): 是否启用隐身模式(默认 False)
797
+ / Whether to enable stealth mode (default: False)
798
+ stealth_browser (str): 隐身模式浏览器类型(默认 'chrome')
799
+ / Stealth mode browser type (default: 'chrome')
800
+ delay (tuple, optional): 请求前随机延迟范围(秒),如 (1, 3)
801
+ / Random delay range before request (seconds), e.g., (1, 3)
802
+ token (str, optional): Workers API 鉴权 token
803
+ / Workers API authentication token
804
+ **kwargs: 其他参数,与 requests 库完全兼容
805
+ / Other parameters, fully compatible with requests library
806
+
807
+ Returns:
808
+ CFSpiderResponse: 响应对象 / Response object
809
+ """
810
+ return request("DELETE", url, cf_proxies=cf_proxies, cf_workers=cf_workers,
811
+ http2=http2, impersonate=impersonate,
812
+ map_output=map_output, map_file=map_file,
813
+ stealth=stealth, stealth_browser=stealth_browser, delay=delay, token=token, **kwargs)
814
+
815
+
816
+ def head(url, cf_proxies=None, cf_workers=True, http2=False, impersonate=None,
817
+ map_output=False, map_file="cfspider_map.html",
818
+ stealth=False, stealth_browser='chrome', delay=None, token=None, **kwargs):
819
+ """
820
+ 发送 HEAD 请求 / Send HEAD request
821
+
822
+ Args:
823
+ url (str): 目标 URL / Target URL
824
+ cf_proxies (str, optional): 代理地址 / Proxy address
825
+ cf_workers (bool): 是否使用 CFspider Workers API(默认 True)
826
+ / Whether to use CFspider Workers API (default: True)
827
+ http2 (bool): 是否启用 HTTP/2 协议(默认 False)
828
+ / Whether to enable HTTP/2 protocol (default: False)
829
+ impersonate (str, optional): TLS 指纹模拟 / TLS fingerprint impersonation
830
+ map_output (bool): 是否生成 IP 地图 HTML 文件(默认 False)
831
+ / Whether to generate IP map HTML file (default: False)
832
+ map_file (str): 地图输出文件名(默认 "cfspider_map.html")
833
+ / Map output filename (default: "cfspider_map.html")
834
+ stealth (bool): 是否启用隐身模式(默认 False)
835
+ / Whether to enable stealth mode (default: False)
836
+ stealth_browser (str): 隐身模式浏览器类型(默认 'chrome')
837
+ / Stealth mode browser type (default: 'chrome')
838
+ delay (tuple, optional): 请求前随机延迟范围(秒),如 (1, 3)
839
+ / Random delay range before request (seconds), e.g., (1, 3)
840
+ token (str, optional): Workers API 鉴权 token
841
+ / Workers API authentication token
842
+ **kwargs: 其他参数,与 requests 库完全兼容
843
+ / Other parameters, fully compatible with requests library
844
+
845
+ Returns:
846
+ CFSpiderResponse: 响应对象 / Response object
847
+ """
848
+ return request("HEAD", url, cf_proxies=cf_proxies, cf_workers=cf_workers,
849
+ http2=http2, impersonate=impersonate,
850
+ map_output=map_output, map_file=map_file,
851
+ stealth=stealth, stealth_browser=stealth_browser, delay=delay, token=token, **kwargs)
852
+
853
+
854
+ def options(url, cf_proxies=None, cf_workers=True, http2=False, impersonate=None,
855
+ map_output=False, map_file="cfspider_map.html",
856
+ stealth=False, stealth_browser='chrome', delay=None, token=None, **kwargs):
857
+ """
858
+ 发送 OPTIONS 请求 / Send OPTIONS request
859
+
860
+ Args:
861
+ url (str): 目标 URL / Target URL
862
+ cf_proxies (str, optional): 代理地址 / Proxy address
863
+ cf_workers (bool): 是否使用 CFspider Workers API(默认 True)
864
+ / Whether to use CFspider Workers API (default: True)
865
+ http2 (bool): 是否启用 HTTP/2 协议(默认 False)
866
+ / Whether to enable HTTP/2 protocol (default: False)
867
+ impersonate (str, optional): TLS 指纹模拟 / TLS fingerprint impersonation
868
+ map_output (bool): 是否生成 IP 地图 HTML 文件(默认 False)
869
+ / Whether to generate IP map HTML file (default: False)
870
+ map_file (str): 地图输出文件名(默认 "cfspider_map.html")
871
+ / Map output filename (default: "cfspider_map.html")
872
+ stealth (bool): 是否启用隐身模式(默认 False)
873
+ / Whether to enable stealth mode (default: False)
874
+ stealth_browser (str): 隐身模式浏览器类型(默认 'chrome')
875
+ / Stealth mode browser type (default: 'chrome')
876
+ delay (tuple, optional): 请求前随机延迟范围(秒),如 (1, 3)
877
+ / Random delay range before request (seconds), e.g., (1, 3)
878
+ token (str, optional): Workers API 鉴权 token
879
+ / Workers API authentication token
880
+ **kwargs: 其他参数,与 requests 库完全兼容
881
+ / Other parameters, fully compatible with requests library
882
+
883
+ Returns:
884
+ CFSpiderResponse: 响应对象 / Response object
885
+ """
886
+ return request("OPTIONS", url, cf_proxies=cf_proxies, cf_workers=cf_workers,
887
+ http2=http2, impersonate=impersonate,
888
+ map_output=map_output, map_file=map_file,
889
+ stealth=stealth, stealth_browser=stealth_browser, delay=delay, token=token, **kwargs)
890
+
891
+
892
+ def patch(url, cf_proxies=None, cf_workers=True, http2=False, impersonate=None,
893
+ map_output=False, map_file="cfspider_map.html",
894
+ stealth=False, stealth_browser='chrome', delay=None, token=None, **kwargs):
895
+ """
896
+ 发送 PATCH 请求 / Send PATCH request
897
+
898
+ Args:
899
+ url (str): 目标 URL / Target URL
900
+ cf_proxies (str, optional): 代理地址 / Proxy address
901
+ cf_workers (bool): 是否使用 CFspider Workers API(默认 True)
902
+ / Whether to use CFspider Workers API (default: True)
903
+ http2 (bool): 是否启用 HTTP/2 协议(默认 False)
904
+ / Whether to enable HTTP/2 protocol (default: False)
905
+ impersonate (str, optional): TLS 指纹模拟 / TLS fingerprint impersonation
906
+ map_output (bool): 是否生成 IP 地图 HTML 文件(默认 False)
907
+ / Whether to generate IP map HTML file (default: False)
908
+ map_file (str): 地图输出文件名(默认 "cfspider_map.html")
909
+ / Map output filename (default: "cfspider_map.html")
910
+ stealth (bool): 是否启用隐身模式(默认 False)
911
+ / Whether to enable stealth mode (default: False)
912
+ stealth_browser (str): 隐身模式浏览器类型(默认 'chrome')
913
+ / Stealth mode browser type (default: 'chrome')
914
+ delay (tuple, optional): 请求前随机延迟范围(秒),如 (1, 3)
915
+ / Random delay range before request (seconds), e.g., (1, 3)
916
+ token (str, optional): Workers API 鉴权 token
917
+ / Workers API authentication token
918
+ **kwargs: 其他参数,与 requests 库完全兼容
919
+ / Other parameters, fully compatible with requests library
920
+
921
+ Returns:
922
+ CFSpiderResponse: 响应对象 / Response object
923
+ """
924
+ return request("PATCH", url, cf_proxies=cf_proxies, cf_workers=cf_workers,
925
+ http2=http2, impersonate=impersonate,
926
+ map_output=map_output, map_file=map_file,
927
+ stealth=stealth, stealth_browser=stealth_browser, delay=delay, token=token, **kwargs)
928
+
929
+
930
+ def clear_map_records():
931
+ """清空 IP 地图记录"""
932
+ ip_map.clear_records()
933
+
934
+
935
+ def get_map_collector():
936
+ """获取 IP 地图收集器"""
937
+ return ip_map.get_collector()