cfspider 1.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,388 @@
1
+ """
2
+ CFspider TLS 指纹模拟模块
3
+
4
+ 基于 curl_cffi 实现,可模拟各种浏览器的 TLS 指纹,绕过反爬检测。
5
+ """
6
+ from urllib.parse import urlencode, quote
7
+ from typing import Optional, Dict, Any, List
8
+
9
+ # 延迟导入 curl_cffi
10
+ _curl_cffi = None
11
+
12
+ def _get_curl_cffi():
13
+ """延迟加载 curl_cffi 模块"""
14
+ global _curl_cffi
15
+ if _curl_cffi is None:
16
+ try:
17
+ from curl_cffi import requests as curl_requests
18
+ _curl_cffi = curl_requests
19
+ except ImportError:
20
+ raise ImportError(
21
+ "curl_cffi is required for TLS fingerprint impersonation. "
22
+ "Install it with: pip install curl_cffi"
23
+ )
24
+ return _curl_cffi
25
+
26
+
27
+ # 支持的浏览器指纹列表
28
+ SUPPORTED_BROWSERS = [
29
+ # Chrome
30
+ "chrome99", "chrome100", "chrome101", "chrome104", "chrome107",
31
+ "chrome110", "chrome116", "chrome119", "chrome120", "chrome123",
32
+ "chrome124", "chrome131",
33
+ # Chrome Android
34
+ "chrome99_android", "chrome131_android",
35
+ # Edge
36
+ "edge99", "edge101",
37
+ # Safari
38
+ "safari15_3", "safari15_5", "safari17_0", "safari17_2_ios",
39
+ "safari18_0", "safari18_0_ios",
40
+ # Firefox
41
+ "firefox102", "firefox109", "firefox133"
42
+ ]
43
+
44
+
45
+ class ImpersonateResponse:
46
+ """TLS 指纹模拟响应对象"""
47
+
48
+ def __init__(self, response, cf_colo: Optional[str] = None, cf_ray: Optional[str] = None):
49
+ self._response = response
50
+ self.cf_colo = cf_colo
51
+ self.cf_ray = cf_ray
52
+
53
+ @property
54
+ def text(self) -> str:
55
+ return self._response.text
56
+
57
+ @property
58
+ def content(self) -> bytes:
59
+ return self._response.content
60
+
61
+ @property
62
+ def status_code(self) -> int:
63
+ return self._response.status_code
64
+
65
+ @property
66
+ def headers(self) -> Dict:
67
+ return dict(self._response.headers)
68
+
69
+ @property
70
+ def cookies(self) -> Dict:
71
+ return dict(self._response.cookies)
72
+
73
+ @property
74
+ def url(self) -> str:
75
+ return str(self._response.url)
76
+
77
+ def json(self, **kwargs) -> Any:
78
+ return self._response.json(**kwargs)
79
+
80
+ def raise_for_status(self) -> None:
81
+ self._response.raise_for_status()
82
+
83
+
84
+ def impersonate_request(
85
+ method: str,
86
+ url: str,
87
+ impersonate: str = "chrome131",
88
+ cf_proxies: Optional[str] = None,
89
+ cf_workers: bool = True,
90
+ token: Optional[str] = None,
91
+ **kwargs
92
+ ) -> ImpersonateResponse:
93
+ """
94
+ 使用 TLS 指纹模拟发送请求
95
+
96
+ Args:
97
+ method: HTTP 方法
98
+ url: 目标 URL
99
+ impersonate: 浏览器指纹(如 chrome131, safari18_0, firefox133)
100
+ cf_proxies: 代理地址(选填)
101
+ cf_workers: 是否使用 CFspider Workers API(默认 True)
102
+ **kwargs: 其他参数
103
+
104
+ Returns:
105
+ ImpersonateResponse: 响应对象
106
+
107
+ Example:
108
+ >>> response = cfspider.impersonate_get("https://example.com", impersonate="chrome131")
109
+ >>> print(response.text)
110
+ """
111
+ curl_requests = _get_curl_cffi()
112
+
113
+ params = kwargs.pop("params", None)
114
+ headers = kwargs.pop("headers", {})
115
+ data = kwargs.pop("data", None)
116
+ json_data = kwargs.pop("json", None)
117
+ cookies = kwargs.pop("cookies", None)
118
+ timeout = kwargs.pop("timeout", 30)
119
+
120
+ # 验证浏览器指纹
121
+ if impersonate not in SUPPORTED_BROWSERS:
122
+ raise ValueError(
123
+ f"Unsupported browser: {impersonate}. "
124
+ f"Supported browsers: {', '.join(SUPPORTED_BROWSERS[:10])}..."
125
+ )
126
+
127
+ # 如果没有指定 cf_proxies,直接请求
128
+ if not cf_proxies:
129
+ response = curl_requests.request(
130
+ method,
131
+ url,
132
+ params=params,
133
+ headers=headers,
134
+ data=data,
135
+ json=json_data,
136
+ cookies=cookies,
137
+ timeout=timeout,
138
+ impersonate=impersonate,
139
+ **kwargs
140
+ )
141
+ return ImpersonateResponse(response)
142
+
143
+ # cf_workers=False:使用普通代理
144
+ if not cf_workers:
145
+ proxy_url = cf_proxies
146
+ if not proxy_url.startswith(('http://', 'https://', 'socks5://')):
147
+ proxy_url = f"http://{proxy_url}"
148
+
149
+ response = curl_requests.request(
150
+ method,
151
+ url,
152
+ params=params,
153
+ headers=headers,
154
+ data=data,
155
+ json=json_data,
156
+ cookies=cookies,
157
+ timeout=timeout,
158
+ impersonate=impersonate,
159
+ proxies={"http": proxy_url, "https": proxy_url},
160
+ **kwargs
161
+ )
162
+ return ImpersonateResponse(response)
163
+
164
+ # cf_workers=True:使用 CFspider Workers API 代理
165
+ cf_proxies = cf_proxies.rstrip("/")
166
+
167
+ if not cf_proxies.startswith(('http://', 'https://')):
168
+ cf_proxies = f"https://{cf_proxies}"
169
+
170
+ target_url = url
171
+ if params:
172
+ target_url = f"{url}?{urlencode(params)}"
173
+
174
+ proxy_url = f"{cf_proxies}/proxy?url={quote(target_url, safe='')}&method={method.upper()}"
175
+ if token:
176
+ proxy_url += f"&token={quote(token, safe='')}"
177
+
178
+ request_headers = {}
179
+ if headers:
180
+ for key, value in headers.items():
181
+ request_headers[f"X-CFSpider-Header-{key}"] = value
182
+
183
+ if cookies:
184
+ cookie_str = "; ".join([f"{k}={v}" for k, v in cookies.items()])
185
+ request_headers["X-CFSpider-Header-Cookie"] = cookie_str
186
+
187
+ response = curl_requests.post(
188
+ proxy_url,
189
+ headers=request_headers,
190
+ data=data,
191
+ json=json_data,
192
+ timeout=timeout,
193
+ impersonate=impersonate,
194
+ **kwargs
195
+ )
196
+
197
+ cf_colo = response.headers.get("X-CF-Colo")
198
+ cf_ray = response.headers.get("CF-Ray")
199
+
200
+ return ImpersonateResponse(response, cf_colo=cf_colo, cf_ray=cf_ray)
201
+
202
+
203
+ # 便捷方法
204
+ def impersonate_get(url: str, impersonate: str = "chrome131", **kwargs) -> ImpersonateResponse:
205
+ """使用 TLS 指纹模拟发送 GET 请求"""
206
+ return impersonate_request("GET", url, impersonate=impersonate, **kwargs)
207
+
208
+
209
+ def impersonate_post(url: str, impersonate: str = "chrome131", **kwargs) -> ImpersonateResponse:
210
+ """使用 TLS 指纹模拟发送 POST 请求"""
211
+ return impersonate_request("POST", url, impersonate=impersonate, **kwargs)
212
+
213
+
214
+ def impersonate_put(url: str, impersonate: str = "chrome131", **kwargs) -> ImpersonateResponse:
215
+ """使用 TLS 指纹模拟发送 PUT 请求"""
216
+ return impersonate_request("PUT", url, impersonate=impersonate, **kwargs)
217
+
218
+
219
+ def impersonate_delete(url: str, impersonate: str = "chrome131", **kwargs) -> ImpersonateResponse:
220
+ """使用 TLS 指纹模拟发送 DELETE 请求"""
221
+ return impersonate_request("DELETE", url, impersonate=impersonate, **kwargs)
222
+
223
+
224
+ def impersonate_head(url: str, impersonate: str = "chrome131", **kwargs) -> ImpersonateResponse:
225
+ """使用 TLS 指纹模拟发送 HEAD 请求"""
226
+ return impersonate_request("HEAD", url, impersonate=impersonate, **kwargs)
227
+
228
+
229
+ def impersonate_options(url: str, impersonate: str = "chrome131", **kwargs) -> ImpersonateResponse:
230
+ """使用 TLS 指纹模拟发送 OPTIONS 请求"""
231
+ return impersonate_request("OPTIONS", url, impersonate=impersonate, **kwargs)
232
+
233
+
234
+ def impersonate_patch(url: str, impersonate: str = "chrome131", **kwargs) -> ImpersonateResponse:
235
+ """使用 TLS 指纹模拟发送 PATCH 请求"""
236
+ return impersonate_request("PATCH", url, impersonate=impersonate, **kwargs)
237
+
238
+
239
+ class ImpersonateSession:
240
+ """
241
+ TLS 指纹模拟会话类
242
+
243
+ Example:
244
+ >>> with cfspider.ImpersonateSession(impersonate="chrome131") as session:
245
+ >>> r1 = session.get("https://example.com")
246
+ >>> r2 = session.post("https://api.example.com", json={"key": "value"})
247
+ """
248
+
249
+ def __init__(
250
+ self,
251
+ impersonate: str = "chrome131",
252
+ cf_proxies: Optional[str] = None,
253
+ cf_workers: bool = True,
254
+ timeout: float = 30,
255
+ headers: Optional[Dict[str, str]] = None,
256
+ cookies: Optional[Dict[str, str]] = None,
257
+ **kwargs
258
+ ):
259
+ """
260
+ 初始化 TLS 指纹模拟会话
261
+
262
+ Args:
263
+ impersonate: 浏览器指纹(默认 chrome131)
264
+ cf_proxies: 代理地址(选填)
265
+ cf_workers: 是否使用 CFspider Workers API(默认 True)
266
+ timeout: 默认超时时间(秒)
267
+ headers: 默认请求头
268
+ cookies: 默认 Cookies
269
+ """
270
+ curl_requests = _get_curl_cffi()
271
+
272
+ if impersonate not in SUPPORTED_BROWSERS:
273
+ raise ValueError(
274
+ f"Unsupported browser: {impersonate}. "
275
+ f"Supported browsers: {', '.join(SUPPORTED_BROWSERS[:10])}..."
276
+ )
277
+
278
+ self.impersonate = impersonate
279
+ self.cf_proxies = cf_proxies
280
+ self.cf_workers = cf_workers
281
+ self.timeout = timeout
282
+ self.headers = headers or {}
283
+ self.cookies = cookies or {}
284
+ self._session = curl_requests.Session(impersonate=impersonate)
285
+
286
+ def __enter__(self):
287
+ return self
288
+
289
+ def __exit__(self, exc_type, exc_val, exc_tb):
290
+ self.close()
291
+
292
+ def close(self):
293
+ """关闭会话"""
294
+ if self._session:
295
+ self._session.close()
296
+
297
+ def request(self, method: str, url: str, **kwargs) -> ImpersonateResponse:
298
+ """发送请求"""
299
+ merged_headers = {**self.headers, **kwargs.pop("headers", {})}
300
+ merged_cookies = {**self.cookies, **kwargs.pop("cookies", {})}
301
+ timeout = kwargs.pop("timeout", self.timeout)
302
+
303
+ # 如果没有 cf_proxies 或不使用 Workers API,直接请求
304
+ if not self.cf_proxies or not self.cf_workers:
305
+ proxies = None
306
+ if self.cf_proxies and not self.cf_workers:
307
+ proxy_url = self.cf_proxies
308
+ if not proxy_url.startswith(('http://', 'https://', 'socks5://')):
309
+ proxy_url = f"http://{proxy_url}"
310
+ proxies = {"http": proxy_url, "https": proxy_url}
311
+
312
+ response = self._session.request(
313
+ method,
314
+ url,
315
+ headers=merged_headers,
316
+ cookies=merged_cookies,
317
+ timeout=timeout,
318
+ proxies=proxies,
319
+ **kwargs
320
+ )
321
+ return ImpersonateResponse(response)
322
+
323
+ # 使用 CFspider Workers API 代理
324
+ cf_proxies_url = self.cf_proxies.rstrip("/")
325
+
326
+ if not cf_proxies_url.startswith(('http://', 'https://')):
327
+ cf_proxies_url = f"https://{cf_proxies_url}"
328
+
329
+ params = kwargs.pop("params", None)
330
+ target_url = url
331
+ if params:
332
+ target_url = f"{url}?{urlencode(params)}"
333
+
334
+ proxy_url = f"{cf_proxies_url}/proxy?url={quote(target_url, safe='')}&method={method.upper()}"
335
+
336
+ request_headers = {}
337
+ for key, value in merged_headers.items():
338
+ request_headers[f"X-CFSpider-Header-{key}"] = value
339
+
340
+ if merged_cookies:
341
+ cookie_str = "; ".join([f"{k}={v}" for k, v in merged_cookies.items()])
342
+ request_headers["X-CFSpider-Header-Cookie"] = cookie_str
343
+
344
+ response = self._session.post(
345
+ proxy_url,
346
+ headers=request_headers,
347
+ timeout=timeout,
348
+ **kwargs
349
+ )
350
+
351
+ cf_colo = response.headers.get("X-CF-Colo")
352
+ cf_ray = response.headers.get("CF-Ray")
353
+
354
+ return ImpersonateResponse(response, cf_colo=cf_colo, cf_ray=cf_ray)
355
+
356
+ def get(self, url: str, **kwargs) -> ImpersonateResponse:
357
+ """GET 请求"""
358
+ return self.request("GET", url, **kwargs)
359
+
360
+ def post(self, url: str, **kwargs) -> ImpersonateResponse:
361
+ """POST 请求"""
362
+ return self.request("POST", url, **kwargs)
363
+
364
+ def put(self, url: str, **kwargs) -> ImpersonateResponse:
365
+ """PUT 请求"""
366
+ return self.request("PUT", url, **kwargs)
367
+
368
+ def delete(self, url: str, **kwargs) -> ImpersonateResponse:
369
+ """DELETE 请求"""
370
+ return self.request("DELETE", url, **kwargs)
371
+
372
+ def head(self, url: str, **kwargs) -> ImpersonateResponse:
373
+ """HEAD 请求"""
374
+ return self.request("HEAD", url, **kwargs)
375
+
376
+ def options(self, url: str, **kwargs) -> ImpersonateResponse:
377
+ """OPTIONS 请求"""
378
+ return self.request("OPTIONS", url, **kwargs)
379
+
380
+ def patch(self, url: str, **kwargs) -> ImpersonateResponse:
381
+ """PATCH 请求"""
382
+ return self.request("PATCH", url, **kwargs)
383
+
384
+
385
+ def get_supported_browsers() -> List[str]:
386
+ """获取支持的浏览器指纹列表"""
387
+ return SUPPORTED_BROWSERS.copy()
388
+