cfspider 1.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cfspider/__init__.py +230 -0
- cfspider/api.py +937 -0
- cfspider/async_api.py +418 -0
- cfspider/async_session.py +281 -0
- cfspider/browser.py +335 -0
- cfspider/cli.py +81 -0
- cfspider/impersonate.py +388 -0
- cfspider/ip_map.py +522 -0
- cfspider/mirror.py +682 -0
- cfspider/session.py +239 -0
- cfspider/stealth.py +537 -0
- cfspider/vless_client.py +572 -0
- cfspider-1.7.4.dist-info/METADATA +1390 -0
- cfspider-1.7.4.dist-info/RECORD +18 -0
- cfspider-1.7.4.dist-info/WHEEL +5 -0
- cfspider-1.7.4.dist-info/entry_points.txt +2 -0
- cfspider-1.7.4.dist-info/licenses/LICENSE +201 -0
- cfspider-1.7.4.dist-info/top_level.txt +1 -0
cfspider/async_api.py
ADDED
|
@@ -0,0 +1,418 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CFspider 异步 API 模块
|
|
3
|
+
|
|
4
|
+
基于 httpx 实现,提供:
|
|
5
|
+
- 异步 HTTP 请求(async/await)
|
|
6
|
+
- HTTP/2 协议支持
|
|
7
|
+
- 流式响应(大文件下载)
|
|
8
|
+
- 并发请求控制
|
|
9
|
+
|
|
10
|
+
使用前需要安装 httpx:
|
|
11
|
+
pip install httpx[http2]
|
|
12
|
+
|
|
13
|
+
快速开始:
|
|
14
|
+
>>> import cfspider
|
|
15
|
+
>>> import asyncio
|
|
16
|
+
>>>
|
|
17
|
+
>>> async def main():
|
|
18
|
+
... # 异步 GET 请求
|
|
19
|
+
... response = await cfspider.aget("https://httpbin.org/ip")
|
|
20
|
+
... print(response.json())
|
|
21
|
+
...
|
|
22
|
+
... # 并发请求
|
|
23
|
+
... urls = ["https://httpbin.org/ip"] * 5
|
|
24
|
+
... tasks = [cfspider.aget(url, cf_proxies="...") for url in urls]
|
|
25
|
+
... responses = await asyncio.gather(*tasks)
|
|
26
|
+
>>>
|
|
27
|
+
>>> asyncio.run(main())
|
|
28
|
+
|
|
29
|
+
性能对比:
|
|
30
|
+
- 同步请求 10 个 URL:约 10 秒(串行)
|
|
31
|
+
- 异步请求 10 个 URL:约 1 秒(并发)
|
|
32
|
+
"""
|
|
33
|
+
import httpx
|
|
34
|
+
from urllib.parse import urlencode, quote
|
|
35
|
+
from typing import Optional, Dict, Any, AsyncIterator
|
|
36
|
+
from contextlib import asynccontextmanager
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class AsyncCFSpiderResponse:
|
|
40
|
+
"""
|
|
41
|
+
异步响应对象
|
|
42
|
+
|
|
43
|
+
封装 httpx.Response,提供与同步 CFSpiderResponse 一致的接口,
|
|
44
|
+
并额外支持异步迭代(用于流式处理)。
|
|
45
|
+
|
|
46
|
+
Attributes:
|
|
47
|
+
cf_colo (str): Cloudflare 数据中心代码
|
|
48
|
+
cf_ray (str): Cloudflare Ray ID
|
|
49
|
+
text (str): 响应文本
|
|
50
|
+
content (bytes): 响应字节
|
|
51
|
+
status_code (int): HTTP 状态码
|
|
52
|
+
headers: 响应头
|
|
53
|
+
http_version (str): HTTP 版本(HTTP/1.1 或 HTTP/2)
|
|
54
|
+
|
|
55
|
+
Methods:
|
|
56
|
+
json(): 解析 JSON
|
|
57
|
+
aiter_bytes(): 异步迭代响应字节
|
|
58
|
+
aiter_text(): 异步迭代响应文本
|
|
59
|
+
aiter_lines(): 异步迭代响应行
|
|
60
|
+
|
|
61
|
+
Example:
|
|
62
|
+
>>> response = await cfspider.aget("https://httpbin.org/ip")
|
|
63
|
+
>>> print(response.http_version) # HTTP/2
|
|
64
|
+
>>> data = response.json()
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
def __init__(self, response: httpx.Response, cf_colo: Optional[str] = None, cf_ray: Optional[str] = None):
|
|
68
|
+
self._response = response
|
|
69
|
+
self.cf_colo = cf_colo
|
|
70
|
+
self.cf_ray = cf_ray
|
|
71
|
+
|
|
72
|
+
@property
|
|
73
|
+
def text(self) -> str:
|
|
74
|
+
return self._response.text
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def content(self) -> bytes:
|
|
78
|
+
return self._response.content
|
|
79
|
+
|
|
80
|
+
@property
|
|
81
|
+
def status_code(self) -> int:
|
|
82
|
+
return self._response.status_code
|
|
83
|
+
|
|
84
|
+
@property
|
|
85
|
+
def headers(self) -> httpx.Headers:
|
|
86
|
+
return self._response.headers
|
|
87
|
+
|
|
88
|
+
@property
|
|
89
|
+
def cookies(self) -> httpx.Cookies:
|
|
90
|
+
return self._response.cookies
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def url(self) -> httpx.URL:
|
|
94
|
+
return self._response.url
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def encoding(self) -> Optional[str]:
|
|
98
|
+
return self._response.encoding
|
|
99
|
+
|
|
100
|
+
@property
|
|
101
|
+
def http_version(self) -> str:
|
|
102
|
+
"""获取 HTTP 协议版本(如 HTTP/1.1 或 HTTP/2)"""
|
|
103
|
+
return self._response.http_version
|
|
104
|
+
|
|
105
|
+
def json(self, **kwargs) -> Any:
|
|
106
|
+
return self._response.json(**kwargs)
|
|
107
|
+
|
|
108
|
+
def raise_for_status(self) -> None:
|
|
109
|
+
self._response.raise_for_status()
|
|
110
|
+
|
|
111
|
+
async def aiter_bytes(self, chunk_size: Optional[int] = None) -> AsyncIterator[bytes]:
|
|
112
|
+
"""异步迭代响应字节"""
|
|
113
|
+
async for chunk in self._response.aiter_bytes(chunk_size):
|
|
114
|
+
yield chunk
|
|
115
|
+
|
|
116
|
+
async def aiter_text(self, chunk_size: Optional[int] = None) -> AsyncIterator[str]:
|
|
117
|
+
"""异步迭代响应文本"""
|
|
118
|
+
async for chunk in self._response.aiter_text(chunk_size):
|
|
119
|
+
yield chunk
|
|
120
|
+
|
|
121
|
+
async def aiter_lines(self) -> AsyncIterator[str]:
|
|
122
|
+
"""异步迭代响应行"""
|
|
123
|
+
async for line in self._response.aiter_lines():
|
|
124
|
+
yield line
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class AsyncStreamResponse:
|
|
128
|
+
"""流式响应对象,用于大文件下载"""
|
|
129
|
+
|
|
130
|
+
def __init__(self, response: httpx.Response, cf_colo: Optional[str] = None, cf_ray: Optional[str] = None):
|
|
131
|
+
self._response = response
|
|
132
|
+
self.cf_colo = cf_colo
|
|
133
|
+
self.cf_ray = cf_ray
|
|
134
|
+
|
|
135
|
+
@property
|
|
136
|
+
def status_code(self) -> int:
|
|
137
|
+
return self._response.status_code
|
|
138
|
+
|
|
139
|
+
@property
|
|
140
|
+
def headers(self) -> httpx.Headers:
|
|
141
|
+
return self._response.headers
|
|
142
|
+
|
|
143
|
+
@property
|
|
144
|
+
def http_version(self) -> str:
|
|
145
|
+
return self._response.http_version
|
|
146
|
+
|
|
147
|
+
async def aiter_bytes(self, chunk_size: Optional[int] = None) -> AsyncIterator[bytes]:
|
|
148
|
+
"""异步迭代响应字节"""
|
|
149
|
+
async for chunk in self._response.aiter_bytes(chunk_size):
|
|
150
|
+
yield chunk
|
|
151
|
+
|
|
152
|
+
async def aiter_text(self, chunk_size: Optional[int] = None) -> AsyncIterator[str]:
|
|
153
|
+
"""异步迭代响应文本"""
|
|
154
|
+
async for chunk in self._response.aiter_text(chunk_size):
|
|
155
|
+
yield chunk
|
|
156
|
+
|
|
157
|
+
async def aiter_lines(self) -> AsyncIterator[str]:
|
|
158
|
+
"""异步迭代响应行"""
|
|
159
|
+
async for line in self._response.aiter_lines():
|
|
160
|
+
yield line
|
|
161
|
+
|
|
162
|
+
async def aread(self) -> bytes:
|
|
163
|
+
"""读取全部响应内容"""
|
|
164
|
+
return await self._response.aread()
|
|
165
|
+
|
|
166
|
+
async def aclose(self) -> None:
|
|
167
|
+
"""关闭响应"""
|
|
168
|
+
await self._response.aclose()
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
async def arequest(
|
|
172
|
+
method: str,
|
|
173
|
+
url: str,
|
|
174
|
+
cf_proxies: Optional[str] = None,
|
|
175
|
+
cf_workers: bool = True,
|
|
176
|
+
http2: bool = True,
|
|
177
|
+
token: Optional[str] = None,
|
|
178
|
+
**kwargs
|
|
179
|
+
) -> AsyncCFSpiderResponse:
|
|
180
|
+
"""
|
|
181
|
+
发送异步 HTTP 请求
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
method: HTTP 方法
|
|
185
|
+
url: 目标 URL
|
|
186
|
+
cf_proxies: 代理地址(选填)
|
|
187
|
+
- 当 cf_workers=True 时,填写 CFspider Workers 地址
|
|
188
|
+
- 当 cf_workers=False 时,填写普通代理地址
|
|
189
|
+
cf_workers: 是否使用 CFspider Workers API(默认 True)
|
|
190
|
+
http2: 是否启用 HTTP/2(默认 True)
|
|
191
|
+
**kwargs: 其他参数
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
AsyncCFSpiderResponse: 异步响应对象
|
|
195
|
+
"""
|
|
196
|
+
params = kwargs.pop("params", None)
|
|
197
|
+
headers = kwargs.pop("headers", {})
|
|
198
|
+
data = kwargs.pop("data", None)
|
|
199
|
+
json_data = kwargs.pop("json", None)
|
|
200
|
+
cookies = kwargs.pop("cookies", None)
|
|
201
|
+
timeout = kwargs.pop("timeout", 30)
|
|
202
|
+
|
|
203
|
+
# 如果没有指定 cf_proxies,直接请求
|
|
204
|
+
if not cf_proxies:
|
|
205
|
+
async with httpx.AsyncClient(http2=http2, timeout=timeout) as client:
|
|
206
|
+
response = await client.request(
|
|
207
|
+
method,
|
|
208
|
+
url,
|
|
209
|
+
params=params,
|
|
210
|
+
headers=headers,
|
|
211
|
+
data=data,
|
|
212
|
+
json=json_data,
|
|
213
|
+
cookies=cookies,
|
|
214
|
+
**kwargs
|
|
215
|
+
)
|
|
216
|
+
return AsyncCFSpiderResponse(response)
|
|
217
|
+
|
|
218
|
+
# cf_workers=False:使用普通代理
|
|
219
|
+
if not cf_workers:
|
|
220
|
+
proxy_url = cf_proxies
|
|
221
|
+
if not proxy_url.startswith(('http://', 'https://', 'socks5://')):
|
|
222
|
+
proxy_url = f"http://{proxy_url}"
|
|
223
|
+
|
|
224
|
+
async with httpx.AsyncClient(http2=http2, timeout=timeout, proxy=proxy_url) as client:
|
|
225
|
+
response = await client.request(
|
|
226
|
+
method,
|
|
227
|
+
url,
|
|
228
|
+
params=params,
|
|
229
|
+
headers=headers,
|
|
230
|
+
data=data,
|
|
231
|
+
json=json_data,
|
|
232
|
+
cookies=cookies,
|
|
233
|
+
**kwargs
|
|
234
|
+
)
|
|
235
|
+
return AsyncCFSpiderResponse(response)
|
|
236
|
+
|
|
237
|
+
# cf_workers=True:使用 CFspider Workers API 代理
|
|
238
|
+
cf_proxies = cf_proxies.rstrip("/")
|
|
239
|
+
|
|
240
|
+
if not cf_proxies.startswith(('http://', 'https://')):
|
|
241
|
+
cf_proxies = f"https://{cf_proxies}"
|
|
242
|
+
|
|
243
|
+
target_url = url
|
|
244
|
+
if params:
|
|
245
|
+
target_url = f"{url}?{urlencode(params)}"
|
|
246
|
+
|
|
247
|
+
proxy_url = f"{cf_proxies}/proxy?url={quote(target_url, safe='')}&method={method.upper()}"
|
|
248
|
+
if token:
|
|
249
|
+
proxy_url += f"&token={quote(token, safe='')}"
|
|
250
|
+
|
|
251
|
+
request_headers = {}
|
|
252
|
+
if headers:
|
|
253
|
+
for key, value in headers.items():
|
|
254
|
+
request_headers[f"X-CFSpider-Header-{key}"] = value
|
|
255
|
+
|
|
256
|
+
if cookies:
|
|
257
|
+
cookie_str = "; ".join([f"{k}={v}" for k, v in cookies.items()])
|
|
258
|
+
request_headers["X-CFSpider-Header-Cookie"] = cookie_str
|
|
259
|
+
|
|
260
|
+
async with httpx.AsyncClient(http2=http2, timeout=timeout) as client:
|
|
261
|
+
response = await client.post(
|
|
262
|
+
proxy_url,
|
|
263
|
+
headers=request_headers,
|
|
264
|
+
data=data,
|
|
265
|
+
json=json_data,
|
|
266
|
+
**kwargs
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
cf_colo = response.headers.get("X-CF-Colo")
|
|
270
|
+
cf_ray = response.headers.get("CF-Ray")
|
|
271
|
+
|
|
272
|
+
return AsyncCFSpiderResponse(response, cf_colo=cf_colo, cf_ray=cf_ray)
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
@asynccontextmanager
|
|
276
|
+
async def astream(
|
|
277
|
+
method: str,
|
|
278
|
+
url: str,
|
|
279
|
+
cf_proxies: Optional[str] = None,
|
|
280
|
+
cf_workers: bool = True,
|
|
281
|
+
http2: bool = True,
|
|
282
|
+
token: Optional[str] = None,
|
|
283
|
+
**kwargs
|
|
284
|
+
) -> AsyncIterator[AsyncStreamResponse]:
|
|
285
|
+
"""
|
|
286
|
+
流式请求上下文管理器
|
|
287
|
+
|
|
288
|
+
Args:
|
|
289
|
+
method: HTTP 方法
|
|
290
|
+
url: 目标 URL
|
|
291
|
+
cf_proxies: 代理地址(选填)
|
|
292
|
+
cf_workers: 是否使用 CFspider Workers API(默认 True)
|
|
293
|
+
http2: 是否启用 HTTP/2(默认 True)
|
|
294
|
+
**kwargs: 其他参数
|
|
295
|
+
|
|
296
|
+
Yields:
|
|
297
|
+
AsyncStreamResponse: 流式响应对象
|
|
298
|
+
|
|
299
|
+
Example:
|
|
300
|
+
async with cfspider.astream("GET", url) as response:
|
|
301
|
+
async for chunk in response.aiter_bytes():
|
|
302
|
+
process(chunk)
|
|
303
|
+
"""
|
|
304
|
+
params = kwargs.pop("params", None)
|
|
305
|
+
headers = kwargs.pop("headers", {})
|
|
306
|
+
data = kwargs.pop("data", None)
|
|
307
|
+
json_data = kwargs.pop("json", None)
|
|
308
|
+
cookies = kwargs.pop("cookies", None)
|
|
309
|
+
timeout = kwargs.pop("timeout", 30)
|
|
310
|
+
|
|
311
|
+
# 如果没有指定 cf_proxies,直接请求
|
|
312
|
+
if not cf_proxies:
|
|
313
|
+
async with httpx.AsyncClient(http2=http2, timeout=timeout) as client:
|
|
314
|
+
async with client.stream(
|
|
315
|
+
method,
|
|
316
|
+
url,
|
|
317
|
+
params=params,
|
|
318
|
+
headers=headers,
|
|
319
|
+
data=data,
|
|
320
|
+
json=json_data,
|
|
321
|
+
cookies=cookies,
|
|
322
|
+
**kwargs
|
|
323
|
+
) as response:
|
|
324
|
+
yield AsyncStreamResponse(response)
|
|
325
|
+
return
|
|
326
|
+
|
|
327
|
+
# cf_workers=False:使用普通代理
|
|
328
|
+
if not cf_workers:
|
|
329
|
+
proxy_url = cf_proxies
|
|
330
|
+
if not proxy_url.startswith(('http://', 'https://', 'socks5://')):
|
|
331
|
+
proxy_url = f"http://{proxy_url}"
|
|
332
|
+
|
|
333
|
+
async with httpx.AsyncClient(http2=http2, timeout=timeout, proxy=proxy_url) as client:
|
|
334
|
+
async with client.stream(
|
|
335
|
+
method,
|
|
336
|
+
url,
|
|
337
|
+
params=params,
|
|
338
|
+
headers=headers,
|
|
339
|
+
data=data,
|
|
340
|
+
json=json_data,
|
|
341
|
+
cookies=cookies,
|
|
342
|
+
**kwargs
|
|
343
|
+
) as response:
|
|
344
|
+
yield AsyncStreamResponse(response)
|
|
345
|
+
return
|
|
346
|
+
|
|
347
|
+
# cf_workers=True:使用 CFspider Workers API 代理
|
|
348
|
+
cf_proxies_url = cf_proxies.rstrip("/")
|
|
349
|
+
|
|
350
|
+
if not cf_proxies_url.startswith(('http://', 'https://')):
|
|
351
|
+
cf_proxies_url = f"https://{cf_proxies_url}"
|
|
352
|
+
|
|
353
|
+
target_url = url
|
|
354
|
+
if params:
|
|
355
|
+
target_url = f"{url}?{urlencode(params)}"
|
|
356
|
+
|
|
357
|
+
proxy_endpoint = f"{cf_proxies_url}/proxy?url={quote(target_url, safe='')}&method={method.upper()}"
|
|
358
|
+
if token:
|
|
359
|
+
proxy_endpoint += f"&token={quote(token, safe='')}"
|
|
360
|
+
|
|
361
|
+
request_headers = {}
|
|
362
|
+
if headers:
|
|
363
|
+
for key, value in headers.items():
|
|
364
|
+
request_headers[f"X-CFSpider-Header-{key}"] = value
|
|
365
|
+
|
|
366
|
+
if cookies:
|
|
367
|
+
cookie_str = "; ".join([f"{k}={v}" for k, v in cookies.items()])
|
|
368
|
+
request_headers["X-CFSpider-Header-Cookie"] = cookie_str
|
|
369
|
+
|
|
370
|
+
async with httpx.AsyncClient(http2=http2, timeout=timeout) as client:
|
|
371
|
+
async with client.stream(
|
|
372
|
+
"POST",
|
|
373
|
+
proxy_endpoint,
|
|
374
|
+
headers=request_headers,
|
|
375
|
+
data=data,
|
|
376
|
+
json=json_data,
|
|
377
|
+
**kwargs
|
|
378
|
+
) as response:
|
|
379
|
+
cf_colo = response.headers.get("X-CF-Colo")
|
|
380
|
+
cf_ray = response.headers.get("CF-Ray")
|
|
381
|
+
yield AsyncStreamResponse(response, cf_colo=cf_colo, cf_ray=cf_ray)
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
# 便捷方法
|
|
385
|
+
async def aget(url: str, cf_proxies: Optional[str] = None, cf_workers: bool = True, http2: bool = True, **kwargs) -> AsyncCFSpiderResponse:
|
|
386
|
+
"""异步 GET 请求"""
|
|
387
|
+
return await arequest("GET", url, cf_proxies=cf_proxies, cf_workers=cf_workers, http2=http2, **kwargs)
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
async def apost(url: str, cf_proxies: Optional[str] = None, cf_workers: bool = True, http2: bool = True, **kwargs) -> AsyncCFSpiderResponse:
|
|
391
|
+
"""异步 POST 请求"""
|
|
392
|
+
return await arequest("POST", url, cf_proxies=cf_proxies, cf_workers=cf_workers, http2=http2, **kwargs)
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
async def aput(url: str, cf_proxies: Optional[str] = None, cf_workers: bool = True, http2: bool = True, **kwargs) -> AsyncCFSpiderResponse:
|
|
396
|
+
"""异步 PUT 请求"""
|
|
397
|
+
return await arequest("PUT", url, cf_proxies=cf_proxies, cf_workers=cf_workers, http2=http2, **kwargs)
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
async def adelete(url: str, cf_proxies: Optional[str] = None, cf_workers: bool = True, http2: bool = True, **kwargs) -> AsyncCFSpiderResponse:
|
|
401
|
+
"""异步 DELETE 请求"""
|
|
402
|
+
return await arequest("DELETE", url, cf_proxies=cf_proxies, cf_workers=cf_workers, http2=http2, **kwargs)
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
async def ahead(url: str, cf_proxies: Optional[str] = None, cf_workers: bool = True, http2: bool = True, **kwargs) -> AsyncCFSpiderResponse:
|
|
406
|
+
"""异步 HEAD 请求"""
|
|
407
|
+
return await arequest("HEAD", url, cf_proxies=cf_proxies, cf_workers=cf_workers, http2=http2, **kwargs)
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
async def aoptions(url: str, cf_proxies: Optional[str] = None, cf_workers: bool = True, http2: bool = True, **kwargs) -> AsyncCFSpiderResponse:
|
|
411
|
+
"""异步 OPTIONS 请求"""
|
|
412
|
+
return await arequest("OPTIONS", url, cf_proxies=cf_proxies, cf_workers=cf_workers, http2=http2, **kwargs)
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
async def apatch(url: str, cf_proxies: Optional[str] = None, cf_workers: bool = True, http2: bool = True, **kwargs) -> AsyncCFSpiderResponse:
|
|
416
|
+
"""异步 PATCH 请求"""
|
|
417
|
+
return await arequest("PATCH", url, cf_proxies=cf_proxies, cf_workers=cf_workers, http2=http2, **kwargs)
|
|
418
|
+
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CFspider 异步会话模块
|
|
3
|
+
|
|
4
|
+
基于 httpx 实现,提供可复用的异步 HTTP 客户端,支持 HTTP/2 和连接池。
|
|
5
|
+
"""
|
|
6
|
+
import httpx
|
|
7
|
+
from urllib.parse import urlencode, quote
|
|
8
|
+
from typing import Optional, Dict, Any, AsyncIterator
|
|
9
|
+
from contextlib import asynccontextmanager
|
|
10
|
+
|
|
11
|
+
from .async_api import AsyncCFSpiderResponse, AsyncStreamResponse
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class AsyncSession:
|
|
15
|
+
"""
|
|
16
|
+
异步会话类
|
|
17
|
+
|
|
18
|
+
提供可复用的 httpx.AsyncClient,支持 HTTP/2 和连接池。
|
|
19
|
+
|
|
20
|
+
Example:
|
|
21
|
+
async with cfspider.AsyncSession(cf_proxies="workers.dev") as session:
|
|
22
|
+
r1 = await session.get("https://example.com")
|
|
23
|
+
r2 = await session.post("https://example.com", json={"key": "value"})
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
cf_proxies: Optional[str] = None,
|
|
29
|
+
cf_workers: bool = True,
|
|
30
|
+
http2: bool = True,
|
|
31
|
+
timeout: float = 30,
|
|
32
|
+
headers: Optional[Dict[str, str]] = None,
|
|
33
|
+
cookies: Optional[Dict[str, str]] = None,
|
|
34
|
+
**kwargs
|
|
35
|
+
):
|
|
36
|
+
"""
|
|
37
|
+
初始化异步会话
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
cf_proxies: 代理地址(选填)
|
|
41
|
+
cf_workers: 是否使用 CFspider Workers API(默认 True)
|
|
42
|
+
http2: 是否启用 HTTP/2(默认 True)
|
|
43
|
+
timeout: 默认超时时间(秒)
|
|
44
|
+
headers: 默认请求头
|
|
45
|
+
cookies: 默认 Cookies
|
|
46
|
+
**kwargs: 传递给 httpx.AsyncClient 的其他参数
|
|
47
|
+
"""
|
|
48
|
+
self.cf_proxies = cf_proxies
|
|
49
|
+
self.cf_workers = cf_workers
|
|
50
|
+
self.http2 = http2
|
|
51
|
+
self.timeout = timeout
|
|
52
|
+
self.headers = headers or {}
|
|
53
|
+
self.cookies = cookies or {}
|
|
54
|
+
self.token = token
|
|
55
|
+
self._client_kwargs = kwargs
|
|
56
|
+
self._client: Optional[httpx.AsyncClient] = None
|
|
57
|
+
|
|
58
|
+
async def __aenter__(self) -> "AsyncSession":
|
|
59
|
+
"""进入异步上下文"""
|
|
60
|
+
await self._ensure_client()
|
|
61
|
+
return self
|
|
62
|
+
|
|
63
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
64
|
+
"""退出异步上下文"""
|
|
65
|
+
await self.close()
|
|
66
|
+
|
|
67
|
+
async def _ensure_client(self) -> None:
|
|
68
|
+
"""确保客户端已创建"""
|
|
69
|
+
if self._client is None:
|
|
70
|
+
# 处理代理
|
|
71
|
+
proxy = None
|
|
72
|
+
if self.cf_proxies and not self.cf_workers:
|
|
73
|
+
proxy = self.cf_proxies
|
|
74
|
+
if not proxy.startswith(('http://', 'https://', 'socks5://')):
|
|
75
|
+
proxy = f"http://{proxy}"
|
|
76
|
+
|
|
77
|
+
self._client = httpx.AsyncClient(
|
|
78
|
+
http2=self.http2,
|
|
79
|
+
timeout=self.timeout,
|
|
80
|
+
proxy=proxy,
|
|
81
|
+
headers=self.headers,
|
|
82
|
+
cookies=self.cookies,
|
|
83
|
+
**self._client_kwargs
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
async def close(self) -> None:
|
|
87
|
+
"""关闭会话"""
|
|
88
|
+
if self._client is not None:
|
|
89
|
+
await self._client.aclose()
|
|
90
|
+
self._client = None
|
|
91
|
+
|
|
92
|
+
async def request(
|
|
93
|
+
self,
|
|
94
|
+
method: str,
|
|
95
|
+
url: str,
|
|
96
|
+
**kwargs
|
|
97
|
+
) -> AsyncCFSpiderResponse:
|
|
98
|
+
"""
|
|
99
|
+
发送请求
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
method: HTTP 方法
|
|
103
|
+
url: 目标 URL
|
|
104
|
+
**kwargs: 请求参数
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
AsyncCFSpiderResponse: 异步响应对象
|
|
108
|
+
"""
|
|
109
|
+
await self._ensure_client()
|
|
110
|
+
|
|
111
|
+
params = kwargs.pop("params", None)
|
|
112
|
+
headers = kwargs.pop("headers", {})
|
|
113
|
+
data = kwargs.pop("data", None)
|
|
114
|
+
json_data = kwargs.pop("json", None)
|
|
115
|
+
cookies = kwargs.pop("cookies", None)
|
|
116
|
+
timeout = kwargs.pop("timeout", None)
|
|
117
|
+
|
|
118
|
+
# 合并 headers
|
|
119
|
+
merged_headers = {**self.headers, **headers}
|
|
120
|
+
|
|
121
|
+
# 如果没有 cf_proxies 或不使用 Workers API,直接请求
|
|
122
|
+
if not self.cf_proxies or not self.cf_workers:
|
|
123
|
+
response = await self._client.request(
|
|
124
|
+
method,
|
|
125
|
+
url,
|
|
126
|
+
params=params,
|
|
127
|
+
headers=merged_headers,
|
|
128
|
+
data=data,
|
|
129
|
+
json=json_data,
|
|
130
|
+
cookies=cookies,
|
|
131
|
+
timeout=timeout,
|
|
132
|
+
**kwargs
|
|
133
|
+
)
|
|
134
|
+
return AsyncCFSpiderResponse(response)
|
|
135
|
+
|
|
136
|
+
# 使用 CFspider Workers API 代理
|
|
137
|
+
cf_proxies_url = self.cf_proxies.rstrip("/")
|
|
138
|
+
|
|
139
|
+
if not cf_proxies_url.startswith(('http://', 'https://')):
|
|
140
|
+
cf_proxies_url = f"https://{cf_proxies_url}"
|
|
141
|
+
|
|
142
|
+
target_url = url
|
|
143
|
+
if params:
|
|
144
|
+
target_url = f"{url}?{urlencode(params)}"
|
|
145
|
+
|
|
146
|
+
proxy_url = f"{cf_proxies_url}/proxy?url={quote(target_url, safe='')}&method={method.upper()}"
|
|
147
|
+
if self.token:
|
|
148
|
+
proxy_url += f"&token={quote(self.token, safe='')}"
|
|
149
|
+
|
|
150
|
+
request_headers = {}
|
|
151
|
+
for key, value in merged_headers.items():
|
|
152
|
+
request_headers[f"X-CFSpider-Header-{key}"] = value
|
|
153
|
+
|
|
154
|
+
all_cookies = {**self.cookies, **(cookies or {})}
|
|
155
|
+
if all_cookies:
|
|
156
|
+
cookie_str = "; ".join([f"{k}={v}" for k, v in all_cookies.items()])
|
|
157
|
+
request_headers["X-CFSpider-Header-Cookie"] = cookie_str
|
|
158
|
+
|
|
159
|
+
response = await self._client.post(
|
|
160
|
+
proxy_url,
|
|
161
|
+
headers=request_headers,
|
|
162
|
+
data=data,
|
|
163
|
+
json=json_data,
|
|
164
|
+
timeout=timeout,
|
|
165
|
+
**kwargs
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
cf_colo = response.headers.get("X-CF-Colo")
|
|
169
|
+
cf_ray = response.headers.get("CF-Ray")
|
|
170
|
+
|
|
171
|
+
return AsyncCFSpiderResponse(response, cf_colo=cf_colo, cf_ray=cf_ray)
|
|
172
|
+
|
|
173
|
+
@asynccontextmanager
|
|
174
|
+
async def stream(
|
|
175
|
+
self,
|
|
176
|
+
method: str,
|
|
177
|
+
url: str,
|
|
178
|
+
**kwargs
|
|
179
|
+
) -> AsyncIterator[AsyncStreamResponse]:
|
|
180
|
+
"""
|
|
181
|
+
流式请求
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
method: HTTP 方法
|
|
185
|
+
url: 目标 URL
|
|
186
|
+
**kwargs: 请求参数
|
|
187
|
+
|
|
188
|
+
Yields:
|
|
189
|
+
AsyncStreamResponse: 流式响应对象
|
|
190
|
+
"""
|
|
191
|
+
await self._ensure_client()
|
|
192
|
+
|
|
193
|
+
params = kwargs.pop("params", None)
|
|
194
|
+
headers = kwargs.pop("headers", {})
|
|
195
|
+
data = kwargs.pop("data", None)
|
|
196
|
+
json_data = kwargs.pop("json", None)
|
|
197
|
+
cookies = kwargs.pop("cookies", None)
|
|
198
|
+
timeout = kwargs.pop("timeout", None)
|
|
199
|
+
|
|
200
|
+
merged_headers = {**self.headers, **headers}
|
|
201
|
+
|
|
202
|
+
# 如果没有 cf_proxies 或不使用 Workers API,直接请求
|
|
203
|
+
if not self.cf_proxies or not self.cf_workers:
|
|
204
|
+
async with self._client.stream(
|
|
205
|
+
method,
|
|
206
|
+
url,
|
|
207
|
+
params=params,
|
|
208
|
+
headers=merged_headers,
|
|
209
|
+
data=data,
|
|
210
|
+
json=json_data,
|
|
211
|
+
cookies=cookies,
|
|
212
|
+
timeout=timeout,
|
|
213
|
+
**kwargs
|
|
214
|
+
) as response:
|
|
215
|
+
yield AsyncStreamResponse(response)
|
|
216
|
+
return
|
|
217
|
+
|
|
218
|
+
# 使用 CFspider Workers API 代理
|
|
219
|
+
cf_proxies_url = self.cf_proxies.rstrip("/")
|
|
220
|
+
|
|
221
|
+
if not cf_proxies_url.startswith(('http://', 'https://')):
|
|
222
|
+
cf_proxies_url = f"https://{cf_proxies_url}"
|
|
223
|
+
|
|
224
|
+
target_url = url
|
|
225
|
+
if params:
|
|
226
|
+
target_url = f"{url}?{urlencode(params)}"
|
|
227
|
+
|
|
228
|
+
proxy_url = f"{cf_proxies_url}/proxy?url={quote(target_url, safe='')}&method={method.upper()}"
|
|
229
|
+
if self.token:
|
|
230
|
+
proxy_url += f"&token={quote(self.token, safe='')}"
|
|
231
|
+
|
|
232
|
+
request_headers = {}
|
|
233
|
+
for key, value in merged_headers.items():
|
|
234
|
+
request_headers[f"X-CFSpider-Header-{key}"] = value
|
|
235
|
+
|
|
236
|
+
all_cookies = {**self.cookies, **(cookies or {})}
|
|
237
|
+
if all_cookies:
|
|
238
|
+
cookie_str = "; ".join([f"{k}={v}" for k, v in all_cookies.items()])
|
|
239
|
+
request_headers["X-CFSpider-Header-Cookie"] = cookie_str
|
|
240
|
+
|
|
241
|
+
async with self._client.stream(
|
|
242
|
+
"POST",
|
|
243
|
+
proxy_url,
|
|
244
|
+
headers=request_headers,
|
|
245
|
+
data=data,
|
|
246
|
+
json=json_data,
|
|
247
|
+
timeout=timeout,
|
|
248
|
+
**kwargs
|
|
249
|
+
) as response:
|
|
250
|
+
cf_colo = response.headers.get("X-CF-Colo")
|
|
251
|
+
cf_ray = response.headers.get("CF-Ray")
|
|
252
|
+
yield AsyncStreamResponse(response, cf_colo=cf_colo, cf_ray=cf_ray)
|
|
253
|
+
|
|
254
|
+
async def get(self, url: str, **kwargs) -> AsyncCFSpiderResponse:
|
|
255
|
+
"""异步 GET 请求"""
|
|
256
|
+
return await self.request("GET", url, **kwargs)
|
|
257
|
+
|
|
258
|
+
async def post(self, url: str, **kwargs) -> AsyncCFSpiderResponse:
|
|
259
|
+
"""异步 POST 请求"""
|
|
260
|
+
return await self.request("POST", url, **kwargs)
|
|
261
|
+
|
|
262
|
+
async def put(self, url: str, **kwargs) -> AsyncCFSpiderResponse:
|
|
263
|
+
"""异步 PUT 请求"""
|
|
264
|
+
return await self.request("PUT", url, **kwargs)
|
|
265
|
+
|
|
266
|
+
async def delete(self, url: str, **kwargs) -> AsyncCFSpiderResponse:
|
|
267
|
+
"""异步 DELETE 请求"""
|
|
268
|
+
return await self.request("DELETE", url, **kwargs)
|
|
269
|
+
|
|
270
|
+
async def head(self, url: str, **kwargs) -> AsyncCFSpiderResponse:
|
|
271
|
+
"""异步 HEAD 请求"""
|
|
272
|
+
return await self.request("HEAD", url, **kwargs)
|
|
273
|
+
|
|
274
|
+
async def options(self, url: str, **kwargs) -> AsyncCFSpiderResponse:
|
|
275
|
+
"""异步 OPTIONS 请求"""
|
|
276
|
+
return await self.request("OPTIONS", url, **kwargs)
|
|
277
|
+
|
|
278
|
+
async def patch(self, url: str, **kwargs) -> AsyncCFSpiderResponse:
|
|
279
|
+
"""异步 PATCH 请求"""
|
|
280
|
+
return await self.request("PATCH", url, **kwargs)
|
|
281
|
+
|