cfspider 1.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cfspider/__init__.py +230 -0
- cfspider/api.py +937 -0
- cfspider/async_api.py +418 -0
- cfspider/async_session.py +281 -0
- cfspider/browser.py +335 -0
- cfspider/cli.py +81 -0
- cfspider/impersonate.py +388 -0
- cfspider/ip_map.py +522 -0
- cfspider/mirror.py +682 -0
- cfspider/session.py +239 -0
- cfspider/stealth.py +537 -0
- cfspider/vless_client.py +572 -0
- cfspider-1.7.4.dist-info/METADATA +1390 -0
- cfspider-1.7.4.dist-info/RECORD +18 -0
- cfspider-1.7.4.dist-info/WHEEL +5 -0
- cfspider-1.7.4.dist-info/entry_points.txt +2 -0
- cfspider-1.7.4.dist-info/licenses/LICENSE +201 -0
- cfspider-1.7.4.dist-info/top_level.txt +1 -0
cfspider/browser.py
ADDED
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CFspider 浏览器模块
|
|
3
|
+
基于 Playwright 封装,支持通过 Cloudflare Workers 代理浏览器流量
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from urllib.parse import urlparse, parse_qs, unquote
|
|
7
|
+
from .vless_client import LocalVlessProxy
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def parse_vless_link(vless_link):
|
|
11
|
+
"""
|
|
12
|
+
解析 VLESS 链接
|
|
13
|
+
|
|
14
|
+
支持格式:
|
|
15
|
+
vless://uuid@host:port?type=ws&path=/xxx#name
|
|
16
|
+
vless://uuid@host:port?path=%2Fxxx
|
|
17
|
+
vless://uuid@host:port
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
vless_link: VLESS 链接字符串
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
dict: 包含 uuid, host, port, path 的字典,解析失败返回 None
|
|
24
|
+
"""
|
|
25
|
+
if not vless_link or not vless_link.startswith('vless://'):
|
|
26
|
+
return None
|
|
27
|
+
|
|
28
|
+
try:
|
|
29
|
+
# 移除 vless:// 前缀
|
|
30
|
+
link = vless_link[8:]
|
|
31
|
+
|
|
32
|
+
# 分离 fragment(#后面的名称)
|
|
33
|
+
if '#' in link:
|
|
34
|
+
link = link.split('#')[0]
|
|
35
|
+
|
|
36
|
+
# 分离 query string
|
|
37
|
+
query_str = ""
|
|
38
|
+
if '?' in link:
|
|
39
|
+
link, query_str = link.split('?', 1)
|
|
40
|
+
|
|
41
|
+
# 解析 uuid@host:port
|
|
42
|
+
if '@' not in link:
|
|
43
|
+
return None
|
|
44
|
+
|
|
45
|
+
uuid, host_port = link.split('@', 1)
|
|
46
|
+
|
|
47
|
+
# 解析 host:port
|
|
48
|
+
if ':' in host_port:
|
|
49
|
+
host, port = host_port.rsplit(':', 1)
|
|
50
|
+
port = int(port)
|
|
51
|
+
else:
|
|
52
|
+
host = host_port
|
|
53
|
+
port = 443
|
|
54
|
+
|
|
55
|
+
# 解析 query 参数
|
|
56
|
+
path = "/"
|
|
57
|
+
if query_str:
|
|
58
|
+
params = parse_qs(query_str)
|
|
59
|
+
if 'path' in params:
|
|
60
|
+
path = unquote(params['path'][0])
|
|
61
|
+
|
|
62
|
+
return {
|
|
63
|
+
'uuid': uuid,
|
|
64
|
+
'host': host,
|
|
65
|
+
'port': port,
|
|
66
|
+
'path': path
|
|
67
|
+
}
|
|
68
|
+
except Exception:
|
|
69
|
+
return None
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
from playwright.sync_api import sync_playwright, Page, Browser as PlaywrightBrowser
|
|
73
|
+
PLAYWRIGHT_AVAILABLE = True
|
|
74
|
+
except ImportError:
|
|
75
|
+
PLAYWRIGHT_AVAILABLE = False
|
|
76
|
+
Page = None
|
|
77
|
+
PlaywrightBrowser = None
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class BrowserNotInstalledError(Exception):
|
|
81
|
+
"""浏览器未安装错误"""
|
|
82
|
+
pass
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class PlaywrightNotInstalledError(Exception):
|
|
86
|
+
"""Playwright 未安装错误"""
|
|
87
|
+
pass
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class Browser:
|
|
91
|
+
"""
|
|
92
|
+
CFspider 浏览器类
|
|
93
|
+
|
|
94
|
+
封装 Playwright,支持通过 Cloudflare Workers (edgetunnel) 代理浏览器流量
|
|
95
|
+
|
|
96
|
+
Example:
|
|
97
|
+
>>> import cfspider
|
|
98
|
+
>>> # 通过 edgetunnel Workers 代理
|
|
99
|
+
>>> browser = cfspider.Browser(cf_proxies="wss://v2.kami666.xyz")
|
|
100
|
+
>>> html = browser.html("https://example.com")
|
|
101
|
+
>>> browser.close()
|
|
102
|
+
>>>
|
|
103
|
+
>>> # 直接使用(无代理)
|
|
104
|
+
>>> browser = cfspider.Browser()
|
|
105
|
+
>>> html = browser.html("https://example.com")
|
|
106
|
+
>>> browser.close()
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
def __init__(self, cf_proxies=None, headless=True, timeout=30, vless_uuid=None):
|
|
110
|
+
"""
|
|
111
|
+
初始化浏览器
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
cf_proxies: 代理地址(选填),支持以下格式:
|
|
115
|
+
- VLESS 链接: "vless://uuid@host:port?path=/xxx#name"(推荐)
|
|
116
|
+
- HTTP 代理: "http://ip:port" 或 "ip:port"
|
|
117
|
+
- SOCKS5 代理: "socks5://ip:port"
|
|
118
|
+
- edgetunnel 域名: "v2.example.com"(需配合 vless_uuid)
|
|
119
|
+
不填则直接使用本地网络
|
|
120
|
+
headless: 是否无头模式,默认 True
|
|
121
|
+
timeout: 请求超时时间(秒),默认 30
|
|
122
|
+
vless_uuid: VLESS UUID(选填),使用域名方式时需要指定
|
|
123
|
+
如果使用完整 VLESS 链接,则无需此参数
|
|
124
|
+
|
|
125
|
+
Examples:
|
|
126
|
+
# 使用完整 VLESS 链接(推荐,无需填写 vless_uuid)
|
|
127
|
+
browser = Browser(cf_proxies="vless://uuid@v2.example.com:443?path=/")
|
|
128
|
+
|
|
129
|
+
# 使用域名 + UUID(旧方式)
|
|
130
|
+
browser = Browser(cf_proxies="v2.example.com", vless_uuid="your-uuid")
|
|
131
|
+
|
|
132
|
+
# 使用 HTTP 代理
|
|
133
|
+
browser = Browser(cf_proxies="127.0.0.1:8080")
|
|
134
|
+
|
|
135
|
+
# 使用 SOCKS5 代理
|
|
136
|
+
browser = Browser(cf_proxies="socks5://127.0.0.1:1080")
|
|
137
|
+
"""
|
|
138
|
+
if not PLAYWRIGHT_AVAILABLE:
|
|
139
|
+
raise PlaywrightNotInstalledError(
|
|
140
|
+
"Playwright 未安装,请运行: pip install cfspider[browser]"
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
self.cf_proxies = cf_proxies
|
|
144
|
+
self.headless = headless
|
|
145
|
+
self.timeout = timeout
|
|
146
|
+
self._vless_proxy = None
|
|
147
|
+
|
|
148
|
+
# 解析代理地址
|
|
149
|
+
proxy_url = None
|
|
150
|
+
if cf_proxies:
|
|
151
|
+
# 1. 检查是否是 VLESS 链接
|
|
152
|
+
vless_info = parse_vless_link(cf_proxies)
|
|
153
|
+
if vless_info:
|
|
154
|
+
# 使用 VLESS 链接
|
|
155
|
+
ws_url = f"wss://{vless_info['host']}{vless_info['path']}"
|
|
156
|
+
self._vless_proxy = LocalVlessProxy(ws_url, vless_info['uuid'])
|
|
157
|
+
port = self._vless_proxy.start()
|
|
158
|
+
proxy_url = f"http://127.0.0.1:{port}"
|
|
159
|
+
# 2. HTTP/SOCKS5 代理格式
|
|
160
|
+
elif cf_proxies.startswith('http://') or cf_proxies.startswith('https://') or cf_proxies.startswith('socks5://'):
|
|
161
|
+
proxy_url = cf_proxies
|
|
162
|
+
# 3. IP:PORT 格式
|
|
163
|
+
elif ':' in cf_proxies and cf_proxies.replace('.', '').replace(':', '').isdigit():
|
|
164
|
+
proxy_url = f"http://{cf_proxies}"
|
|
165
|
+
# 4. 域名 + UUID(旧方式)
|
|
166
|
+
elif vless_uuid:
|
|
167
|
+
hostname = cf_proxies.replace('https://', '').replace('http://', '').replace('wss://', '').replace('ws://', '').split('/')[0]
|
|
168
|
+
ws_url = f'wss://{hostname}/{vless_uuid}'
|
|
169
|
+
self._vless_proxy = LocalVlessProxy(ws_url, vless_uuid)
|
|
170
|
+
port = self._vless_proxy.start()
|
|
171
|
+
proxy_url = f"http://127.0.0.1:{port}"
|
|
172
|
+
# 5. 默认当作 HTTP 代理
|
|
173
|
+
else:
|
|
174
|
+
proxy_url = f"http://{cf_proxies}"
|
|
175
|
+
|
|
176
|
+
# 启动 Playwright
|
|
177
|
+
self._playwright = sync_playwright().start()
|
|
178
|
+
|
|
179
|
+
# 启动浏览器
|
|
180
|
+
launch_options = {"headless": headless}
|
|
181
|
+
if proxy_url:
|
|
182
|
+
launch_options["proxy"] = {"server": proxy_url}
|
|
183
|
+
|
|
184
|
+
try:
|
|
185
|
+
self._browser = self._playwright.chromium.launch(**launch_options)
|
|
186
|
+
except Exception as e:
|
|
187
|
+
if self._vless_proxy:
|
|
188
|
+
self._vless_proxy.stop()
|
|
189
|
+
self._playwright.stop()
|
|
190
|
+
if "Executable doesn't exist" in str(e):
|
|
191
|
+
raise BrowserNotInstalledError(
|
|
192
|
+
"Chromium 浏览器未安装,请运行: cfspider install"
|
|
193
|
+
)
|
|
194
|
+
raise
|
|
195
|
+
|
|
196
|
+
# 创建默认上下文
|
|
197
|
+
self._context = self._browser.new_context(
|
|
198
|
+
ignore_https_errors=True
|
|
199
|
+
)
|
|
200
|
+
self._context.set_default_timeout(timeout * 1000)
|
|
201
|
+
|
|
202
|
+
def get(self, url):
|
|
203
|
+
"""
|
|
204
|
+
打开页面并返回 Page 对象
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
url: 目标 URL
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
Page: Playwright Page 对象,可用于自动化操作
|
|
211
|
+
"""
|
|
212
|
+
page = self._context.new_page()
|
|
213
|
+
page.goto(url, wait_until="networkidle")
|
|
214
|
+
return page
|
|
215
|
+
|
|
216
|
+
def html(self, url, wait_until="domcontentloaded"):
|
|
217
|
+
"""
|
|
218
|
+
获取页面渲染后的 HTML
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
url: 目标 URL
|
|
222
|
+
wait_until: 等待策略,可选 "load", "domcontentloaded", "networkidle"
|
|
223
|
+
|
|
224
|
+
Returns:
|
|
225
|
+
str: 渲染后的 HTML 内容
|
|
226
|
+
"""
|
|
227
|
+
page = self._context.new_page()
|
|
228
|
+
try:
|
|
229
|
+
page.goto(url, wait_until=wait_until)
|
|
230
|
+
return page.content()
|
|
231
|
+
finally:
|
|
232
|
+
page.close()
|
|
233
|
+
|
|
234
|
+
def screenshot(self, url, path=None, full_page=False):
|
|
235
|
+
"""
|
|
236
|
+
页面截图
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
url: 目标 URL
|
|
240
|
+
path: 保存路径,如 "screenshot.png"
|
|
241
|
+
full_page: 是否截取整个页面,默认 False
|
|
242
|
+
|
|
243
|
+
Returns:
|
|
244
|
+
bytes: 截图的二进制数据
|
|
245
|
+
"""
|
|
246
|
+
page = self._context.new_page()
|
|
247
|
+
try:
|
|
248
|
+
page.goto(url, wait_until="networkidle")
|
|
249
|
+
return page.screenshot(path=path, full_page=full_page)
|
|
250
|
+
finally:
|
|
251
|
+
page.close()
|
|
252
|
+
|
|
253
|
+
def pdf(self, url, path=None):
|
|
254
|
+
"""
|
|
255
|
+
生成页面 PDF
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
url: 目标 URL
|
|
259
|
+
path: 保存路径,如 "page.pdf"
|
|
260
|
+
|
|
261
|
+
Returns:
|
|
262
|
+
bytes: PDF 的二进制数据
|
|
263
|
+
|
|
264
|
+
Note:
|
|
265
|
+
PDF 生成仅在无头模式下可用
|
|
266
|
+
"""
|
|
267
|
+
if not self.headless:
|
|
268
|
+
raise ValueError("PDF 生成仅在无头模式 (headless=True) 下可用")
|
|
269
|
+
|
|
270
|
+
page = self._context.new_page()
|
|
271
|
+
try:
|
|
272
|
+
page.goto(url, wait_until="networkidle")
|
|
273
|
+
return page.pdf(path=path)
|
|
274
|
+
finally:
|
|
275
|
+
page.close()
|
|
276
|
+
|
|
277
|
+
def execute_script(self, url, script):
|
|
278
|
+
"""
|
|
279
|
+
在页面中执行 JavaScript
|
|
280
|
+
|
|
281
|
+
Args:
|
|
282
|
+
url: 目标 URL
|
|
283
|
+
script: 要执行的 JavaScript 代码
|
|
284
|
+
|
|
285
|
+
Returns:
|
|
286
|
+
执行结果
|
|
287
|
+
"""
|
|
288
|
+
page = self._context.new_page()
|
|
289
|
+
try:
|
|
290
|
+
page.goto(url, wait_until="networkidle")
|
|
291
|
+
return page.evaluate(script)
|
|
292
|
+
finally:
|
|
293
|
+
page.close()
|
|
294
|
+
|
|
295
|
+
def new_page(self):
|
|
296
|
+
"""
|
|
297
|
+
创建新页面
|
|
298
|
+
|
|
299
|
+
Returns:
|
|
300
|
+
Page: 新的 Playwright Page 对象
|
|
301
|
+
"""
|
|
302
|
+
return self._context.new_page()
|
|
303
|
+
|
|
304
|
+
def close(self):
|
|
305
|
+
"""关闭浏览器和代理"""
|
|
306
|
+
try:
|
|
307
|
+
self._context.close()
|
|
308
|
+
except:
|
|
309
|
+
pass
|
|
310
|
+
|
|
311
|
+
try:
|
|
312
|
+
self._browser.close()
|
|
313
|
+
except:
|
|
314
|
+
pass
|
|
315
|
+
|
|
316
|
+
try:
|
|
317
|
+
self._playwright.stop()
|
|
318
|
+
except:
|
|
319
|
+
pass
|
|
320
|
+
|
|
321
|
+
if self._vless_proxy:
|
|
322
|
+
try:
|
|
323
|
+
self._vless_proxy.stop()
|
|
324
|
+
except:
|
|
325
|
+
pass
|
|
326
|
+
|
|
327
|
+
def __enter__(self):
|
|
328
|
+
"""支持 with 语句"""
|
|
329
|
+
return self
|
|
330
|
+
|
|
331
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
332
|
+
"""支持 with 语句"""
|
|
333
|
+
self.close()
|
|
334
|
+
return False
|
|
335
|
+
|
cfspider/cli.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CFspider 命令行工具
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
import subprocess
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def install_browser():
|
|
10
|
+
"""
|
|
11
|
+
安装 Chromium 浏览器
|
|
12
|
+
|
|
13
|
+
Example:
|
|
14
|
+
>>> import cfspider
|
|
15
|
+
>>> cfspider.install_browser()
|
|
16
|
+
"""
|
|
17
|
+
try:
|
|
18
|
+
# 使用 playwright 命令行安装
|
|
19
|
+
result = subprocess.run(
|
|
20
|
+
[sys.executable, '-m', 'playwright', 'install', 'chromium'],
|
|
21
|
+
capture_output=False
|
|
22
|
+
)
|
|
23
|
+
return result.returncode == 0
|
|
24
|
+
except Exception as e:
|
|
25
|
+
print(f"安装失败: {e}")
|
|
26
|
+
return False
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def main():
|
|
30
|
+
"""命令行入口"""
|
|
31
|
+
if len(sys.argv) < 2:
|
|
32
|
+
print_help()
|
|
33
|
+
return
|
|
34
|
+
|
|
35
|
+
command = sys.argv[1].lower()
|
|
36
|
+
|
|
37
|
+
if command == 'install':
|
|
38
|
+
print("正在安装 Chromium 浏览器...")
|
|
39
|
+
if install_browser():
|
|
40
|
+
print("安装完成!")
|
|
41
|
+
else:
|
|
42
|
+
print("安装失败,请检查网络连接或手动安装")
|
|
43
|
+
sys.exit(1)
|
|
44
|
+
|
|
45
|
+
elif command == 'version':
|
|
46
|
+
from . import __version__
|
|
47
|
+
print(f"cfspider {__version__}")
|
|
48
|
+
|
|
49
|
+
elif command == 'help' or command == '-h' or command == '--help':
|
|
50
|
+
print_help()
|
|
51
|
+
|
|
52
|
+
else:
|
|
53
|
+
print(f"未知命令: {command}")
|
|
54
|
+
print_help()
|
|
55
|
+
sys.exit(1)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def print_help():
|
|
59
|
+
"""打印帮助信息"""
|
|
60
|
+
print("""
|
|
61
|
+
CFspider - Cloudflare 代理 IP 池
|
|
62
|
+
|
|
63
|
+
用法:
|
|
64
|
+
cfspider <command>
|
|
65
|
+
|
|
66
|
+
命令:
|
|
67
|
+
install 安装 Chromium 浏览器(用于 Browser 功能)
|
|
68
|
+
version 显示版本号
|
|
69
|
+
help 显示帮助信息
|
|
70
|
+
|
|
71
|
+
示例:
|
|
72
|
+
cfspider install # 安装浏览器
|
|
73
|
+
cfspider version # 显示版本
|
|
74
|
+
|
|
75
|
+
更多信息请访问: https://github.com/violettoolssite/CFspider
|
|
76
|
+
""")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
if __name__ == '__main__':
|
|
80
|
+
main()
|
|
81
|
+
|