cfspider 1.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cfspider/browser.py ADDED
@@ -0,0 +1,335 @@
1
+ """
2
+ CFspider 浏览器模块
3
+ 基于 Playwright 封装,支持通过 Cloudflare Workers 代理浏览器流量
4
+ """
5
+
6
+ from urllib.parse import urlparse, parse_qs, unquote
7
+ from .vless_client import LocalVlessProxy
8
+
9
+
10
+ def parse_vless_link(vless_link):
11
+ """
12
+ 解析 VLESS 链接
13
+
14
+ 支持格式:
15
+ vless://uuid@host:port?type=ws&path=/xxx#name
16
+ vless://uuid@host:port?path=%2Fxxx
17
+ vless://uuid@host:port
18
+
19
+ Args:
20
+ vless_link: VLESS 链接字符串
21
+
22
+ Returns:
23
+ dict: 包含 uuid, host, port, path 的字典,解析失败返回 None
24
+ """
25
+ if not vless_link or not vless_link.startswith('vless://'):
26
+ return None
27
+
28
+ try:
29
+ # 移除 vless:// 前缀
30
+ link = vless_link[8:]
31
+
32
+ # 分离 fragment(#后面的名称)
33
+ if '#' in link:
34
+ link = link.split('#')[0]
35
+
36
+ # 分离 query string
37
+ query_str = ""
38
+ if '?' in link:
39
+ link, query_str = link.split('?', 1)
40
+
41
+ # 解析 uuid@host:port
42
+ if '@' not in link:
43
+ return None
44
+
45
+ uuid, host_port = link.split('@', 1)
46
+
47
+ # 解析 host:port
48
+ if ':' in host_port:
49
+ host, port = host_port.rsplit(':', 1)
50
+ port = int(port)
51
+ else:
52
+ host = host_port
53
+ port = 443
54
+
55
+ # 解析 query 参数
56
+ path = "/"
57
+ if query_str:
58
+ params = parse_qs(query_str)
59
+ if 'path' in params:
60
+ path = unquote(params['path'][0])
61
+
62
+ return {
63
+ 'uuid': uuid,
64
+ 'host': host,
65
+ 'port': port,
66
+ 'path': path
67
+ }
68
+ except Exception:
69
+ return None
70
+
71
+ try:
72
+ from playwright.sync_api import sync_playwright, Page, Browser as PlaywrightBrowser
73
+ PLAYWRIGHT_AVAILABLE = True
74
+ except ImportError:
75
+ PLAYWRIGHT_AVAILABLE = False
76
+ Page = None
77
+ PlaywrightBrowser = None
78
+
79
+
80
+ class BrowserNotInstalledError(Exception):
81
+ """浏览器未安装错误"""
82
+ pass
83
+
84
+
85
+ class PlaywrightNotInstalledError(Exception):
86
+ """Playwright 未安装错误"""
87
+ pass
88
+
89
+
90
+ class Browser:
91
+ """
92
+ CFspider 浏览器类
93
+
94
+ 封装 Playwright,支持通过 Cloudflare Workers (edgetunnel) 代理浏览器流量
95
+
96
+ Example:
97
+ >>> import cfspider
98
+ >>> # 通过 edgetunnel Workers 代理
99
+ >>> browser = cfspider.Browser(cf_proxies="wss://v2.kami666.xyz")
100
+ >>> html = browser.html("https://example.com")
101
+ >>> browser.close()
102
+ >>>
103
+ >>> # 直接使用(无代理)
104
+ >>> browser = cfspider.Browser()
105
+ >>> html = browser.html("https://example.com")
106
+ >>> browser.close()
107
+ """
108
+
109
+ def __init__(self, cf_proxies=None, headless=True, timeout=30, vless_uuid=None):
110
+ """
111
+ 初始化浏览器
112
+
113
+ Args:
114
+ cf_proxies: 代理地址(选填),支持以下格式:
115
+ - VLESS 链接: "vless://uuid@host:port?path=/xxx#name"(推荐)
116
+ - HTTP 代理: "http://ip:port" 或 "ip:port"
117
+ - SOCKS5 代理: "socks5://ip:port"
118
+ - edgetunnel 域名: "v2.example.com"(需配合 vless_uuid)
119
+ 不填则直接使用本地网络
120
+ headless: 是否无头模式,默认 True
121
+ timeout: 请求超时时间(秒),默认 30
122
+ vless_uuid: VLESS UUID(选填),使用域名方式时需要指定
123
+ 如果使用完整 VLESS 链接,则无需此参数
124
+
125
+ Examples:
126
+ # 使用完整 VLESS 链接(推荐,无需填写 vless_uuid)
127
+ browser = Browser(cf_proxies="vless://uuid@v2.example.com:443?path=/")
128
+
129
+ # 使用域名 + UUID(旧方式)
130
+ browser = Browser(cf_proxies="v2.example.com", vless_uuid="your-uuid")
131
+
132
+ # 使用 HTTP 代理
133
+ browser = Browser(cf_proxies="127.0.0.1:8080")
134
+
135
+ # 使用 SOCKS5 代理
136
+ browser = Browser(cf_proxies="socks5://127.0.0.1:1080")
137
+ """
138
+ if not PLAYWRIGHT_AVAILABLE:
139
+ raise PlaywrightNotInstalledError(
140
+ "Playwright 未安装,请运行: pip install cfspider[browser]"
141
+ )
142
+
143
+ self.cf_proxies = cf_proxies
144
+ self.headless = headless
145
+ self.timeout = timeout
146
+ self._vless_proxy = None
147
+
148
+ # 解析代理地址
149
+ proxy_url = None
150
+ if cf_proxies:
151
+ # 1. 检查是否是 VLESS 链接
152
+ vless_info = parse_vless_link(cf_proxies)
153
+ if vless_info:
154
+ # 使用 VLESS 链接
155
+ ws_url = f"wss://{vless_info['host']}{vless_info['path']}"
156
+ self._vless_proxy = LocalVlessProxy(ws_url, vless_info['uuid'])
157
+ port = self._vless_proxy.start()
158
+ proxy_url = f"http://127.0.0.1:{port}"
159
+ # 2. HTTP/SOCKS5 代理格式
160
+ elif cf_proxies.startswith('http://') or cf_proxies.startswith('https://') or cf_proxies.startswith('socks5://'):
161
+ proxy_url = cf_proxies
162
+ # 3. IP:PORT 格式
163
+ elif ':' in cf_proxies and cf_proxies.replace('.', '').replace(':', '').isdigit():
164
+ proxy_url = f"http://{cf_proxies}"
165
+ # 4. 域名 + UUID(旧方式)
166
+ elif vless_uuid:
167
+ hostname = cf_proxies.replace('https://', '').replace('http://', '').replace('wss://', '').replace('ws://', '').split('/')[0]
168
+ ws_url = f'wss://{hostname}/{vless_uuid}'
169
+ self._vless_proxy = LocalVlessProxy(ws_url, vless_uuid)
170
+ port = self._vless_proxy.start()
171
+ proxy_url = f"http://127.0.0.1:{port}"
172
+ # 5. 默认当作 HTTP 代理
173
+ else:
174
+ proxy_url = f"http://{cf_proxies}"
175
+
176
+ # 启动 Playwright
177
+ self._playwright = sync_playwright().start()
178
+
179
+ # 启动浏览器
180
+ launch_options = {"headless": headless}
181
+ if proxy_url:
182
+ launch_options["proxy"] = {"server": proxy_url}
183
+
184
+ try:
185
+ self._browser = self._playwright.chromium.launch(**launch_options)
186
+ except Exception as e:
187
+ if self._vless_proxy:
188
+ self._vless_proxy.stop()
189
+ self._playwright.stop()
190
+ if "Executable doesn't exist" in str(e):
191
+ raise BrowserNotInstalledError(
192
+ "Chromium 浏览器未安装,请运行: cfspider install"
193
+ )
194
+ raise
195
+
196
+ # 创建默认上下文
197
+ self._context = self._browser.new_context(
198
+ ignore_https_errors=True
199
+ )
200
+ self._context.set_default_timeout(timeout * 1000)
201
+
202
+ def get(self, url):
203
+ """
204
+ 打开页面并返回 Page 对象
205
+
206
+ Args:
207
+ url: 目标 URL
208
+
209
+ Returns:
210
+ Page: Playwright Page 对象,可用于自动化操作
211
+ """
212
+ page = self._context.new_page()
213
+ page.goto(url, wait_until="networkidle")
214
+ return page
215
+
216
+ def html(self, url, wait_until="domcontentloaded"):
217
+ """
218
+ 获取页面渲染后的 HTML
219
+
220
+ Args:
221
+ url: 目标 URL
222
+ wait_until: 等待策略,可选 "load", "domcontentloaded", "networkidle"
223
+
224
+ Returns:
225
+ str: 渲染后的 HTML 内容
226
+ """
227
+ page = self._context.new_page()
228
+ try:
229
+ page.goto(url, wait_until=wait_until)
230
+ return page.content()
231
+ finally:
232
+ page.close()
233
+
234
+ def screenshot(self, url, path=None, full_page=False):
235
+ """
236
+ 页面截图
237
+
238
+ Args:
239
+ url: 目标 URL
240
+ path: 保存路径,如 "screenshot.png"
241
+ full_page: 是否截取整个页面,默认 False
242
+
243
+ Returns:
244
+ bytes: 截图的二进制数据
245
+ """
246
+ page = self._context.new_page()
247
+ try:
248
+ page.goto(url, wait_until="networkidle")
249
+ return page.screenshot(path=path, full_page=full_page)
250
+ finally:
251
+ page.close()
252
+
253
+ def pdf(self, url, path=None):
254
+ """
255
+ 生成页面 PDF
256
+
257
+ Args:
258
+ url: 目标 URL
259
+ path: 保存路径,如 "page.pdf"
260
+
261
+ Returns:
262
+ bytes: PDF 的二进制数据
263
+
264
+ Note:
265
+ PDF 生成仅在无头模式下可用
266
+ """
267
+ if not self.headless:
268
+ raise ValueError("PDF 生成仅在无头模式 (headless=True) 下可用")
269
+
270
+ page = self._context.new_page()
271
+ try:
272
+ page.goto(url, wait_until="networkidle")
273
+ return page.pdf(path=path)
274
+ finally:
275
+ page.close()
276
+
277
+ def execute_script(self, url, script):
278
+ """
279
+ 在页面中执行 JavaScript
280
+
281
+ Args:
282
+ url: 目标 URL
283
+ script: 要执行的 JavaScript 代码
284
+
285
+ Returns:
286
+ 执行结果
287
+ """
288
+ page = self._context.new_page()
289
+ try:
290
+ page.goto(url, wait_until="networkidle")
291
+ return page.evaluate(script)
292
+ finally:
293
+ page.close()
294
+
295
+ def new_page(self):
296
+ """
297
+ 创建新页面
298
+
299
+ Returns:
300
+ Page: 新的 Playwright Page 对象
301
+ """
302
+ return self._context.new_page()
303
+
304
+ def close(self):
305
+ """关闭浏览器和代理"""
306
+ try:
307
+ self._context.close()
308
+ except:
309
+ pass
310
+
311
+ try:
312
+ self._browser.close()
313
+ except:
314
+ pass
315
+
316
+ try:
317
+ self._playwright.stop()
318
+ except:
319
+ pass
320
+
321
+ if self._vless_proxy:
322
+ try:
323
+ self._vless_proxy.stop()
324
+ except:
325
+ pass
326
+
327
+ def __enter__(self):
328
+ """支持 with 语句"""
329
+ return self
330
+
331
+ def __exit__(self, exc_type, exc_val, exc_tb):
332
+ """支持 with 语句"""
333
+ self.close()
334
+ return False
335
+
cfspider/cli.py ADDED
@@ -0,0 +1,81 @@
1
+ """
2
+ CFspider 命令行工具
3
+ """
4
+
5
+ import sys
6
+ import subprocess
7
+
8
+
9
+ def install_browser():
10
+ """
11
+ 安装 Chromium 浏览器
12
+
13
+ Example:
14
+ >>> import cfspider
15
+ >>> cfspider.install_browser()
16
+ """
17
+ try:
18
+ # 使用 playwright 命令行安装
19
+ result = subprocess.run(
20
+ [sys.executable, '-m', 'playwright', 'install', 'chromium'],
21
+ capture_output=False
22
+ )
23
+ return result.returncode == 0
24
+ except Exception as e:
25
+ print(f"安装失败: {e}")
26
+ return False
27
+
28
+
29
+ def main():
30
+ """命令行入口"""
31
+ if len(sys.argv) < 2:
32
+ print_help()
33
+ return
34
+
35
+ command = sys.argv[1].lower()
36
+
37
+ if command == 'install':
38
+ print("正在安装 Chromium 浏览器...")
39
+ if install_browser():
40
+ print("安装完成!")
41
+ else:
42
+ print("安装失败,请检查网络连接或手动安装")
43
+ sys.exit(1)
44
+
45
+ elif command == 'version':
46
+ from . import __version__
47
+ print(f"cfspider {__version__}")
48
+
49
+ elif command == 'help' or command == '-h' or command == '--help':
50
+ print_help()
51
+
52
+ else:
53
+ print(f"未知命令: {command}")
54
+ print_help()
55
+ sys.exit(1)
56
+
57
+
58
+ def print_help():
59
+ """打印帮助信息"""
60
+ print("""
61
+ CFspider - Cloudflare 代理 IP 池
62
+
63
+ 用法:
64
+ cfspider <command>
65
+
66
+ 命令:
67
+ install 安装 Chromium 浏览器(用于 Browser 功能)
68
+ version 显示版本号
69
+ help 显示帮助信息
70
+
71
+ 示例:
72
+ cfspider install # 安装浏览器
73
+ cfspider version # 显示版本
74
+
75
+ 更多信息请访问: https://github.com/violettoolssite/CFspider
76
+ """)
77
+
78
+
79
+ if __name__ == '__main__':
80
+ main()
81
+