mdbq 4.0.71__tar.gz → 4.0.73__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {mdbq-4.0.71 → mdbq-4.0.73}/PKG-INFO +1 -1
  2. mdbq-4.0.73/mdbq/__version__.py +1 -0
  3. mdbq-4.0.73/mdbq/selenium/get_driver.py +467 -0
  4. {mdbq-4.0.71 → mdbq-4.0.73}/mdbq.egg-info/PKG-INFO +1 -1
  5. mdbq-4.0.71/mdbq/__version__.py +0 -1
  6. mdbq-4.0.71/mdbq/selenium/get_driver.py +0 -267
  7. {mdbq-4.0.71 → mdbq-4.0.73}/README.txt +0 -0
  8. {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/__init__.py +0 -0
  9. {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/log/__init__.py +0 -0
  10. {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/log/mylogger.py +0 -0
  11. {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/myconf/__init__.py +0 -0
  12. {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/myconf/myconf.py +0 -0
  13. {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/mysql/__init__.py +0 -0
  14. {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/mysql/deduplicator.py +0 -0
  15. {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/mysql/mysql.py +0 -0
  16. {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/mysql/s_query.py +0 -0
  17. {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/mysql/unique_.py +0 -0
  18. {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/mysql/uploader.py +0 -0
  19. {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/other/__init__.py +0 -0
  20. {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/other/download_sku_picture.py +0 -0
  21. {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/other/error_handler.py +0 -0
  22. {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/other/otk.py +0 -0
  23. {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/other/pov_city.py +0 -0
  24. {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/other/ua_sj.py +0 -0
  25. {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/pbix/__init__.py +0 -0
  26. {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/pbix/pbix_refresh.py +0 -0
  27. {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/pbix/refresh_all.py +0 -0
  28. {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/redis/__init__.py +0 -0
  29. {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/redis/getredis.py +0 -0
  30. {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/selenium/__init__.py +0 -0
  31. {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/spider/__init__.py +0 -0
  32. {mdbq-4.0.71 → mdbq-4.0.73}/mdbq.egg-info/SOURCES.txt +0 -0
  33. {mdbq-4.0.71 → mdbq-4.0.73}/mdbq.egg-info/dependency_links.txt +0 -0
  34. {mdbq-4.0.71 → mdbq-4.0.73}/mdbq.egg-info/top_level.txt +0 -0
  35. {mdbq-4.0.71 → mdbq-4.0.73}/setup.cfg +0 -0
  36. {mdbq-4.0.71 → mdbq-4.0.73}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 4.0.71
3
+ Version: 4.0.73
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -0,0 +1 @@
1
+ VERSION = '4.0.73'
@@ -0,0 +1,467 @@
1
+ # -*- coding:utf-8 -*-
2
+ import os
3
+ import platform
4
+ import getpass
5
+ from selenium import webdriver
6
+ from selenium.webdriver.chrome.service import Service
7
+ import re
8
+ import socket
9
+ import tempfile
10
+ import shutil
11
+ import uuid
12
+ import subprocess
13
+ import json
14
+
15
+ dir_path = os.path.expanduser("~")
16
+
17
+
18
+ class GetDriverException(Exception):
19
+ """自定义异常:GetDriver相关错误"""
20
+ pass
21
+
22
+
23
+ class GetDriver:
24
+ """
25
+ Selenium ChromeDriver 管理器,支持多平台、代理、无头模式、下载目录、User-Agent等高级配置。
26
+ 支持上下文管理器(with语法),自动资源清理。
27
+ """
28
+ def __init__(self, url=None, headless=False, proxy=None, user_agent=None, download_dir=None, chrome_path=None, chromedriver_path=None, maximize_window=True):
29
+ """
30
+ 初始化GetDriver
31
+ :param url: 允许的安全站点(用于insecure origin as secure)
32
+ :param headless: 是否无头模式
33
+ :param proxy: 代理(支持http、https、socks5,格式如socks5://127.0.0.1:1080)
34
+ :param user_agent: 自定义User-Agent
35
+ :param download_dir: 下载目录
36
+ :param chrome_path: Chrome浏览器路径
37
+ :param chromedriver_path: Chromedriver路径
38
+ """
39
+ self.url = url
40
+ self.headless = headless
41
+ self.proxy = proxy
42
+ self.user_agent = user_agent
43
+ self.download_dir = os.path.expanduser(download_dir) if download_dir else os.path.expanduser('~/Downloads')
44
+ self.chrome_path = chrome_path
45
+ self.chromedriver_path = chromedriver_path
46
+ self.temp_dirs = [] # 存储临时目录路径,用于清理
47
+ self.driver = None
48
+ if not self.user_agent:
49
+ user_agents = [
50
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36',
51
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36',
52
+ ]
53
+ import random
54
+ self.user_agent = user_agents[random.randint(0, len(user_agents) - 1)]
55
+ self.maximize_window = maximize_window
56
+
57
+ def check_proxy(self):
58
+ """
59
+ 校验代理格式和连通性,支持http/https/socks5
60
+ :return: True/False
61
+ """
62
+ if not self.proxy:
63
+ return True
64
+ # 支持协议前缀
65
+ proxy_pattern = r'^(socks5|http|https)://(\d{1,3}(\.\d{1,3}){3}):(\d+)$'
66
+ if not re.match(proxy_pattern, self.proxy):
67
+ return False
68
+ proto, ip, _, _, port = re.match(proxy_pattern, self.proxy).groups()
69
+ try:
70
+ sock = socket.create_connection((ip, int(port)), timeout=5)
71
+ sock.close()
72
+ return True
73
+ except:
74
+ return False
75
+
76
+ def _get_chrome_version(self, chrome_path):
77
+ """
78
+ 获取Chrome版本号
79
+ :param chrome_path: Chrome可执行文件路径
80
+ :return: 版本号字符串,如"120.0.6099.109"
81
+ """
82
+ try:
83
+ if platform.system().lower() == 'windows':
84
+ # Windows下尝试多种方式获取版本
85
+ # 方法1: 尝试--version参数
86
+ try:
87
+ result = subprocess.run([chrome_path, '--version'],
88
+ capture_output=True, text=True, timeout=10, shell=True)
89
+ if result.returncode == 0:
90
+ version_match = re.search(r'Chrome\s+(\d+\.\d+\.\d+\.\d+)', result.stdout)
91
+ if version_match:
92
+ return version_match.group(1)
93
+ except:
94
+ pass
95
+
96
+ # 方法2: 尝试从注册表获取版本
97
+ try:
98
+ import winreg
99
+ key_path = r"SOFTWARE\Google\Chrome\BLBeacon"
100
+ with winreg.OpenKey(winreg.HKEY_CURRENT_USER, key_path) as key:
101
+ version = winreg.QueryValueEx(key, "version")[0]
102
+ return version
103
+ except:
104
+ pass
105
+
106
+ # 方法3: 尝试从文件属性获取版本
107
+ try:
108
+ result = subprocess.run(['wmic', 'datafile', 'where', f'name="{chrome_path.replace("/", "\\")}"', 'get', 'version', '/value'],
109
+ capture_output=True, text=True, timeout=10, shell=True)
110
+ if result.returncode == 0:
111
+ version_match = re.search(r'Version=(\d+\.\d+\.\d+\.\d+)', result.stdout)
112
+ if version_match:
113
+ return version_match.group(1)
114
+ except:
115
+ pass
116
+
117
+ # 方法4: 尝试直接启动Chrome获取版本信息
118
+ try:
119
+ result = subprocess.run([chrome_path, '--headless', '--disable-gpu', '--dump-dom', 'about:version'],
120
+ capture_output=True, text=True, timeout=15, shell=True)
121
+ if result.returncode == 0:
122
+ version_match = re.search(r'Chrome/(\d+\.\d+\.\d+\.\d+)', result.stdout)
123
+ if version_match:
124
+ return version_match.group(1)
125
+ except:
126
+ pass
127
+
128
+ else:
129
+ # macOS和Linux下使用--version参数
130
+ result = subprocess.run([chrome_path, '--version'],
131
+ capture_output=True, text=True, timeout=10)
132
+ if result.returncode == 0:
133
+ # 输出格式: "Google Chrome 120.0.6099.109"
134
+ version_match = re.search(r'Chrome\s+(\d+\.\d+\.\d+\.\d+)', result.stdout)
135
+ if version_match:
136
+ return version_match.group(1)
137
+ except Exception as e:
138
+ print(f"获取Chrome版本失败: {e}")
139
+ return None
140
+
141
+ def _get_chromedriver_version(self, chromedriver_path):
142
+ """
143
+ 获取Chromedriver版本号
144
+ :param chromedriver_path: Chromedriver可执行文件路径
145
+ :return: 版本号字符串,如"120.0.6099.109"
146
+ """
147
+ try:
148
+ if platform.system().lower() == 'windows':
149
+ # Windows下使用shell=True确保参数正确传递
150
+ result = subprocess.run([chromedriver_path, '--version'],
151
+ capture_output=True, text=True, timeout=10, shell=True)
152
+ else:
153
+ result = subprocess.run([chromedriver_path, '--version'],
154
+ capture_output=True, text=True, timeout=10)
155
+
156
+ if result.returncode == 0:
157
+ # 输出格式: "ChromeDriver 120.0.6099.109"
158
+ version_match = re.search(r'ChromeDriver\s+(\d+\.\d+\.\d+\.\d+)', result.stdout)
159
+ if version_match:
160
+ return version_match.group(1)
161
+ except Exception as e:
162
+ print(f"获取Chromedriver版本失败: {e}")
163
+ return None
164
+
165
+ def _check_version_compatibility(self, chrome_path, chromedriver_path):
166
+ """
167
+ 检查Chrome和Chromedriver版本兼容性
168
+ :param chrome_path: Chrome可执行文件路径
169
+ :param chromedriver_path: Chromedriver可执行文件路径
170
+ :return: (is_compatible, chrome_version, chromedriver_version)
171
+ """
172
+ chrome_version = self._get_chrome_version(chrome_path)
173
+ chromedriver_version = self._get_chromedriver_version(chromedriver_path)
174
+
175
+ # 如果无法获取版本信息,返回True允许尝试启动
176
+ if not chrome_version or not chromedriver_version:
177
+ print(f"警告: 无法获取版本信息 - Chrome: {chrome_version}, Chromedriver: {chromedriver_version}")
178
+ return True, chrome_version, chromedriver_version
179
+
180
+ # 提取主版本号进行比较
181
+ chrome_major = chrome_version.split('.')[0]
182
+ chromedriver_major = chromedriver_version.split('.')[0]
183
+
184
+ is_compatible = chrome_major == chromedriver_major
185
+ return is_compatible, chrome_version, chromedriver_version
186
+
187
+ def _try_create_driver(self, chrome_path, chromedriver_path, option, temp_dir):
188
+ """
189
+ 尝试创建Chrome WebDriver实例
190
+ :param chrome_path: Chrome可执行文件路径
191
+ :param chromedriver_path: Chromedriver可执行文件路径
192
+ :param option: ChromeOptions实例
193
+ :param temp_dir: 临时目录路径
194
+ :return: Chrome WebDriver实例或None
195
+ """
196
+ try:
197
+ option.binary_location = chrome_path
198
+ service = Service(chromedriver_path)
199
+ driver = webdriver.Chrome(service=service, options=option)
200
+ if self.maximize_window:
201
+ driver.maximize_window()
202
+
203
+ # --- 防反爬:注入多段JS隐藏Selenium特征 ---
204
+ js_hide_features = [
205
+ # 隐藏webdriver属性
206
+ "Object.defineProperty(navigator, 'webdriver', {get: () => undefined, configurable: true});",
207
+ # 模拟真实浏览器插件
208
+ "Object.defineProperty(navigator, 'plugins', {get: () => [1,2,3,4,5], configurable: true});",
209
+ # 设置语言
210
+ "Object.defineProperty(navigator, 'languages', {get: () => ['zh-CN', 'zh', 'en'], configurable: true});",
211
+ # 模拟Chrome运行时
212
+ "window.chrome = {runtime: {}, loadTimes: function(){}, csi: function(){}, app: {}};",
213
+ # 删除原型链上的webdriver
214
+ "delete window.navigator.__proto__.webdriver;",
215
+ # 删除Selenium相关属性
216
+ r"for (let key in window) {if (key.match(/^[\$\_]{3,}/)) {try {delete window[key];} catch(e){}}}",
217
+ # 隐藏自动化相关属性
218
+ "Object.defineProperty(navigator, 'permissions', {get: () => ({query: () => Promise.resolve({state: 'granted'})}), configurable: true});",
219
+ # 模拟真实的navigator属性
220
+ "Object.defineProperty(navigator, 'hardwareConcurrency', {get: () => 8, configurable: true});",
221
+ "Object.defineProperty(navigator, 'deviceMemory', {get: () => 8, configurable: true});",
222
+ # 防止检测自动化工具
223
+ "Object.defineProperty(navigator, 'maxTouchPoints', {get: () => 0, configurable: true});",
224
+ # 隐藏CDP相关属性
225
+ "delete window.cdc_adoQpoasnfa76pfcZLmcfl_Array;",
226
+ "delete window.cdc_adoQpoasnfa76pfcZLmcfl_Promise;",
227
+ "delete window.cdc_adoQpoasnfa76pfcZLmcfl_Symbol;"
228
+ ]
229
+ for js in js_hide_features:
230
+ driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source": js})
231
+
232
+ return driver
233
+ except Exception as e:
234
+ print(f"创建Chrome WebDriver失败: {e}")
235
+ return None
236
+
237
+ def getdriver(self):
238
+ """
239
+ 创建并返回Chrome WebDriver实例,自动注入反检测JS,异常时抛出GetDriverException
240
+ 智能版本检测:优先使用正式版,版本不匹配时自动切换到测试版
241
+ :return: selenium.webdriver.Chrome实例
242
+ :raises: GetDriverException
243
+ """
244
+ if not self.check_proxy():
245
+ raise GetDriverException(f"代理不可用或格式错误: {self.proxy}")
246
+
247
+ option = webdriver.ChromeOptions() # 浏览器启动选项
248
+ if self.headless:
249
+ option.add_argument("--headless") # 设置无界面模式
250
+ option.add_argument("--window-size=1920,1080")
251
+ option.add_argument("--disable-gpu")
252
+ option.add_argument("--no-sandbox")
253
+ option.add_argument("--disable-dev-shm-usage")
254
+ # 隐藏Chrome测试版提示信息
255
+ option.add_argument("--disable-blink-features=AutomationControlled")
256
+ option.add_argument("--disable-features=VizDisplayCompositor")
257
+ option.add_argument("--disable-background-timer-throttling")
258
+ option.add_argument("--disable-backgrounding-occluded-windows")
259
+ option.add_argument("--disable-renderer-backgrounding")
260
+ option.add_argument("--disable-features=TranslateUI")
261
+ option.add_argument("--disable-ipc-flooding-protection")
262
+ # 添加唯一的用户数据目录,避免Chrome实例冲突
263
+ temp_dir = tempfile.mkdtemp(prefix=f'chrome_automation_{uuid.uuid4().hex[:8]}_')
264
+ option.add_argument(f'--user-data-dir={temp_dir}')
265
+ option.add_argument('--no-first-run')
266
+ option.add_argument('--no-default-browser-check')
267
+ # 关键安全浏览禁用参数
268
+ option.add_argument('--allow-insecure-localhost')
269
+ option.add_argument('--allow-running-insecure-content')
270
+ option.add_argument('--disable-features=BlockInsecurePrivateNetworkRequests,SafeBrowsing,DownloadBubble,SafeBrowsingEnhancedProtection,DownloadWarning')
271
+ option.add_argument('--safebrowsing-disable-download-protection')
272
+ option.add_argument('--disable-client-side-phishing-detection')
273
+ option.add_argument('--disable-popup-blocking')
274
+ option.add_argument('--ignore-certificate-errors')
275
+ if self.url:
276
+ option.add_argument(f"--unsafely-treat-insecure-origin-as-secure={self.url}")
277
+ # User-Agent
278
+ option.add_argument(f'--user-agent={self.user_agent}')
279
+ # 自动化相关设置
280
+ option.add_experimental_option("excludeSwitches", ["enable-automation", "enable-logging"])
281
+ option.add_experimental_option("useAutomationExtension", False)
282
+ # 代理设置
283
+ if self.proxy:
284
+ option.add_argument(f'--proxy-server={self.proxy}')
285
+ # 下载配置
286
+ prefs = {
287
+ "download.default_directory": self.download_dir,
288
+ "download.prompt_for_download": False,
289
+ "download.directory_upgrade": True,
290
+ "safebrowsing.enabled": False,
291
+ "safebrowsing.disable_download_protection": True,
292
+ "profile.content_settings.exceptions.automatic_downloads.*.setting": 1,
293
+ "profile.default_content_settings.popups": 0,
294
+ "profile.default_content_setting_values.automatic_downloads": 1,
295
+ "profile.default_content_setting_values.notifications": 2,
296
+ "credentials_enable_service": False,
297
+ "profile.password_manager_enabled": False,
298
+ "download_restrictions": 0,
299
+ }
300
+ option.add_experimental_option("prefs", prefs)
301
+
302
+ # 平台与路径自动检测
303
+ sys_platform = platform.system().lower()
304
+ chrome_path = self.chrome_path
305
+ chromedriver_path = self.chromedriver_path
306
+
307
+ try:
308
+ if sys_platform == 'windows':
309
+ if not chrome_path:
310
+ chrome_path_candidates = [
311
+ 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe', # 正式版
312
+ os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chrome_win64\\chrome.exe'), # 测试版
313
+ ]
314
+ if not chromedriver_path:
315
+ chromedriver_path_candidates = [
316
+ os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chromedriver.exe'),
317
+ os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chrome_win64\\chromedriver.exe'),
318
+ ]
319
+ elif sys_platform == 'linux':
320
+ if not chrome_path:
321
+ chrome_path_candidates = [
322
+ '/usr/bin/google-chrome', # 正式版
323
+ '/usr/bin/chrome/chrome', # 测试版
324
+ ]
325
+ if not chromedriver_path:
326
+ chromedriver_path_candidates = [
327
+ '/usr/local/bin/chromedriver',
328
+ '/usr/bin/chromedriver',
329
+ ]
330
+ elif sys_platform == 'darwin':
331
+ if not chrome_path:
332
+ chrome_path_candidates = [
333
+ '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome', # 正式版
334
+ '/usr/local/chrome/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing', # 测试版
335
+ ]
336
+ if not chromedriver_path:
337
+ chromedriver_path_candidates = [
338
+ '/usr/local/chrome/chromedriver',
339
+ '/usr/local/bin/chromedriver',
340
+ '/opt/homebrew/bin/chromedriver',
341
+ ]
342
+ else:
343
+ raise GetDriverException(f"不支持的平台: {sys_platform}")
344
+
345
+ # 如果用户指定了路径,直接使用
346
+ if chrome_path and chromedriver_path:
347
+ driver = self._try_create_driver(chrome_path, chromedriver_path, option, temp_dir)
348
+ if driver:
349
+ self.temp_dirs.append(temp_dir)
350
+ self.driver = driver
351
+ return driver
352
+ else:
353
+ raise GetDriverException(f"指定的Chrome路径无法启动: {chrome_path}")
354
+
355
+ # 智能版本检测和切换
356
+ chrome_paths = [p for p in chrome_path_candidates if os.path.exists(p)]
357
+ chromedriver_paths = [p for p in chromedriver_path_candidates if os.path.exists(p)]
358
+
359
+ if not chrome_paths:
360
+ raise GetDriverException("未找到Chrome浏览器,请手动指定chrome_path")
361
+ if not chromedriver_paths:
362
+ raise GetDriverException("未找到Chromedriver,请手动指定chromedriver_path")
363
+
364
+ # 优先尝试正式版Chrome
365
+ for chrome_path in chrome_paths:
366
+ for chromedriver_path in chromedriver_paths:
367
+ # 检查版本兼容性
368
+ is_compatible, chrome_version, chromedriver_version = self._check_version_compatibility(chrome_path, chromedriver_path)
369
+
370
+ if is_compatible:
371
+ # print(f"版本兼容: Chrome {chrome_version}, Chromedriver {chromedriver_version}")
372
+ driver = self._try_create_driver(chrome_path, chromedriver_path, option, temp_dir)
373
+ if driver:
374
+ self.temp_dirs.append(temp_dir)
375
+ self.driver = driver
376
+ return driver
377
+ else:
378
+ print(f"版本不兼容: Chrome {chrome_version}, Chromedriver {chromedriver_version}")
379
+ # 即使版本不兼容也尝试启动,有时可能仍然可以工作
380
+ driver = self._try_create_driver(chrome_path, chromedriver_path, option, temp_dir)
381
+ if driver:
382
+ print("警告:版本不兼容但启动成功,建议更新Chromedriver")
383
+ self.temp_dirs.append(temp_dir)
384
+ self.driver = driver
385
+ return driver
386
+
387
+ # 如果所有组合都失败,抛出异常
388
+ raise GetDriverException("所有Chrome和Chromedriver组合都无法启动,请检查版本兼容性")
389
+
390
+ except Exception as e:
391
+ try:
392
+ if os.path.exists(temp_dir):
393
+ shutil.rmtree(temp_dir)
394
+ except Exception as cleanup_error:
395
+ pass
396
+ if isinstance(e, GetDriverException):
397
+ raise e
398
+ else:
399
+ raise GetDriverException(f"启动ChromeDriver失败: {e}")
400
+
401
+ def _cleanup_temp_dirs(self):
402
+ """
403
+ 清理所有创建的临时目录
404
+ """
405
+ for temp_dir in self.temp_dirs:
406
+ try:
407
+ if os.path.exists(temp_dir):
408
+ shutil.rmtree(temp_dir)
409
+ except:
410
+ pass
411
+ self.temp_dirs = []
412
+
413
+ def __enter__(self):
414
+ """
415
+ 支持with语法自动获取driver
416
+ :return: selenium.webdriver.Chrome实例
417
+ """
418
+ self.driver = self.getdriver()
419
+ return self.driver
420
+
421
+ def __exit__(self, exc_type, exc_val, exc_tb):
422
+ """
423
+ 支持with语法自动清理资源
424
+ """
425
+ self.quit()
426
+
427
+ def close(self):
428
+ """
429
+ 关闭浏览器窗口并清理临时目录
430
+ """
431
+ if self.driver:
432
+ try:
433
+ self.driver.close()
434
+ except:
435
+ pass
436
+ self._cleanup_temp_dirs()
437
+
438
+ def quit(self):
439
+ """
440
+ 彻底退出浏览器并清理临时目录
441
+ """
442
+ if self.driver:
443
+ try:
444
+ self.driver.quit()
445
+ except:
446
+ pass
447
+ self._cleanup_temp_dirs()
448
+
449
+
450
+ if __name__ == '__main__':
451
+ # with GetDriver(
452
+ # headless=True,
453
+ # proxy=None, # 代理('socks5://127.0.0.1:1080')
454
+ # user_agent=None,
455
+ # download_dir=None,
456
+ # chrome_path=None,
457
+ # chromedriver_path=None,
458
+ # ) as driver:
459
+ # driver.get('https://www.baidu.com')
460
+ # print(driver.title)
461
+
462
+
463
+ driver = GetDriver(headless=False).getdriver()
464
+ driver.get('https://www.baidu.com')
465
+ print(driver.title)
466
+ import time
467
+ time.sleep(1000)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 4.0.71
3
+ Version: 4.0.73
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1 +0,0 @@
1
- VERSION = '4.0.71'
@@ -1,267 +0,0 @@
1
- # -*- coding:utf-8 -*-
2
- import os
3
- import platform
4
- import getpass
5
- from selenium import webdriver
6
- from selenium.webdriver.chrome.service import Service
7
- import re
8
- import socket
9
- import tempfile
10
- import shutil
11
- import uuid
12
-
13
- dir_path = os.path.expanduser("~")
14
-
15
-
16
- class GetDriverException(Exception):
17
- """自定义异常:GetDriver相关错误"""
18
- pass
19
-
20
-
21
- class GetDriver:
22
- """
23
- Selenium ChromeDriver 管理器,支持多平台、代理、无头模式、下载目录、User-Agent等高级配置。
24
- 支持上下文管理器(with语法),自动资源清理。
25
- """
26
- def __init__(self, url=None, headless=False, proxy=None, user_agent=None, download_dir=None, chrome_path=None, chromedriver_path=None, maximize_window=True):
27
- """
28
- 初始化GetDriver
29
- :param url: 允许的安全站点(用于insecure origin as secure)
30
- :param headless: 是否无头模式
31
- :param proxy: 代理(支持http、https、socks5,格式如socks5://127.0.0.1:1080)
32
- :param user_agent: 自定义User-Agent
33
- :param download_dir: 下载目录
34
- :param chrome_path: Chrome浏览器路径
35
- :param chromedriver_path: Chromedriver路径
36
- """
37
- self.url = url
38
- self.headless = headless
39
- self.proxy = proxy
40
- self.user_agent = user_agent
41
- self.download_dir = os.path.expanduser(download_dir) if download_dir else os.path.expanduser('~/Downloads')
42
- self.chrome_path = chrome_path
43
- self.chromedriver_path = chromedriver_path
44
- self.temp_dirs = [] # 存储临时目录路径,用于清理
45
- self.driver = None
46
- if not self.user_agent:
47
- user_agents = [
48
- 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36',
49
- 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36',
50
- ]
51
- import random
52
- self.user_agent = user_agents[random.randint(0, len(user_agents) - 1)]
53
- self.maximize_window = maximize_window
54
-
55
- def __enter__(self):
56
- """
57
- 支持with语法自动获取driver
58
- :return: selenium.webdriver.Chrome实例
59
- """
60
- self.driver = self.getdriver()
61
- return self.driver
62
-
63
- def __exit__(self, exc_type, exc_val, exc_tb):
64
- """
65
- 支持with语法自动清理资源
66
- """
67
- self.quit()
68
-
69
- def close(self):
70
- """
71
- 关闭浏览器窗口并清理临时目录
72
- """
73
- if self.driver:
74
- try:
75
- self.driver.close()
76
- except:
77
- pass
78
- self._cleanup_temp_dirs()
79
-
80
- def quit(self):
81
- """
82
- 彻底退出浏览器并清理临时目录
83
- """
84
- if self.driver:
85
- try:
86
- self.driver.quit()
87
- except:
88
- pass
89
- self._cleanup_temp_dirs()
90
-
91
- def _cleanup_temp_dirs(self):
92
- """
93
- 清理所有创建的临时目录
94
- """
95
- for temp_dir in self.temp_dirs:
96
- try:
97
- if os.path.exists(temp_dir):
98
- shutil.rmtree(temp_dir)
99
- except:
100
- pass
101
- self.temp_dirs = []
102
-
103
- def check_proxy(self):
104
- """
105
- 校验代理格式和连通性,支持http/https/socks5
106
- :return: True/False
107
- """
108
- if not self.proxy:
109
- return True
110
- # 支持协议前缀
111
- proxy_pattern = r'^(socks5|http|https)://(\d{1,3}(\.\d{1,3}){3}):(\d+)$'
112
- if not re.match(proxy_pattern, self.proxy):
113
- return False
114
- proto, ip, _, _, port = re.match(proxy_pattern, self.proxy).groups()
115
- try:
116
- sock = socket.create_connection((ip, int(port)), timeout=5)
117
- sock.close()
118
- return True
119
- except:
120
- return False
121
-
122
- def getdriver(self):
123
- """
124
- 创建并返回Chrome WebDriver实例,自动注入反检测JS,异常时抛出GetDriverException
125
- :return: selenium.webdriver.Chrome实例
126
- :raises: GetDriverException
127
- """
128
- if not self.check_proxy():
129
- raise GetDriverException(f"代理不可用或格式错误: {self.proxy}")
130
- option = webdriver.ChromeOptions() # 浏览器启动选项
131
- if self.headless:
132
- option.add_argument("--headless") # 设置无界面模式
133
- option.add_argument("--window-size=1920,1080")
134
- option.add_argument("--disable-gpu")
135
- option.add_argument("--no-sandbox")
136
- option.add_argument("--disable-dev-shm-usage")
137
- # 添加唯一的用户数据目录,避免Chrome实例冲突
138
- temp_dir = tempfile.mkdtemp(prefix=f'chrome_automation_{uuid.uuid4().hex[:8]}_')
139
- option.add_argument(f'--user-data-dir={temp_dir}')
140
- option.add_argument('--no-first-run')
141
- option.add_argument('--no-default-browser-check')
142
- option.add_argument('--disable-background-timer-throttling')
143
- option.add_argument('--disable-backgrounding-occluded-windows')
144
- option.add_argument('--disable-renderer-backgrounding')
145
- option.add_argument('--disable-features=TranslateUI')
146
- option.add_argument('--disable-ipc-flooding-protection')
147
- # 关键安全浏览禁用参数
148
- option.add_argument('--allow-insecure-localhost')
149
- option.add_argument('--allow-running-insecure-content')
150
- option.add_argument('--disable-features=BlockInsecurePrivateNetworkRequests,SafeBrowsing,DownloadBubble,SafeBrowsingEnhancedProtection,DownloadWarning')
151
- option.add_argument('--safebrowsing-disable-download-protection')
152
- option.add_argument('--disable-client-side-phishing-detection')
153
- option.add_argument('--disable-popup-blocking')
154
- option.add_argument('--ignore-certificate-errors')
155
- if self.url:
156
- option.add_argument(f"--unsafely-treat-insecure-origin-as-secure={self.url}")
157
- # User-Agent
158
- option.add_argument(f'--user-agent={self.user_agent}')
159
- # 自动化相关设置
160
- option.add_experimental_option("excludeSwitches", ["enable-automation", "enable-logging"])
161
- option.add_experimental_option("useAutomationExtension", False)
162
- # 代理设置
163
- if self.proxy:
164
- option.add_argument(f'--proxy-server={self.proxy}')
165
- # 下载配置
166
- prefs = {
167
- "download.default_directory": self.download_dir,
168
- "download.prompt_for_download": False,
169
- "download.directory_upgrade": True,
170
- "safebrowsing.enabled": False,
171
- "safebrowsing.disable_download_protection": True,
172
- "profile.content_settings.exceptions.automatic_downloads.*.setting": 1,
173
- "profile.default_content_settings.popups": 0,
174
- "profile.default_content_setting_values.automatic_downloads": 1,
175
- "profile.default_content_setting_values.notifications": 2,
176
- "credentials_enable_service": False,
177
- "profile.password_manager_enabled": False,
178
- "download_restrictions": 0,
179
- }
180
- # 平台与路径自动检测
181
- sys_platform = platform.system().lower()
182
- chrome_path = self.chrome_path
183
- chromedriver_path = self.chromedriver_path
184
- try:
185
- if sys_platform == 'windows':
186
- if not chrome_path:
187
- chrome_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chrome_win64\\chrome.exe')
188
- if not chromedriver_path:
189
- chromedriver_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chromedriver.exe')
190
- option.binary_location = chrome_path
191
- service = Service(chromedriver_path)
192
- elif sys_platform == 'linux':
193
- if not chrome_path:
194
- chrome_path = '/usr/bin/chrome/chrome'
195
- """
196
- # sudo mv /usr/bin/google-chrome /usr/bin/google-chrome.bak # 备份原有
197
- # sudo ln -s /usr/bin/chrome /usr/bin/google-chrome # 创建软链接
198
- """
199
- if not chromedriver_path:
200
- chromedriver_path = '/usr/local/bin/chromedriver'
201
- option.binary_location = chrome_path
202
- service = Service(chromedriver_path)
203
- elif sys_platform == 'darwin':
204
- if not chrome_path:
205
- # 优先使用用户指定的默认路径
206
- chrome_path_candidates = [
207
- '/usr/local/chrome/Google Chrome for Testing.app/Contents/MacOS/Google Chrome',
208
- '/usr/local/chrome/Google Chrome for Testing.app',
209
- '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
210
- '/Applications/Google Chrome for Testing.app/Contents/MacOS/Google Chrome',
211
- ]
212
- chrome_path = next((p for p in chrome_path_candidates if os.path.exists(p)), None)
213
- if not chromedriver_path:
214
- chromedriver_path_candidates = [
215
- '/usr/local/chrome/chromedriver',
216
- '/usr/local/bin/chromedriver',
217
- '/opt/homebrew/bin/chromedriver',
218
- ]
219
- chromedriver_path = next((p for p in chromedriver_path_candidates if os.path.exists(p)), None)
220
- if not chrome_path or not chromedriver_path:
221
- raise GetDriverException("未找到Chrome或Chromedriver,请手动指定chrome_path和chromedriver_path")
222
- # option.binary_location = chrome_path # macOS 设置此参数报错
223
- service = Service(chromedriver_path)
224
- else:
225
- raise GetDriverException(f"不支持的平台: {sys_platform}")
226
- except Exception as e:
227
- raise GetDriverException(f"浏览器路径配置异常: {e}")
228
- option.add_experimental_option("prefs", prefs)
229
- try:
230
- driver = webdriver.Chrome(service=service, options=option)
231
- if self.maximize_window:
232
- driver.maximize_window()
233
- # --- 防反爬:注入多段JS隐藏Selenium特征 ---
234
- js_hide_features = [
235
- "Object.defineProperty(navigator, 'webdriver', {get: () => false});",
236
- "Object.defineProperty(navigator, 'plugins', {get: () => [1,2,3,4,5]});",
237
- "Object.defineProperty(navigator, 'languages', {get: () => ['zh-CN', 'zh', 'en']});",
238
- "window.chrome = {runtime: {}};",
239
- "delete window.navigator.__proto__.webdriver;",
240
- r"for (let key in window) {if (key.match(/^[\$\_]{3,}/)) {try {delete window[key];} catch(e){}}}"
241
- ]
242
- for js in js_hide_features:
243
- pass
244
- driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source": js})
245
- self.temp_dirs.append(temp_dir)
246
- self.driver = driver
247
- return driver
248
- except Exception as e:
249
- try:
250
- if os.path.exists(temp_dir):
251
- shutil.rmtree(temp_dir)
252
- except Exception as e:
253
- pass
254
- raise GetDriverException(f"启动ChromeDriver失败: {e}")
255
-
256
-
257
- if __name__ == '__main__':
258
- with GetDriver(
259
- headless=True,
260
- proxy=None, # 代理('socks5://127.0.0.1:1080')
261
- user_agent=None,
262
- download_dir=None,
263
- chrome_path=None,
264
- chromedriver_path=None,
265
- ) as driver:
266
- driver.get('https://www.baidu.com')
267
- print(driver.title)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes