mdbq 4.0.71__tar.gz → 4.0.73__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdbq-4.0.71 → mdbq-4.0.73}/PKG-INFO +1 -1
- mdbq-4.0.73/mdbq/__version__.py +1 -0
- mdbq-4.0.73/mdbq/selenium/get_driver.py +467 -0
- {mdbq-4.0.71 → mdbq-4.0.73}/mdbq.egg-info/PKG-INFO +1 -1
- mdbq-4.0.71/mdbq/__version__.py +0 -1
- mdbq-4.0.71/mdbq/selenium/get_driver.py +0 -267
- {mdbq-4.0.71 → mdbq-4.0.73}/README.txt +0 -0
- {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/__init__.py +0 -0
- {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/log/__init__.py +0 -0
- {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/log/mylogger.py +0 -0
- {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/myconf/__init__.py +0 -0
- {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/myconf/myconf.py +0 -0
- {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/mysql/__init__.py +0 -0
- {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/mysql/deduplicator.py +0 -0
- {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/mysql/mysql.py +0 -0
- {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/mysql/s_query.py +0 -0
- {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/mysql/unique_.py +0 -0
- {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/mysql/uploader.py +0 -0
- {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/other/__init__.py +0 -0
- {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/other/download_sku_picture.py +0 -0
- {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/other/error_handler.py +0 -0
- {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/other/otk.py +0 -0
- {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/other/pov_city.py +0 -0
- {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/other/ua_sj.py +0 -0
- {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/pbix/__init__.py +0 -0
- {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/pbix/pbix_refresh.py +0 -0
- {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/pbix/refresh_all.py +0 -0
- {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/redis/__init__.py +0 -0
- {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/redis/getredis.py +0 -0
- {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/selenium/__init__.py +0 -0
- {mdbq-4.0.71 → mdbq-4.0.73}/mdbq/spider/__init__.py +0 -0
- {mdbq-4.0.71 → mdbq-4.0.73}/mdbq.egg-info/SOURCES.txt +0 -0
- {mdbq-4.0.71 → mdbq-4.0.73}/mdbq.egg-info/dependency_links.txt +0 -0
- {mdbq-4.0.71 → mdbq-4.0.73}/mdbq.egg-info/top_level.txt +0 -0
- {mdbq-4.0.71 → mdbq-4.0.73}/setup.cfg +0 -0
- {mdbq-4.0.71 → mdbq-4.0.73}/setup.py +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
VERSION = '4.0.73'
|
@@ -0,0 +1,467 @@
|
|
1
|
+
# -*- coding:utf-8 -*-
|
2
|
+
import os
|
3
|
+
import platform
|
4
|
+
import getpass
|
5
|
+
from selenium import webdriver
|
6
|
+
from selenium.webdriver.chrome.service import Service
|
7
|
+
import re
|
8
|
+
import socket
|
9
|
+
import tempfile
|
10
|
+
import shutil
|
11
|
+
import uuid
|
12
|
+
import subprocess
|
13
|
+
import json
|
14
|
+
|
15
|
+
dir_path = os.path.expanduser("~")
|
16
|
+
|
17
|
+
|
18
|
+
class GetDriverException(Exception):
|
19
|
+
"""自定义异常:GetDriver相关错误"""
|
20
|
+
pass
|
21
|
+
|
22
|
+
|
23
|
+
class GetDriver:
|
24
|
+
"""
|
25
|
+
Selenium ChromeDriver 管理器,支持多平台、代理、无头模式、下载目录、User-Agent等高级配置。
|
26
|
+
支持上下文管理器(with语法),自动资源清理。
|
27
|
+
"""
|
28
|
+
def __init__(self, url=None, headless=False, proxy=None, user_agent=None, download_dir=None, chrome_path=None, chromedriver_path=None, maximize_window=True):
|
29
|
+
"""
|
30
|
+
初始化GetDriver
|
31
|
+
:param url: 允许的安全站点(用于insecure origin as secure)
|
32
|
+
:param headless: 是否无头模式
|
33
|
+
:param proxy: 代理(支持http、https、socks5,格式如socks5://127.0.0.1:1080)
|
34
|
+
:param user_agent: 自定义User-Agent
|
35
|
+
:param download_dir: 下载目录
|
36
|
+
:param chrome_path: Chrome浏览器路径
|
37
|
+
:param chromedriver_path: Chromedriver路径
|
38
|
+
"""
|
39
|
+
self.url = url
|
40
|
+
self.headless = headless
|
41
|
+
self.proxy = proxy
|
42
|
+
self.user_agent = user_agent
|
43
|
+
self.download_dir = os.path.expanduser(download_dir) if download_dir else os.path.expanduser('~/Downloads')
|
44
|
+
self.chrome_path = chrome_path
|
45
|
+
self.chromedriver_path = chromedriver_path
|
46
|
+
self.temp_dirs = [] # 存储临时目录路径,用于清理
|
47
|
+
self.driver = None
|
48
|
+
if not self.user_agent:
|
49
|
+
user_agents = [
|
50
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36',
|
51
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36',
|
52
|
+
]
|
53
|
+
import random
|
54
|
+
self.user_agent = user_agents[random.randint(0, len(user_agents) - 1)]
|
55
|
+
self.maximize_window = maximize_window
|
56
|
+
|
57
|
+
def check_proxy(self):
|
58
|
+
"""
|
59
|
+
校验代理格式和连通性,支持http/https/socks5
|
60
|
+
:return: True/False
|
61
|
+
"""
|
62
|
+
if not self.proxy:
|
63
|
+
return True
|
64
|
+
# 支持协议前缀
|
65
|
+
proxy_pattern = r'^(socks5|http|https)://(\d{1,3}(\.\d{1,3}){3}):(\d+)$'
|
66
|
+
if not re.match(proxy_pattern, self.proxy):
|
67
|
+
return False
|
68
|
+
proto, ip, _, _, port = re.match(proxy_pattern, self.proxy).groups()
|
69
|
+
try:
|
70
|
+
sock = socket.create_connection((ip, int(port)), timeout=5)
|
71
|
+
sock.close()
|
72
|
+
return True
|
73
|
+
except:
|
74
|
+
return False
|
75
|
+
|
76
|
+
def _get_chrome_version(self, chrome_path):
|
77
|
+
"""
|
78
|
+
获取Chrome版本号
|
79
|
+
:param chrome_path: Chrome可执行文件路径
|
80
|
+
:return: 版本号字符串,如"120.0.6099.109"
|
81
|
+
"""
|
82
|
+
try:
|
83
|
+
if platform.system().lower() == 'windows':
|
84
|
+
# Windows下尝试多种方式获取版本
|
85
|
+
# 方法1: 尝试--version参数
|
86
|
+
try:
|
87
|
+
result = subprocess.run([chrome_path, '--version'],
|
88
|
+
capture_output=True, text=True, timeout=10, shell=True)
|
89
|
+
if result.returncode == 0:
|
90
|
+
version_match = re.search(r'Chrome\s+(\d+\.\d+\.\d+\.\d+)', result.stdout)
|
91
|
+
if version_match:
|
92
|
+
return version_match.group(1)
|
93
|
+
except:
|
94
|
+
pass
|
95
|
+
|
96
|
+
# 方法2: 尝试从注册表获取版本
|
97
|
+
try:
|
98
|
+
import winreg
|
99
|
+
key_path = r"SOFTWARE\Google\Chrome\BLBeacon"
|
100
|
+
with winreg.OpenKey(winreg.HKEY_CURRENT_USER, key_path) as key:
|
101
|
+
version = winreg.QueryValueEx(key, "version")[0]
|
102
|
+
return version
|
103
|
+
except:
|
104
|
+
pass
|
105
|
+
|
106
|
+
# 方法3: 尝试从文件属性获取版本
|
107
|
+
try:
|
108
|
+
result = subprocess.run(['wmic', 'datafile', 'where', f'name="{chrome_path.replace("/", "\\")}"', 'get', 'version', '/value'],
|
109
|
+
capture_output=True, text=True, timeout=10, shell=True)
|
110
|
+
if result.returncode == 0:
|
111
|
+
version_match = re.search(r'Version=(\d+\.\d+\.\d+\.\d+)', result.stdout)
|
112
|
+
if version_match:
|
113
|
+
return version_match.group(1)
|
114
|
+
except:
|
115
|
+
pass
|
116
|
+
|
117
|
+
# 方法4: 尝试直接启动Chrome获取版本信息
|
118
|
+
try:
|
119
|
+
result = subprocess.run([chrome_path, '--headless', '--disable-gpu', '--dump-dom', 'about:version'],
|
120
|
+
capture_output=True, text=True, timeout=15, shell=True)
|
121
|
+
if result.returncode == 0:
|
122
|
+
version_match = re.search(r'Chrome/(\d+\.\d+\.\d+\.\d+)', result.stdout)
|
123
|
+
if version_match:
|
124
|
+
return version_match.group(1)
|
125
|
+
except:
|
126
|
+
pass
|
127
|
+
|
128
|
+
else:
|
129
|
+
# macOS和Linux下使用--version参数
|
130
|
+
result = subprocess.run([chrome_path, '--version'],
|
131
|
+
capture_output=True, text=True, timeout=10)
|
132
|
+
if result.returncode == 0:
|
133
|
+
# 输出格式: "Google Chrome 120.0.6099.109"
|
134
|
+
version_match = re.search(r'Chrome\s+(\d+\.\d+\.\d+\.\d+)', result.stdout)
|
135
|
+
if version_match:
|
136
|
+
return version_match.group(1)
|
137
|
+
except Exception as e:
|
138
|
+
print(f"获取Chrome版本失败: {e}")
|
139
|
+
return None
|
140
|
+
|
141
|
+
def _get_chromedriver_version(self, chromedriver_path):
|
142
|
+
"""
|
143
|
+
获取Chromedriver版本号
|
144
|
+
:param chromedriver_path: Chromedriver可执行文件路径
|
145
|
+
:return: 版本号字符串,如"120.0.6099.109"
|
146
|
+
"""
|
147
|
+
try:
|
148
|
+
if platform.system().lower() == 'windows':
|
149
|
+
# Windows下使用shell=True确保参数正确传递
|
150
|
+
result = subprocess.run([chromedriver_path, '--version'],
|
151
|
+
capture_output=True, text=True, timeout=10, shell=True)
|
152
|
+
else:
|
153
|
+
result = subprocess.run([chromedriver_path, '--version'],
|
154
|
+
capture_output=True, text=True, timeout=10)
|
155
|
+
|
156
|
+
if result.returncode == 0:
|
157
|
+
# 输出格式: "ChromeDriver 120.0.6099.109"
|
158
|
+
version_match = re.search(r'ChromeDriver\s+(\d+\.\d+\.\d+\.\d+)', result.stdout)
|
159
|
+
if version_match:
|
160
|
+
return version_match.group(1)
|
161
|
+
except Exception as e:
|
162
|
+
print(f"获取Chromedriver版本失败: {e}")
|
163
|
+
return None
|
164
|
+
|
165
|
+
def _check_version_compatibility(self, chrome_path, chromedriver_path):
|
166
|
+
"""
|
167
|
+
检查Chrome和Chromedriver版本兼容性
|
168
|
+
:param chrome_path: Chrome可执行文件路径
|
169
|
+
:param chromedriver_path: Chromedriver可执行文件路径
|
170
|
+
:return: (is_compatible, chrome_version, chromedriver_version)
|
171
|
+
"""
|
172
|
+
chrome_version = self._get_chrome_version(chrome_path)
|
173
|
+
chromedriver_version = self._get_chromedriver_version(chromedriver_path)
|
174
|
+
|
175
|
+
# 如果无法获取版本信息,返回True允许尝试启动
|
176
|
+
if not chrome_version or not chromedriver_version:
|
177
|
+
print(f"警告: 无法获取版本信息 - Chrome: {chrome_version}, Chromedriver: {chromedriver_version}")
|
178
|
+
return True, chrome_version, chromedriver_version
|
179
|
+
|
180
|
+
# 提取主版本号进行比较
|
181
|
+
chrome_major = chrome_version.split('.')[0]
|
182
|
+
chromedriver_major = chromedriver_version.split('.')[0]
|
183
|
+
|
184
|
+
is_compatible = chrome_major == chromedriver_major
|
185
|
+
return is_compatible, chrome_version, chromedriver_version
|
186
|
+
|
187
|
+
def _try_create_driver(self, chrome_path, chromedriver_path, option, temp_dir):
|
188
|
+
"""
|
189
|
+
尝试创建Chrome WebDriver实例
|
190
|
+
:param chrome_path: Chrome可执行文件路径
|
191
|
+
:param chromedriver_path: Chromedriver可执行文件路径
|
192
|
+
:param option: ChromeOptions实例
|
193
|
+
:param temp_dir: 临时目录路径
|
194
|
+
:return: Chrome WebDriver实例或None
|
195
|
+
"""
|
196
|
+
try:
|
197
|
+
option.binary_location = chrome_path
|
198
|
+
service = Service(chromedriver_path)
|
199
|
+
driver = webdriver.Chrome(service=service, options=option)
|
200
|
+
if self.maximize_window:
|
201
|
+
driver.maximize_window()
|
202
|
+
|
203
|
+
# --- 防反爬:注入多段JS隐藏Selenium特征 ---
|
204
|
+
js_hide_features = [
|
205
|
+
# 隐藏webdriver属性
|
206
|
+
"Object.defineProperty(navigator, 'webdriver', {get: () => undefined, configurable: true});",
|
207
|
+
# 模拟真实浏览器插件
|
208
|
+
"Object.defineProperty(navigator, 'plugins', {get: () => [1,2,3,4,5], configurable: true});",
|
209
|
+
# 设置语言
|
210
|
+
"Object.defineProperty(navigator, 'languages', {get: () => ['zh-CN', 'zh', 'en'], configurable: true});",
|
211
|
+
# 模拟Chrome运行时
|
212
|
+
"window.chrome = {runtime: {}, loadTimes: function(){}, csi: function(){}, app: {}};",
|
213
|
+
# 删除原型链上的webdriver
|
214
|
+
"delete window.navigator.__proto__.webdriver;",
|
215
|
+
# 删除Selenium相关属性
|
216
|
+
r"for (let key in window) {if (key.match(/^[\$\_]{3,}/)) {try {delete window[key];} catch(e){}}}",
|
217
|
+
# 隐藏自动化相关属性
|
218
|
+
"Object.defineProperty(navigator, 'permissions', {get: () => ({query: () => Promise.resolve({state: 'granted'})}), configurable: true});",
|
219
|
+
# 模拟真实的navigator属性
|
220
|
+
"Object.defineProperty(navigator, 'hardwareConcurrency', {get: () => 8, configurable: true});",
|
221
|
+
"Object.defineProperty(navigator, 'deviceMemory', {get: () => 8, configurable: true});",
|
222
|
+
# 防止检测自动化工具
|
223
|
+
"Object.defineProperty(navigator, 'maxTouchPoints', {get: () => 0, configurable: true});",
|
224
|
+
# 隐藏CDP相关属性
|
225
|
+
"delete window.cdc_adoQpoasnfa76pfcZLmcfl_Array;",
|
226
|
+
"delete window.cdc_adoQpoasnfa76pfcZLmcfl_Promise;",
|
227
|
+
"delete window.cdc_adoQpoasnfa76pfcZLmcfl_Symbol;"
|
228
|
+
]
|
229
|
+
for js in js_hide_features:
|
230
|
+
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source": js})
|
231
|
+
|
232
|
+
return driver
|
233
|
+
except Exception as e:
|
234
|
+
print(f"创建Chrome WebDriver失败: {e}")
|
235
|
+
return None
|
236
|
+
|
237
|
+
def getdriver(self):
|
238
|
+
"""
|
239
|
+
创建并返回Chrome WebDriver实例,自动注入反检测JS,异常时抛出GetDriverException
|
240
|
+
智能版本检测:优先使用正式版,版本不匹配时自动切换到测试版
|
241
|
+
:return: selenium.webdriver.Chrome实例
|
242
|
+
:raises: GetDriverException
|
243
|
+
"""
|
244
|
+
if not self.check_proxy():
|
245
|
+
raise GetDriverException(f"代理不可用或格式错误: {self.proxy}")
|
246
|
+
|
247
|
+
option = webdriver.ChromeOptions() # 浏览器启动选项
|
248
|
+
if self.headless:
|
249
|
+
option.add_argument("--headless") # 设置无界面模式
|
250
|
+
option.add_argument("--window-size=1920,1080")
|
251
|
+
option.add_argument("--disable-gpu")
|
252
|
+
option.add_argument("--no-sandbox")
|
253
|
+
option.add_argument("--disable-dev-shm-usage")
|
254
|
+
# 隐藏Chrome测试版提示信息
|
255
|
+
option.add_argument("--disable-blink-features=AutomationControlled")
|
256
|
+
option.add_argument("--disable-features=VizDisplayCompositor")
|
257
|
+
option.add_argument("--disable-background-timer-throttling")
|
258
|
+
option.add_argument("--disable-backgrounding-occluded-windows")
|
259
|
+
option.add_argument("--disable-renderer-backgrounding")
|
260
|
+
option.add_argument("--disable-features=TranslateUI")
|
261
|
+
option.add_argument("--disable-ipc-flooding-protection")
|
262
|
+
# 添加唯一的用户数据目录,避免Chrome实例冲突
|
263
|
+
temp_dir = tempfile.mkdtemp(prefix=f'chrome_automation_{uuid.uuid4().hex[:8]}_')
|
264
|
+
option.add_argument(f'--user-data-dir={temp_dir}')
|
265
|
+
option.add_argument('--no-first-run')
|
266
|
+
option.add_argument('--no-default-browser-check')
|
267
|
+
# 关键安全浏览禁用参数
|
268
|
+
option.add_argument('--allow-insecure-localhost')
|
269
|
+
option.add_argument('--allow-running-insecure-content')
|
270
|
+
option.add_argument('--disable-features=BlockInsecurePrivateNetworkRequests,SafeBrowsing,DownloadBubble,SafeBrowsingEnhancedProtection,DownloadWarning')
|
271
|
+
option.add_argument('--safebrowsing-disable-download-protection')
|
272
|
+
option.add_argument('--disable-client-side-phishing-detection')
|
273
|
+
option.add_argument('--disable-popup-blocking')
|
274
|
+
option.add_argument('--ignore-certificate-errors')
|
275
|
+
if self.url:
|
276
|
+
option.add_argument(f"--unsafely-treat-insecure-origin-as-secure={self.url}")
|
277
|
+
# User-Agent
|
278
|
+
option.add_argument(f'--user-agent={self.user_agent}')
|
279
|
+
# 自动化相关设置
|
280
|
+
option.add_experimental_option("excludeSwitches", ["enable-automation", "enable-logging"])
|
281
|
+
option.add_experimental_option("useAutomationExtension", False)
|
282
|
+
# 代理设置
|
283
|
+
if self.proxy:
|
284
|
+
option.add_argument(f'--proxy-server={self.proxy}')
|
285
|
+
# 下载配置
|
286
|
+
prefs = {
|
287
|
+
"download.default_directory": self.download_dir,
|
288
|
+
"download.prompt_for_download": False,
|
289
|
+
"download.directory_upgrade": True,
|
290
|
+
"safebrowsing.enabled": False,
|
291
|
+
"safebrowsing.disable_download_protection": True,
|
292
|
+
"profile.content_settings.exceptions.automatic_downloads.*.setting": 1,
|
293
|
+
"profile.default_content_settings.popups": 0,
|
294
|
+
"profile.default_content_setting_values.automatic_downloads": 1,
|
295
|
+
"profile.default_content_setting_values.notifications": 2,
|
296
|
+
"credentials_enable_service": False,
|
297
|
+
"profile.password_manager_enabled": False,
|
298
|
+
"download_restrictions": 0,
|
299
|
+
}
|
300
|
+
option.add_experimental_option("prefs", prefs)
|
301
|
+
|
302
|
+
# 平台与路径自动检测
|
303
|
+
sys_platform = platform.system().lower()
|
304
|
+
chrome_path = self.chrome_path
|
305
|
+
chromedriver_path = self.chromedriver_path
|
306
|
+
|
307
|
+
try:
|
308
|
+
if sys_platform == 'windows':
|
309
|
+
if not chrome_path:
|
310
|
+
chrome_path_candidates = [
|
311
|
+
'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe', # 正式版
|
312
|
+
os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chrome_win64\\chrome.exe'), # 测试版
|
313
|
+
]
|
314
|
+
if not chromedriver_path:
|
315
|
+
chromedriver_path_candidates = [
|
316
|
+
os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chromedriver.exe'),
|
317
|
+
os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chrome_win64\\chromedriver.exe'),
|
318
|
+
]
|
319
|
+
elif sys_platform == 'linux':
|
320
|
+
if not chrome_path:
|
321
|
+
chrome_path_candidates = [
|
322
|
+
'/usr/bin/google-chrome', # 正式版
|
323
|
+
'/usr/bin/chrome/chrome', # 测试版
|
324
|
+
]
|
325
|
+
if not chromedriver_path:
|
326
|
+
chromedriver_path_candidates = [
|
327
|
+
'/usr/local/bin/chromedriver',
|
328
|
+
'/usr/bin/chromedriver',
|
329
|
+
]
|
330
|
+
elif sys_platform == 'darwin':
|
331
|
+
if not chrome_path:
|
332
|
+
chrome_path_candidates = [
|
333
|
+
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome', # 正式版
|
334
|
+
'/usr/local/chrome/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing', # 测试版
|
335
|
+
]
|
336
|
+
if not chromedriver_path:
|
337
|
+
chromedriver_path_candidates = [
|
338
|
+
'/usr/local/chrome/chromedriver',
|
339
|
+
'/usr/local/bin/chromedriver',
|
340
|
+
'/opt/homebrew/bin/chromedriver',
|
341
|
+
]
|
342
|
+
else:
|
343
|
+
raise GetDriverException(f"不支持的平台: {sys_platform}")
|
344
|
+
|
345
|
+
# 如果用户指定了路径,直接使用
|
346
|
+
if chrome_path and chromedriver_path:
|
347
|
+
driver = self._try_create_driver(chrome_path, chromedriver_path, option, temp_dir)
|
348
|
+
if driver:
|
349
|
+
self.temp_dirs.append(temp_dir)
|
350
|
+
self.driver = driver
|
351
|
+
return driver
|
352
|
+
else:
|
353
|
+
raise GetDriverException(f"指定的Chrome路径无法启动: {chrome_path}")
|
354
|
+
|
355
|
+
# 智能版本检测和切换
|
356
|
+
chrome_paths = [p for p in chrome_path_candidates if os.path.exists(p)]
|
357
|
+
chromedriver_paths = [p for p in chromedriver_path_candidates if os.path.exists(p)]
|
358
|
+
|
359
|
+
if not chrome_paths:
|
360
|
+
raise GetDriverException("未找到Chrome浏览器,请手动指定chrome_path")
|
361
|
+
if not chromedriver_paths:
|
362
|
+
raise GetDriverException("未找到Chromedriver,请手动指定chromedriver_path")
|
363
|
+
|
364
|
+
# 优先尝试正式版Chrome
|
365
|
+
for chrome_path in chrome_paths:
|
366
|
+
for chromedriver_path in chromedriver_paths:
|
367
|
+
# 检查版本兼容性
|
368
|
+
is_compatible, chrome_version, chromedriver_version = self._check_version_compatibility(chrome_path, chromedriver_path)
|
369
|
+
|
370
|
+
if is_compatible:
|
371
|
+
# print(f"版本兼容: Chrome {chrome_version}, Chromedriver {chromedriver_version}")
|
372
|
+
driver = self._try_create_driver(chrome_path, chromedriver_path, option, temp_dir)
|
373
|
+
if driver:
|
374
|
+
self.temp_dirs.append(temp_dir)
|
375
|
+
self.driver = driver
|
376
|
+
return driver
|
377
|
+
else:
|
378
|
+
print(f"版本不兼容: Chrome {chrome_version}, Chromedriver {chromedriver_version}")
|
379
|
+
# 即使版本不兼容也尝试启动,有时可能仍然可以工作
|
380
|
+
driver = self._try_create_driver(chrome_path, chromedriver_path, option, temp_dir)
|
381
|
+
if driver:
|
382
|
+
print("警告:版本不兼容但启动成功,建议更新Chromedriver")
|
383
|
+
self.temp_dirs.append(temp_dir)
|
384
|
+
self.driver = driver
|
385
|
+
return driver
|
386
|
+
|
387
|
+
# 如果所有组合都失败,抛出异常
|
388
|
+
raise GetDriverException("所有Chrome和Chromedriver组合都无法启动,请检查版本兼容性")
|
389
|
+
|
390
|
+
except Exception as e:
|
391
|
+
try:
|
392
|
+
if os.path.exists(temp_dir):
|
393
|
+
shutil.rmtree(temp_dir)
|
394
|
+
except Exception as cleanup_error:
|
395
|
+
pass
|
396
|
+
if isinstance(e, GetDriverException):
|
397
|
+
raise e
|
398
|
+
else:
|
399
|
+
raise GetDriverException(f"启动ChromeDriver失败: {e}")
|
400
|
+
|
401
|
+
def _cleanup_temp_dirs(self):
|
402
|
+
"""
|
403
|
+
清理所有创建的临时目录
|
404
|
+
"""
|
405
|
+
for temp_dir in self.temp_dirs:
|
406
|
+
try:
|
407
|
+
if os.path.exists(temp_dir):
|
408
|
+
shutil.rmtree(temp_dir)
|
409
|
+
except:
|
410
|
+
pass
|
411
|
+
self.temp_dirs = []
|
412
|
+
|
413
|
+
def __enter__(self):
|
414
|
+
"""
|
415
|
+
支持with语法自动获取driver
|
416
|
+
:return: selenium.webdriver.Chrome实例
|
417
|
+
"""
|
418
|
+
self.driver = self.getdriver()
|
419
|
+
return self.driver
|
420
|
+
|
421
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
422
|
+
"""
|
423
|
+
支持with语法自动清理资源
|
424
|
+
"""
|
425
|
+
self.quit()
|
426
|
+
|
427
|
+
def close(self):
|
428
|
+
"""
|
429
|
+
关闭浏览器窗口并清理临时目录
|
430
|
+
"""
|
431
|
+
if self.driver:
|
432
|
+
try:
|
433
|
+
self.driver.close()
|
434
|
+
except:
|
435
|
+
pass
|
436
|
+
self._cleanup_temp_dirs()
|
437
|
+
|
438
|
+
def quit(self):
|
439
|
+
"""
|
440
|
+
彻底退出浏览器并清理临时目录
|
441
|
+
"""
|
442
|
+
if self.driver:
|
443
|
+
try:
|
444
|
+
self.driver.quit()
|
445
|
+
except:
|
446
|
+
pass
|
447
|
+
self._cleanup_temp_dirs()
|
448
|
+
|
449
|
+
|
450
|
+
if __name__ == '__main__':
|
451
|
+
# with GetDriver(
|
452
|
+
# headless=True,
|
453
|
+
# proxy=None, # 代理('socks5://127.0.0.1:1080')
|
454
|
+
# user_agent=None,
|
455
|
+
# download_dir=None,
|
456
|
+
# chrome_path=None,
|
457
|
+
# chromedriver_path=None,
|
458
|
+
# ) as driver:
|
459
|
+
# driver.get('https://www.baidu.com')
|
460
|
+
# print(driver.title)
|
461
|
+
|
462
|
+
|
463
|
+
driver = GetDriver(headless=False).getdriver()
|
464
|
+
driver.get('https://www.baidu.com')
|
465
|
+
print(driver.title)
|
466
|
+
import time
|
467
|
+
time.sleep(1000)
|
mdbq-4.0.71/mdbq/__version__.py
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
VERSION = '4.0.71'
|
@@ -1,267 +0,0 @@
|
|
1
|
-
# -*- coding:utf-8 -*-
|
2
|
-
import os
|
3
|
-
import platform
|
4
|
-
import getpass
|
5
|
-
from selenium import webdriver
|
6
|
-
from selenium.webdriver.chrome.service import Service
|
7
|
-
import re
|
8
|
-
import socket
|
9
|
-
import tempfile
|
10
|
-
import shutil
|
11
|
-
import uuid
|
12
|
-
|
13
|
-
dir_path = os.path.expanduser("~")
|
14
|
-
|
15
|
-
|
16
|
-
class GetDriverException(Exception):
|
17
|
-
"""自定义异常:GetDriver相关错误"""
|
18
|
-
pass
|
19
|
-
|
20
|
-
|
21
|
-
class GetDriver:
|
22
|
-
"""
|
23
|
-
Selenium ChromeDriver 管理器,支持多平台、代理、无头模式、下载目录、User-Agent等高级配置。
|
24
|
-
支持上下文管理器(with语法),自动资源清理。
|
25
|
-
"""
|
26
|
-
def __init__(self, url=None, headless=False, proxy=None, user_agent=None, download_dir=None, chrome_path=None, chromedriver_path=None, maximize_window=True):
|
27
|
-
"""
|
28
|
-
初始化GetDriver
|
29
|
-
:param url: 允许的安全站点(用于insecure origin as secure)
|
30
|
-
:param headless: 是否无头模式
|
31
|
-
:param proxy: 代理(支持http、https、socks5,格式如socks5://127.0.0.1:1080)
|
32
|
-
:param user_agent: 自定义User-Agent
|
33
|
-
:param download_dir: 下载目录
|
34
|
-
:param chrome_path: Chrome浏览器路径
|
35
|
-
:param chromedriver_path: Chromedriver路径
|
36
|
-
"""
|
37
|
-
self.url = url
|
38
|
-
self.headless = headless
|
39
|
-
self.proxy = proxy
|
40
|
-
self.user_agent = user_agent
|
41
|
-
self.download_dir = os.path.expanduser(download_dir) if download_dir else os.path.expanduser('~/Downloads')
|
42
|
-
self.chrome_path = chrome_path
|
43
|
-
self.chromedriver_path = chromedriver_path
|
44
|
-
self.temp_dirs = [] # 存储临时目录路径,用于清理
|
45
|
-
self.driver = None
|
46
|
-
if not self.user_agent:
|
47
|
-
user_agents = [
|
48
|
-
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36',
|
49
|
-
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36',
|
50
|
-
]
|
51
|
-
import random
|
52
|
-
self.user_agent = user_agents[random.randint(0, len(user_agents) - 1)]
|
53
|
-
self.maximize_window = maximize_window
|
54
|
-
|
55
|
-
def __enter__(self):
|
56
|
-
"""
|
57
|
-
支持with语法自动获取driver
|
58
|
-
:return: selenium.webdriver.Chrome实例
|
59
|
-
"""
|
60
|
-
self.driver = self.getdriver()
|
61
|
-
return self.driver
|
62
|
-
|
63
|
-
def __exit__(self, exc_type, exc_val, exc_tb):
|
64
|
-
"""
|
65
|
-
支持with语法自动清理资源
|
66
|
-
"""
|
67
|
-
self.quit()
|
68
|
-
|
69
|
-
def close(self):
|
70
|
-
"""
|
71
|
-
关闭浏览器窗口并清理临时目录
|
72
|
-
"""
|
73
|
-
if self.driver:
|
74
|
-
try:
|
75
|
-
self.driver.close()
|
76
|
-
except:
|
77
|
-
pass
|
78
|
-
self._cleanup_temp_dirs()
|
79
|
-
|
80
|
-
def quit(self):
|
81
|
-
"""
|
82
|
-
彻底退出浏览器并清理临时目录
|
83
|
-
"""
|
84
|
-
if self.driver:
|
85
|
-
try:
|
86
|
-
self.driver.quit()
|
87
|
-
except:
|
88
|
-
pass
|
89
|
-
self._cleanup_temp_dirs()
|
90
|
-
|
91
|
-
def _cleanup_temp_dirs(self):
|
92
|
-
"""
|
93
|
-
清理所有创建的临时目录
|
94
|
-
"""
|
95
|
-
for temp_dir in self.temp_dirs:
|
96
|
-
try:
|
97
|
-
if os.path.exists(temp_dir):
|
98
|
-
shutil.rmtree(temp_dir)
|
99
|
-
except:
|
100
|
-
pass
|
101
|
-
self.temp_dirs = []
|
102
|
-
|
103
|
-
def check_proxy(self):
|
104
|
-
"""
|
105
|
-
校验代理格式和连通性,支持http/https/socks5
|
106
|
-
:return: True/False
|
107
|
-
"""
|
108
|
-
if not self.proxy:
|
109
|
-
return True
|
110
|
-
# 支持协议前缀
|
111
|
-
proxy_pattern = r'^(socks5|http|https)://(\d{1,3}(\.\d{1,3}){3}):(\d+)$'
|
112
|
-
if not re.match(proxy_pattern, self.proxy):
|
113
|
-
return False
|
114
|
-
proto, ip, _, _, port = re.match(proxy_pattern, self.proxy).groups()
|
115
|
-
try:
|
116
|
-
sock = socket.create_connection((ip, int(port)), timeout=5)
|
117
|
-
sock.close()
|
118
|
-
return True
|
119
|
-
except:
|
120
|
-
return False
|
121
|
-
|
122
|
-
def getdriver(self):
|
123
|
-
"""
|
124
|
-
创建并返回Chrome WebDriver实例,自动注入反检测JS,异常时抛出GetDriverException
|
125
|
-
:return: selenium.webdriver.Chrome实例
|
126
|
-
:raises: GetDriverException
|
127
|
-
"""
|
128
|
-
if not self.check_proxy():
|
129
|
-
raise GetDriverException(f"代理不可用或格式错误: {self.proxy}")
|
130
|
-
option = webdriver.ChromeOptions() # 浏览器启动选项
|
131
|
-
if self.headless:
|
132
|
-
option.add_argument("--headless") # 设置无界面模式
|
133
|
-
option.add_argument("--window-size=1920,1080")
|
134
|
-
option.add_argument("--disable-gpu")
|
135
|
-
option.add_argument("--no-sandbox")
|
136
|
-
option.add_argument("--disable-dev-shm-usage")
|
137
|
-
# 添加唯一的用户数据目录,避免Chrome实例冲突
|
138
|
-
temp_dir = tempfile.mkdtemp(prefix=f'chrome_automation_{uuid.uuid4().hex[:8]}_')
|
139
|
-
option.add_argument(f'--user-data-dir={temp_dir}')
|
140
|
-
option.add_argument('--no-first-run')
|
141
|
-
option.add_argument('--no-default-browser-check')
|
142
|
-
option.add_argument('--disable-background-timer-throttling')
|
143
|
-
option.add_argument('--disable-backgrounding-occluded-windows')
|
144
|
-
option.add_argument('--disable-renderer-backgrounding')
|
145
|
-
option.add_argument('--disable-features=TranslateUI')
|
146
|
-
option.add_argument('--disable-ipc-flooding-protection')
|
147
|
-
# 关键安全浏览禁用参数
|
148
|
-
option.add_argument('--allow-insecure-localhost')
|
149
|
-
option.add_argument('--allow-running-insecure-content')
|
150
|
-
option.add_argument('--disable-features=BlockInsecurePrivateNetworkRequests,SafeBrowsing,DownloadBubble,SafeBrowsingEnhancedProtection,DownloadWarning')
|
151
|
-
option.add_argument('--safebrowsing-disable-download-protection')
|
152
|
-
option.add_argument('--disable-client-side-phishing-detection')
|
153
|
-
option.add_argument('--disable-popup-blocking')
|
154
|
-
option.add_argument('--ignore-certificate-errors')
|
155
|
-
if self.url:
|
156
|
-
option.add_argument(f"--unsafely-treat-insecure-origin-as-secure={self.url}")
|
157
|
-
# User-Agent
|
158
|
-
option.add_argument(f'--user-agent={self.user_agent}')
|
159
|
-
# 自动化相关设置
|
160
|
-
option.add_experimental_option("excludeSwitches", ["enable-automation", "enable-logging"])
|
161
|
-
option.add_experimental_option("useAutomationExtension", False)
|
162
|
-
# 代理设置
|
163
|
-
if self.proxy:
|
164
|
-
option.add_argument(f'--proxy-server={self.proxy}')
|
165
|
-
# 下载配置
|
166
|
-
prefs = {
|
167
|
-
"download.default_directory": self.download_dir,
|
168
|
-
"download.prompt_for_download": False,
|
169
|
-
"download.directory_upgrade": True,
|
170
|
-
"safebrowsing.enabled": False,
|
171
|
-
"safebrowsing.disable_download_protection": True,
|
172
|
-
"profile.content_settings.exceptions.automatic_downloads.*.setting": 1,
|
173
|
-
"profile.default_content_settings.popups": 0,
|
174
|
-
"profile.default_content_setting_values.automatic_downloads": 1,
|
175
|
-
"profile.default_content_setting_values.notifications": 2,
|
176
|
-
"credentials_enable_service": False,
|
177
|
-
"profile.password_manager_enabled": False,
|
178
|
-
"download_restrictions": 0,
|
179
|
-
}
|
180
|
-
# 平台与路径自动检测
|
181
|
-
sys_platform = platform.system().lower()
|
182
|
-
chrome_path = self.chrome_path
|
183
|
-
chromedriver_path = self.chromedriver_path
|
184
|
-
try:
|
185
|
-
if sys_platform == 'windows':
|
186
|
-
if not chrome_path:
|
187
|
-
chrome_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chrome_win64\\chrome.exe')
|
188
|
-
if not chromedriver_path:
|
189
|
-
chromedriver_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chromedriver.exe')
|
190
|
-
option.binary_location = chrome_path
|
191
|
-
service = Service(chromedriver_path)
|
192
|
-
elif sys_platform == 'linux':
|
193
|
-
if not chrome_path:
|
194
|
-
chrome_path = '/usr/bin/chrome/chrome'
|
195
|
-
"""
|
196
|
-
# sudo mv /usr/bin/google-chrome /usr/bin/google-chrome.bak # 备份原有
|
197
|
-
# sudo ln -s /usr/bin/chrome /usr/bin/google-chrome # 创建软链接
|
198
|
-
"""
|
199
|
-
if not chromedriver_path:
|
200
|
-
chromedriver_path = '/usr/local/bin/chromedriver'
|
201
|
-
option.binary_location = chrome_path
|
202
|
-
service = Service(chromedriver_path)
|
203
|
-
elif sys_platform == 'darwin':
|
204
|
-
if not chrome_path:
|
205
|
-
# 优先使用用户指定的默认路径
|
206
|
-
chrome_path_candidates = [
|
207
|
-
'/usr/local/chrome/Google Chrome for Testing.app/Contents/MacOS/Google Chrome',
|
208
|
-
'/usr/local/chrome/Google Chrome for Testing.app',
|
209
|
-
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
|
210
|
-
'/Applications/Google Chrome for Testing.app/Contents/MacOS/Google Chrome',
|
211
|
-
]
|
212
|
-
chrome_path = next((p for p in chrome_path_candidates if os.path.exists(p)), None)
|
213
|
-
if not chromedriver_path:
|
214
|
-
chromedriver_path_candidates = [
|
215
|
-
'/usr/local/chrome/chromedriver',
|
216
|
-
'/usr/local/bin/chromedriver',
|
217
|
-
'/opt/homebrew/bin/chromedriver',
|
218
|
-
]
|
219
|
-
chromedriver_path = next((p for p in chromedriver_path_candidates if os.path.exists(p)), None)
|
220
|
-
if not chrome_path or not chromedriver_path:
|
221
|
-
raise GetDriverException("未找到Chrome或Chromedriver,请手动指定chrome_path和chromedriver_path")
|
222
|
-
# option.binary_location = chrome_path # macOS 设置此参数报错
|
223
|
-
service = Service(chromedriver_path)
|
224
|
-
else:
|
225
|
-
raise GetDriverException(f"不支持的平台: {sys_platform}")
|
226
|
-
except Exception as e:
|
227
|
-
raise GetDriverException(f"浏览器路径配置异常: {e}")
|
228
|
-
option.add_experimental_option("prefs", prefs)
|
229
|
-
try:
|
230
|
-
driver = webdriver.Chrome(service=service, options=option)
|
231
|
-
if self.maximize_window:
|
232
|
-
driver.maximize_window()
|
233
|
-
# --- 防反爬:注入多段JS隐藏Selenium特征 ---
|
234
|
-
js_hide_features = [
|
235
|
-
"Object.defineProperty(navigator, 'webdriver', {get: () => false});",
|
236
|
-
"Object.defineProperty(navigator, 'plugins', {get: () => [1,2,3,4,5]});",
|
237
|
-
"Object.defineProperty(navigator, 'languages', {get: () => ['zh-CN', 'zh', 'en']});",
|
238
|
-
"window.chrome = {runtime: {}};",
|
239
|
-
"delete window.navigator.__proto__.webdriver;",
|
240
|
-
r"for (let key in window) {if (key.match(/^[\$\_]{3,}/)) {try {delete window[key];} catch(e){}}}"
|
241
|
-
]
|
242
|
-
for js in js_hide_features:
|
243
|
-
pass
|
244
|
-
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source": js})
|
245
|
-
self.temp_dirs.append(temp_dir)
|
246
|
-
self.driver = driver
|
247
|
-
return driver
|
248
|
-
except Exception as e:
|
249
|
-
try:
|
250
|
-
if os.path.exists(temp_dir):
|
251
|
-
shutil.rmtree(temp_dir)
|
252
|
-
except Exception as e:
|
253
|
-
pass
|
254
|
-
raise GetDriverException(f"启动ChromeDriver失败: {e}")
|
255
|
-
|
256
|
-
|
257
|
-
if __name__ == '__main__':
|
258
|
-
with GetDriver(
|
259
|
-
headless=True,
|
260
|
-
proxy=None, # 代理('socks5://127.0.0.1:1080')
|
261
|
-
user_agent=None,
|
262
|
-
download_dir=None,
|
263
|
-
chrome_path=None,
|
264
|
-
chromedriver_path=None,
|
265
|
-
) as driver:
|
266
|
-
driver.get('https://www.baidu.com')
|
267
|
-
print(driver.title)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|