mdbq 4.0.70__py3-none-any.whl → 4.0.72__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/__version__.py CHANGED
@@ -1 +1 @@
1
- VERSION = '4.0.70'
1
+ VERSION = '4.0.72'
@@ -9,6 +9,8 @@ import socket
9
9
  import tempfile
10
10
  import shutil
11
11
  import uuid
12
+ import subprocess
13
+ import json
12
14
 
13
15
  dir_path = os.path.expanduser("~")
14
16
 
@@ -52,54 +54,6 @@ class GetDriver:
52
54
  self.user_agent = user_agents[random.randint(0, len(user_agents) - 1)]
53
55
  self.maximize_window = maximize_window
54
56
 
55
- def __enter__(self):
56
- """
57
- 支持with语法自动获取driver
58
- :return: selenium.webdriver.Chrome实例
59
- """
60
- self.driver = self.getdriver()
61
- return self.driver
62
-
63
- def __exit__(self, exc_type, exc_val, exc_tb):
64
- """
65
- 支持with语法自动清理资源
66
- """
67
- self.quit()
68
-
69
- def close(self):
70
- """
71
- 关闭浏览器窗口并清理临时目录
72
- """
73
- if self.driver:
74
- try:
75
- self.driver.close()
76
- except:
77
- pass
78
- self._cleanup_temp_dirs()
79
-
80
- def quit(self):
81
- """
82
- 彻底退出浏览器并清理临时目录
83
- """
84
- if self.driver:
85
- try:
86
- self.driver.quit()
87
- except:
88
- pass
89
- self._cleanup_temp_dirs()
90
-
91
- def _cleanup_temp_dirs(self):
92
- """
93
- 清理所有创建的临时目录
94
- """
95
- for temp_dir in self.temp_dirs:
96
- try:
97
- if os.path.exists(temp_dir):
98
- shutil.rmtree(temp_dir)
99
- except:
100
- pass
101
- self.temp_dirs = []
102
-
103
57
  def check_proxy(self):
104
58
  """
105
59
  校验代理格式和连通性,支持http/https/socks5
@@ -119,14 +73,133 @@ class GetDriver:
119
73
  except:
120
74
  return False
121
75
 
76
+ def _get_chrome_version(self, chrome_path):
77
+ """
78
+ 获取Chrome版本号
79
+ :param chrome_path: Chrome可执行文件路径
80
+ :return: 版本号字符串,如"120.0.6099.109"
81
+ """
82
+ try:
83
+ if platform.system().lower() == 'windows':
84
+ # Windows下使用--version参数
85
+ result = subprocess.run([chrome_path, '--version'],
86
+ capture_output=True, text=True, timeout=10)
87
+ if result.returncode == 0:
88
+ # 输出格式: "Google Chrome 120.0.6099.109"
89
+ version_match = re.search(r'Chrome\s+(\d+\.\d+\.\d+\.\d+)', result.stdout)
90
+ if version_match:
91
+ return version_match.group(1)
92
+ else:
93
+ # macOS和Linux下使用--version参数
94
+ result = subprocess.run([chrome_path, '--version'],
95
+ capture_output=True, text=True, timeout=10)
96
+ if result.returncode == 0:
97
+ # 输出格式: "Google Chrome 120.0.6099.109"
98
+ version_match = re.search(r'Chrome\s+(\d+\.\d+\.\d+\.\d+)', result.stdout)
99
+ if version_match:
100
+ return version_match.group(1)
101
+ except Exception as e:
102
+ print(f"获取Chrome版本失败: {e}")
103
+ return None
104
+
105
+ def _get_chromedriver_version(self, chromedriver_path):
106
+ """
107
+ 获取Chromedriver版本号
108
+ :param chromedriver_path: Chromedriver可执行文件路径
109
+ :return: 版本号字符串,如"120.0.6099.109"
110
+ """
111
+ try:
112
+ result = subprocess.run([chromedriver_path, '--version'],
113
+ capture_output=True, text=True, timeout=10)
114
+ if result.returncode == 0:
115
+ # 输出格式: "ChromeDriver 120.0.6099.109"
116
+ version_match = re.search(r'ChromeDriver\s+(\d+\.\d+\.\d+\.\d+)', result.stdout)
117
+ if version_match:
118
+ return version_match.group(1)
119
+ except Exception as e:
120
+ print(f"获取Chromedriver版本失败: {e}")
121
+ return None
122
+
123
+ def _check_version_compatibility(self, chrome_path, chromedriver_path):
124
+ """
125
+ 检查Chrome和Chromedriver版本兼容性
126
+ :param chrome_path: Chrome可执行文件路径
127
+ :param chromedriver_path: Chromedriver可执行文件路径
128
+ :return: (is_compatible, chrome_version, chromedriver_version)
129
+ """
130
+ chrome_version = self._get_chrome_version(chrome_path)
131
+ chromedriver_version = self._get_chromedriver_version(chromedriver_path)
132
+
133
+ if not chrome_version or not chromedriver_version:
134
+ return False, chrome_version, chromedriver_version
135
+
136
+ # 提取主版本号进行比较
137
+ chrome_major = chrome_version.split('.')[0]
138
+ chromedriver_major = chromedriver_version.split('.')[0]
139
+
140
+ is_compatible = chrome_major == chromedriver_major
141
+ return is_compatible, chrome_version, chromedriver_version
142
+
143
+ def _try_create_driver(self, chrome_path, chromedriver_path, option, temp_dir):
144
+ """
145
+ 尝试创建Chrome WebDriver实例
146
+ :param chrome_path: Chrome可执行文件路径
147
+ :param chromedriver_path: Chromedriver可执行文件路径
148
+ :param option: ChromeOptions实例
149
+ :param temp_dir: 临时目录路径
150
+ :return: Chrome WebDriver实例或None
151
+ """
152
+ try:
153
+ option.binary_location = chrome_path
154
+ service = Service(chromedriver_path)
155
+ driver = webdriver.Chrome(service=service, options=option)
156
+ if self.maximize_window:
157
+ driver.maximize_window()
158
+
159
+ # --- 防反爬:注入多段JS隐藏Selenium特征 ---
160
+ js_hide_features = [
161
+ # 隐藏webdriver属性
162
+ "Object.defineProperty(navigator, 'webdriver', {get: () => undefined, configurable: true});",
163
+ # 模拟真实浏览器插件
164
+ "Object.defineProperty(navigator, 'plugins', {get: () => [1,2,3,4,5], configurable: true});",
165
+ # 设置语言
166
+ "Object.defineProperty(navigator, 'languages', {get: () => ['zh-CN', 'zh', 'en'], configurable: true});",
167
+ # 模拟Chrome运行时
168
+ "window.chrome = {runtime: {}, loadTimes: function(){}, csi: function(){}, app: {}};",
169
+ # 删除原型链上的webdriver
170
+ "delete window.navigator.__proto__.webdriver;",
171
+ # 删除Selenium相关属性
172
+ r"for (let key in window) {if (key.match(/^[\$\_]{3,}/)) {try {delete window[key];} catch(e){}}}",
173
+ # 隐藏自动化相关属性
174
+ "Object.defineProperty(navigator, 'permissions', {get: () => ({query: () => Promise.resolve({state: 'granted'})}), configurable: true});",
175
+ # 模拟真实的navigator属性
176
+ "Object.defineProperty(navigator, 'hardwareConcurrency', {get: () => 8, configurable: true});",
177
+ "Object.defineProperty(navigator, 'deviceMemory', {get: () => 8, configurable: true});",
178
+ # 防止检测自动化工具
179
+ "Object.defineProperty(navigator, 'maxTouchPoints', {get: () => 0, configurable: true});",
180
+ # 隐藏CDP相关属性
181
+ "delete window.cdc_adoQpoasnfa76pfcZLmcfl_Array;",
182
+ "delete window.cdc_adoQpoasnfa76pfcZLmcfl_Promise;",
183
+ "delete window.cdc_adoQpoasnfa76pfcZLmcfl_Symbol;"
184
+ ]
185
+ for js in js_hide_features:
186
+ driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source": js})
187
+
188
+ return driver
189
+ except Exception as e:
190
+ print(f"创建Chrome WebDriver失败: {e}")
191
+ return None
192
+
122
193
  def getdriver(self):
123
194
  """
124
195
  创建并返回Chrome WebDriver实例,自动注入反检测JS,异常时抛出GetDriverException
196
+ 智能版本检测:优先使用正式版,版本不匹配时自动切换到测试版
125
197
  :return: selenium.webdriver.Chrome实例
126
198
  :raises: GetDriverException
127
199
  """
128
200
  if not self.check_proxy():
129
201
  raise GetDriverException(f"代理不可用或格式错误: {self.proxy}")
202
+
130
203
  option = webdriver.ChromeOptions() # 浏览器启动选项
131
204
  if self.headless:
132
205
  option.add_argument("--headless") # 设置无界面模式
@@ -134,16 +207,19 @@ class GetDriver:
134
207
  option.add_argument("--disable-gpu")
135
208
  option.add_argument("--no-sandbox")
136
209
  option.add_argument("--disable-dev-shm-usage")
210
+ # 隐藏Chrome测试版提示信息
211
+ option.add_argument("--disable-blink-features=AutomationControlled")
212
+ option.add_argument("--disable-features=VizDisplayCompositor")
213
+ option.add_argument("--disable-background-timer-throttling")
214
+ option.add_argument("--disable-backgrounding-occluded-windows")
215
+ option.add_argument("--disable-renderer-backgrounding")
216
+ option.add_argument("--disable-features=TranslateUI")
217
+ option.add_argument("--disable-ipc-flooding-protection")
137
218
  # 添加唯一的用户数据目录,避免Chrome实例冲突
138
219
  temp_dir = tempfile.mkdtemp(prefix=f'chrome_automation_{uuid.uuid4().hex[:8]}_')
139
220
  option.add_argument(f'--user-data-dir={temp_dir}')
140
221
  option.add_argument('--no-first-run')
141
222
  option.add_argument('--no-default-browser-check')
142
- option.add_argument('--disable-background-timer-throttling')
143
- option.add_argument('--disable-backgrounding-occluded-windows')
144
- option.add_argument('--disable-renderer-backgrounding')
145
- option.add_argument('--disable-features=TranslateUI')
146
- option.add_argument('--disable-ipc-flooding-protection')
147
223
  # 关键安全浏览禁用参数
148
224
  option.add_argument('--allow-insecure-localhost')
149
225
  option.add_argument('--allow-running-insecure-content')
@@ -177,91 +253,171 @@ class GetDriver:
177
253
  "profile.password_manager_enabled": False,
178
254
  "download_restrictions": 0,
179
255
  }
256
+ option.add_experimental_option("prefs", prefs)
257
+
180
258
  # 平台与路径自动检测
181
259
  sys_platform = platform.system().lower()
182
260
  chrome_path = self.chrome_path
183
261
  chromedriver_path = self.chromedriver_path
262
+
184
263
  try:
185
264
  if sys_platform == 'windows':
186
265
  if not chrome_path:
187
- chrome_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chrome_win64\\chrome.exe')
266
+ chrome_path_candidates = [
267
+ 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe', # 正式版
268
+ os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chrome_win64\\chrome.exe'), # 测试版
269
+ ]
188
270
  if not chromedriver_path:
189
- chromedriver_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chromedriver.exe')
190
- option.binary_location = chrome_path
191
- service = Service(chromedriver_path)
271
+ chromedriver_path_candidates = [
272
+ os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chromedriver.exe'),
273
+ os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chrome_win64\\chromedriver.exe'),
274
+ ]
192
275
  elif sys_platform == 'linux':
193
276
  if not chrome_path:
194
- chrome_path = '/usr/bin/chrome'
195
- """
196
- # sudo mv /usr/bin/google-chrome /usr/bin/google-chrome.bak # 备份原有
197
- # sudo ln -s /usr/bin/chrome /usr/bin/google-chrome # 创建软链接
198
- """
277
+ chrome_path_candidates = [
278
+ '/usr/bin/google-chrome', # 正式版
279
+ '/usr/bin/chrome/chrome', # 测试版
280
+ ]
199
281
  if not chromedriver_path:
200
- chromedriver_path = '/usr/local/bin/chromedriver'
201
- option.binary_location = chrome_path
202
- service = Service(chromedriver_path)
282
+ chromedriver_path_candidates = [
283
+ '/usr/local/bin/chromedriver',
284
+ '/usr/bin/chromedriver',
285
+ ]
203
286
  elif sys_platform == 'darwin':
204
287
  if not chrome_path:
205
- # 优先使用用户指定的默认路径
206
288
  chrome_path_candidates = [
207
- '/usr/local/chrome/Google Chrome for Testing.app/Contents/MacOS/Google Chrome',
208
- '/usr/local/chrome/Google Chrome for Testing.app',
209
- '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
210
- '/Applications/Google Chrome for Testing.app/Contents/MacOS/Google Chrome',
289
+ '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome', # 正式版
290
+ '/usr/local/chrome/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing', # 测试版
211
291
  ]
212
- chrome_path = next((p for p in chrome_path_candidates if os.path.exists(p)), None)
213
292
  if not chromedriver_path:
214
293
  chromedriver_path_candidates = [
215
294
  '/usr/local/chrome/chromedriver',
216
295
  '/usr/local/bin/chromedriver',
217
296
  '/opt/homebrew/bin/chromedriver',
218
297
  ]
219
- chromedriver_path = next((p for p in chromedriver_path_candidates if os.path.exists(p)), None)
220
- if not chrome_path or not chromedriver_path:
221
- raise GetDriverException("未找到Chrome或Chromedriver,请手动指定chrome_path和chromedriver_path")
222
- # option.binary_location = chrome_path # macOS 设置此参数报错
223
- service = Service(chromedriver_path)
224
298
  else:
225
299
  raise GetDriverException(f"不支持的平台: {sys_platform}")
300
+
301
+ # 如果用户指定了路径,直接使用
302
+ if chrome_path and chromedriver_path:
303
+ driver = self._try_create_driver(chrome_path, chromedriver_path, option, temp_dir)
304
+ if driver:
305
+ self.temp_dirs.append(temp_dir)
306
+ self.driver = driver
307
+ return driver
308
+ else:
309
+ raise GetDriverException(f"指定的Chrome路径无法启动: {chrome_path}")
310
+
311
+ # 智能版本检测和切换
312
+ chrome_paths = [p for p in chrome_path_candidates if os.path.exists(p)]
313
+ chromedriver_paths = [p for p in chromedriver_path_candidates if os.path.exists(p)]
314
+
315
+ if not chrome_paths:
316
+ raise GetDriverException("未找到Chrome浏览器,请手动指定chrome_path")
317
+ if not chromedriver_paths:
318
+ raise GetDriverException("未找到Chromedriver,请手动指定chromedriver_path")
319
+
320
+ # 优先尝试正式版Chrome
321
+ for chrome_path in chrome_paths:
322
+ for chromedriver_path in chromedriver_paths:
323
+ # 检查版本兼容性
324
+ is_compatible, chrome_version, chromedriver_version = self._check_version_compatibility(chrome_path, chromedriver_path)
325
+
326
+ if is_compatible:
327
+ # print(f"版本兼容: Chrome {chrome_version}, Chromedriver {chromedriver_version}")
328
+ driver = self._try_create_driver(chrome_path, chromedriver_path, option, temp_dir)
329
+ if driver:
330
+ self.temp_dirs.append(temp_dir)
331
+ self.driver = driver
332
+ return driver
333
+ else:
334
+ print(f"版本不兼容: Chrome {chrome_version}, Chromedriver {chromedriver_version}")
335
+ # 即使版本不兼容也尝试启动,有时可能仍然可以工作
336
+ driver = self._try_create_driver(chrome_path, chromedriver_path, option, temp_dir)
337
+ if driver:
338
+ print("警告:版本不兼容但启动成功,建议更新Chromedriver")
339
+ self.temp_dirs.append(temp_dir)
340
+ self.driver = driver
341
+ return driver
342
+
343
+ # 如果所有组合都失败,抛出异常
344
+ raise GetDriverException("所有Chrome和Chromedriver组合都无法启动,请检查版本兼容性")
345
+
226
346
  except Exception as e:
227
- raise GetDriverException(f"浏览器路径配置异常: {e}")
228
- option.add_experimental_option("prefs", prefs)
229
- try:
230
- driver = webdriver.Chrome(service=service, options=option)
231
- if self.maximize_window:
232
- driver.maximize_window()
233
- # --- 防反爬:注入多段JS隐藏Selenium特征 ---
234
- js_hide_features = [
235
- "Object.defineProperty(navigator, 'webdriver', {get: () => false});",
236
- "Object.defineProperty(navigator, 'plugins', {get: () => [1,2,3,4,5]});",
237
- "Object.defineProperty(navigator, 'languages', {get: () => ['zh-CN', 'zh', 'en']});",
238
- "window.chrome = {runtime: {}};",
239
- "delete window.navigator.__proto__.webdriver;",
240
- r"for (let key in window) {if (key.match(/^[\$\_]{3,}/)) {try {delete window[key];} catch(e){}}}"
241
- ]
242
- for js in js_hide_features:
347
+ try:
348
+ if os.path.exists(temp_dir):
349
+ shutil.rmtree(temp_dir)
350
+ except Exception as cleanup_error:
243
351
  pass
244
- driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source": js})
245
- self.temp_dirs.append(temp_dir)
246
- self.driver = driver
247
- return driver
248
- except Exception as e:
352
+ if isinstance(e, GetDriverException):
353
+ raise e
354
+ else:
355
+ raise GetDriverException(f"启动ChromeDriver失败: {e}")
356
+
357
+ def _cleanup_temp_dirs(self):
358
+ """
359
+ 清理所有创建的临时目录
360
+ """
361
+ for temp_dir in self.temp_dirs:
249
362
  try:
250
363
  if os.path.exists(temp_dir):
251
364
  shutil.rmtree(temp_dir)
252
- except Exception as e:
365
+ except:
366
+ pass
367
+ self.temp_dirs = []
368
+
369
+ def __enter__(self):
370
+ """
371
+ 支持with语法自动获取driver
372
+ :return: selenium.webdriver.Chrome实例
373
+ """
374
+ self.driver = self.getdriver()
375
+ return self.driver
376
+
377
+ def __exit__(self, exc_type, exc_val, exc_tb):
378
+ """
379
+ 支持with语法自动清理资源
380
+ """
381
+ self.quit()
382
+
383
+ def close(self):
384
+ """
385
+ 关闭浏览器窗口并清理临时目录
386
+ """
387
+ if self.driver:
388
+ try:
389
+ self.driver.close()
390
+ except:
391
+ pass
392
+ self._cleanup_temp_dirs()
393
+
394
+ def quit(self):
395
+ """
396
+ 彻底退出浏览器并清理临时目录
397
+ """
398
+ if self.driver:
399
+ try:
400
+ self.driver.quit()
401
+ except:
253
402
  pass
254
- raise GetDriverException(f"启动ChromeDriver失败: {e}")
403
+ self._cleanup_temp_dirs()
255
404
 
256
405
 
257
406
  if __name__ == '__main__':
258
- with GetDriver(
259
- headless=True,
260
- proxy=None, # 代理('socks5://127.0.0.1:1080')
261
- user_agent=None,
262
- download_dir=None,
263
- chrome_path=None,
264
- chromedriver_path=None,
265
- ) as driver:
266
- driver.get('https://www.baidu.com')
267
- print(driver.title)
407
+ # with GetDriver(
408
+ # headless=True,
409
+ # proxy=None, # 代理('socks5://127.0.0.1:1080')
410
+ # user_agent=None,
411
+ # download_dir=None,
412
+ # chrome_path=None,
413
+ # chromedriver_path=None,
414
+ # ) as driver:
415
+ # driver.get('https://www.baidu.com')
416
+ # print(driver.title)
417
+
418
+
419
+ driver = GetDriver(headless=False).getdriver()
420
+ driver.get('https://www.baidu.com')
421
+ print(driver.title)
422
+ import time
423
+ time.sleep(1000)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 4.0.70
3
+ Version: 4.0.72
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,5 +1,5 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
- mdbq/__version__.py,sha256=DIv1UFgIHnLfXOtMM3gqOwNoi2BmzYHR5nJ-wgmo_cI,18
2
+ mdbq/__version__.py,sha256=_hhEVsHNRe5szh4nb_dJ6z64dFjwH_wfQ3tLU7LCUX0,18
3
3
  mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
4
4
  mdbq/log/mylogger.py,sha256=kPe3wsQNaB1slfX-Z7VMqzZoMoqPfc7ylYXZDBeFzzI,24945
5
5
  mdbq/myconf/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
@@ -22,9 +22,9 @@ mdbq/pbix/refresh_all.py,sha256=OBT9EewSZ0aRS9vL_FflVn74d4l2G00wzHiikCC4TC0,5926
22
22
  mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
23
23
  mdbq/redis/getredis.py,sha256=vpBuNc22uj9Vr-_Dh25_wpwWM1e-072EAAIBdB_IpL0,23494
24
24
  mdbq/selenium/__init__.py,sha256=AKzeEceqZyvqn2dEDoJSzDQnbuENkJSHAlbHAD0u0ZI,10
25
- mdbq/selenium/get_driver.py,sha256=XGAsXeSb4rkXW2CF9WWlv7aUoJX0ed39Vxmwf73j4gw,11687
25
+ mdbq/selenium/get_driver.py,sha256=bZoE3Z2pf75r5ngDS_B9xshLiOwO--X-N5RKsQjKEQ8,19181
26
26
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
27
- mdbq-4.0.70.dist-info/METADATA,sha256=szfoKA32ztaqq0FbR9xrDL1uyFEyRqHiROEtDtXCQ3Y,364
28
- mdbq-4.0.70.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
29
- mdbq-4.0.70.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
30
- mdbq-4.0.70.dist-info/RECORD,,
27
+ mdbq-4.0.72.dist-info/METADATA,sha256=76uckGGCRbJe7W5oV3_FTdkTR28VT_RKmrDpHLWLKhQ,364
28
+ mdbq-4.0.72.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
29
+ mdbq-4.0.72.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
30
+ mdbq-4.0.72.dist-info/RECORD,,
File without changes