mdbq 4.0.65__py3-none-any.whl → 4.0.67__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/__version__.py CHANGED
@@ -1 +1 @@
1
- VERSION = '4.0.65'
1
+ VERSION = '4.0.67'
mdbq/mysql/s_query.py CHANGED
@@ -794,8 +794,16 @@ class QueryDatas:
794
794
  转换后的数据
795
795
  """
796
796
  # 参数验证
797
- if not lower_col or not data:
797
+ if not lower_col:
798
798
  return data
799
+
800
+ # 检查data是否为空
801
+ if return_format == 'df':
802
+ if data is None or data.empty:
803
+ return data
804
+ else: # list_dict格式
805
+ if data is None or len(data) == 0:
806
+ return data
799
807
 
800
808
  # 确保 lower_col 是列表类型
801
809
  if not isinstance(lower_col, list):
@@ -0,0 +1,4 @@
1
+
2
+
3
+
4
+ # pass
@@ -0,0 +1,262 @@
1
+ # -*- coding:utf-8 -*-
2
+ import os
3
+ import platform
4
+ import getpass
5
+ from selenium import webdriver
6
+ from selenium.webdriver.chrome.service import Service
7
+ import re
8
+ import socket
9
+ import tempfile
10
+ import shutil
11
+ import uuid
12
+
13
+ dir_path = os.path.expanduser("~")
14
+
15
+
16
+ class GetDriverException(Exception):
17
+ """自定义异常:GetDriver相关错误"""
18
+ pass
19
+
20
+
21
+ class GetDriver:
22
+ """
23
+ Selenium ChromeDriver 管理器,支持多平台、代理、无头模式、下载目录、User-Agent等高级配置。
24
+ 支持上下文管理器(with语法),自动资源清理。
25
+ """
26
+ def __init__(self, url=None, headless=False, proxy=None, user_agent=None, download_dir=None, chrome_path=None, chromedriver_path=None, maximize_window=True):
27
+ """
28
+ 初始化GetDriver
29
+ :param url: 允许的安全站点(用于insecure origin as secure)
30
+ :param headless: 是否无头模式
31
+ :param proxy: 代理(支持http、https、socks5,格式如socks5://127.0.0.1:1080)
32
+ :param user_agent: 自定义User-Agent
33
+ :param download_dir: 下载目录
34
+ :param chrome_path: Chrome浏览器路径
35
+ :param chromedriver_path: Chromedriver路径
36
+ """
37
+ self.url = url
38
+ self.headless = headless
39
+ self.proxy = proxy
40
+ self.user_agent = user_agent
41
+ self.download_dir = os.path.expanduser(download_dir) if download_dir else os.path.expanduser('~/Downloads')
42
+ self.chrome_path = chrome_path
43
+ self.chromedriver_path = chromedriver_path
44
+ self.temp_dirs = [] # 存储临时目录路径,用于清理
45
+ self.driver = None
46
+ if not self.user_agent:
47
+ user_agents = [
48
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36',
49
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36',
50
+ ]
51
+ import random
52
+ self.user_agent = user_agents[random.randint(0, len(user_agents) - 1)]
53
+ self.maximize_window = maximize_window
54
+
55
+ def __enter__(self):
56
+ """
57
+ 支持with语法自动获取driver
58
+ :return: selenium.webdriver.Chrome实例
59
+ """
60
+ self.driver = self.getdriver()
61
+ return self.driver
62
+
63
+ def __exit__(self, exc_type, exc_val, exc_tb):
64
+ """
65
+ 支持with语法自动清理资源
66
+ """
67
+ self.quit()
68
+
69
+ def close(self):
70
+ """
71
+ 关闭浏览器窗口并清理临时目录
72
+ """
73
+ if self.driver:
74
+ try:
75
+ self.driver.close()
76
+ except:
77
+ pass
78
+ self._cleanup_temp_dirs()
79
+
80
+ def quit(self):
81
+ """
82
+ 彻底退出浏览器并清理临时目录
83
+ """
84
+ if self.driver:
85
+ try:
86
+ self.driver.quit()
87
+ except:
88
+ pass
89
+ self._cleanup_temp_dirs()
90
+
91
+ def _cleanup_temp_dirs(self):
92
+ """
93
+ 清理所有创建的临时目录
94
+ """
95
+ for temp_dir in self.temp_dirs:
96
+ try:
97
+ if os.path.exists(temp_dir):
98
+ shutil.rmtree(temp_dir)
99
+ except:
100
+ pass
101
+ self.temp_dirs = []
102
+
103
+ def check_proxy(self):
104
+ """
105
+ 校验代理格式和连通性,支持http/https/socks5
106
+ :return: True/False
107
+ """
108
+ if not self.proxy:
109
+ return True
110
+ # 支持协议前缀
111
+ proxy_pattern = r'^(socks5|http|https)://(\d{1,3}(\.\d{1,3}){3}):(\d+)$'
112
+ if not re.match(proxy_pattern, self.proxy):
113
+ return False
114
+ proto, ip, _, _, port = re.match(proxy_pattern, self.proxy).groups()
115
+ try:
116
+ sock = socket.create_connection((ip, int(port)), timeout=5)
117
+ sock.close()
118
+ return True
119
+ except:
120
+ return False
121
+
122
+ def getdriver(self):
123
+ """
124
+ 创建并返回Chrome WebDriver实例,自动注入反检测JS,异常时抛出GetDriverException
125
+ :return: selenium.webdriver.Chrome实例
126
+ :raises: GetDriverException
127
+ """
128
+ if not self.check_proxy():
129
+ raise GetDriverException(f"代理不可用或格式错误: {self.proxy}")
130
+ option = webdriver.ChromeOptions() # 浏览器启动选项
131
+ if self.headless:
132
+ option.add_argument("--headless") # 设置无界面模式
133
+ option.add_argument("--window-size=1920,1080")
134
+ option.add_argument("--disable-gpu")
135
+ option.add_argument("--no-sandbox")
136
+ option.add_argument("--disable-dev-shm-usage")
137
+ # 添加唯一的用户数据目录,避免Chrome实例冲突
138
+ temp_dir = tempfile.mkdtemp(prefix=f'chrome_automation_{uuid.uuid4().hex[:8]}_')
139
+ option.add_argument(f'--user-data-dir={temp_dir}')
140
+ option.add_argument('--no-first-run')
141
+ option.add_argument('--no-default-browser-check')
142
+ option.add_argument('--disable-background-timer-throttling')
143
+ option.add_argument('--disable-backgrounding-occluded-windows')
144
+ option.add_argument('--disable-renderer-backgrounding')
145
+ option.add_argument('--disable-features=TranslateUI')
146
+ option.add_argument('--disable-ipc-flooding-protection')
147
+ # 关键安全浏览禁用参数
148
+ option.add_argument('--allow-insecure-localhost')
149
+ option.add_argument('--allow-running-insecure-content')
150
+ option.add_argument('--disable-features=BlockInsecurePrivateNetworkRequests,SafeBrowsing,DownloadBubble,SafeBrowsingEnhancedProtection,DownloadWarning')
151
+ option.add_argument('--safebrowsing-disable-download-protection')
152
+ option.add_argument('--disable-client-side-phishing-detection')
153
+ option.add_argument('--disable-popup-blocking')
154
+ option.add_argument('--ignore-certificate-errors')
155
+ if self.url:
156
+ option.add_argument(f"--unsafely-treat-insecure-origin-as-secure={self.url}")
157
+ # User-Agent
158
+ option.add_argument(f'--user-agent={self.user_agent}')
159
+ # 自动化相关设置
160
+ option.add_experimental_option("excludeSwitches", ["enable-automation", "enable-logging"])
161
+ option.add_experimental_option("useAutomationExtension", False)
162
+ # 代理设置
163
+ if self.proxy:
164
+ option.add_argument(f'--proxy-server={self.proxy}')
165
+ # 下载配置
166
+ prefs = {
167
+ "download.default_directory": self.download_dir,
168
+ "download.prompt_for_download": False,
169
+ "download.directory_upgrade": True,
170
+ "safebrowsing.enabled": False,
171
+ "safebrowsing.disable_download_protection": True,
172
+ "profile.content_settings.exceptions.automatic_downloads.*.setting": 1,
173
+ "profile.default_content_settings.popups": 0,
174
+ "profile.default_content_setting_values.automatic_downloads": 1,
175
+ "profile.default_content_setting_values.notifications": 2,
176
+ "credentials_enable_service": False,
177
+ "profile.password_manager_enabled": False,
178
+ "download_restrictions": 0,
179
+ }
180
+ # 平台与路径自动检测
181
+ sys_platform = platform.system().lower()
182
+ chrome_path = self.chrome_path
183
+ chromedriver_path = self.chromedriver_path
184
+ try:
185
+ if sys_platform == 'windows':
186
+ if not chrome_path:
187
+ chrome_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chrome_win64\\chrome.exe')
188
+ if not chromedriver_path:
189
+ chromedriver_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chromedriver.exe')
190
+ option.binary_location = chrome_path
191
+ service = Service(chromedriver_path)
192
+ elif sys_platform == 'linux':
193
+ if not chrome_path:
194
+ chrome_path = '/usr/bin/google-chrome'
195
+ if not chromedriver_path:
196
+ chromedriver_path = '/usr/local/bin/chromedriver'
197
+ option.binary_location = chrome_path
198
+ service = Service(chromedriver_path)
199
+ elif sys_platform == 'darwin':
200
+ if not chrome_path:
201
+ # 优先使用用户指定的默认路径
202
+ chrome_path_candidates = [
203
+ '/usr/local/chrome/Google Chrome for Testing.app/Contents/MacOS/Google Chrome',
204
+ '/usr/local/chrome/Google Chrome for Testing.app',
205
+ '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
206
+ '/Applications/Google Chrome for Testing.app/Contents/MacOS/Google Chrome',
207
+ ]
208
+ chrome_path = next((p for p in chrome_path_candidates if os.path.exists(p)), None)
209
+ if not chromedriver_path:
210
+ chromedriver_path_candidates = [
211
+ '/usr/local/chrome/chromedriver',
212
+ '/usr/local/bin/chromedriver',
213
+ '/opt/homebrew/bin/chromedriver',
214
+ ]
215
+ chromedriver_path = next((p for p in chromedriver_path_candidates if os.path.exists(p)), None)
216
+ if not chrome_path or not chromedriver_path:
217
+ raise GetDriverException("未找到Chrome或Chromedriver,请手动指定chrome_path和chromedriver_path")
218
+ # option.binary_location = chrome_path # macOS 设置此参数报错
219
+ service = Service(chromedriver_path)
220
+ else:
221
+ raise GetDriverException(f"不支持的平台: {sys_platform}")
222
+ except:
223
+ raise GetDriverException(f"浏览器路径配置异常: {e}")
224
+ option.add_experimental_option("prefs", prefs)
225
+ try:
226
+ driver = webdriver.Chrome(service=service, options=option)
227
+ if self.maximize_window:
228
+ driver.maximize_window()
229
+ # --- 防反爬:注入多段JS隐藏Selenium特征 ---
230
+ js_hide_features = [
231
+ "Object.defineProperty(navigator, 'webdriver', {get: () => false});",
232
+ "Object.defineProperty(navigator, 'plugins', {get: () => [1,2,3,4,5]});",
233
+ "Object.defineProperty(navigator, 'languages', {get: () => ['zh-CN', 'zh', 'en']});",
234
+ "window.chrome = {runtime: {}};",
235
+ "delete window.navigator.__proto__.webdriver;",
236
+ r"for (let key in window) {if (key.match(/^[\$\_]{3,}/)) {try {delete window[key];} catch(e){}}}"
237
+ ]
238
+ for js in js_hide_features:
239
+ driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source": js})
240
+ self.temp_dirs.append(temp_dir)
241
+ self.driver = driver
242
+ return driver
243
+ except:
244
+ try:
245
+ if os.path.exists(temp_dir):
246
+ shutil.rmtree(temp_dir)
247
+ except:
248
+ pass
249
+ raise GetDriverException(f"启动ChromeDriver失败: {e}")
250
+
251
+
252
+ if __name__ == '__main__':
253
+ with GetDriver(
254
+ headless=True,
255
+ proxy=None, # 代理('socks5://127.0.0.1:1080')
256
+ user_agent=None,
257
+ download_dir=None,
258
+ chrome_path=None,
259
+ chromedriver_path=None,
260
+ ) as driver:
261
+ driver.get('https://www.baidu.com')
262
+ print(driver.title)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 4.0.65
3
+ Version: 4.0.67
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,5 +1,5 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
- mdbq/__version__.py,sha256=1gP4SARV8VrJwdAE1ZRMMnoekF6Fe6v0VGYuInbT8h0,18
2
+ mdbq/__version__.py,sha256=ePWLfB_-kadgtscNTRnc7UG22j-pnp5pbgvtzqFYx0c,18
3
3
  mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
4
4
  mdbq/log/mylogger.py,sha256=kPe3wsQNaB1slfX-Z7VMqzZoMoqPfc7ylYXZDBeFzzI,24945
5
5
  mdbq/myconf/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
@@ -7,7 +7,7 @@ mdbq/myconf/myconf.py,sha256=rHvQCnQRKhQ49AZBke-Z4v28hyOLmHt4MylIuB0H6yA,33516
7
7
  mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
8
8
  mdbq/mysql/deduplicator.py,sha256=AB3gL7ZwhcmzGHSu4UY4M6YZVPFZ2wlAN3BCcwAhegQ,73074
9
9
  mdbq/mysql/mysql.py,sha256=pDg771xBugCMSTWeskIFTi3pFLgaqgyG3smzf-86Wn8,56772
10
- mdbq/mysql/s_query.py,sha256=hptv4YxnkqPruJkgNX8UxRRhisvSEnn8HzDGDLmCQFw,50231
10
+ mdbq/mysql/s_query.py,sha256=nwhyqbxq-V0sGUJbdjiUDEwjpDxiKrzG0PjV6wkrWU4,50474
11
11
  mdbq/mysql/unique_.py,sha256=MaztT-WIyEQUs-OOYY4pFulgHVcXR1BfCy3QUz0XM_U,21127
12
12
  mdbq/mysql/uploader.py,sha256=68kJIrCnP2dJZ6ilb8MoFzuzGGWU_272WwPfaqt075A,112125
13
13
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
@@ -21,8 +21,10 @@ mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,239
21
21
  mdbq/pbix/refresh_all.py,sha256=OBT9EewSZ0aRS9vL_FflVn74d4l2G00wzHiikCC4TC0,5926
22
22
  mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
23
23
  mdbq/redis/getredis.py,sha256=vpBuNc22uj9Vr-_Dh25_wpwWM1e-072EAAIBdB_IpL0,23494
24
+ mdbq/selenium/__init__.py,sha256=AKzeEceqZyvqn2dEDoJSzDQnbuENkJSHAlbHAD0u0ZI,10
25
+ mdbq/selenium/get_driver.py,sha256=X8KldM2LLGm5njIAD3TvFND3sQ_dhnvkb5qbuL78X2g,11394
24
26
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
25
- mdbq-4.0.65.dist-info/METADATA,sha256=5tbI-X82C-RHIHkdxXt6thtOGZbDDhYmeil4N8uQWBQ,364
26
- mdbq-4.0.65.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
27
- mdbq-4.0.65.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
28
- mdbq-4.0.65.dist-info/RECORD,,
27
+ mdbq-4.0.67.dist-info/METADATA,sha256=ngUzHOCX_jeHzOylR307BqJpEW0hqO8k6BtbnUNsxo8,364
28
+ mdbq-4.0.67.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
29
+ mdbq-4.0.67.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
30
+ mdbq-4.0.67.dist-info/RECORD,,
File without changes