kchttp 1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. kchttp-1.0/PKG-INFO +13 -0
  2. kchttp-1.0/kchttp/__init__.py +2 -0
  3. kchttp-1.0/kchttp/httpclass.py +599 -0
  4. kchttp-1.0/kchttp/selenium3/__init__.py +19 -0
  5. kchttp-1.0/kchttp/selenium3/common/__init__.py +18 -0
  6. kchttp-1.0/kchttp/selenium3/common/exceptions.py +242 -0
  7. kchttp-1.0/kchttp/selenium3/webdriver/__init__.py +35 -0
  8. kchttp-1.0/kchttp/selenium3/webdriver/android/__init__.py +16 -0
  9. kchttp-1.0/kchttp/selenium3/webdriver/android/webdriver.py +42 -0
  10. kchttp-1.0/kchttp/selenium3/webdriver/blackberry/__init__.py +16 -0
  11. kchttp-1.0/kchttp/selenium3/webdriver/blackberry/webdriver.py +116 -0
  12. kchttp-1.0/kchttp/selenium3/webdriver/chrome/__init__.py +16 -0
  13. kchttp-1.0/kchttp/selenium3/webdriver/chrome/options.py +169 -0
  14. kchttp-1.0/kchttp/selenium3/webdriver/chrome/remote_connection.py +25 -0
  15. kchttp-1.0/kchttp/selenium3/webdriver/chrome/service.py +45 -0
  16. kchttp-1.0/kchttp/selenium3/webdriver/chrome/webdriver.py +118 -0
  17. kchttp-1.0/kchttp/selenium3/webdriver/common/__init__.py +16 -0
  18. kchttp-1.0/kchttp/selenium3/webdriver/common/action_chains.py +287 -0
  19. kchttp-1.0/kchttp/selenium3/webdriver/common/alert.py +112 -0
  20. kchttp-1.0/kchttp/selenium3/webdriver/common/by.py +35 -0
  21. kchttp-1.0/kchttp/selenium3/webdriver/common/desired_capabilities.py +132 -0
  22. kchttp-1.0/kchttp/selenium3/webdriver/common/html5/__init__.py +16 -0
  23. kchttp-1.0/kchttp/selenium3/webdriver/common/html5/application_cache.py +48 -0
  24. kchttp-1.0/kchttp/selenium3/webdriver/common/keys.py +96 -0
  25. kchttp-1.0/kchttp/selenium3/webdriver/common/proxy.py +334 -0
  26. kchttp-1.0/kchttp/selenium3/webdriver/common/service.py +163 -0
  27. kchttp-1.0/kchttp/selenium3/webdriver/common/touch_actions.py +192 -0
  28. kchttp-1.0/kchttp/selenium3/webdriver/common/utils.py +152 -0
  29. kchttp-1.0/kchttp/selenium3/webdriver/edge/__init__.py +16 -0
  30. kchttp-1.0/kchttp/selenium3/webdriver/edge/options.py +45 -0
  31. kchttp-1.0/kchttp/selenium3/webdriver/edge/service.py +28 -0
  32. kchttp-1.0/kchttp/selenium3/webdriver/edge/webdriver.py +48 -0
  33. kchttp-1.0/kchttp/selenium3/webdriver/firefox/__init__.py +16 -0
  34. kchttp-1.0/kchttp/selenium3/webdriver/firefox/extension_connection.py +84 -0
  35. kchttp-1.0/kchttp/selenium3/webdriver/firefox/firefox_binary.py +212 -0
  36. kchttp-1.0/kchttp/selenium3/webdriver/firefox/firefox_profile.py +381 -0
  37. kchttp-1.0/kchttp/selenium3/webdriver/firefox/options.py +113 -0
  38. kchttp-1.0/kchttp/selenium3/webdriver/firefox/remote_connection.py +29 -0
  39. kchttp-1.0/kchttp/selenium3/webdriver/firefox/service.py +55 -0
  40. kchttp-1.0/kchttp/selenium3/webdriver/firefox/webdriver.py +197 -0
  41. kchttp-1.0/kchttp/selenium3/webdriver/firefox/webelement.py +45 -0
  42. kchttp-1.0/kchttp/selenium3/webdriver/ie/__init__.py +16 -0
  43. kchttp-1.0/kchttp/selenium3/webdriver/ie/service.py +50 -0
  44. kchttp-1.0/kchttp/selenium3/webdriver/ie/webdriver.py +62 -0
  45. kchttp-1.0/kchttp/selenium3/webdriver/opera/__init__.py +16 -0
  46. kchttp-1.0/kchttp/selenium3/webdriver/opera/options.py +106 -0
  47. kchttp-1.0/kchttp/selenium3/webdriver/opera/webdriver.py +73 -0
  48. kchttp-1.0/kchttp/selenium3/webdriver/phantomjs/__init__.py +16 -0
  49. kchttp-1.0/kchttp/selenium3/webdriver/phantomjs/service.py +68 -0
  50. kchttp-1.0/kchttp/selenium3/webdriver/phantomjs/webdriver.py +99 -0
  51. kchttp-1.0/kchttp/selenium3/webdriver/remote/__init__.py +16 -0
  52. kchttp-1.0/kchttp/selenium3/webdriver/remote/command.py +159 -0
  53. kchttp-1.0/kchttp/selenium3/webdriver/remote/errorhandler.py +195 -0
  54. kchttp-1.0/kchttp/selenium3/webdriver/remote/file_detector.py +58 -0
  55. kchttp-1.0/kchttp/selenium3/webdriver/remote/getAttribute.js +11 -0
  56. kchttp-1.0/kchttp/selenium3/webdriver/remote/isDisplayed.js +106 -0
  57. kchttp-1.0/kchttp/selenium3/webdriver/remote/mobile.py +83 -0
  58. kchttp-1.0/kchttp/selenium3/webdriver/remote/remote_connection.py +517 -0
  59. kchttp-1.0/kchttp/selenium3/webdriver/remote/switch_to.py +109 -0
  60. kchttp-1.0/kchttp/selenium3/webdriver/remote/utils.py +113 -0
  61. kchttp-1.0/kchttp/selenium3/webdriver/remote/webdriver.py +980 -0
  62. kchttp-1.0/kchttp/selenium3/webdriver/remote/webelement.py +551 -0
  63. kchttp-1.0/kchttp/selenium3/webdriver/safari/__init__.py +16 -0
  64. kchttp-1.0/kchttp/selenium3/webdriver/safari/service.py +57 -0
  65. kchttp-1.0/kchttp/selenium3/webdriver/safari/webdriver.py +65 -0
  66. kchttp-1.0/kchttp/selenium3/webdriver/support/__init__.py +16 -0
  67. kchttp-1.0/kchttp/selenium3/webdriver/support/abstract_event_listener.py +79 -0
  68. kchttp-1.0/kchttp/selenium3/webdriver/support/color.py +310 -0
  69. kchttp-1.0/kchttp/selenium3/webdriver/support/event_firing_webdriver.py +334 -0
  70. kchttp-1.0/kchttp/selenium3/webdriver/support/events.py +19 -0
  71. kchttp-1.0/kchttp/selenium3/webdriver/support/expected_conditions.py +339 -0
  72. kchttp-1.0/kchttp/selenium3/webdriver/support/select.py +241 -0
  73. kchttp-1.0/kchttp/selenium3/webdriver/support/ui.py +19 -0
  74. kchttp-1.0/kchttp/selenium3/webdriver/support/wait.py +96 -0
  75. kchttp-1.0/kchttp.egg-info/PKG-INFO +13 -0
  76. kchttp-1.0/kchttp.egg-info/SOURCES.txt +79 -0
  77. kchttp-1.0/kchttp.egg-info/dependency_links.txt +1 -0
  78. kchttp-1.0/kchttp.egg-info/requires.txt +1 -0
  79. kchttp-1.0/kchttp.egg-info/top_level.txt +17 -0
  80. kchttp-1.0/setup.cfg +4 -0
  81. kchttp-1.0/setup.py +56 -0
kchttp-1.0/PKG-INFO ADDED
@@ -0,0 +1,13 @@
1
+ Metadata-Version: 1.2
2
+ Name: kchttp
3
+ Version: 1.0
4
+ Summary: kchttp
5
+ Home-page: UNKNOWN
6
+ Author: 百里
7
+ Author-email: kcwebs@kwebapp.cn
8
+ Maintainer: 坤坤
9
+ Maintainer-email: fk1402936534@qq.com
10
+ License: MIT License
11
+ Description: UNKNOWN
12
+ Keywords: kchttp1.0
13
+ Platform: UNKNOWN
@@ -0,0 +1,2 @@
1
+ # -*- coding: utf-8 -*-
2
+ __version__ = '1.0'
@@ -0,0 +1,599 @@
1
+ # -*- coding: utf-8 -*-
2
+ import requests,traceback,time
3
+ requests.packages.urllib3.disable_warnings(requests.packages.urllib3.exceptions.InsecureRequestWarning)
4
+ from .selenium3 import webdriver as webdriver3
5
+ from .selenium3.webdriver.support import expected_conditions
6
+ import io
7
+ class Http:
8
+ # By.CLASS_NAME
9
+ webdriver3=webdriver3
10
+ expecteds=expected_conditions
11
+ "http请求类"
12
+ set_session=True #是否启用会话
13
+ set_impersonate=None #设置模拟浏览器指纹(chrome99、chrome100、chrome110、chrome118、chrome120,firefox100、firefox110,safari15、safari16)
14
+ set_proxies=None #设置代理
15
+ set_cookies={} #设置请求cookie
16
+ set_header={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'} #请求头
17
+ set_timeout=(6.05,10) #超时时间 6.05表示连接采时时间 3030表示读取超时时间 #注意 set_timeout参数主要关注的是“无响应”的时间段,而不是整个请求的处理时间
18
+ set_max_retries=2 #重试次数 (实际请求3次)
19
+ set_verify=False #SSL 证书的验证 sll证书路径
20
+ set_encoding="" #设置text输出编码 如utf-8 不填表示自动
21
+
22
+
23
+ get_header={} #获取响应头
24
+ get_cookies={} #获取最后的响应cookie
25
+ get_cookie_str='' #获取最后的响应cookie 字符串
26
+ get_text='' #获取body响应内容
27
+ get_content='' #获取body响应二进制内容
28
+ get_response='' #获取响应对象
29
+ get_status_code=None #获取响应状态码
30
+ keep_alive=True #默认的http connection是keep-alive的 False表示关闭
31
+ req=None
32
+ def __init(self):
33
+ self.set_proxies=None #设置代理
34
+ self.set_cookies={} #设置请求cookie
35
+ self.set_header={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'} #请求头
36
+ self.set_timeout=(6.05,10) #超时时间 6.05表示连接采时时间 3030表示读取超时时间 #注意 set_timeout参数主要关注的是“无响应”的时间段,而不是整个请求的处理时间
37
+ self.set_max_retries=2 #重试次数 (实际请求3次)
38
+ self.set_verify=False #SSL 证书的验证 sll证书路径
39
+ self.set_encoding="" #设置text输出编码
40
+ self.set_session=True #是否启用会话
41
+
42
+ self.get_header={} #获取响应头
43
+ self.get_cookies={} #获取最后的响应cookie
44
+ self.get_cookie_str='' #获取最后的响应cookie 字符串
45
+ self.get_text='' #获取body响应内容
46
+ self.get_content='' #获取body响应二进制内容
47
+ self.get_response='' #获取响应对象
48
+ self.get_status_code=None #获取响应状态码
49
+ self.keep_alive=True #默认的http connection是keep-alive的 False表示关闭
50
+ self.req=None
51
+ self.set_impersonate=None
52
+ def __init__(self):
53
+ self.__init()
54
+ def __del__(self):
55
+ self.__init()
56
+ def gettext(self):
57
+ """得到响应text"""
58
+ return self.get_text
59
+ def wait(self,wait_pq=[],wait_pq_type='element',sleep=1,Obj=None,url=''):
60
+ from pyquery import PyQuery as kcwebspq
61
+ """等待其中一个元素出现
62
+
63
+ wait_pq 基于pyquery表达式 等待其中一个元素出现 (传入pyquery表达式 列表格式) 如 [['表达式1','表达式2'],['表达式3','表达式4']] 表示 表达式1或表达式2其中一个成立 并且 表达式3或表达式4其中一个成立
64
+
65
+ wait_pq_type 等待类型: element表示等待元素出现 text表示等待元素内的文本出现 其他值表示等待标签属性值出现
66
+
67
+ sleep 最多等待时间 建议配合wait_pq使用
68
+
69
+ Obj webdriver的Chrome或PhantomJS对象
70
+ """
71
+ if wait_pq:
72
+ if not Obj:
73
+ if self.PhantomJsObj and self.ChromeObj:
74
+ raise Exception('Chrome和PhantomJS不可同时存在')
75
+ if self.ChromeObj:
76
+ Obj=self.ChromeObj
77
+ elif self.PhantomJsObj:
78
+ Obj=self.PhantomJsObj
79
+ elif not Obj:
80
+ raise Exception('Chrome对象和PhantomJS对象不存在')
81
+ if sleep<10:
82
+ sleep=10
83
+
84
+ sfdsf=False
85
+ for sdsa in range(10000):
86
+ if sdsa>sleep*2:
87
+ self.get_text = Obj.page_source
88
+ raise Exception('max-wait_pq:'+url)
89
+ time.sleep(0.5)
90
+ doc=kcwebspq(Obj.page_source)
91
+ sfdsf1=0
92
+ for wait_pq1 in wait_pq:
93
+ if isinstance(wait_pq1, list) or isinstance(wait_pq1, tuple):
94
+ for wait_pq2 in wait_pq1:
95
+ tt=self.__get_pyquery_rules_obj(wait_pq2,doc)
96
+ if tt and tt.length:
97
+ if wait_pq_type=='text':
98
+ if len(tt.text().replace(' ','').replace('\n','').replace('\r','').replace('\t',''))>0:
99
+ sfdsf1+=1
100
+ break
101
+ elif wait_pq_type=='element':
102
+ sfdsf1+=1
103
+ break
104
+ elif wait_pq_type:
105
+ if tt.attr(wait_pq_type):
106
+ sfdsf1+=1
107
+ break
108
+ else:
109
+ tt=self.__get_pyquery_rules_obj(wait_pq1,doc)
110
+ if tt and tt.length:
111
+ if wait_pq_type=='text':
112
+ # print('tt.text()',wait_pq1)
113
+ if len(tt.text().replace(' ','').replace('\n','').replace('\r','').replace('\t',''))>0:
114
+ sfdsf=True
115
+ break
116
+ elif wait_pq_type=='element':
117
+ sfdsf=True
118
+ break
119
+ elif wait_pq_type:
120
+ if tt.attr(wait_pq_type):
121
+ sfdsf=True
122
+ break
123
+ if sfdsf or sfdsf1==len(wait_pq):
124
+ break
125
+ self.get_text = Obj.page_source
126
+ PhantomJsObj=None
127
+ def open_PhantomJS(self,url,executable_path='',closedriver=True,wait_pq=[],wait_pq_type='element',sleep=1):
128
+ """通过PhantomJS引擎模拟浏览器请求 可以获取到js渲染后的html
129
+
130
+ wait_pq 基于pyquery表达式 等待其中一个元素出现 (传入pyquery表达式 列表格式) 如 [['表达式1','表达式2'],['表达式3','表达式4']] 表示 表达式1或表达式2其中一个成立 并且 表达式3或表达式4其中一个成立
131
+
132
+ wait_pq_type 等待类型: element表示等待元素出现 text表示等待元素内的文本出现 其他值表示等待标签属性值出现
133
+
134
+ sleep 最多等待时间 建议配合wait_pq使用
135
+
136
+ """
137
+ if self.set_cookies and isinstance(self.set_cookies,str):
138
+ self.set_cookies=self.cookieserTdict(self.set_cookies)
139
+ if not self.PhantomJsObj:
140
+ self.PhantomJsObj=webdriver3.PhantomJS(executable_path=executable_path)
141
+ # if self.set_session:
142
+ # self.get_cookies=self.set_cookies
143
+ # for k in self.PhantomJsObj.get_cookies():
144
+ # self.get_cookies=self.__merge(self.get_cookies,k)
145
+ # if self.get_cookies:
146
+ # self.get_cookie_str=self.cookieTdictstr(self.get_cookies)
147
+ # self.get_cookies=self.cookieserTdict(self.get_cookie_str)
148
+ # if self.get_cookies!=self.set_cookies:
149
+ # self.PhantomJsObj.delete_all_cookies()
150
+ # for k in self.set_cookies:
151
+ # t={'name':k, 'value':self.set_cookies[k]}
152
+ # self.PhantomJsObj.add_cookie(t)
153
+
154
+ i=0
155
+ while True:
156
+ try:
157
+ self.PhantomJsObj.get(url)
158
+ except Exception as e:
159
+ estr=str(e)
160
+ print('estr',estr)
161
+ if 'error: net::ERR_CONNECTION_CLOSED' in estr or 'timeout: Timed out receiving message from rendere' in estr or 'error: net::ERR_CONNECTION_RESET' in estr or 'error: net::ERR_NAME_NOT_RESOLVED' in estr or 'unknown error: net::ERR_CONNECTION_TIMED_OUT' in estr or 'Max retries exceeded with url' in estr or 'error: net::ERR_SSL_VERSION_OR_CIPHER_MISMATCH' in estr or 'error: net::ERR_CONNECTION_REFUSED' in estr:
162
+ if i>self.set_max_retries:
163
+ raise Exception('max_retries'+estr)
164
+ i+=1
165
+ else:
166
+ raise
167
+ else:
168
+ break
169
+ if not closedriver:
170
+ try:
171
+ response = requests.head(url,cookies=self.set_cookies,allow_redirects=True)
172
+ except:pass
173
+ else:
174
+ resheader=dict(response.headers)
175
+ self.get_header={}
176
+ for k in resheader:
177
+ self.get_header[k.lower()]=resheader[k]
178
+ if not wait_pq and sleep:
179
+ time.sleep(sleep)
180
+ self.wait(wait_pq=wait_pq,wait_pq_type=wait_pq_type,sleep=sleep,Obj=self.PhantomJsObj,url=url)
181
+ # self.get_text = self.PhantomJsObj.page_source
182
+ # self.get_cookies=self.set_cookies
183
+ for k in reversed(self.PhantomJsObj.get_cookies()):
184
+ zd={k['name']:k['value']}
185
+ self.get_cookies=self.__merge(self.get_cookies,zd)
186
+ if self.get_cookies:
187
+ self.get_cookie_str=self.cookieTdictstr(self.get_cookies)
188
+ self.get_cookies=self.cookieserTdict(self.get_cookie_str)
189
+ # if self.set_session:
190
+ # self.set_cookies=self.get_cookies
191
+ if closedriver:
192
+ self.PhantomJsObj.quit()
193
+ self.PhantomJsObj=None
194
+ ChromeObj=None
195
+ def open_Chrome(self,url,executable_path='',closedriver=True,setheadless=True,wait_pq=[],wait_pq_type='element',sleep=1,devtools=False):
196
+ """通过Chrome浏览器引擎模拟浏览器请求 可以获取到js渲染后的html
197
+
198
+ closedriver 是否关闭退出
199
+
200
+ setheadless 是否设置无头
201
+
202
+ wait_pq 基于pyquery表达式 等待其中一个元素出现 (传入pyquery表达式 列表格式) 如 [['表达式1','表达式2'],['表达式3','表达式4']] 表示 表达式1或表达式2其中一个成立 并且 表达式3或表达式4其中一个成立
203
+
204
+ wait_pq_type 等待类型: element表示等待元素出现 text表示等待元素内的文本出现 其他值表示等待标签属性值出现
205
+
206
+ sleep 最多等待时间 建议配合wait_pq使用
207
+
208
+ """
209
+ # if wait_pq_type not in ['element','text']:
210
+ # raise Exception('wait_pq_type 错误')
211
+ if self.set_cookies and isinstance(self.set_cookies,str):
212
+ self.set_cookies=self.cookieserTdict(self.set_cookies)
213
+ # print('self.set_cookiesself.set_cookiesself.set_cookies',self.set_cookies)
214
+ if not self.ChromeObj:
215
+ chrome_options = webdriver3.chrome.options.Options()
216
+
217
+ if setheadless:
218
+ chrome_options.add_argument("--headless") #设置无头
219
+ else:
220
+ if devtools:
221
+ chrome_options.add_argument("--auto-open-devtools-for-tabs") # 打开开发者工具
222
+ chrome_options.add_argument('--disable-infobars') # 隐藏 "Chrome 正受到自动测试软件控制" 提示栏
223
+ if self.set_proxies:
224
+ # chrome_options.add_argument("--proxy-server=http://proxyserver:port")
225
+ if self.set_proxies['http']:
226
+ chrome_options.add_argument("--proxy-server="+self.set_proxies['http'])
227
+ elif self.set_proxies['https']:
228
+ chrome_options.add_argument("--proxy-server="+self.set_proxies['https'])
229
+ # print(self.set_proxies)
230
+ # exit()
231
+
232
+ chrome_options.add_argument("--disable-gpu") # 禁用GPU硬件加速,适用于Linux和Windows系统
233
+ chrome_options.add_argument("--no-sandbox") # 禁用沙盒模式,在某些Linux系统上需要
234
+
235
+ # chrome_options.add_argument('--log-level=3') # 关闭所有非致命日志
236
+ # chrome_options.add_experimental_option('excludeSwitches', ['enable-logging']) # 禁止 Selenium 自身日志
237
+
238
+
239
+
240
+ # 禁用自动化控制特征(减少被检测风险)
241
+ chrome_options.add_argument('--disable-blink-features=AutomationControlled')
242
+ chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
243
+
244
+ chrome_options.add_argument('--ignore-certificate-errors') # 忽略所有证书错误
245
+ chrome_options.add_argument('--ignore-ssl-errors') # 忽略 SSL 相关错误(如握手失败)
246
+
247
+ try:
248
+ self.ChromeObj = webdriver3.Chrome(executable_path=executable_path,chrome_options=chrome_options)
249
+ except Exception as e:
250
+ import os
251
+ print("\033[93mChromeChromeChromeChromeChromeChromeChromeChromeChromeChromeChromeChromeve\033[0m",e)
252
+ if os.name == 'nt' and 'Driver info: chromedriver=142.0.7444.175' in str(e):
253
+ # print("\033[93mchromedriver与您操作系统的Chrome不兼容性 下载地址参考",'https://file.kwebapp.cn/sh/install/chrome/chromedriver-win64-142/GoogleChrome.msi\033[0m')
254
+ response=requests.get('https://file.kwebapp.cn/sh/install/chrome/chromedriver-win64-142/GoogleChrome.msi')
255
+ f=open('Chrome.msi',"wb")
256
+ tsize=f.write(response.content)
257
+ f.close()
258
+ if tsize<10*1024*1024:
259
+ os.remove('Chrome.msi')
260
+ raise Exception('文件下载失败:https://file.kwebapp.cn/sh/install/chrome/chromedriver-win64-142/GoogleChrome.msi')
261
+ print('\033[93mchromedriver与您操作系统的Chrome不兼容/不存在,正在为您安装Chrome...\033[0m')
262
+ os.system("msiexec /i Chrome.msi")
263
+ os.remove('Chrome.msi')
264
+ # print('\033[93m安装完成,请重试\033[0m')
265
+ self.open_Chrome(url=url,executable_path=executable_path,closedriver=closedriver,setheadless=setheadless)
266
+ elif os.name == 'posix' and 'Driver info: chromedriver=106.0.5249.21' in str(e):
267
+ def systemtypes():
268
+ try:
269
+ with open('/etc/os-release', 'r') as f:
270
+ content = f.read()
271
+ if 'CentOS-7' in content or 'CentOS Linux 7' in content:
272
+ return 'CentOS7'
273
+ except FileNotFoundError:
274
+ pass
275
+ return False
276
+ t=systemtypes()
277
+ if t=='CentOS7':
278
+ # print("\033[93mchromedriver与您操作系统的Chrome不兼容性 下载地址参考",'https://file.kwebapp.cn/sh/install/chrome/google-chrome-unstable-106.0.5249.12-1.x86_64.rpm\033[0m')
279
+ response=requests.get('https://file.kwebapp.cn/sh/install/chrome/google-chrome-unstable-106.0.5249.12-1.x86_64.rpm')
280
+ f=open('google-chrome-unstable-106.0.5249.12-1.x86_64.rpm',"wb")
281
+ tsize=f.write(response.content)
282
+ f.close()
283
+ if tsize<10*1024*1024:
284
+ os.remove('google-chrome-unstable-106.0.5249.12-1.x86_64.rpm')
285
+ raise Exception('文件下载失败:https://file.kwebapp.cn/sh/install/chrome/google-chrome-unstable-106.0.5249.12-1.x86_64.rpm')
286
+
287
+ print('\033[93mchromedriver与您操作系统的Chrome不兼容/不存在,正在为您安装Chrome...\033[0m')
288
+ os.system("sudo yum -y install google-chrome-unstable-106.0.5249.12-1.x86_64.rpm")
289
+ os.remove('google-chrome-unstable-106.0.5249.12-1.x86_64.rpm')
290
+ # print('\033[93m安装完成,请重试\033[0m')
291
+ self.open_Chrome(url=url,executable_path=executable_path,closedriver=closedriver,setheadless=setheadless)
292
+ else:
293
+ raise Exception('暂不支持该操作系统版本,目前仅支持CentOS7和windows10。'+str(e))
294
+ else:
295
+ raise Exception('暂不支持该操作系统版本,目前仅支持CentOS7和windows10'+str(e))
296
+
297
+ if self.ChromeObj:
298
+ # if self.set_session:
299
+ # self.get_cookies=self.set_cookies
300
+ # for k in self.ChromeObj.get_cookies():
301
+ # self.get_cookies=self.__merge(self.get_cookies,k)
302
+ # if self.get_cookies:
303
+ # self.get_cookie_str=self.cookieTdictstr(self.get_cookies)
304
+ # self.get_cookies=self.cookieserTdict(self.get_cookie_str)
305
+ # if self.get_cookies!=self.set_cookies:
306
+ # self.ChromeObj.delete_all_cookies()
307
+ # for k in self.set_cookies:
308
+ # t={'name':k, 'value':self.set_cookies[k]}
309
+ # # print('ttttt',t)
310
+ # self.ChromeObj.add_cookie(t)
311
+
312
+ i=0
313
+ while True:
314
+ try:
315
+ self.ChromeObj.get(url)
316
+ except Exception as e:
317
+ estr=str(e)
318
+ if 'error: net::ERR_CONNECTION_CLOSED' in estr or 'timeout: Timed out receiving message from rendere' in estr or 'error: net::ERR_CONNECTION_RESET' in estr or 'error: net::ERR_NAME_NOT_RESOLVED' in estr or 'unknown error: net::ERR_CONNECTION_TIMED_OUT' in estr or 'Max retries exceeded with url' in estr or 'error: net::ERR_SSL_VERSION_OR_CIPHER_MISMATCH' in estr or 'error: net::ERR_CONNECTION_REFUSED' in estr:
319
+ if i>self.set_max_retries:
320
+ raise Exception('max_retries'+estr)
321
+ i+=1
322
+ else:
323
+ raise
324
+ else:
325
+ break
326
+ if not closedriver:
327
+ try:
328
+ response = requests.head(url,cookies=self.set_cookies,allow_redirects=True)
329
+ except:pass
330
+ else:
331
+ resheader=dict(response.headers)
332
+ self.get_header={}
333
+ for k in resheader:
334
+ self.get_header[k.lower()]=resheader[k]
335
+ if not wait_pq and sleep:
336
+ time.sleep(sleep)
337
+ self.wait(wait_pq=wait_pq,wait_pq_type=wait_pq_type,sleep=sleep,Obj=self.ChromeObj,url=url)
338
+ # self.get_text = self.ChromeObj.page_source
339
+ # self.get_cookies=self.set_cookies
340
+ for k in reversed(self.ChromeObj.get_cookies()):
341
+ zd={k['name']:k['value']}
342
+ self.get_cookies=self.__merge(self.get_cookies,zd)
343
+ if self.get_cookies:
344
+ self.get_cookie_str=self.cookieTdictstr(self.get_cookies)
345
+ self.get_cookies=self.cookieserTdict(self.get_cookie_str)
346
+ # if self.set_session:
347
+ # self.set_cookies=self.get_cookies
348
+ if closedriver:
349
+ self.ChromeObj.quit()
350
+ self.ChromeObj=None
351
+ def Chrome_screenshot(self,xpath,outfile):
352
+ """Chrome截图
353
+
354
+ xpath 元素 xpath
355
+
356
+ outfile 截图保存位置
357
+
358
+ """
359
+ from PIL import Image
360
+ element=self.ChromeObj.find_element(self.webdriver3.common.by.By.XPATH,xpath)
361
+ screenshot = self.ChromeObj.get_screenshot_as_png()
362
+ screenshot_img = Image.open(io.BytesIO(screenshot))
363
+ location = element.location
364
+ size = element.size
365
+ left = location['x']
366
+ top = location['y']
367
+ right = location['x'] + size['width']
368
+ bottom = location['y'] + size['height']
369
+ screenshot_img = screenshot_img.crop((left, top, right, bottom))
370
+ # 保存或显示裁剪后的图片
371
+ screenshot_img.save(outfile)
372
+ def close_webdriver(self):
373
+ if self.ChromeObj:
374
+ self.ChromeObj.quit()
375
+ self.ChromeObj=None
376
+ if self.PhantomJsObj:
377
+ self.PhantomJsObj.quit()
378
+ self.PhantomJsObj=None
379
+
380
+ def openurl(self,url,method="GET",data=None,params=None,jsonparams=None,files=None,allow_redirects=True):
381
+ """模拟浏览器请求
382
+
383
+ url : 目标地址
384
+
385
+ method :GET POST 等
386
+
387
+ data:请求参数
388
+
389
+ params:请求参数
390
+
391
+ jsonparams:请求json参数
392
+
393
+ file 上传文件
394
+
395
+ allow_redirects 是否重定向
396
+ """
397
+ if self.set_impersonate:
398
+ from curl_cffi import requests as curl_cffi_requests
399
+ if self.set_session:
400
+ if self.req is None:
401
+ if self.set_impersonate:
402
+ self.req = curl_cffi_requests.Session(impersonate=self.set_impersonate)
403
+ else:
404
+ self.req = requests.Session()
405
+ self.req.mount('http://', requests.adapters.HTTPAdapter(max_retries=self.set_max_retries))
406
+ self.req.mount('https://', requests.adapters.HTTPAdapter(max_retries=self.set_max_retries))
407
+ else:
408
+ if self.req is None:
409
+ if self.set_impersonate:
410
+ self.req = curl_cffi_requests
411
+ else:
412
+ self.req = requests
413
+ if not self.keep_alive:
414
+ self.req.keep_alive=False
415
+ if self.set_cookies and isinstance(self.set_cookies,str):
416
+ self.set_cookies=self.cookieserTdict(self.set_cookies)
417
+ if self.set_impersonate:
418
+ method=method.lower()
419
+ if self.set_session:
420
+ if method=='get':
421
+ response=self.req.get(url,data=data,params=params,json=jsonparams,files=files,proxies=self.set_proxies,cookies=self.set_cookies,headers=self.set_header,timeout=self.set_timeout,verify=self.set_verify,allow_redirects=allow_redirects)
422
+ elif method=='post':
423
+ response=self.req.post(url,data=data,params=params,json=jsonparams,files=files,proxies=self.set_proxies,cookies=self.set_cookies,headers=self.set_header,timeout=self.set_timeout,verify=self.set_verify,allow_redirects=allow_redirects)
424
+ elif method=='put':
425
+ response=self.req.put(url,data=data,params=params,json=jsonparams,files=files,proxies=self.set_proxies,cookies=self.set_cookies,headers=self.set_header,timeout=self.set_timeout,verify=self.set_verify,allow_redirects=allow_redirects)
426
+ elif method=='patch':
427
+ response=self.req.patch(url,data=data,params=params,json=jsonparams,files=files,proxies=self.set_proxies,cookies=self.set_cookies,headers=self.set_header,timeout=self.set_timeout,verify=self.set_verify,allow_redirects=allow_redirects)
428
+ elif method=='delete':
429
+ response=self.req.delete(url,data=data,params=params,json=jsonparams,files=files,proxies=self.set_proxies,cookies=self.set_cookies,headers=self.set_header,timeout=self.set_timeout,verify=self.set_verify,allow_redirects=allow_redirects)
430
+ # elif method=='head':
431
+ # response=self.req.head(url,data=data,params=params,json=jsonparams,files=files,proxies=self.set_proxies,cookies=self.set_cookies,headers=self.set_header,timeout=self.set_timeout,verify=self.set_verify,allow_redirects=allow_redirects)
432
+ else:
433
+ raise Exception('不支持method='+method)
434
+ else:
435
+ if method=='get':
436
+ response=self.req.get(url,data=data,params=params,json=jsonparams,files=files,proxies=self.set_proxies,cookies=self.set_cookies,headers=self.set_header,timeout=self.set_timeout,verify=self.set_verify,allow_redirects=allow_redirects,impersonate=self.set_impersonate)
437
+ elif method=='post':
438
+ response=self.req.post(url,data=data,params=params,json=jsonparams,files=files,proxies=self.set_proxies,cookies=self.set_cookies,headers=self.set_header,timeout=self.set_timeout,verify=self.set_verify,allow_redirects=allow_redirects,impersonate=self.set_impersonate)
439
+ elif method=='put':
440
+ response=self.req.put(url,data=data,params=params,json=jsonparams,files=files,proxies=self.set_proxies,cookies=self.set_cookies,headers=self.set_header,timeout=self.set_timeout,verify=self.set_verify,allow_redirects=allow_redirects,impersonate=self.set_impersonate)
441
+ elif method=='patch':
442
+ response=self.req.patch(url,data=data,params=params,json=jsonparams,files=files,proxies=self.set_proxies,cookies=self.set_cookies,headers=self.set_header,timeout=self.set_timeout,verify=self.set_verify,allow_redirects=allow_redirects,impersonate=self.set_impersonate)
443
+ elif method=='delete':
444
+ response=self.req.delete(url,data=data,params=params,json=jsonparams,files=files,proxies=self.set_proxies,cookies=self.set_cookies,headers=self.set_header,timeout=self.set_timeout,verify=self.set_verify,allow_redirects=allow_redirects,impersonate=self.set_impersonate)
445
+ # elif method=='head':
446
+ # response=self.req.head(url,data=data,params=params,json=jsonparams,files=files,proxies=self.set_proxies,cookies=self.set_cookies,headers=self.set_header,timeout=self.set_timeout,verify=self.set_verify,allow_redirects=allow_redirects)
447
+ else:
448
+ raise Exception('不支持method='+method)
449
+ # if self.set_encoding:
450
+ # response.encoding=self.set_encoding
451
+ # else:
452
+ # response.encoding=response.apparent_encoding
453
+ resheader=dict(response.headers)
454
+ self.get_header={}
455
+ for k in resheader:
456
+ self.get_header[k.lower()]=resheader[k]
457
+ cookie=dict(response.cookies)
458
+ if self.get_cookies and cookie:
459
+ self.get_cookies=self.__merge(self.get_cookies,cookie)
460
+ elif cookie:
461
+ self.get_cookies=cookie
462
+ if self.set_cookies:
463
+ self.get_cookies=self.__merge(self.set_cookies,self.get_cookies)
464
+ if self.get_cookies:
465
+ self.get_cookie_str=self.cookieTdictstr(self.get_cookies)
466
+ self.get_text=response.text
467
+ self.get_content=response.content
468
+ self.get_response=response
469
+ self.get_status_code=int(response.status_code)
470
+ else:
471
+ response=self.req.request(method, url,data=data,params=params,json=jsonparams,files=files,proxies=self.set_proxies,cookies=self.set_cookies,headers=self.set_header,timeout=self.set_timeout,verify=self.set_verify,allow_redirects=allow_redirects)
472
+ if self.set_encoding:
473
+ response.encoding=self.set_encoding
474
+ else:
475
+ response.encoding=response.apparent_encoding
476
+ resheader=dict(response.headers)
477
+ self.get_header={}
478
+ for k in resheader:
479
+ self.get_header[k.lower()]=resheader[k]
480
+ cookie=requests.utils.dict_from_cookiejar(response.cookies)
481
+ if self.get_cookies and cookie:
482
+ self.get_cookies=self.__merge(self.get_cookies,cookie)
483
+ elif cookie:
484
+ self.get_cookies=cookie
485
+ if self.set_cookies:
486
+ self.get_cookies=self.__merge(self.set_cookies,self.get_cookies)
487
+ if self.get_cookies:
488
+ self.get_cookie_str=self.cookieTdictstr(self.get_cookies)
489
+ self.get_text=response.text
490
+ self.get_content=response.content
491
+ self.get_response=response
492
+ self.get_status_code=int(response.status_code)
493
+ def __is_index(self,params,index):
494
+ """判断列表或字典里的索引是否存在
495
+
496
+ params 列表或字典
497
+
498
+ index 索引值
499
+
500
+ return Boolean类型
501
+ """
502
+ try:
503
+ params[index]
504
+ except KeyError:
505
+ return False
506
+ except IndexError:
507
+ return False
508
+ else:
509
+ return True
510
+ def __merge(self,dict1, dict2):
511
+ "合并两个字典"
512
+ C_dict = {}
513
+ if dict1:
514
+ for key,value in dict1.items():
515
+ C_dict[key]=value
516
+ for key,value in dict2.items():
517
+ if value:
518
+ if isinstance(value, str) or (self.__is_index(C_dict,key) and isinstance(C_dict[key], str)):
519
+ if self.__is_index(C_dict,key):
520
+ t1,t2=len(str(value)),len(str(C_dict[key]))
521
+ if t1>=t2:
522
+ C_dict[key]=value
523
+ else:
524
+ C_dict[key]=value
525
+ else:
526
+ C_dict[key]=value
527
+ return C_dict
528
+ def cookieserTdict(self,cookiesstr):
529
+ "cookies字符串转换字典"
530
+ if isinstance(cookiesstr,str):
531
+ cok={}
532
+ for line in cookiesstr.split(";"):
533
+ lists=line.split("=")
534
+ # print("listslists",lists)
535
+ if lists[0] and len(lists)==2:
536
+ cok[lists[0]]=lists[1]
537
+ return cok
538
+ def cookieTdictstr(self,cookie):
539
+ cookiestr=''
540
+ for key in cookie:
541
+ if not cookie[key]:
542
+ cookie[key]=''
543
+ cookiestr+=str(key)+"="+str(cookie[key])+";"
544
+ return cookiestr
545
+ def __get_pyquery_rules(self,rulestext):
546
+ """获取pyquery规则
547
+
548
+ rulestext 规则字符串 参考 (.page-tip{0} 表示选择第一个 .page-tip{1} 表示选择第二个)
549
+
550
+ """
551
+ tkevalarr=rulestext.split('}')
552
+ tkevalarr1=[]
553
+ for tttt in tkevalarr:
554
+ eq='null'
555
+ if '{' in tttt:
556
+ tttttt=tttt.split('{')
557
+ eq=int(tttttt[1])
558
+ tttt=tttttt[0]
559
+ if tttt:
560
+ tkevalarr1.append({'val':tttt,'eq':eq})
561
+ return tkevalarr1
562
+ def __get_pyquery_rules_obj(self,rulestext,pyqueryobj):
563
+ """通过pyquery规则获取列表对象
564
+
565
+ rulestext 规则字符串 参考 .gknb-box[1]ul li
566
+
567
+ pyqueryobj pyquery装载html后的对象 pq(html)
568
+
569
+ return 返回 pyquery选中的对象
570
+
571
+ """
572
+ if ',' in rulestext:
573
+ pqobj=None
574
+ rulestextarr=rulestext.split(',')
575
+ for rulestext in rulestextarr:
576
+ tkevalarr1=self.__get_pyquery_rules(rulestext)
577
+ pqobj=None
578
+ lists=pyqueryobj
579
+ for tttt in tkevalarr1:
580
+ lists=lists.find(tttt['val'])
581
+ if tttt['eq']>=1:
582
+ lists=lists.eq(tttt['eq'])
583
+
584
+ if lists.length:
585
+ pqobj=lists
586
+ break
587
+ return pqobj
588
+ else:
589
+ tkevalarr1=self.__get_pyquery_rules(rulestext)
590
+ pqobj=None
591
+ lists=pyqueryobj
592
+ for tttt in tkevalarr1:
593
+ lists=lists.find(tttt['val'])
594
+ if tttt['eq']!='null':
595
+ lists=lists.eq(tttt['eq'])
596
+
597
+ if lists.length:
598
+ pqobj=lists
599
+ return pqobj
@@ -0,0 +1,19 @@
1
+ # Licensed to the Software Freedom Conservancy (SFC) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The SFC licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+
19
+ __version__ = "3.0.0"
@@ -0,0 +1,18 @@
1
+ # Licensed to the Software Freedom Conservancy (SFC) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The SFC licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ from . import exceptions # noqa