kcwebs 1.6__tar.gz → 1.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {kcwebs-1.6 → kcwebs-1.7}/PKG-INFO +2 -2
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/__init__.py +1 -1
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/common/autoload.py +22 -4
- kcwebs-1.7/kcwebs/utill/http.py +577 -0
- kcwebs-1.7/kcwebs/utill/redis.py +597 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/support/event_firing_webdriver.py +4 -4
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/support/expected_conditions.py +5 -5
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs.egg-info/PKG-INFO +2 -2
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs.egg-info/requires.txt +2 -1
- {kcwebs-1.6 → kcwebs-1.7}/setup.py +2 -1
- kcwebs-1.6/kcwebs/utill/http.py +0 -314
- kcwebs-1.6/kcwebs/utill/redis.py +0 -356
- {kcwebs-1.6 → kcwebs-1.7}/README.md +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/common/__init__.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/common/session.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/config/__init__.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/kcwebs.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/tempfile/kcwebs/app/__init__.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/tempfile/kcwebs/app/api/__init__.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/tempfile/kcwebs/app/api/common/__init__.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/tempfile/kcwebs/app/api/common/autoload.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/tempfile/kcwebs/app/api/controller/__init__.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/tempfile/kcwebs/app/api/controller/index/__init__.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/tempfile/kcwebs/app/api/controller/index/common/__init__.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/tempfile/kcwebs/app/api/controller/index/common/autoload.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/tempfile/kcwebs/app/api/controller/index/common/html/include/static.html +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/tempfile/kcwebs/app/api/controller/index/common/model.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/tempfile/kcwebs/app/api/controller/index/index.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/tempfile/kcwebs/app/api/controller/index/tpl/index/home.html +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/tempfile/kcwebs/app/api/controller/index/tpl/index/index.html +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/tempfile/kcwebs/app/common/__init__.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/tempfile/kcwebs/app/common/autoload.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/tempfile/kcwebs/app/common/html/include/static.html +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/tempfile/kcwebs/app/common/model.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/tempfile/kcwebs/app/config/__init__.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/tempfile/kcwebs/app/config/app.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/tempfile/kcwebs/app/config/database.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/tempfile/kcwebs/app/config/other.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/tempfile/kcwebs/app/config/redis.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/tempfile/kcwebs/server.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/cache/cache.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/db/model.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/db/mongodb.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/db/mysql copy.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/db/mysql.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/db/sqlite.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/queues.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/__init__.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/common/__init__.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/common/exceptions.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/__init__.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/android/__init__.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/android/webdriver.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/blackberry/__init__.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/blackberry/webdriver.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/chrome/__init__.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/chrome/options.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/chrome/remote_connection.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/chrome/service.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/chrome/webdriver.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/common/__init__.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/common/action_chains.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/common/alert.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/common/by.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/common/desired_capabilities.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/common/html5/__init__.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/common/html5/application_cache.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/common/keys.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/common/proxy.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/common/service.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/common/touch_actions.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/common/utils.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/edge/__init__.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/edge/options.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/edge/service.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/edge/webdriver.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/firefox/__init__.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/firefox/extension_connection.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/firefox/firefox_binary.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/firefox/firefox_profile.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/firefox/options.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/firefox/remote_connection.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/firefox/service.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/firefox/webdriver.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/firefox/webelement.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/ie/__init__.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/ie/service.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/ie/webdriver.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/opera/__init__.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/opera/options.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/opera/webdriver.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/phantomjs/__init__.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/phantomjs/service.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/phantomjs/webdriver.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/remote/__init__.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/remote/command.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/remote/errorhandler.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/remote/file_detector.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/remote/getAttribute.js +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/remote/isDisplayed.js +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/remote/mobile.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/remote/remote_connection.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/remote/switch_to.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/remote/utils.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/remote/webdriver.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/remote/webelement.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/safari/__init__.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/safari/service.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/safari/webdriver.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/support/__init__.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/support/abstract_event_listener.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/support/color.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/support/events.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/support/select.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/support/ui.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs/utill/selenium3/webdriver/support/wait.py +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs.egg-info/SOURCES.txt +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs.egg-info/dependency_links.txt +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs.egg-info/entry_points.txt +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/kcwebs.egg-info/top_level.txt +0 -0
- {kcwebs-1.6 → kcwebs-1.7}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 1.2
|
|
2
2
|
Name: kcwebs
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.7
|
|
4
4
|
Summary: kcwebs作为web开发而设计的高性能框架
|
|
5
5
|
Home-page: https://docs.kwebapp.cn/index/index/2
|
|
6
6
|
Author: 百里-坤坤
|
|
@@ -9,5 +9,5 @@ Maintainer: 坤坤
|
|
|
9
9
|
Maintainer-email: fk1402936534@qq.com
|
|
10
10
|
License: MIT License
|
|
11
11
|
Description: kcwebs作为web开发而设计的高性能框架,采用全新的架构思想,注重易用性。遵循MIT开源许可协议发布,意味着个人和企业可以免费使用kcwebs,甚至允许把你基于kcwebs开发的应用开源或商业产品发布或销售。完整文档请访问:https://docs.kwebapp.cn/index/index/2
|
|
12
|
-
Keywords: kcwebs1.
|
|
12
|
+
Keywords: kcwebs1.7
|
|
13
13
|
Platform: UNKNOWN
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from kcws.common import *
|
|
2
2
|
from .. import config
|
|
3
|
-
import random,urllib,asyncio,websockets,smtplib,datetime,chardet,copy,multiprocessing,warnings,xlrd
|
|
3
|
+
import random,urllib,asyncio,websockets,smtplib,datetime,chardet,copy,multiprocessing,warnings,xlrd,threading
|
|
4
4
|
from kcwebs.utill.redis import redis as kcwsredis
|
|
5
5
|
from email.mime.text import MIMEText
|
|
6
6
|
from email.utils import formataddr
|
|
@@ -775,8 +775,7 @@ def rggestrsgrhklhtrdhbithjtiorjhiothposzfsgrgtsre(docfile,imgpath=False,imgmins
|
|
|
775
775
|
file_set_content(outfiles,html)
|
|
776
776
|
else:
|
|
777
777
|
return html
|
|
778
|
-
|
|
779
|
-
def tran_pdf_to_docx(pdfname,outname):
|
|
778
|
+
def fesgrsgtrgtdrhbtdgrrgrgsgtsegr(pdfname,outname,stop_event=None):
|
|
780
779
|
"""pdf转docx
|
|
781
780
|
|
|
782
781
|
pdfname pdf文件
|
|
@@ -784,12 +783,31 @@ def tran_pdf_to_docx(pdfname,outname):
|
|
|
784
783
|
outname 转换后保存文件
|
|
785
784
|
"""
|
|
786
785
|
folder_path=os.path.dirname(outname)
|
|
787
|
-
if not os.path.exists(folder_path):
|
|
786
|
+
if folder_path and not os.path.exists(folder_path):
|
|
788
787
|
os.makedirs(folder_path, exist_ok=True)
|
|
789
788
|
cv = pdf2docx.Converter(pdfname)
|
|
790
789
|
cv.convert(outname, start=0, end=None)
|
|
791
790
|
cv.close()
|
|
792
791
|
return True
|
|
792
|
+
def tran_pdf_to_docx(pdfname,outname,timeout=None):
|
|
793
|
+
"""pdf转docx
|
|
794
|
+
|
|
795
|
+
pdfname pdf文件
|
|
796
|
+
|
|
797
|
+
outname 转换后保存文件
|
|
798
|
+
|
|
799
|
+
timeout 转换超时 单位秒
|
|
800
|
+
"""
|
|
801
|
+
if not timeout:
|
|
802
|
+
return fesgrsgtrgtdrhbtdgrrgrgsgtsegr(pdfname,outname)
|
|
803
|
+
else:
|
|
804
|
+
thread = threading.Thread(target=fesgrsgtrgtdrhbtdgrrgrgsgtsegr,args=(pdfname,outname),daemon=True)
|
|
805
|
+
thread.start()
|
|
806
|
+
thread.join(timeout=timeout)
|
|
807
|
+
if thread.is_alive():
|
|
808
|
+
raise Exception('tran_pdf_to_docx timeout out')
|
|
809
|
+
else:
|
|
810
|
+
return True
|
|
793
811
|
def tran_pdf_to_xlsx_process(pdfname,outname,process=True):
|
|
794
812
|
"""pdf 转 xlsx
|
|
795
813
|
|
|
@@ -0,0 +1,577 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
import requests,traceback,time
|
|
3
|
+
from requests.packages.urllib3.exceptions import InsecureRequestWarning
|
|
4
|
+
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
|
|
5
|
+
from .selenium3 import webdriver as webdriver3
|
|
6
|
+
from .selenium3.webdriver.support import expected_conditions
|
|
7
|
+
# from .selenium3.webdriver.common.by import By
|
|
8
|
+
from curl_cffi import requests as curl_cffi_requests
|
|
9
|
+
from pyquery import PyQuery as kcwebspq
|
|
10
|
+
class Http:
|
|
11
|
+
# By.CLASS_NAME
|
|
12
|
+
webdriver3=webdriver3
|
|
13
|
+
expecteds=expected_conditions
|
|
14
|
+
"http请求类"
|
|
15
|
+
set_session=True #是否启用会话
|
|
16
|
+
set_impersonate=None #设置模拟浏览器指纹(chrome99、chrome100、chrome110、chrome118、chrome120,firefox100、firefox110,safari15、safari16)
|
|
17
|
+
set_proxies=None #设置代理
|
|
18
|
+
set_cookies={} #设置请求cookie
|
|
19
|
+
set_header={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'} #请求头
|
|
20
|
+
set_timeout=(6.05,10) #超时时间 6.05表示连接采时时间 3030表示读取超时时间 #注意 set_timeout参数主要关注的是“无响应”的时间段,而不是整个请求的处理时间
|
|
21
|
+
set_max_retries=2 #重试次数 (实际请求3次)
|
|
22
|
+
set_verify=False #SSL 证书的验证 sll证书路径
|
|
23
|
+
set_encoding="" #设置text输出编码 如utf-8 不填表示自动
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
get_header={} #获取响应头
|
|
27
|
+
get_cookies={} #获取最后的响应cookie
|
|
28
|
+
get_cookie_str='' #获取最后的响应cookie 字符串
|
|
29
|
+
get_text='' #获取body响应内容
|
|
30
|
+
get_content='' #获取body响应二进制内容
|
|
31
|
+
get_response='' #获取响应对象
|
|
32
|
+
get_status_code=None #获取响应状态码
|
|
33
|
+
keep_alive=True #默认的http connection是keep-alive的 False表示关闭
|
|
34
|
+
req=None
|
|
35
|
+
def __init(self):
|
|
36
|
+
self.set_proxies=None #设置代理
|
|
37
|
+
self.set_cookies={} #设置请求cookie
|
|
38
|
+
self.set_header={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'} #请求头
|
|
39
|
+
self.set_timeout=(6.05,10) #超时时间 6.05表示连接采时时间 3030表示读取超时时间 #注意 set_timeout参数主要关注的是“无响应”的时间段,而不是整个请求的处理时间
|
|
40
|
+
self.set_max_retries=2 #重试次数 (实际请求3次)
|
|
41
|
+
self.set_verify=False #SSL 证书的验证 sll证书路径
|
|
42
|
+
self.set_encoding="" #设置text输出编码
|
|
43
|
+
self.set_session=True #是否启用会话
|
|
44
|
+
|
|
45
|
+
self.get_header={} #获取响应头
|
|
46
|
+
self.get_cookies={} #获取最后的响应cookie
|
|
47
|
+
self.get_cookie_str='' #获取最后的响应cookie 字符串
|
|
48
|
+
self.get_text='' #获取body响应内容
|
|
49
|
+
self.get_content='' #获取body响应二进制内容
|
|
50
|
+
self.get_response='' #获取响应对象
|
|
51
|
+
self.get_status_code=None #获取响应状态码
|
|
52
|
+
self.keep_alive=True #默认的http connection是keep-alive的 False表示关闭
|
|
53
|
+
self.req=None
|
|
54
|
+
self.set_impersonate=None
|
|
55
|
+
def __init__(self):
|
|
56
|
+
self.__init()
|
|
57
|
+
def __del__(self):
|
|
58
|
+
self.__init()
|
|
59
|
+
def gettext(self):
|
|
60
|
+
"""得到响应text"""
|
|
61
|
+
return self.get_text
|
|
62
|
+
def wait(self,wait_pq=[],wait_pq_type='element',sleep=1,Obj=None,url=''):
|
|
63
|
+
"""等待其中一个元素出现
|
|
64
|
+
|
|
65
|
+
wait_pq 基于pyquery表达式 等待其中一个元素出现 (传入pyquery表达式 列表格式) 如 [['表达式1','表达式2'],['表达式3','表达式4']] 表示 表达式1或表达式2其中一个成立 并且 表达式3或表达式4其中一个成立
|
|
66
|
+
|
|
67
|
+
wait_pq_type 等待类型: element表示等待元素出现 text表示等待元素内的文本出现 其他值表示等待标签属性值出现
|
|
68
|
+
|
|
69
|
+
sleep 最多等待时间 建议配合wait_pq使用
|
|
70
|
+
|
|
71
|
+
Obj webdriver的Chrome或PhantomJS对象
|
|
72
|
+
"""
|
|
73
|
+
if wait_pq:
|
|
74
|
+
if not Obj:
|
|
75
|
+
if self.PhantomJsObj and self.ChromeObj:
|
|
76
|
+
raise Exception('Chrome和PhantomJS不可同时存在')
|
|
77
|
+
if self.ChromeObj:
|
|
78
|
+
Obj=self.ChromeObj
|
|
79
|
+
elif self.PhantomJsObj:
|
|
80
|
+
Obj=self.PhantomJsObj
|
|
81
|
+
elif not Obj:
|
|
82
|
+
raise Exception('Chrome对象和PhantomJS对象不存在')
|
|
83
|
+
if sleep<10:
|
|
84
|
+
sleep=10
|
|
85
|
+
|
|
86
|
+
sfdsf=False
|
|
87
|
+
for sdsa in range(10000):
|
|
88
|
+
if sdsa>sleep*2:
|
|
89
|
+
self.get_text = Obj.page_source
|
|
90
|
+
raise Exception('max-wait_pq:'+url)
|
|
91
|
+
time.sleep(0.5)
|
|
92
|
+
doc=kcwebspq(Obj.page_source)
|
|
93
|
+
sfdsf1=0
|
|
94
|
+
for wait_pq1 in wait_pq:
|
|
95
|
+
if isinstance(wait_pq1, list) or isinstance(wait_pq1, tuple):
|
|
96
|
+
for wait_pq2 in wait_pq1:
|
|
97
|
+
tt=self.__get_pyquery_rules_obj(wait_pq2,doc)
|
|
98
|
+
if tt and tt.length:
|
|
99
|
+
if wait_pq_type=='text':
|
|
100
|
+
if len(tt.text().replace(' ','').replace('\n','').replace('\r','').replace('\t',''))>0:
|
|
101
|
+
sfdsf1+=1
|
|
102
|
+
break
|
|
103
|
+
elif wait_pq_type=='element':
|
|
104
|
+
sfdsf1+=1
|
|
105
|
+
break
|
|
106
|
+
elif wait_pq_type:
|
|
107
|
+
if tt.attr(wait_pq_type):
|
|
108
|
+
sfdsf1+=1
|
|
109
|
+
break
|
|
110
|
+
else:
|
|
111
|
+
tt=self.__get_pyquery_rules_obj(wait_pq1,doc)
|
|
112
|
+
if tt and tt.length:
|
|
113
|
+
if wait_pq_type=='text':
|
|
114
|
+
if len(tt.text().replace(' ','').replace('\n','').replace('\r','').replace('\t',''))>0:
|
|
115
|
+
sfdsf=True
|
|
116
|
+
break
|
|
117
|
+
elif wait_pq_type=='element':
|
|
118
|
+
sfdsf=True
|
|
119
|
+
break
|
|
120
|
+
elif wait_pq_type:
|
|
121
|
+
if tt.attr(wait_pq_type):
|
|
122
|
+
sfdsf=True
|
|
123
|
+
break
|
|
124
|
+
if sfdsf or sfdsf1==len(wait_pq):
|
|
125
|
+
break
|
|
126
|
+
self.get_text = Obj.page_source
|
|
127
|
+
PhantomJsObj=None
|
|
128
|
+
def open_PhantomJS(self,url,executable_path='',closedriver=True,wait_pq=[],wait_pq_type='element',sleep=1):
|
|
129
|
+
"""通过PhantomJS引擎模拟浏览器请求 可以获取到js渲染后的html
|
|
130
|
+
|
|
131
|
+
wait_pq 基于pyquery表达式 等待其中一个元素出现 (传入pyquery表达式 列表格式) 如 [['表达式1','表达式2'],['表达式3','表达式4']] 表示 表达式1或表达式2其中一个成立 并且 表达式3或表达式4其中一个成立
|
|
132
|
+
|
|
133
|
+
wait_pq_type 等待类型: element表示等待元素出现 text表示等待元素内的文本出现 其他值表示等待标签属性值出现
|
|
134
|
+
|
|
135
|
+
sleep 最多等待时间 建议配合wait_pq使用
|
|
136
|
+
|
|
137
|
+
"""
|
|
138
|
+
if self.set_cookies and isinstance(self.set_cookies,str):
|
|
139
|
+
self.set_cookies=self.cookieserTdict(self.set_cookies)
|
|
140
|
+
if not self.PhantomJsObj:
|
|
141
|
+
self.PhantomJsObj=webdriver3.PhantomJS(executable_path=executable_path)
|
|
142
|
+
# if self.set_session:
|
|
143
|
+
# self.get_cookies=self.set_cookies
|
|
144
|
+
# for k in self.PhantomJsObj.get_cookies():
|
|
145
|
+
# self.get_cookies=self.__merge(self.get_cookies,k)
|
|
146
|
+
# if self.get_cookies:
|
|
147
|
+
# self.get_cookie_str=self.cookieTdictstr(self.get_cookies)
|
|
148
|
+
# self.get_cookies=self.cookieserTdict(self.get_cookie_str)
|
|
149
|
+
# if self.get_cookies!=self.set_cookies:
|
|
150
|
+
# self.PhantomJsObj.delete_all_cookies()
|
|
151
|
+
# for k in self.set_cookies:
|
|
152
|
+
# t={'name':k, 'value':self.set_cookies[k]}
|
|
153
|
+
# self.PhantomJsObj.add_cookie(t)
|
|
154
|
+
|
|
155
|
+
i=0
|
|
156
|
+
while True:
|
|
157
|
+
try:
|
|
158
|
+
self.PhantomJsObj.get(url)
|
|
159
|
+
except Exception as e:
|
|
160
|
+
estr=str(e)
|
|
161
|
+
print('estr',estr)
|
|
162
|
+
if 'error: net::ERR_CONNECTION_CLOSED' in estr or 'timeout: Timed out receiving message from rendere' in estr or 'error: net::ERR_CONNECTION_RESET' in estr or 'error: net::ERR_NAME_NOT_RESOLVED' in estr or 'unknown error: net::ERR_CONNECTION_TIMED_OUT' in estr or 'Max retries exceeded with url' in estr or 'error: net::ERR_SSL_VERSION_OR_CIPHER_MISMATCH' in estr or 'error: net::ERR_CONNECTION_REFUSED' in estr:
|
|
163
|
+
if i>self.set_max_retries:
|
|
164
|
+
raise Exception('max_retries'+estr)
|
|
165
|
+
i+=1
|
|
166
|
+
else:
|
|
167
|
+
raise
|
|
168
|
+
else:
|
|
169
|
+
break
|
|
170
|
+
if not closedriver:
|
|
171
|
+
try:
|
|
172
|
+
response = requests.head(url,cookies=self.set_cookies,allow_redirects=True)
|
|
173
|
+
except:pass
|
|
174
|
+
else:
|
|
175
|
+
resheader=dict(response.headers)
|
|
176
|
+
self.get_header={}
|
|
177
|
+
for k in resheader:
|
|
178
|
+
self.get_header[k.lower()]=resheader[k]
|
|
179
|
+
if not wait_pq and sleep:
|
|
180
|
+
time.sleep(sleep)
|
|
181
|
+
self.wait(wait_pq=wait_pq,wait_pq_type=wait_pq_type,sleep=sleep,Obj=self.PhantomJsObj,url=url)
|
|
182
|
+
# self.get_text = self.PhantomJsObj.page_source
|
|
183
|
+
# self.get_cookies=self.set_cookies
|
|
184
|
+
for k in reversed(self.PhantomJsObj.get_cookies()):
|
|
185
|
+
zd={k['name']:k['value']}
|
|
186
|
+
self.get_cookies=self.__merge(self.get_cookies,zd)
|
|
187
|
+
if self.get_cookies:
|
|
188
|
+
self.get_cookie_str=self.cookieTdictstr(self.get_cookies)
|
|
189
|
+
self.get_cookies=self.cookieserTdict(self.get_cookie_str)
|
|
190
|
+
# if self.set_session:
|
|
191
|
+
# self.set_cookies=self.get_cookies
|
|
192
|
+
if closedriver:
|
|
193
|
+
self.PhantomJsObj.quit()
|
|
194
|
+
self.PhantomJsObj=None
|
|
195
|
+
ChromeObj=None
|
|
196
|
+
def open_Chrome(self,url,executable_path='',closedriver=True,setheadless=True,wait_pq=[],wait_pq_type='element',sleep=1):
|
|
197
|
+
"""通过Chrome浏览器引擎模拟浏览器请求 可以获取到js渲染后的html
|
|
198
|
+
|
|
199
|
+
closedriver 是否关闭退出
|
|
200
|
+
|
|
201
|
+
setheadless 是否设置无头
|
|
202
|
+
|
|
203
|
+
wait_pq 基于pyquery表达式 等待其中一个元素出现 (传入pyquery表达式 列表格式) 如 [['表达式1','表达式2'],['表达式3','表达式4']] 表示 表达式1或表达式2其中一个成立 并且 表达式3或表达式4其中一个成立
|
|
204
|
+
|
|
205
|
+
wait_pq_type 等待类型: element表示等待元素出现 text表示等待元素内的文本出现 其他值表示等待标签属性值出现
|
|
206
|
+
|
|
207
|
+
sleep 最多等待时间 建议配合wait_pq使用
|
|
208
|
+
|
|
209
|
+
"""
|
|
210
|
+
# if wait_pq_type not in ['element','text']:
|
|
211
|
+
# raise Exception('wait_pq_type 错误')
|
|
212
|
+
if self.set_cookies and isinstance(self.set_cookies,str):
|
|
213
|
+
self.set_cookies=self.cookieserTdict(self.set_cookies)
|
|
214
|
+
# print('self.set_cookiesself.set_cookiesself.set_cookies',self.set_cookies)
|
|
215
|
+
if not self.ChromeObj:
|
|
216
|
+
chrome_options = webdriver3.chrome.options.Options()
|
|
217
|
+
if setheadless:
|
|
218
|
+
chrome_options.add_argument("--headless") #设置无头
|
|
219
|
+
else:
|
|
220
|
+
chrome_options.add_argument('--disable-infobars') # 隐藏 "Chrome 正受到自动测试软件控制" 提示栏
|
|
221
|
+
if self.set_proxies:
|
|
222
|
+
# chrome_options.add_argument("--proxy-server=http://proxyserver:port")
|
|
223
|
+
if self.set_proxies['http']:
|
|
224
|
+
chrome_options.add_argument("--proxy-server="+self.set_proxies['http'])
|
|
225
|
+
elif self.set_proxies['https']:
|
|
226
|
+
chrome_options.add_argument("--proxy-server="+self.set_proxies['https'])
|
|
227
|
+
# print(self.set_proxies)
|
|
228
|
+
# exit()
|
|
229
|
+
|
|
230
|
+
chrome_options.add_argument("--disable-gpu") # 禁用GPU硬件加速,适用于Linux和Windows系统
|
|
231
|
+
chrome_options.add_argument("--no-sandbox") # 禁用沙盒模式,在某些Linux系统上需要
|
|
232
|
+
|
|
233
|
+
# chrome_options.add_argument('--log-level=3') # 关闭所有非致命日志
|
|
234
|
+
# chrome_options.add_experimental_option('excludeSwitches', ['enable-logging']) # 禁止 Selenium 自身日志
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
# 禁用自动化控制特征(减少被检测风险)
|
|
239
|
+
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
|
|
240
|
+
chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
|
|
241
|
+
|
|
242
|
+
chrome_options.add_argument('--ignore-certificate-errors') # 忽略所有证书错误
|
|
243
|
+
chrome_options.add_argument('--ignore-ssl-errors') # 忽略 SSL 相关错误(如握手失败)
|
|
244
|
+
|
|
245
|
+
try:
|
|
246
|
+
self.ChromeObj = webdriver3.Chrome(executable_path=executable_path,chrome_options=chrome_options)
|
|
247
|
+
except Exception as e:
|
|
248
|
+
import os
|
|
249
|
+
print("\033[93mChromeChromeChromeChromeChromeChromeChromeChromeChromeChromeChromeChromeve\033[0m",e)
|
|
250
|
+
if os.name == 'nt' and 'Driver info: chromedriver=142.0.7444.175' in str(e):
|
|
251
|
+
# print("\033[93mchromedriver与您操作系统的Chrome不兼容性 下载地址参考",'https://file.kwebapp.cn/sh/install/chrome/chromedriver-win64-142/GoogleChrome.msi\033[0m')
|
|
252
|
+
response=requests.get('https://file.kwebapp.cn/sh/install/chrome/chromedriver-win64-142/GoogleChrome.msi')
|
|
253
|
+
f=open('Chrome.msi',"wb")
|
|
254
|
+
tsize=f.write(response.content)
|
|
255
|
+
f.close()
|
|
256
|
+
if tsize<10*1024*1024:
|
|
257
|
+
os.remove('Chrome.msi')
|
|
258
|
+
raise Exception('文件下载失败:https://file.kwebapp.cn/sh/install/chrome/chromedriver-win64-142/GoogleChrome.msi')
|
|
259
|
+
print('\033[93mchromedriver与您操作系统的Chrome不兼容/不存在,正在为您安装Chrome...\033[0m')
|
|
260
|
+
os.system("msiexec /i Chrome.msi")
|
|
261
|
+
os.remove('Chrome.msi')
|
|
262
|
+
# print('\033[93m安装完成,请重试\033[0m')
|
|
263
|
+
self.open_Chrome(url=url,executable_path=executable_path,closedriver=closedriver,setheadless=setheadless)
|
|
264
|
+
elif os.name == 'posix' and 'Driver info: chromedriver=106.0.5249.21' in str(e):
|
|
265
|
+
def systemtypes():
|
|
266
|
+
try:
|
|
267
|
+
with open('/etc/os-release', 'r') as f:
|
|
268
|
+
content = f.read()
|
|
269
|
+
if 'CentOS-7' in content or 'CentOS Linux 7' in content:
|
|
270
|
+
return 'CentOS7'
|
|
271
|
+
except FileNotFoundError:
|
|
272
|
+
pass
|
|
273
|
+
return False
|
|
274
|
+
t=systemtypes()
|
|
275
|
+
if t=='CentOS7':
|
|
276
|
+
# print("\033[93mchromedriver与您操作系统的Chrome不兼容性 下载地址参考",'https://file.kwebapp.cn/sh/install/chrome/google-chrome-unstable-106.0.5249.12-1.x86_64.rpm\033[0m')
|
|
277
|
+
response=requests.get('https://file.kwebapp.cn/sh/install/chrome/google-chrome-unstable-106.0.5249.12-1.x86_64.rpm')
|
|
278
|
+
f=open('google-chrome-unstable-106.0.5249.12-1.x86_64.rpm',"wb")
|
|
279
|
+
tsize=f.write(response.content)
|
|
280
|
+
f.close()
|
|
281
|
+
if tsize<10*1024*1024:
|
|
282
|
+
os.remove('google-chrome-unstable-106.0.5249.12-1.x86_64.rpm')
|
|
283
|
+
raise Exception('文件下载失败:https://file.kwebapp.cn/sh/install/chrome/google-chrome-unstable-106.0.5249.12-1.x86_64.rpm')
|
|
284
|
+
|
|
285
|
+
print('\033[93mchromedriver与您操作系统的Chrome不兼容/不存在,正在为您安装Chrome...\033[0m')
|
|
286
|
+
os.system("sudo yum -y install google-chrome-unstable-106.0.5249.12-1.x86_64.rpm")
|
|
287
|
+
os.remove('google-chrome-unstable-106.0.5249.12-1.x86_64.rpm')
|
|
288
|
+
# print('\033[93m安装完成,请重试\033[0m')
|
|
289
|
+
self.open_Chrome(url=url,executable_path=executable_path,closedriver=closedriver,setheadless=setheadless)
|
|
290
|
+
else:
|
|
291
|
+
raise Exception('暂不支持该操作系统版本,目前仅支持CentOS7和windows10。'+str(e))
|
|
292
|
+
else:
|
|
293
|
+
raise Exception('暂不支持该操作系统版本,目前仅支持CentOS7和windows10'+str(e))
|
|
294
|
+
|
|
295
|
+
if self.ChromeObj:
|
|
296
|
+
# if self.set_session:
|
|
297
|
+
# self.get_cookies=self.set_cookies
|
|
298
|
+
# for k in self.ChromeObj.get_cookies():
|
|
299
|
+
# self.get_cookies=self.__merge(self.get_cookies,k)
|
|
300
|
+
# if self.get_cookies:
|
|
301
|
+
# self.get_cookie_str=self.cookieTdictstr(self.get_cookies)
|
|
302
|
+
# self.get_cookies=self.cookieserTdict(self.get_cookie_str)
|
|
303
|
+
# if self.get_cookies!=self.set_cookies:
|
|
304
|
+
# self.ChromeObj.delete_all_cookies()
|
|
305
|
+
# for k in self.set_cookies:
|
|
306
|
+
# t={'name':k, 'value':self.set_cookies[k]}
|
|
307
|
+
# # print('ttttt',t)
|
|
308
|
+
# self.ChromeObj.add_cookie(t)
|
|
309
|
+
|
|
310
|
+
i=0
|
|
311
|
+
while True:
|
|
312
|
+
try:
|
|
313
|
+
self.ChromeObj.get(url)
|
|
314
|
+
except Exception as e:
|
|
315
|
+
estr=str(e)
|
|
316
|
+
if 'error: net::ERR_CONNECTION_CLOSED' in estr or 'timeout: Timed out receiving message from rendere' in estr or 'error: net::ERR_CONNECTION_RESET' in estr or 'error: net::ERR_NAME_NOT_RESOLVED' in estr or 'unknown error: net::ERR_CONNECTION_TIMED_OUT' in estr or 'Max retries exceeded with url' in estr or 'error: net::ERR_SSL_VERSION_OR_CIPHER_MISMATCH' in estr or 'error: net::ERR_CONNECTION_REFUSED' in estr:
|
|
317
|
+
if i>self.set_max_retries:
|
|
318
|
+
raise Exception('max_retries'+estr)
|
|
319
|
+
i+=1
|
|
320
|
+
else:
|
|
321
|
+
raise
|
|
322
|
+
else:
|
|
323
|
+
break
|
|
324
|
+
if not closedriver:
|
|
325
|
+
try:
|
|
326
|
+
response = requests.head(url,cookies=self.set_cookies,allow_redirects=True)
|
|
327
|
+
except:pass
|
|
328
|
+
else:
|
|
329
|
+
resheader=dict(response.headers)
|
|
330
|
+
self.get_header={}
|
|
331
|
+
for k in resheader:
|
|
332
|
+
self.get_header[k.lower()]=resheader[k]
|
|
333
|
+
if not wait_pq and sleep:
|
|
334
|
+
time.sleep(sleep)
|
|
335
|
+
self.wait(wait_pq=wait_pq,wait_pq_type=wait_pq_type,sleep=sleep,Obj=self.ChromeObj,url=url)
|
|
336
|
+
# self.get_text = self.ChromeObj.page_source
|
|
337
|
+
# self.get_cookies=self.set_cookies
|
|
338
|
+
for k in reversed(self.ChromeObj.get_cookies()):
|
|
339
|
+
zd={k['name']:k['value']}
|
|
340
|
+
self.get_cookies=self.__merge(self.get_cookies,zd)
|
|
341
|
+
if self.get_cookies:
|
|
342
|
+
self.get_cookie_str=self.cookieTdictstr(self.get_cookies)
|
|
343
|
+
self.get_cookies=self.cookieserTdict(self.get_cookie_str)
|
|
344
|
+
# if self.set_session:
|
|
345
|
+
# self.set_cookies=self.get_cookies
|
|
346
|
+
if closedriver:
|
|
347
|
+
self.ChromeObj.quit()
|
|
348
|
+
self.ChromeObj.close()
|
|
349
|
+
self.ChromeObj=None
|
|
350
|
+
def close_webdriver(self):
|
|
351
|
+
if self.ChromeObj:
|
|
352
|
+
self.ChromeObj.quit()
|
|
353
|
+
self.ChromeObj.close()
|
|
354
|
+
self.ChromeObj=None
|
|
355
|
+
|
|
356
|
+
if self.PhantomJsObj:
|
|
357
|
+
self.PhantomJsObj.quit()
|
|
358
|
+
self.PhantomJsObj=None
|
|
359
|
+
|
|
360
|
+
def openurl(self,url,method="GET",data=None,params=None,jsonparams=None,files=None,allow_redirects=True):
|
|
361
|
+
"""模拟浏览器请求
|
|
362
|
+
|
|
363
|
+
url : 目标地址
|
|
364
|
+
|
|
365
|
+
method :GET POST 等
|
|
366
|
+
|
|
367
|
+
data:请求参数
|
|
368
|
+
|
|
369
|
+
params:请求参数
|
|
370
|
+
|
|
371
|
+
jsonparams:请求json参数
|
|
372
|
+
|
|
373
|
+
file 上传文件
|
|
374
|
+
|
|
375
|
+
allow_redirects 是否重定向
|
|
376
|
+
"""
|
|
377
|
+
if self.set_session:
|
|
378
|
+
if self.req is None:
|
|
379
|
+
if self.set_impersonate:
|
|
380
|
+
self.req = curl_cffi_requests.Session(impersonate=self.set_impersonate)
|
|
381
|
+
else:
|
|
382
|
+
self.req = requests.Session()
|
|
383
|
+
self.req.mount('http://', requests.adapters.HTTPAdapter(max_retries=self.set_max_retries))
|
|
384
|
+
self.req.mount('https://', requests.adapters.HTTPAdapter(max_retries=self.set_max_retries))
|
|
385
|
+
else:
|
|
386
|
+
if self.req is None:
|
|
387
|
+
if self.set_impersonate:
|
|
388
|
+
self.req = curl_cffi_requests
|
|
389
|
+
else:
|
|
390
|
+
self.req = requests
|
|
391
|
+
if not self.keep_alive:
|
|
392
|
+
self.req.keep_alive=False
|
|
393
|
+
if self.set_cookies and isinstance(self.set_cookies,str):
|
|
394
|
+
self.set_cookies=self.cookieserTdict(self.set_cookies)
|
|
395
|
+
if self.set_impersonate:
|
|
396
|
+
method=method.lower()
|
|
397
|
+
if self.set_session:
|
|
398
|
+
if method=='get':
|
|
399
|
+
response=self.req.get(url,data=data,params=params,json=jsonparams,files=files,proxies=self.set_proxies,cookies=self.set_cookies,headers=self.set_header,timeout=self.set_timeout,verify=self.set_verify,allow_redirects=allow_redirects)
|
|
400
|
+
elif method=='post':
|
|
401
|
+
response=self.req.post(url,data=data,params=params,json=jsonparams,files=files,proxies=self.set_proxies,cookies=self.set_cookies,headers=self.set_header,timeout=self.set_timeout,verify=self.set_verify,allow_redirects=allow_redirects)
|
|
402
|
+
elif method=='put':
|
|
403
|
+
response=self.req.put(url,data=data,params=params,json=jsonparams,files=files,proxies=self.set_proxies,cookies=self.set_cookies,headers=self.set_header,timeout=self.set_timeout,verify=self.set_verify,allow_redirects=allow_redirects)
|
|
404
|
+
elif method=='patch':
|
|
405
|
+
response=self.req.patch(url,data=data,params=params,json=jsonparams,files=files,proxies=self.set_proxies,cookies=self.set_cookies,headers=self.set_header,timeout=self.set_timeout,verify=self.set_verify,allow_redirects=allow_redirects)
|
|
406
|
+
elif method=='delete':
|
|
407
|
+
response=self.req.delete(url,data=data,params=params,json=jsonparams,files=files,proxies=self.set_proxies,cookies=self.set_cookies,headers=self.set_header,timeout=self.set_timeout,verify=self.set_verify,allow_redirects=allow_redirects)
|
|
408
|
+
# elif method=='head':
|
|
409
|
+
# response=self.req.head(url,data=data,params=params,json=jsonparams,files=files,proxies=self.set_proxies,cookies=self.set_cookies,headers=self.set_header,timeout=self.set_timeout,verify=self.set_verify,allow_redirects=allow_redirects)
|
|
410
|
+
else:
|
|
411
|
+
raise Exception('不支持method='+method)
|
|
412
|
+
else:
|
|
413
|
+
if method=='get':
|
|
414
|
+
response=self.req.get(url,data=data,params=params,json=jsonparams,files=files,proxies=self.set_proxies,cookies=self.set_cookies,headers=self.set_header,timeout=self.set_timeout,verify=self.set_verify,allow_redirects=allow_redirects,impersonate=self.set_impersonate)
|
|
415
|
+
elif method=='post':
|
|
416
|
+
response=self.req.post(url,data=data,params=params,json=jsonparams,files=files,proxies=self.set_proxies,cookies=self.set_cookies,headers=self.set_header,timeout=self.set_timeout,verify=self.set_verify,allow_redirects=allow_redirects,impersonate=self.set_impersonate)
|
|
417
|
+
elif method=='put':
|
|
418
|
+
response=self.req.put(url,data=data,params=params,json=jsonparams,files=files,proxies=self.set_proxies,cookies=self.set_cookies,headers=self.set_header,timeout=self.set_timeout,verify=self.set_verify,allow_redirects=allow_redirects,impersonate=self.set_impersonate)
|
|
419
|
+
elif method=='patch':
|
|
420
|
+
response=self.req.patch(url,data=data,params=params,json=jsonparams,files=files,proxies=self.set_proxies,cookies=self.set_cookies,headers=self.set_header,timeout=self.set_timeout,verify=self.set_verify,allow_redirects=allow_redirects,impersonate=self.set_impersonate)
|
|
421
|
+
elif method=='delete':
|
|
422
|
+
response=self.req.delete(url,data=data,params=params,json=jsonparams,files=files,proxies=self.set_proxies,cookies=self.set_cookies,headers=self.set_header,timeout=self.set_timeout,verify=self.set_verify,allow_redirects=allow_redirects,impersonate=self.set_impersonate)
|
|
423
|
+
# elif method=='head':
|
|
424
|
+
# response=self.req.head(url,data=data,params=params,json=jsonparams,files=files,proxies=self.set_proxies,cookies=self.set_cookies,headers=self.set_header,timeout=self.set_timeout,verify=self.set_verify,allow_redirects=allow_redirects)
|
|
425
|
+
else:
|
|
426
|
+
raise Exception('不支持method='+method)
|
|
427
|
+
# if self.set_encoding:
|
|
428
|
+
# response.encoding=self.set_encoding
|
|
429
|
+
# else:
|
|
430
|
+
# response.encoding=response.apparent_encoding
|
|
431
|
+
resheader=dict(response.headers)
|
|
432
|
+
self.get_header={}
|
|
433
|
+
for k in resheader:
|
|
434
|
+
self.get_header[k.lower()]=resheader[k]
|
|
435
|
+
cookie=dict(response.cookies)
|
|
436
|
+
if self.get_cookies and cookie:
|
|
437
|
+
self.get_cookies=self.__merge(self.get_cookies,cookie)
|
|
438
|
+
elif cookie:
|
|
439
|
+
self.get_cookies=cookie
|
|
440
|
+
if self.set_cookies:
|
|
441
|
+
self.get_cookies=self.__merge(self.set_cookies,self.get_cookies)
|
|
442
|
+
if self.get_cookies:
|
|
443
|
+
self.get_cookie_str=self.cookieTdictstr(self.get_cookies)
|
|
444
|
+
self.get_text=response.text
|
|
445
|
+
self.get_content=response.content
|
|
446
|
+
self.get_response=response
|
|
447
|
+
self.get_status_code=int(response.status_code)
|
|
448
|
+
else:
|
|
449
|
+
response=self.req.request(method, url,data=data,params=params,json=jsonparams,files=files,proxies=self.set_proxies,cookies=self.set_cookies,headers=self.set_header,timeout=self.set_timeout,verify=self.set_verify,allow_redirects=allow_redirects)
|
|
450
|
+
if self.set_encoding:
|
|
451
|
+
response.encoding=self.set_encoding
|
|
452
|
+
else:
|
|
453
|
+
response.encoding=response.apparent_encoding
|
|
454
|
+
resheader=dict(response.headers)
|
|
455
|
+
self.get_header={}
|
|
456
|
+
for k in resheader:
|
|
457
|
+
self.get_header[k.lower()]=resheader[k]
|
|
458
|
+
cookie=requests.utils.dict_from_cookiejar(response.cookies)
|
|
459
|
+
if self.get_cookies and cookie:
|
|
460
|
+
self.get_cookies=self.__merge(self.get_cookies,cookie)
|
|
461
|
+
elif cookie:
|
|
462
|
+
self.get_cookies=cookie
|
|
463
|
+
if self.set_cookies:
|
|
464
|
+
self.get_cookies=self.__merge(self.set_cookies,self.get_cookies)
|
|
465
|
+
if self.get_cookies:
|
|
466
|
+
self.get_cookie_str=self.cookieTdictstr(self.get_cookies)
|
|
467
|
+
self.get_text=response.text
|
|
468
|
+
self.get_content=response.content
|
|
469
|
+
self.get_response=response
|
|
470
|
+
self.get_status_code=int(response.status_code)
|
|
471
|
+
def __is_index(self,params,index):
|
|
472
|
+
"""判断列表或字典里的索引是否存在
|
|
473
|
+
|
|
474
|
+
params 列表或字典
|
|
475
|
+
|
|
476
|
+
index 索引值
|
|
477
|
+
|
|
478
|
+
return Boolean类型
|
|
479
|
+
"""
|
|
480
|
+
try:
|
|
481
|
+
params[index]
|
|
482
|
+
except KeyError:
|
|
483
|
+
return False
|
|
484
|
+
except IndexError:
|
|
485
|
+
return False
|
|
486
|
+
else:
|
|
487
|
+
return True
|
|
488
|
+
def __merge(self,dict1, dict2):
|
|
489
|
+
"合并两个字典"
|
|
490
|
+
C_dict = {}
|
|
491
|
+
if dict1:
|
|
492
|
+
for key,value in dict1.items():
|
|
493
|
+
C_dict[key]=value
|
|
494
|
+
for key,value in dict2.items():
|
|
495
|
+
if value:
|
|
496
|
+
if isinstance(value, str) or (self.__is_index(C_dict,key) and isinstance(C_dict[key], str)):
|
|
497
|
+
if self.__is_index(C_dict,key):
|
|
498
|
+
t1,t2=len(str(value)),len(str(C_dict[key]))
|
|
499
|
+
if t1>=t2:
|
|
500
|
+
C_dict[key]=value
|
|
501
|
+
else:
|
|
502
|
+
C_dict[key]=value
|
|
503
|
+
else:
|
|
504
|
+
C_dict[key]=value
|
|
505
|
+
return C_dict
|
|
506
|
+
def cookieserTdict(self,cookiesstr):
|
|
507
|
+
"cookies字符串转换字典"
|
|
508
|
+
if isinstance(cookiesstr,str):
|
|
509
|
+
cok={}
|
|
510
|
+
for line in cookiesstr.split(";"):
|
|
511
|
+
lists=line.split("=")
|
|
512
|
+
# print("listslists",lists)
|
|
513
|
+
if lists[0] and len(lists)==2:
|
|
514
|
+
cok[lists[0]]=lists[1]
|
|
515
|
+
return cok
|
|
516
|
+
def cookieTdictstr(self,cookie):
|
|
517
|
+
cookiestr=''
|
|
518
|
+
for key in cookie:
|
|
519
|
+
if not cookie[key]:
|
|
520
|
+
cookie[key]=''
|
|
521
|
+
cookiestr+=str(key)+"="+str(cookie[key])+";"
|
|
522
|
+
return cookiestr
|
|
523
|
+
def __get_pyquery_rules(self,rulestext):
|
|
524
|
+
"""获取pyquery规则
|
|
525
|
+
|
|
526
|
+
rulestext 规则字符串 参考 (.page-tip{0} 表示选择第一个 .page-tip{1} 表示选择第二个)
|
|
527
|
+
|
|
528
|
+
"""
|
|
529
|
+
tkevalarr=rulestext.split('}')
|
|
530
|
+
tkevalarr1=[]
|
|
531
|
+
for tttt in tkevalarr:
|
|
532
|
+
eq='null'
|
|
533
|
+
if '{' in tttt:
|
|
534
|
+
tttttt=tttt.split('{')
|
|
535
|
+
eq=int(tttttt[1])
|
|
536
|
+
tttt=tttttt[0]
|
|
537
|
+
if tttt:
|
|
538
|
+
tkevalarr1.append({'val':tttt,'eq':eq})
|
|
539
|
+
return tkevalarr1
|
|
540
|
+
def __get_pyquery_rules_obj(self,rulestext,pyqueryobj):
|
|
541
|
+
"""通过pyquery规则获取列表对象
|
|
542
|
+
|
|
543
|
+
rulestext 规则字符串 参考 .gknb-box[1]ul li
|
|
544
|
+
|
|
545
|
+
pyqueryobj pyquery装载html后的对象 pq(html)
|
|
546
|
+
|
|
547
|
+
return 返回 pyquery选中的对象
|
|
548
|
+
|
|
549
|
+
"""
|
|
550
|
+
if ',' in rulestext:
|
|
551
|
+
pqobj=None
|
|
552
|
+
rulestextarr=rulestext.split(',')
|
|
553
|
+
for rulestext in rulestextarr:
|
|
554
|
+
tkevalarr1=self.__get_pyquery_rules(rulestext)
|
|
555
|
+
pqobj=None
|
|
556
|
+
lists=pyqueryobj
|
|
557
|
+
for tttt in tkevalarr1:
|
|
558
|
+
lists=lists.find(tttt['val'])
|
|
559
|
+
if tttt['eq']>=1:
|
|
560
|
+
lists=lists.eq(tttt['eq'])
|
|
561
|
+
|
|
562
|
+
if lists.length:
|
|
563
|
+
pqobj=lists
|
|
564
|
+
break
|
|
565
|
+
return pqobj
|
|
566
|
+
else:
|
|
567
|
+
tkevalarr1=self.__get_pyquery_rules(rulestext)
|
|
568
|
+
pqobj=None
|
|
569
|
+
lists=pyqueryobj
|
|
570
|
+
for tttt in tkevalarr1:
|
|
571
|
+
lists=lists.find(tttt['val'])
|
|
572
|
+
if tttt['eq']!='null':
|
|
573
|
+
lists=lists.eq(tttt['eq'])
|
|
574
|
+
|
|
575
|
+
if lists.length:
|
|
576
|
+
pqobj=lists
|
|
577
|
+
return pqobj
|