pyxllib 0.3.197__py3-none-any.whl → 0.3.200__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyxllib/__init__.py +21 -21
- pyxllib/algo/__init__.py +8 -8
- pyxllib/algo/disjoint.py +54 -54
- pyxllib/algo/geo.py +541 -541
- pyxllib/algo/intervals.py +964 -964
- pyxllib/algo/matcher.py +389 -389
- pyxllib/algo/newbie.py +166 -166
- pyxllib/algo/pupil.py +629 -629
- pyxllib/algo/shapelylib.py +67 -67
- pyxllib/algo/specialist.py +241 -241
- pyxllib/algo/stat.py +494 -494
- pyxllib/algo/treelib.py +149 -149
- pyxllib/algo/unitlib.py +66 -66
- pyxllib/autogui/__init__.py +5 -5
- pyxllib/autogui/activewin.py +246 -246
- pyxllib/autogui/all.py +9 -9
- pyxllib/autogui/autogui.py +852 -852
- pyxllib/autogui/uiautolib.py +362 -362
- pyxllib/autogui/virtualkey.py +102 -102
- pyxllib/autogui/wechat.py +827 -827
- pyxllib/autogui/wechat_msg.py +421 -421
- pyxllib/autogui/wxautolib.py +84 -84
- pyxllib/cv/__init__.py +5 -5
- pyxllib/cv/expert.py +267 -267
- pyxllib/cv/imfile.py +159 -159
- pyxllib/cv/imhash.py +39 -39
- pyxllib/cv/pupil.py +9 -9
- pyxllib/cv/rgbfmt.py +1525 -1525
- pyxllib/cv/slidercaptcha.py +137 -137
- pyxllib/cv/trackbartools.py +251 -251
- pyxllib/cv/xlcvlib.py +1040 -1040
- pyxllib/cv/xlpillib.py +423 -423
- pyxllib/data/echarts.py +240 -240
- pyxllib/data/jsonlib.py +89 -89
- pyxllib/data/oss.py +72 -72
- pyxllib/data/pglib.py +1127 -1127
- pyxllib/data/sqlite.py +568 -568
- pyxllib/data/sqllib.py +297 -297
- pyxllib/ext/JLineViewer.py +505 -505
- pyxllib/ext/__init__.py +6 -6
- pyxllib/ext/demolib.py +246 -246
- pyxllib/ext/drissionlib.py +277 -277
- pyxllib/ext/kq5034lib.py +12 -12
- pyxllib/ext/old.py +663 -663
- pyxllib/ext/qt.py +449 -449
- pyxllib/ext/robustprocfile.py +497 -497
- pyxllib/ext/seleniumlib.py +76 -76
- pyxllib/ext/tk.py +173 -173
- pyxllib/ext/unixlib.py +827 -827
- pyxllib/ext/utools.py +351 -351
- pyxllib/ext/webhook.py +124 -119
- pyxllib/ext/win32lib.py +40 -40
- pyxllib/ext/wjxlib.py +88 -88
- pyxllib/ext/wpsapi.py +124 -124
- pyxllib/ext/xlwork.py +9 -9
- pyxllib/ext/yuquelib.py +1105 -1105
- pyxllib/file/__init__.py +17 -17
- pyxllib/file/docxlib.py +761 -761
- pyxllib/file/gitlib.py +309 -309
- pyxllib/file/libreoffice.py +165 -165
- pyxllib/file/movielib.py +148 -148
- pyxllib/file/newbie.py +10 -10
- pyxllib/file/onenotelib.py +1469 -1469
- pyxllib/file/packlib/__init__.py +330 -330
- pyxllib/file/packlib/zipfile.py +2441 -2441
- pyxllib/file/pdflib.py +426 -426
- pyxllib/file/pupil.py +185 -185
- pyxllib/file/specialist/__init__.py +685 -685
- pyxllib/file/specialist/dirlib.py +799 -799
- pyxllib/file/specialist/download.py +193 -193
- pyxllib/file/specialist/filelib.py +2829 -2829
- pyxllib/file/xlsxlib.py +3131 -3131
- pyxllib/file/xlsyncfile.py +341 -341
- pyxllib/prog/__init__.py +5 -5
- pyxllib/prog/cachetools.py +64 -64
- pyxllib/prog/deprecatedlib.py +233 -233
- pyxllib/prog/filelock.py +42 -42
- pyxllib/prog/ipyexec.py +253 -253
- pyxllib/prog/multiprogs.py +940 -940
- pyxllib/prog/newbie.py +451 -451
- pyxllib/prog/pupil.py +1197 -1197
- pyxllib/prog/sitepackages.py +33 -33
- pyxllib/prog/specialist/__init__.py +391 -391
- pyxllib/prog/specialist/bc.py +203 -203
- pyxllib/prog/specialist/browser.py +497 -497
- pyxllib/prog/specialist/common.py +347 -347
- pyxllib/prog/specialist/datetime.py +198 -198
- pyxllib/prog/specialist/tictoc.py +240 -240
- pyxllib/prog/specialist/xllog.py +180 -180
- pyxllib/prog/xlosenv.py +108 -108
- pyxllib/stdlib/__init__.py +17 -17
- pyxllib/stdlib/tablepyxl/__init__.py +10 -10
- pyxllib/stdlib/tablepyxl/style.py +303 -303
- pyxllib/stdlib/tablepyxl/tablepyxl.py +130 -130
- pyxllib/text/__init__.py +8 -8
- pyxllib/text/ahocorasick.py +39 -39
- pyxllib/text/airscript.js +744 -744
- pyxllib/text/charclasslib.py +121 -121
- pyxllib/text/jiebalib.py +267 -267
- pyxllib/text/jinjalib.py +32 -32
- pyxllib/text/jsa_ai_prompt.md +271 -271
- pyxllib/text/jscode.py +922 -922
- pyxllib/text/latex/__init__.py +158 -158
- pyxllib/text/levenshtein.py +303 -303
- pyxllib/text/nestenv.py +1215 -1215
- pyxllib/text/newbie.py +300 -300
- pyxllib/text/pupil/__init__.py +8 -8
- pyxllib/text/pupil/common.py +1121 -1121
- pyxllib/text/pupil/xlalign.py +326 -326
- pyxllib/text/pycode.py +47 -47
- pyxllib/text/specialist/__init__.py +8 -8
- pyxllib/text/specialist/common.py +112 -112
- pyxllib/text/specialist/ptag.py +186 -186
- pyxllib/text/spellchecker.py +172 -172
- pyxllib/text/templates/echart_base.html +10 -10
- pyxllib/text/templates/highlight_code.html +16 -16
- pyxllib/text/templates/latex_editor.html +102 -102
- pyxllib/text/vbacode.py +17 -17
- pyxllib/text/xmllib.py +747 -747
- pyxllib/xl.py +42 -39
- pyxllib/xlcv.py +17 -17
- {pyxllib-0.3.197.dist-info → pyxllib-0.3.200.dist-info}/METADATA +1 -1
- pyxllib-0.3.200.dist-info/RECORD +126 -0
- {pyxllib-0.3.197.dist-info → pyxllib-0.3.200.dist-info}/licenses/LICENSE +190 -190
- pyxllib-0.3.197.dist-info/RECORD +0 -126
- {pyxllib-0.3.197.dist-info → pyxllib-0.3.200.dist-info}/WHEEL +0 -0
pyxllib/ext/webhook.py
CHANGED
@@ -1,119 +1,124 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
# @Author : 陈坤泽
|
4
|
-
# @Email : 877362867@qq.com
|
5
|
-
# @Date : 2020/10/21 09:22
|
6
|
-
|
7
|
-
import os
|
8
|
-
import time
|
9
|
-
import hmac
|
10
|
-
import hashlib
|
11
|
-
import base64
|
12
|
-
import urllib.parse
|
13
|
-
|
14
|
-
import requests
|
15
|
-
|
16
|
-
|
17
|
-
class WeixinRobot:
|
18
|
-
""" 企业微信 机器人 """
|
19
|
-
|
20
|
-
def __init__(self, url):
|
21
|
-
self.url = url
|
22
|
-
|
23
|
-
def push_text(self, s):
|
24
|
-
msgtype = 'text'
|
25
|
-
try:
|
26
|
-
headers = {"Content-Type": "text/plain"}
|
27
|
-
t = {"content": s} if isinstance(s, str) else s
|
28
|
-
data = {"msgtype": msgtype, msgtype: t} # msgtype: text、markdown
|
29
|
-
requests.post(url=self.url, headers=headers, json=data)
|
30
|
-
except requests.exceptions.ConnectionError: # 没网发送失败的时候也不报错
|
31
|
-
pass
|
32
|
-
|
33
|
-
|
34
|
-
class DingtalkRobot:
|
35
|
-
""" 钉钉 自定义webhook机器人
|
36
|
-
|
37
|
-
https://ding-doc.dingtalk.com/doc#/serverapi2/qf2nxq
|
38
|
-
"""
|
39
|
-
|
40
|
-
def __init__(self, access_token=None, secret=None):
|
41
|
-
access_token = access_token or os.getenv('DINGTALK_ROBOT_SECRET')
|
42
|
-
self.url = f'https://oapi.dingtalk.com/robot/send?access_token={access_token}'
|
43
|
-
self.url += self.add_secret(secret or os.getenv('DINGTALK_ROBOT_SECRET'))
|
44
|
-
self.headers = {"Content-Type": "application/json"}
|
45
|
-
|
46
|
-
@classmethod
|
47
|
-
def add_secret(cls, secret):
|
48
|
-
""" 钉钉机器人需要加签,确保安全性 """
|
49
|
-
timestamp = str(round(time.time() * 1000))
|
50
|
-
secret_enc = secret.encode('utf-8')
|
51
|
-
string_to_sign = '{}\n{}'.format(timestamp, secret)
|
52
|
-
string_to_sign_enc = string_to_sign.encode('utf-8')
|
53
|
-
hmac_code = hmac.new(secret_enc, string_to_sign_enc, digestmod=hashlib.sha256).digest()
|
54
|
-
sign = urllib.parse.quote_plus(base64.b64encode(hmac_code))
|
55
|
-
return f'×tamp={timestamp}&sign={sign}'
|
56
|
-
|
57
|
-
def send_data(self, data):
|
58
|
-
try:
|
59
|
-
requests.post(url=self.url, headers=self.headers, json=data)
|
60
|
-
except requests.exceptions.ConnectionError as e: # 没网发送失败的时候也不报错
|
61
|
-
raise e
|
62
|
-
|
63
|
-
def send_text(self, content):
|
64
|
-
msgtype = 'text'
|
65
|
-
d = {}
|
66
|
-
if content: d['content'] = content
|
67
|
-
data = {"msgtype": msgtype, msgtype: d}
|
68
|
-
self.send_data(data)
|
69
|
-
|
70
|
-
def send_link(self, text='', title='', pic_url='', message_url=''):
|
71
|
-
msgtype = 'link'
|
72
|
-
d = {}
|
73
|
-
if text: d['text'] = text
|
74
|
-
if title: d['title'] = title
|
75
|
-
if pic_url: d['picUrl'] = pic_url
|
76
|
-
if message_url: d['messageUrl'] = message_url
|
77
|
-
data = {"msgtype": msgtype, msgtype: d}
|
78
|
-
self.send_data(data)
|
79
|
-
|
80
|
-
def send_markdown(self, text='', title=''):
|
81
|
-
msgtype = 'link'
|
82
|
-
d = {}
|
83
|
-
if text: d['text'] = text
|
84
|
-
if title: d['title'] = title
|
85
|
-
data = {"msgtype": msgtype, msgtype: d}
|
86
|
-
self.send_data(data)
|
87
|
-
|
88
|
-
def send_actioncard(self, text='', title='', siggle_url='', siggle_title='', btn_orientation='0'):
|
89
|
-
raise NotImplementedError
|
90
|
-
|
91
|
-
def send_feedcard(self):
|
92
|
-
raise NotImplementedError
|
93
|
-
|
94
|
-
|
95
|
-
class DingtalkRobot2(DingtalkRobot):
|
96
|
-
|
97
|
-
def __init__(self, title=None):
|
98
|
-
super().__init__()
|
99
|
-
self.title = title
|
100
|
-
|
101
|
-
def send_text2(self, text): # 增加一个更加定制化的便捷接口
|
102
|
-
from pyxllib.prog.pupil import utc_timestamp
|
103
|
-
|
104
|
-
if self.title:
|
105
|
-
self.send_text(f'{utc_timestamp()} {get_host_nickname()} [{self.title}] {text}')
|
106
|
-
else:
|
107
|
-
self.send_text(f'{utc_timestamp()} {get_host_nickname()} {text}')
|
108
|
-
|
109
|
-
def __enter__(self):
|
110
|
-
self.send_text2('启动')
|
111
|
-
return self
|
112
|
-
|
113
|
-
def __exit__(self, exc_type, exc_val, exc_tb):
|
114
|
-
from pyxllib.prog.pupil import format_exception
|
115
|
-
|
116
|
-
if exc_tb is None:
|
117
|
-
self.send_text2('完成')
|
118
|
-
else:
|
119
|
-
self.send_text2(f'报错\n{format_exception(exc_val, 3)}')
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : 陈坤泽
|
4
|
+
# @Email : 877362867@qq.com
|
5
|
+
# @Date : 2020/10/21 09:22
|
6
|
+
|
7
|
+
import os
|
8
|
+
import time
|
9
|
+
import hmac
|
10
|
+
import hashlib
|
11
|
+
import base64
|
12
|
+
import urllib.parse
|
13
|
+
|
14
|
+
import requests
|
15
|
+
|
16
|
+
|
17
|
+
class WeixinRobot:
|
18
|
+
""" 企业微信 机器人 """
|
19
|
+
|
20
|
+
def __init__(self, url):
|
21
|
+
self.url = url
|
22
|
+
|
23
|
+
def push_text(self, s):
|
24
|
+
msgtype = 'text'
|
25
|
+
try:
|
26
|
+
headers = {"Content-Type": "text/plain"}
|
27
|
+
t = {"content": s} if isinstance(s, str) else s
|
28
|
+
data = {"msgtype": msgtype, msgtype: t} # msgtype: text、markdown
|
29
|
+
requests.post(url=self.url, headers=headers, json=data)
|
30
|
+
except requests.exceptions.ConnectionError: # 没网发送失败的时候也不报错
|
31
|
+
pass
|
32
|
+
|
33
|
+
|
34
|
+
class DingtalkRobot:
|
35
|
+
""" 钉钉 自定义webhook机器人
|
36
|
+
|
37
|
+
https://ding-doc.dingtalk.com/doc#/serverapi2/qf2nxq
|
38
|
+
"""
|
39
|
+
|
40
|
+
def __init__(self, access_token=None, secret=None):
|
41
|
+
access_token = access_token or os.getenv('DINGTALK_ROBOT_SECRET')
|
42
|
+
self.url = f'https://oapi.dingtalk.com/robot/send?access_token={access_token}'
|
43
|
+
self.url += self.add_secret(secret or os.getenv('DINGTALK_ROBOT_SECRET'))
|
44
|
+
self.headers = {"Content-Type": "application/json"}
|
45
|
+
|
46
|
+
@classmethod
|
47
|
+
def add_secret(cls, secret):
|
48
|
+
""" 钉钉机器人需要加签,确保安全性 """
|
49
|
+
timestamp = str(round(time.time() * 1000))
|
50
|
+
secret_enc = secret.encode('utf-8')
|
51
|
+
string_to_sign = '{}\n{}'.format(timestamp, secret)
|
52
|
+
string_to_sign_enc = string_to_sign.encode('utf-8')
|
53
|
+
hmac_code = hmac.new(secret_enc, string_to_sign_enc, digestmod=hashlib.sha256).digest()
|
54
|
+
sign = urllib.parse.quote_plus(base64.b64encode(hmac_code))
|
55
|
+
return f'×tamp={timestamp}&sign={sign}'
|
56
|
+
|
57
|
+
def send_data(self, data):
|
58
|
+
try:
|
59
|
+
requests.post(url=self.url, headers=self.headers, json=data)
|
60
|
+
except requests.exceptions.ConnectionError as e: # 没网发送失败的时候也不报错
|
61
|
+
raise e
|
62
|
+
|
63
|
+
def send_text(self, content):
|
64
|
+
msgtype = 'text'
|
65
|
+
d = {}
|
66
|
+
if content: d['content'] = content
|
67
|
+
data = {"msgtype": msgtype, msgtype: d}
|
68
|
+
self.send_data(data)
|
69
|
+
|
70
|
+
def send_link(self, text='', title='', pic_url='', message_url=''):
|
71
|
+
msgtype = 'link'
|
72
|
+
d = {}
|
73
|
+
if text: d['text'] = text
|
74
|
+
if title: d['title'] = title
|
75
|
+
if pic_url: d['picUrl'] = pic_url
|
76
|
+
if message_url: d['messageUrl'] = message_url
|
77
|
+
data = {"msgtype": msgtype, msgtype: d}
|
78
|
+
self.send_data(data)
|
79
|
+
|
80
|
+
def send_markdown(self, text='', title=''):
|
81
|
+
msgtype = 'link'
|
82
|
+
d = {}
|
83
|
+
if text: d['text'] = text
|
84
|
+
if title: d['title'] = title
|
85
|
+
data = {"msgtype": msgtype, msgtype: d}
|
86
|
+
self.send_data(data)
|
87
|
+
|
88
|
+
def send_actioncard(self, text='', title='', siggle_url='', siggle_title='', btn_orientation='0'):
|
89
|
+
raise NotImplementedError
|
90
|
+
|
91
|
+
def send_feedcard(self):
|
92
|
+
raise NotImplementedError
|
93
|
+
|
94
|
+
|
95
|
+
class DingtalkRobot2(DingtalkRobot):
|
96
|
+
|
97
|
+
def __init__(self, title=None):
|
98
|
+
super().__init__()
|
99
|
+
self.title = title
|
100
|
+
|
101
|
+
def send_text2(self, text): # 增加一个更加定制化的便捷接口
|
102
|
+
from pyxllib.prog.pupil import utc_timestamp
|
103
|
+
|
104
|
+
if self.title:
|
105
|
+
self.send_text(f'{utc_timestamp()} {get_host_nickname()} [{self.title}] {text}')
|
106
|
+
else:
|
107
|
+
self.send_text(f'{utc_timestamp()} {get_host_nickname()} {text}')
|
108
|
+
|
109
|
+
def __enter__(self):
|
110
|
+
self.send_text2('启动')
|
111
|
+
return self
|
112
|
+
|
113
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
114
|
+
from pyxllib.prog.pupil import format_exception
|
115
|
+
|
116
|
+
if exc_tb is None:
|
117
|
+
self.send_text2('完成')
|
118
|
+
else:
|
119
|
+
self.send_text2(f'报错\n{format_exception(exc_val, 3)}')
|
120
|
+
|
121
|
+
if __name__ == '__main__':
|
122
|
+
pass
|
123
|
+
|
124
|
+
from pyxllib.prog.newbie import typename
|
pyxllib/ext/win32lib.py
CHANGED
@@ -1,40 +1,40 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
# @Author : 陈坤泽
|
4
|
-
# @Email : 877362867@qq.com
|
5
|
-
# @Date : 2021/09/07 10:21
|
6
|
-
|
7
|
-
|
8
|
-
import win32com.client as win32
|
9
|
-
import pythoncom
|
10
|
-
|
11
|
-
|
12
|
-
def get_win32_app(name, visible=False):
|
13
|
-
""" 启动可支持pywin32自动化处理的应用
|
14
|
-
|
15
|
-
Args:
|
16
|
-
str name: 应用名称,不区分大小写,比如word, excel, powerpoint, onenote
|
17
|
-
不带'.'的情况下,会自动添加'.Application'的后缀
|
18
|
-
visible: 应用是否可见
|
19
|
-
|
20
|
-
Returns: app
|
21
|
-
|
22
|
-
"""
|
23
|
-
# 1 name
|
24
|
-
name = name.lower()
|
25
|
-
if '.' not in name:
|
26
|
-
name += '.application'
|
27
|
-
|
28
|
-
# 2 app
|
29
|
-
# 这里可能还有些问题,不同的应用,机制不太一样,后面再细化完善吧
|
30
|
-
try:
|
31
|
-
app = win32.GetActiveObject(f'{name}') # 不能关联到普通方式打开的应用。但代码打开的应用都能找得到。
|
32
|
-
except pythoncom.com_error:
|
33
|
-
app = win32.gencache.EnsureDispatch(f'{name}')
|
34
|
-
# 还有种常见的初始化方法,是 win32com.client.Dispatch和win32com.client.dynamic.Dispatch
|
35
|
-
# from win32com.client.dynamic import Disypatch
|
36
|
-
|
37
|
-
if visible is not None:
|
38
|
-
app.Visible = visible
|
39
|
-
|
40
|
-
return app
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : 陈坤泽
|
4
|
+
# @Email : 877362867@qq.com
|
5
|
+
# @Date : 2021/09/07 10:21
|
6
|
+
|
7
|
+
|
8
|
+
import win32com.client as win32
|
9
|
+
import pythoncom
|
10
|
+
|
11
|
+
|
12
|
+
def get_win32_app(name, visible=False):
|
13
|
+
""" 启动可支持pywin32自动化处理的应用
|
14
|
+
|
15
|
+
Args:
|
16
|
+
str name: 应用名称,不区分大小写,比如word, excel, powerpoint, onenote
|
17
|
+
不带'.'的情况下,会自动添加'.Application'的后缀
|
18
|
+
visible: 应用是否可见
|
19
|
+
|
20
|
+
Returns: app
|
21
|
+
|
22
|
+
"""
|
23
|
+
# 1 name
|
24
|
+
name = name.lower()
|
25
|
+
if '.' not in name:
|
26
|
+
name += '.application'
|
27
|
+
|
28
|
+
# 2 app
|
29
|
+
# 这里可能还有些问题,不同的应用,机制不太一样,后面再细化完善吧
|
30
|
+
try:
|
31
|
+
app = win32.GetActiveObject(f'{name}') # 不能关联到普通方式打开的应用。但代码打开的应用都能找得到。
|
32
|
+
except pythoncom.com_error:
|
33
|
+
app = win32.gencache.EnsureDispatch(f'{name}')
|
34
|
+
# 还有种常见的初始化方法,是 win32com.client.Dispatch和win32com.client.dynamic.Dispatch
|
35
|
+
# from win32com.client.dynamic import Disypatch
|
36
|
+
|
37
|
+
if visible is not None:
|
38
|
+
app.Visible = visible
|
39
|
+
|
40
|
+
return app
|
pyxllib/ext/wjxlib.py
CHANGED
@@ -1,88 +1,88 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
# @Author : 陈坤泽
|
4
|
-
# @Email : 877362867@qq.com
|
5
|
-
# @Date : 2024/11/19
|
6
|
-
|
7
|
-
""" 问卷星 相关工具 """
|
8
|
-
|
9
|
-
import os
|
10
|
-
import io
|
11
|
-
import time
|
12
|
-
|
13
|
-
from loguru import logger
|
14
|
-
from DrissionPage import Chromium
|
15
|
-
import pandas as pd
|
16
|
-
|
17
|
-
from pyxllib.ext.drissionlib import DpWebBase
|
18
|
-
|
19
|
-
|
20
|
-
class WjxWeb(DpWebBase):
|
21
|
-
""" 问卷星网页的爬虫 """
|
22
|
-
|
23
|
-
def __init__(self, url=None):
|
24
|
-
super().__init__(url or 'https://www.wjx.cn')
|
25
|
-
self.login()
|
26
|
-
|
27
|
-
def login(self):
|
28
|
-
tab = self.tab
|
29
|
-
|
30
|
-
if tab.url.startswith('https://www.wjx.cn/wjx/activitystat/resultlimit.aspx'):
|
31
|
-
tab('tag:a@@text():登录').click()
|
32
|
-
|
33
|
-
if tab.url.lower().startswith('https://www.wjx.cn/login.aspx'):
|
34
|
-
tab('tag:input@@name=UserName').input(os.getenv('WJX_USERNAME'), clear=True)
|
35
|
-
tab('tag:input@@name=Password').input(os.getenv('WJX_PASSWORD'), clear=True)
|
36
|
-
tab('tag:input@@type=submit').click()
|
37
|
-
|
38
|
-
def get_page_num(self):
|
39
|
-
"""
|
40
|
-
返回当前页编号和总页数 (idx, num)。
|
41
|
-
"""
|
42
|
-
idx, num = map(int, self.tab('tag:span@@class=paging-num').text.split('/'))
|
43
|
-
return idx, num
|
44
|
-
|
45
|
-
def prev_page(self):
|
46
|
-
self.tab('tag:a@@class=go-pre').click()
|
47
|
-
|
48
|
-
def next_page(self):
|
49
|
-
self.tab('tag:a@@class=go-next').click()
|
50
|
-
|
51
|
-
def _parse_table(self):
|
52
|
-
"""处理并解析网页中的表格数据"""
|
53
|
-
table_html = self.tab('tag:table').html
|
54
|
-
df = pd.read_html(io.StringIO(table_html))[0] # 读取表格
|
55
|
-
df.columns = [col.replace('\ue645', '') for col in df.columns]
|
56
|
-
# "星标"的内容特殊字符
|
57
|
-
df.replace('\ue66b', '', regex=True, inplace=True)
|
58
|
-
# "操作"的内容特殊字符
|
59
|
-
df.replace('\ue6a3\ue6d4', '', regex=True, inplace=True)
|
60
|
-
return df
|
61
|
-
|
62
|
-
def set_num_of_page(self, num_of_page):
|
63
|
-
""" 查看数据页面,设置每页显示多少条记录 """
|
64
|
-
select = self.tab('tag:span@@text():每页显示').next('tag:select')
|
65
|
-
select.click()
|
66
|
-
opt = select(f'tag:option@@text()={num_of_page}')
|
67
|
-
if opt.attr('selected') != 'selected':
|
68
|
-
opt.click()
|
69
|
-
else:
|
70
|
-
select.click()
|
71
|
-
|
72
|
-
def get_df(self, all_pages=False):
|
73
|
-
"""获得当前页面的表格数据,如果 all_pages 为 True,则下载所有页面的数据"""
|
74
|
-
# 初始化DataFrame列表,用于存储每页的数据
|
75
|
-
dfs = [self._parse_table()] # 获取当前页面的数据
|
76
|
-
|
77
|
-
# 如果需要下载所有页面数据
|
78
|
-
if all_pages:
|
79
|
-
current_idx, total_pages = self.get_page_num()
|
80
|
-
while current_idx < total_pages:
|
81
|
-
self.next_page() # 翻到下一页
|
82
|
-
time.sleep(2)
|
83
|
-
dfs.append(self._parse_table()) # 获取并处理新一页的数据
|
84
|
-
current_idx, total_pages = self.get_page_num() # 更新页码信息
|
85
|
-
|
86
|
-
# 将所有数据合并为一个DataFrame
|
87
|
-
final_df = pd.concat(dfs, ignore_index=True) if all_pages else dfs[0]
|
88
|
-
return final_df
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : 陈坤泽
|
4
|
+
# @Email : 877362867@qq.com
|
5
|
+
# @Date : 2024/11/19
|
6
|
+
|
7
|
+
""" 问卷星 相关工具 """
|
8
|
+
|
9
|
+
import os
|
10
|
+
import io
|
11
|
+
import time
|
12
|
+
|
13
|
+
from loguru import logger
|
14
|
+
from DrissionPage import Chromium
|
15
|
+
import pandas as pd
|
16
|
+
|
17
|
+
from pyxllib.ext.drissionlib import DpWebBase
|
18
|
+
|
19
|
+
|
20
|
+
class WjxWeb(DpWebBase):
|
21
|
+
""" 问卷星网页的爬虫 """
|
22
|
+
|
23
|
+
def __init__(self, url=None):
|
24
|
+
super().__init__(url or 'https://www.wjx.cn')
|
25
|
+
self.login()
|
26
|
+
|
27
|
+
def login(self):
|
28
|
+
tab = self.tab
|
29
|
+
|
30
|
+
if tab.url.startswith('https://www.wjx.cn/wjx/activitystat/resultlimit.aspx'):
|
31
|
+
tab('tag:a@@text():登录').click()
|
32
|
+
|
33
|
+
if tab.url.lower().startswith('https://www.wjx.cn/login.aspx'):
|
34
|
+
tab('tag:input@@name=UserName').input(os.getenv('WJX_USERNAME'), clear=True)
|
35
|
+
tab('tag:input@@name=Password').input(os.getenv('WJX_PASSWORD'), clear=True)
|
36
|
+
tab('tag:input@@type=submit').click()
|
37
|
+
|
38
|
+
def get_page_num(self):
|
39
|
+
"""
|
40
|
+
返回当前页编号和总页数 (idx, num)。
|
41
|
+
"""
|
42
|
+
idx, num = map(int, self.tab('tag:span@@class=paging-num').text.split('/'))
|
43
|
+
return idx, num
|
44
|
+
|
45
|
+
def prev_page(self):
|
46
|
+
self.tab('tag:a@@class=go-pre').click()
|
47
|
+
|
48
|
+
def next_page(self):
|
49
|
+
self.tab('tag:a@@class=go-next').click()
|
50
|
+
|
51
|
+
def _parse_table(self):
|
52
|
+
"""处理并解析网页中的表格数据"""
|
53
|
+
table_html = self.tab('tag:table').html
|
54
|
+
df = pd.read_html(io.StringIO(table_html))[0] # 读取表格
|
55
|
+
df.columns = [col.replace('\ue645', '') for col in df.columns]
|
56
|
+
# "星标"的内容特殊字符
|
57
|
+
df.replace('\ue66b', '', regex=True, inplace=True)
|
58
|
+
# "操作"的内容特殊字符
|
59
|
+
df.replace('\ue6a3\ue6d4', '', regex=True, inplace=True)
|
60
|
+
return df
|
61
|
+
|
62
|
+
def set_num_of_page(self, num_of_page):
|
63
|
+
""" 查看数据页面,设置每页显示多少条记录 """
|
64
|
+
select = self.tab('tag:span@@text():每页显示').next('tag:select')
|
65
|
+
select.click()
|
66
|
+
opt = select(f'tag:option@@text()={num_of_page}')
|
67
|
+
if opt.attr('selected') != 'selected':
|
68
|
+
opt.click()
|
69
|
+
else:
|
70
|
+
select.click()
|
71
|
+
|
72
|
+
def get_df(self, all_pages=False):
|
73
|
+
"""获得当前页面的表格数据,如果 all_pages 为 True,则下载所有页面的数据"""
|
74
|
+
# 初始化DataFrame列表,用于存储每页的数据
|
75
|
+
dfs = [self._parse_table()] # 获取当前页面的数据
|
76
|
+
|
77
|
+
# 如果需要下载所有页面数据
|
78
|
+
if all_pages:
|
79
|
+
current_idx, total_pages = self.get_page_num()
|
80
|
+
while current_idx < total_pages:
|
81
|
+
self.next_page() # 翻到下一页
|
82
|
+
time.sleep(2)
|
83
|
+
dfs.append(self._parse_table()) # 获取并处理新一页的数据
|
84
|
+
current_idx, total_pages = self.get_page_num() # 更新页码信息
|
85
|
+
|
86
|
+
# 将所有数据合并为一个DataFrame
|
87
|
+
final_df = pd.concat(dfs, ignore_index=True) if all_pages else dfs[0]
|
88
|
+
return final_df
|