pyxllib 0.3.197__py3-none-any.whl → 3.201.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. pyxllib/__init__.py +14 -21
  2. pyxllib/algo/__init__.py +8 -8
  3. pyxllib/algo/disjoint.py +54 -54
  4. pyxllib/algo/geo.py +537 -541
  5. pyxllib/algo/intervals.py +964 -964
  6. pyxllib/algo/matcher.py +389 -389
  7. pyxllib/algo/newbie.py +166 -166
  8. pyxllib/algo/pupil.py +629 -629
  9. pyxllib/algo/shapelylib.py +67 -67
  10. pyxllib/algo/specialist.py +241 -241
  11. pyxllib/algo/stat.py +494 -494
  12. pyxllib/algo/treelib.py +145 -149
  13. pyxllib/algo/unitlib.py +62 -66
  14. pyxllib/autogui/__init__.py +5 -5
  15. pyxllib/autogui/activewin.py +246 -246
  16. pyxllib/autogui/all.py +9 -9
  17. pyxllib/autogui/autogui.py +846 -852
  18. pyxllib/autogui/uiautolib.py +362 -362
  19. pyxllib/autogui/virtualkey.py +102 -102
  20. pyxllib/autogui/wechat.py +827 -827
  21. pyxllib/autogui/wechat_msg.py +421 -421
  22. pyxllib/autogui/wxautolib.py +84 -84
  23. pyxllib/cv/__init__.py +5 -5
  24. pyxllib/cv/expert.py +267 -267
  25. pyxllib/cv/imfile.py +159 -159
  26. pyxllib/cv/imhash.py +39 -39
  27. pyxllib/cv/pupil.py +9 -9
  28. pyxllib/cv/rgbfmt.py +1525 -1525
  29. pyxllib/cv/slidercaptcha.py +137 -137
  30. pyxllib/cv/trackbartools.py +251 -251
  31. pyxllib/cv/xlcvlib.py +1040 -1040
  32. pyxllib/cv/xlpillib.py +423 -423
  33. pyxllib/data/echarts.py +236 -240
  34. pyxllib/data/jsonlib.py +85 -89
  35. pyxllib/data/oss.py +72 -72
  36. pyxllib/data/pglib.py +1111 -1127
  37. pyxllib/data/sqlite.py +568 -568
  38. pyxllib/data/sqllib.py +297 -297
  39. pyxllib/ext/JLineViewer.py +505 -505
  40. pyxllib/ext/__init__.py +6 -6
  41. pyxllib/ext/demolib.py +251 -246
  42. pyxllib/ext/drissionlib.py +277 -277
  43. pyxllib/ext/kq5034lib.py +12 -12
  44. pyxllib/ext/qt.py +449 -449
  45. pyxllib/ext/robustprocfile.py +493 -497
  46. pyxllib/ext/seleniumlib.py +76 -76
  47. pyxllib/ext/tk.py +173 -173
  48. pyxllib/ext/unixlib.py +821 -827
  49. pyxllib/ext/utools.py +345 -351
  50. pyxllib/ext/webhook.py +124 -119
  51. pyxllib/ext/win32lib.py +40 -40
  52. pyxllib/ext/wjxlib.py +91 -88
  53. pyxllib/ext/wpsapi.py +124 -124
  54. pyxllib/ext/xlwork.py +9 -9
  55. pyxllib/ext/yuquelib.py +1110 -1105
  56. pyxllib/file/__init__.py +17 -17
  57. pyxllib/file/docxlib.py +757 -761
  58. pyxllib/file/gitlib.py +309 -309
  59. pyxllib/file/libreoffice.py +165 -165
  60. pyxllib/file/movielib.py +144 -148
  61. pyxllib/file/newbie.py +10 -10
  62. pyxllib/file/onenotelib.py +1469 -1469
  63. pyxllib/file/packlib/__init__.py +330 -330
  64. pyxllib/file/packlib/zipfile.py +2441 -2441
  65. pyxllib/file/pdflib.py +422 -426
  66. pyxllib/file/pupil.py +185 -185
  67. pyxllib/file/specialist/__init__.py +681 -685
  68. pyxllib/file/specialist/dirlib.py +799 -799
  69. pyxllib/file/specialist/download.py +193 -193
  70. pyxllib/file/specialist/filelib.py +2825 -2829
  71. pyxllib/file/xlsxlib.py +3122 -3131
  72. pyxllib/file/xlsyncfile.py +341 -341
  73. pyxllib/prog/__init__.py +5 -5
  74. pyxllib/prog/cachetools.py +58 -64
  75. pyxllib/prog/deprecatedlib.py +233 -233
  76. pyxllib/prog/filelock.py +42 -42
  77. pyxllib/prog/ipyexec.py +253 -253
  78. pyxllib/prog/multiprogs.py +940 -940
  79. pyxllib/prog/newbie.py +451 -451
  80. pyxllib/prog/pupil.py +1208 -1197
  81. pyxllib/prog/sitepackages.py +33 -33
  82. pyxllib/prog/specialist/__init__.py +348 -391
  83. pyxllib/prog/specialist/bc.py +203 -203
  84. pyxllib/prog/specialist/browser.py +497 -497
  85. pyxllib/prog/specialist/common.py +347 -347
  86. pyxllib/prog/specialist/datetime.py +198 -198
  87. pyxllib/prog/specialist/tictoc.py +240 -240
  88. pyxllib/prog/specialist/xllog.py +180 -180
  89. pyxllib/prog/xlosenv.py +110 -108
  90. pyxllib/stdlib/__init__.py +17 -17
  91. pyxllib/stdlib/tablepyxl/__init__.py +10 -10
  92. pyxllib/stdlib/tablepyxl/style.py +303 -303
  93. pyxllib/stdlib/tablepyxl/tablepyxl.py +130 -130
  94. pyxllib/text/__init__.py +8 -8
  95. pyxllib/text/ahocorasick.py +36 -39
  96. pyxllib/text/airscript.js +754 -744
  97. pyxllib/text/charclasslib.py +121 -121
  98. pyxllib/text/jiebalib.py +267 -267
  99. pyxllib/text/jinjalib.py +27 -32
  100. pyxllib/text/jsa_ai_prompt.md +271 -271
  101. pyxllib/text/jscode.py +922 -922
  102. pyxllib/text/latex/__init__.py +158 -158
  103. pyxllib/text/levenshtein.py +303 -303
  104. pyxllib/text/nestenv.py +1215 -1215
  105. pyxllib/text/newbie.py +300 -300
  106. pyxllib/text/pupil/__init__.py +8 -8
  107. pyxllib/text/pupil/common.py +1121 -1121
  108. pyxllib/text/pupil/xlalign.py +326 -326
  109. pyxllib/text/pycode.py +47 -47
  110. pyxllib/text/specialist/__init__.py +8 -8
  111. pyxllib/text/specialist/common.py +112 -112
  112. pyxllib/text/specialist/ptag.py +186 -186
  113. pyxllib/text/spellchecker.py +172 -172
  114. pyxllib/text/templates/echart_base.html +10 -10
  115. pyxllib/text/templates/highlight_code.html +16 -16
  116. pyxllib/text/templates/latex_editor.html +102 -102
  117. pyxllib/text/vbacode.py +17 -17
  118. pyxllib/text/xmllib.py +741 -747
  119. pyxllib/xl.py +42 -39
  120. pyxllib/xlcv.py +17 -17
  121. pyxllib-3.201.1.dist-info/METADATA +296 -0
  122. pyxllib-3.201.1.dist-info/RECORD +125 -0
  123. {pyxllib-0.3.197.dist-info → pyxllib-3.201.1.dist-info}/licenses/LICENSE +190 -190
  124. pyxllib/ext/old.py +0 -663
  125. pyxllib-0.3.197.dist-info/METADATA +0 -48
  126. pyxllib-0.3.197.dist-info/RECORD +0 -126
  127. {pyxllib-0.3.197.dist-info → pyxllib-3.201.1.dist-info}/WHEEL +0 -0
pyxllib/ext/webhook.py CHANGED
@@ -1,119 +1,124 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- # @Author : 陈坤泽
4
- # @Email : 877362867@qq.com
5
- # @Date : 2020/10/21 09:22
6
-
7
- import os
8
- import time
9
- import hmac
10
- import hashlib
11
- import base64
12
- import urllib.parse
13
-
14
- import requests
15
-
16
-
17
- class WeixinRobot:
18
- """ 企业微信 机器人 """
19
-
20
- def __init__(self, url):
21
- self.url = url
22
-
23
- def push_text(self, s):
24
- msgtype = 'text'
25
- try:
26
- headers = {"Content-Type": "text/plain"}
27
- t = {"content": s} if isinstance(s, str) else s
28
- data = {"msgtype": msgtype, msgtype: t} # msgtype: text、markdown
29
- requests.post(url=self.url, headers=headers, json=data)
30
- except requests.exceptions.ConnectionError: # 没网发送失败的时候也不报错
31
- pass
32
-
33
-
34
- class DingtalkRobot:
35
- """ 钉钉 自定义webhook机器人
36
-
37
- https://ding-doc.dingtalk.com/doc#/serverapi2/qf2nxq
38
- """
39
-
40
- def __init__(self, access_token=None, secret=None):
41
- access_token = access_token or os.getenv('DINGTALK_ROBOT_SECRET')
42
- self.url = f'https://oapi.dingtalk.com/robot/send?access_token={access_token}'
43
- self.url += self.add_secret(secret or os.getenv('DINGTALK_ROBOT_SECRET'))
44
- self.headers = {"Content-Type": "application/json"}
45
-
46
- @classmethod
47
- def add_secret(cls, secret):
48
- """ 钉钉机器人需要加签,确保安全性 """
49
- timestamp = str(round(time.time() * 1000))
50
- secret_enc = secret.encode('utf-8')
51
- string_to_sign = '{}\n{}'.format(timestamp, secret)
52
- string_to_sign_enc = string_to_sign.encode('utf-8')
53
- hmac_code = hmac.new(secret_enc, string_to_sign_enc, digestmod=hashlib.sha256).digest()
54
- sign = urllib.parse.quote_plus(base64.b64encode(hmac_code))
55
- return f'&timestamp={timestamp}&sign={sign}'
56
-
57
- def send_data(self, data):
58
- try:
59
- requests.post(url=self.url, headers=self.headers, json=data)
60
- except requests.exceptions.ConnectionError as e: # 没网发送失败的时候也不报错
61
- raise e
62
-
63
- def send_text(self, content):
64
- msgtype = 'text'
65
- d = {}
66
- if content: d['content'] = content
67
- data = {"msgtype": msgtype, msgtype: d}
68
- self.send_data(data)
69
-
70
- def send_link(self, text='', title='', pic_url='', message_url=''):
71
- msgtype = 'link'
72
- d = {}
73
- if text: d['text'] = text
74
- if title: d['title'] = title
75
- if pic_url: d['picUrl'] = pic_url
76
- if message_url: d['messageUrl'] = message_url
77
- data = {"msgtype": msgtype, msgtype: d}
78
- self.send_data(data)
79
-
80
- def send_markdown(self, text='', title=''):
81
- msgtype = 'link'
82
- d = {}
83
- if text: d['text'] = text
84
- if title: d['title'] = title
85
- data = {"msgtype": msgtype, msgtype: d}
86
- self.send_data(data)
87
-
88
- def send_actioncard(self, text='', title='', siggle_url='', siggle_title='', btn_orientation='0'):
89
- raise NotImplementedError
90
-
91
- def send_feedcard(self):
92
- raise NotImplementedError
93
-
94
-
95
- class DingtalkRobot2(DingtalkRobot):
96
-
97
- def __init__(self, title=None):
98
- super().__init__()
99
- self.title = title
100
-
101
- def send_text2(self, text): # 增加一个更加定制化的便捷接口
102
- from pyxllib.prog.pupil import utc_timestamp
103
-
104
- if self.title:
105
- self.send_text(f'{utc_timestamp()} {get_host_nickname()} [{self.title}] {text}')
106
- else:
107
- self.send_text(f'{utc_timestamp()} {get_host_nickname()} {text}')
108
-
109
- def __enter__(self):
110
- self.send_text2('启动')
111
- return self
112
-
113
- def __exit__(self, exc_type, exc_val, exc_tb):
114
- from pyxllib.prog.pupil import format_exception
115
-
116
- if exc_tb is None:
117
- self.send_text2('完成')
118
- else:
119
- self.send_text2(f'报错\n{format_exception(exc_val, 3)}')
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : 陈坤泽
4
+ # @Email : 877362867@qq.com
5
+ # @Date : 2020/10/21 09:22
6
+
7
+ import os
8
+ import time
9
+ import hmac
10
+ import hashlib
11
+ import base64
12
+ import urllib.parse
13
+
14
+ import requests
15
+
16
+
17
+ class WeixinRobot:
18
+ """ 企业微信 机器人 """
19
+
20
+ def __init__(self, url):
21
+ self.url = url
22
+
23
+ def push_text(self, s):
24
+ msgtype = 'text'
25
+ try:
26
+ headers = {"Content-Type": "text/plain"}
27
+ t = {"content": s} if isinstance(s, str) else s
28
+ data = {"msgtype": msgtype, msgtype: t} # msgtype: text、markdown
29
+ requests.post(url=self.url, headers=headers, json=data)
30
+ except requests.exceptions.ConnectionError: # 没网发送失败的时候也不报错
31
+ pass
32
+
33
+
34
+ class DingtalkRobot:
35
+ """ 钉钉 自定义webhook机器人
36
+
37
+ https://ding-doc.dingtalk.com/doc#/serverapi2/qf2nxq
38
+ """
39
+
40
+ def __init__(self, access_token=None, secret=None):
41
+ access_token = access_token or os.getenv('DINGTALK_ROBOT_SECRET')
42
+ self.url = f'https://oapi.dingtalk.com/robot/send?access_token={access_token}'
43
+ self.url += self.add_secret(secret or os.getenv('DINGTALK_ROBOT_SECRET'))
44
+ self.headers = {"Content-Type": "application/json"}
45
+
46
+ @classmethod
47
+ def add_secret(cls, secret):
48
+ """ 钉钉机器人需要加签,确保安全性 """
49
+ timestamp = str(round(time.time() * 1000))
50
+ secret_enc = secret.encode('utf-8')
51
+ string_to_sign = '{}\n{}'.format(timestamp, secret)
52
+ string_to_sign_enc = string_to_sign.encode('utf-8')
53
+ hmac_code = hmac.new(secret_enc, string_to_sign_enc, digestmod=hashlib.sha256).digest()
54
+ sign = urllib.parse.quote_plus(base64.b64encode(hmac_code))
55
+ return f'&timestamp={timestamp}&sign={sign}'
56
+
57
+ def send_data(self, data):
58
+ try:
59
+ requests.post(url=self.url, headers=self.headers, json=data)
60
+ except requests.exceptions.ConnectionError as e: # 没网发送失败的时候也不报错
61
+ raise e
62
+
63
+ def send_text(self, content):
64
+ msgtype = 'text'
65
+ d = {}
66
+ if content: d['content'] = content
67
+ data = {"msgtype": msgtype, msgtype: d}
68
+ self.send_data(data)
69
+
70
+ def send_link(self, text='', title='', pic_url='', message_url=''):
71
+ msgtype = 'link'
72
+ d = {}
73
+ if text: d['text'] = text
74
+ if title: d['title'] = title
75
+ if pic_url: d['picUrl'] = pic_url
76
+ if message_url: d['messageUrl'] = message_url
77
+ data = {"msgtype": msgtype, msgtype: d}
78
+ self.send_data(data)
79
+
80
+ def send_markdown(self, text='', title=''):
81
+ msgtype = 'link'
82
+ d = {}
83
+ if text: d['text'] = text
84
+ if title: d['title'] = title
85
+ data = {"msgtype": msgtype, msgtype: d}
86
+ self.send_data(data)
87
+
88
+ def send_actioncard(self, text='', title='', siggle_url='', siggle_title='', btn_orientation='0'):
89
+ raise NotImplementedError
90
+
91
+ def send_feedcard(self):
92
+ raise NotImplementedError
93
+
94
+
95
+ class DingtalkRobot2(DingtalkRobot):
96
+
97
+ def __init__(self, title=None):
98
+ super().__init__()
99
+ self.title = title
100
+
101
+ def send_text2(self, text): # 增加一个更加定制化的便捷接口
102
+ from pyxllib.prog.pupil import utc_timestamp
103
+
104
+ if self.title:
105
+ self.send_text(f'{utc_timestamp()} {get_host_nickname()} [{self.title}] {text}')
106
+ else:
107
+ self.send_text(f'{utc_timestamp()} {get_host_nickname()} {text}')
108
+
109
+ def __enter__(self):
110
+ self.send_text2('启动')
111
+ return self
112
+
113
+ def __exit__(self, exc_type, exc_val, exc_tb):
114
+ from pyxllib.prog.pupil import format_exception
115
+
116
+ if exc_tb is None:
117
+ self.send_text2('完成')
118
+ else:
119
+ self.send_text2(f'报错\n{format_exception(exc_val, 3)}')
120
+
121
+ if __name__ == '__main__':
122
+ pass
123
+
124
+ from pyxllib.prog.newbie import typename
pyxllib/ext/win32lib.py CHANGED
@@ -1,40 +1,40 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- # @Author : 陈坤泽
4
- # @Email : 877362867@qq.com
5
- # @Date : 2021/09/07 10:21
6
-
7
-
8
- import win32com.client as win32
9
- import pythoncom
10
-
11
-
12
- def get_win32_app(name, visible=False):
13
- """ 启动可支持pywin32自动化处理的应用
14
-
15
- Args:
16
- str name: 应用名称,不区分大小写,比如word, excel, powerpoint, onenote
17
- 不带'.'的情况下,会自动添加'.Application'的后缀
18
- visible: 应用是否可见
19
-
20
- Returns: app
21
-
22
- """
23
- # 1 name
24
- name = name.lower()
25
- if '.' not in name:
26
- name += '.application'
27
-
28
- # 2 app
29
- # 这里可能还有些问题,不同的应用,机制不太一样,后面再细化完善吧
30
- try:
31
- app = win32.GetActiveObject(f'{name}') # 不能关联到普通方式打开的应用。但代码打开的应用都能找得到。
32
- except pythoncom.com_error:
33
- app = win32.gencache.EnsureDispatch(f'{name}')
34
- # 还有种常见的初始化方法,是 win32com.client.Dispatch和win32com.client.dynamic.Dispatch
35
- # from win32com.client.dynamic import Disypatch
36
-
37
- if visible is not None:
38
- app.Visible = visible
39
-
40
- return app
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : 陈坤泽
4
+ # @Email : 877362867@qq.com
5
+ # @Date : 2021/09/07 10:21
6
+
7
+
8
+ import win32com.client as win32
9
+ import pythoncom
10
+
11
+
12
+ def get_win32_app(name, visible=False):
13
+ """ 启动可支持pywin32自动化处理的应用
14
+
15
+ Args:
16
+ str name: 应用名称,不区分大小写,比如word, excel, powerpoint, onenote
17
+ 不带'.'的情况下,会自动添加'.Application'的后缀
18
+ visible: 应用是否可见
19
+
20
+ Returns: app
21
+
22
+ """
23
+ # 1 name
24
+ name = name.lower()
25
+ if '.' not in name:
26
+ name += '.application'
27
+
28
+ # 2 app
29
+ # 这里可能还有些问题,不同的应用,机制不太一样,后面再细化完善吧
30
+ try:
31
+ app = win32.GetActiveObject(f'{name}') # 不能关联到普通方式打开的应用。但代码打开的应用都能找得到。
32
+ except pythoncom.com_error:
33
+ app = win32.gencache.EnsureDispatch(f'{name}')
34
+ # 还有种常见的初始化方法,是 win32com.client.Dispatch和win32com.client.dynamic.Dispatch
35
+ # from win32com.client.dynamic import Disypatch
36
+
37
+ if visible is not None:
38
+ app.Visible = visible
39
+
40
+ return app
pyxllib/ext/wjxlib.py CHANGED
@@ -1,88 +1,91 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- # @Author : 陈坤泽
4
- # @Email : 877362867@qq.com
5
- # @Date : 2024/11/19
6
-
7
- """ 问卷星 相关工具 """
8
-
9
- import os
10
- import io
11
- import time
12
-
13
- from loguru import logger
14
- from DrissionPage import Chromium
15
- import pandas as pd
16
-
17
- from pyxllib.ext.drissionlib import DpWebBase
18
-
19
-
20
- class WjxWeb(DpWebBase):
21
- """ 问卷星网页的爬虫 """
22
-
23
- def __init__(self, url=None):
24
- super().__init__(url or 'https://www.wjx.cn')
25
- self.login()
26
-
27
- def login(self):
28
- tab = self.tab
29
-
30
- if tab.url.startswith('https://www.wjx.cn/wjx/activitystat/resultlimit.aspx'):
31
- tab('tag:a@@text():登录').click()
32
-
33
- if tab.url.lower().startswith('https://www.wjx.cn/login.aspx'):
34
- tab('tag:input@@name=UserName').input(os.getenv('WJX_USERNAME'), clear=True)
35
- tab('tag:input@@name=Password').input(os.getenv('WJX_PASSWORD'), clear=True)
36
- tab('tag:input@@type=submit').click()
37
-
38
- def get_page_num(self):
39
- """
40
- 返回当前页编号和总页数 (idx, num)。
41
- """
42
- idx, num = map(int, self.tab('tag:span@@class=paging-num').text.split('/'))
43
- return idx, num
44
-
45
- def prev_page(self):
46
- self.tab('tag:a@@class=go-pre').click()
47
-
48
- def next_page(self):
49
- self.tab('tag:a@@class=go-next').click()
50
-
51
- def _parse_table(self):
52
- """处理并解析网页中的表格数据"""
53
- table_html = self.tab('tag:table').html
54
- df = pd.read_html(io.StringIO(table_html))[0] # 读取表格
55
- df.columns = [col.replace('\ue645', '') for col in df.columns]
56
- # "星标"的内容特殊字符
57
- df.replace('\ue66b', '', regex=True, inplace=True)
58
- # "操作"的内容特殊字符
59
- df.replace('\ue6a3\ue6d4', '', regex=True, inplace=True)
60
- return df
61
-
62
- def set_num_of_page(self, num_of_page):
63
- """ 查看数据页面,设置每页显示多少条记录 """
64
- select = self.tab('tag:span@@text():每页显示').next('tag:select')
65
- select.click()
66
- opt = select(f'tag:option@@text()={num_of_page}')
67
- if opt.attr('selected') != 'selected':
68
- opt.click()
69
- else:
70
- select.click()
71
-
72
- def get_df(self, all_pages=False):
73
- """获得当前页面的表格数据,如果 all_pages 为 True,则下载所有页面的数据"""
74
- # 初始化DataFrame列表,用于存储每页的数据
75
- dfs = [self._parse_table()] # 获取当前页面的数据
76
-
77
- # 如果需要下载所有页面数据
78
- if all_pages:
79
- current_idx, total_pages = self.get_page_num()
80
- while current_idx < total_pages:
81
- self.next_page() # 翻到下一页
82
- time.sleep(2)
83
- dfs.append(self._parse_table()) # 获取并处理新一页的数据
84
- current_idx, total_pages = self.get_page_num() # 更新页码信息
85
-
86
- # 将所有数据合并为一个DataFrame
87
- final_df = pd.concat(dfs, ignore_index=True) if all_pages else dfs[0]
88
- return final_df
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : 陈坤泽
4
+ # @Email : 877362867@qq.com
5
+ # @Date : 2024/11/19
6
+
7
+ """ 问卷星 相关工具 """
8
+
9
+ import os
10
+ import io
11
+ import time
12
+
13
+ from loguru import logger
14
+ from DrissionPage import Chromium
15
+ import pandas as pd
16
+
17
+ from pyxllib.ext.drissionlib import DpWebBase
18
+
19
+
20
+ class WjxWeb(DpWebBase):
21
+ """ 问卷星网页的爬虫 """
22
+
23
+ def __init__(self, url=None):
24
+ super().__init__('https://www.wjx.cn/login.aspx')
25
+ self.login()
26
+ self.tab.get(url)
27
+
28
+ def login(self):
29
+ tab = self.tab
30
+
31
+ if tab.url.startswith('https://www.wjx.cn/wjx/activitystat/resultlimit.aspx'):
32
+ tab('t:a@@text():登录').click()
33
+
34
+ if tab.url.startswith('https://www.wjx.cn/login.aspx'):
35
+ tab('t:input@@name=UserName').input(os.getenv('WJX_USERNAME'), clear=True)
36
+ tab('t:input@@name=Password').input(os.getenv('WJX_PASSWORD'), clear=True)
37
+ tab('t:label@@for=RememberMe').click()
38
+ time.sleep(2)
39
+ tab('t:input@@type=submit').click()
40
+
41
+ def get_page_num(self):
42
+ """
43
+ 返回当前页编号和总页数 (idx, num)。
44
+ """
45
+ idx, num = map(int, self.tab('tag:span@@class=paging-num').text.split('/'))
46
+ return idx, num
47
+
48
+ def prev_page(self):
49
+ self.tab('tag:a@@class=go-pre').click()
50
+
51
+ def next_page(self):
52
+ self.tab('tag:a@@class=go-next').click()
53
+
54
+ def _parse_table(self):
55
+ """处理并解析网页中的表格数据"""
56
+ table_html = self.tab('tag:table').html
57
+ df = pd.read_html(io.StringIO(table_html))[0] # 读取表格
58
+ df.columns = [col.replace('\ue645', '') for col in df.columns]
59
+ # "星标"的内容特殊字符
60
+ df.replace('\ue66b', '', regex=True, inplace=True)
61
+ # "操作"的内容特殊字符
62
+ df.replace('\ue6a3\ue6d4', '', regex=True, inplace=True)
63
+ return df
64
+
65
+ def set_num_of_page(self, num_of_page):
66
+ """ 查看数据页面,设置每页显示多少条记录 """
67
+ select = self.tab('tag:span@@text():每页显示').next('tag:select')
68
+ select.click()
69
+ opt = select(f'tag:option@@text()={num_of_page}')
70
+ if opt.attr('selected') != 'selected':
71
+ opt.click()
72
+ else:
73
+ select.click()
74
+
75
+ def get_df(self, all_pages=False):
76
+ """获得当前页面的表格数据,如果 all_pages 为 True,则下载所有页面的数据"""
77
+ # 初始化DataFrame列表,用于存储每页的数据
78
+ dfs = [self._parse_table()] # 获取当前页面的数据
79
+
80
+ # 如果需要下载所有页面数据
81
+ if all_pages:
82
+ current_idx, total_pages = self.get_page_num()
83
+ while current_idx < total_pages:
84
+ self.next_page() # 翻到下一页
85
+ time.sleep(2)
86
+ dfs.append(self._parse_table()) # 获取并处理新一页的数据
87
+ current_idx, total_pages = self.get_page_num() # 更新页码信息
88
+
89
+ # 将所有数据合并为一个DataFrame
90
+ final_df = pd.concat(dfs, ignore_index=True) if all_pages else dfs[0]
91
+ return final_df