pyxllib 0.3.197__py3-none-any.whl → 0.3.200__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. pyxllib/__init__.py +21 -21
  2. pyxllib/algo/__init__.py +8 -8
  3. pyxllib/algo/disjoint.py +54 -54
  4. pyxllib/algo/geo.py +541 -541
  5. pyxllib/algo/intervals.py +964 -964
  6. pyxllib/algo/matcher.py +389 -389
  7. pyxllib/algo/newbie.py +166 -166
  8. pyxllib/algo/pupil.py +629 -629
  9. pyxllib/algo/shapelylib.py +67 -67
  10. pyxllib/algo/specialist.py +241 -241
  11. pyxllib/algo/stat.py +494 -494
  12. pyxllib/algo/treelib.py +149 -149
  13. pyxllib/algo/unitlib.py +66 -66
  14. pyxllib/autogui/__init__.py +5 -5
  15. pyxllib/autogui/activewin.py +246 -246
  16. pyxllib/autogui/all.py +9 -9
  17. pyxllib/autogui/autogui.py +852 -852
  18. pyxllib/autogui/uiautolib.py +362 -362
  19. pyxllib/autogui/virtualkey.py +102 -102
  20. pyxllib/autogui/wechat.py +827 -827
  21. pyxllib/autogui/wechat_msg.py +421 -421
  22. pyxllib/autogui/wxautolib.py +84 -84
  23. pyxllib/cv/__init__.py +5 -5
  24. pyxllib/cv/expert.py +267 -267
  25. pyxllib/cv/imfile.py +159 -159
  26. pyxllib/cv/imhash.py +39 -39
  27. pyxllib/cv/pupil.py +9 -9
  28. pyxllib/cv/rgbfmt.py +1525 -1525
  29. pyxllib/cv/slidercaptcha.py +137 -137
  30. pyxllib/cv/trackbartools.py +251 -251
  31. pyxllib/cv/xlcvlib.py +1040 -1040
  32. pyxllib/cv/xlpillib.py +423 -423
  33. pyxllib/data/echarts.py +240 -240
  34. pyxllib/data/jsonlib.py +89 -89
  35. pyxllib/data/oss.py +72 -72
  36. pyxllib/data/pglib.py +1127 -1127
  37. pyxllib/data/sqlite.py +568 -568
  38. pyxllib/data/sqllib.py +297 -297
  39. pyxllib/ext/JLineViewer.py +505 -505
  40. pyxllib/ext/__init__.py +6 -6
  41. pyxllib/ext/demolib.py +246 -246
  42. pyxllib/ext/drissionlib.py +277 -277
  43. pyxllib/ext/kq5034lib.py +12 -12
  44. pyxllib/ext/old.py +663 -663
  45. pyxllib/ext/qt.py +449 -449
  46. pyxllib/ext/robustprocfile.py +497 -497
  47. pyxllib/ext/seleniumlib.py +76 -76
  48. pyxllib/ext/tk.py +173 -173
  49. pyxllib/ext/unixlib.py +827 -827
  50. pyxllib/ext/utools.py +351 -351
  51. pyxllib/ext/webhook.py +124 -119
  52. pyxllib/ext/win32lib.py +40 -40
  53. pyxllib/ext/wjxlib.py +88 -88
  54. pyxllib/ext/wpsapi.py +124 -124
  55. pyxllib/ext/xlwork.py +9 -9
  56. pyxllib/ext/yuquelib.py +1105 -1105
  57. pyxllib/file/__init__.py +17 -17
  58. pyxllib/file/docxlib.py +761 -761
  59. pyxllib/file/gitlib.py +309 -309
  60. pyxllib/file/libreoffice.py +165 -165
  61. pyxllib/file/movielib.py +148 -148
  62. pyxllib/file/newbie.py +10 -10
  63. pyxllib/file/onenotelib.py +1469 -1469
  64. pyxllib/file/packlib/__init__.py +330 -330
  65. pyxllib/file/packlib/zipfile.py +2441 -2441
  66. pyxllib/file/pdflib.py +426 -426
  67. pyxllib/file/pupil.py +185 -185
  68. pyxllib/file/specialist/__init__.py +685 -685
  69. pyxllib/file/specialist/dirlib.py +799 -799
  70. pyxllib/file/specialist/download.py +193 -193
  71. pyxllib/file/specialist/filelib.py +2829 -2829
  72. pyxllib/file/xlsxlib.py +3131 -3131
  73. pyxllib/file/xlsyncfile.py +341 -341
  74. pyxllib/prog/__init__.py +5 -5
  75. pyxllib/prog/cachetools.py +64 -64
  76. pyxllib/prog/deprecatedlib.py +233 -233
  77. pyxllib/prog/filelock.py +42 -42
  78. pyxllib/prog/ipyexec.py +253 -253
  79. pyxllib/prog/multiprogs.py +940 -940
  80. pyxllib/prog/newbie.py +451 -451
  81. pyxllib/prog/pupil.py +1197 -1197
  82. pyxllib/prog/sitepackages.py +33 -33
  83. pyxllib/prog/specialist/__init__.py +391 -391
  84. pyxllib/prog/specialist/bc.py +203 -203
  85. pyxllib/prog/specialist/browser.py +497 -497
  86. pyxllib/prog/specialist/common.py +347 -347
  87. pyxllib/prog/specialist/datetime.py +198 -198
  88. pyxllib/prog/specialist/tictoc.py +240 -240
  89. pyxllib/prog/specialist/xllog.py +180 -180
  90. pyxllib/prog/xlosenv.py +108 -108
  91. pyxllib/stdlib/__init__.py +17 -17
  92. pyxllib/stdlib/tablepyxl/__init__.py +10 -10
  93. pyxllib/stdlib/tablepyxl/style.py +303 -303
  94. pyxllib/stdlib/tablepyxl/tablepyxl.py +130 -130
  95. pyxllib/text/__init__.py +8 -8
  96. pyxllib/text/ahocorasick.py +39 -39
  97. pyxllib/text/airscript.js +744 -744
  98. pyxllib/text/charclasslib.py +121 -121
  99. pyxllib/text/jiebalib.py +267 -267
  100. pyxllib/text/jinjalib.py +32 -32
  101. pyxllib/text/jsa_ai_prompt.md +271 -271
  102. pyxllib/text/jscode.py +922 -922
  103. pyxllib/text/latex/__init__.py +158 -158
  104. pyxllib/text/levenshtein.py +303 -303
  105. pyxllib/text/nestenv.py +1215 -1215
  106. pyxllib/text/newbie.py +300 -300
  107. pyxllib/text/pupil/__init__.py +8 -8
  108. pyxllib/text/pupil/common.py +1121 -1121
  109. pyxllib/text/pupil/xlalign.py +326 -326
  110. pyxllib/text/pycode.py +47 -47
  111. pyxllib/text/specialist/__init__.py +8 -8
  112. pyxllib/text/specialist/common.py +112 -112
  113. pyxllib/text/specialist/ptag.py +186 -186
  114. pyxllib/text/spellchecker.py +172 -172
  115. pyxllib/text/templates/echart_base.html +10 -10
  116. pyxllib/text/templates/highlight_code.html +16 -16
  117. pyxllib/text/templates/latex_editor.html +102 -102
  118. pyxllib/text/vbacode.py +17 -17
  119. pyxllib/text/xmllib.py +747 -747
  120. pyxllib/xl.py +42 -39
  121. pyxllib/xlcv.py +17 -17
  122. {pyxllib-0.3.197.dist-info → pyxllib-0.3.200.dist-info}/METADATA +1 -1
  123. pyxllib-0.3.200.dist-info/RECORD +126 -0
  124. {pyxllib-0.3.197.dist-info → pyxllib-0.3.200.dist-info}/licenses/LICENSE +190 -190
  125. pyxllib-0.3.197.dist-info/RECORD +0 -126
  126. {pyxllib-0.3.197.dist-info → pyxllib-0.3.200.dist-info}/WHEEL +0 -0
pyxllib/ext/webhook.py CHANGED
@@ -1,119 +1,124 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- # @Author : 陈坤泽
4
- # @Email : 877362867@qq.com
5
- # @Date : 2020/10/21 09:22
6
-
7
- import os
8
- import time
9
- import hmac
10
- import hashlib
11
- import base64
12
- import urllib.parse
13
-
14
- import requests
15
-
16
-
17
- class WeixinRobot:
18
- """ 企业微信 机器人 """
19
-
20
- def __init__(self, url):
21
- self.url = url
22
-
23
- def push_text(self, s):
24
- msgtype = 'text'
25
- try:
26
- headers = {"Content-Type": "text/plain"}
27
- t = {"content": s} if isinstance(s, str) else s
28
- data = {"msgtype": msgtype, msgtype: t} # msgtype: text、markdown
29
- requests.post(url=self.url, headers=headers, json=data)
30
- except requests.exceptions.ConnectionError: # 没网发送失败的时候也不报错
31
- pass
32
-
33
-
34
- class DingtalkRobot:
35
- """ 钉钉 自定义webhook机器人
36
-
37
- https://ding-doc.dingtalk.com/doc#/serverapi2/qf2nxq
38
- """
39
-
40
- def __init__(self, access_token=None, secret=None):
41
- access_token = access_token or os.getenv('DINGTALK_ROBOT_SECRET')
42
- self.url = f'https://oapi.dingtalk.com/robot/send?access_token={access_token}'
43
- self.url += self.add_secret(secret or os.getenv('DINGTALK_ROBOT_SECRET'))
44
- self.headers = {"Content-Type": "application/json"}
45
-
46
- @classmethod
47
- def add_secret(cls, secret):
48
- """ 钉钉机器人需要加签,确保安全性 """
49
- timestamp = str(round(time.time() * 1000))
50
- secret_enc = secret.encode('utf-8')
51
- string_to_sign = '{}\n{}'.format(timestamp, secret)
52
- string_to_sign_enc = string_to_sign.encode('utf-8')
53
- hmac_code = hmac.new(secret_enc, string_to_sign_enc, digestmod=hashlib.sha256).digest()
54
- sign = urllib.parse.quote_plus(base64.b64encode(hmac_code))
55
- return f'&timestamp={timestamp}&sign={sign}'
56
-
57
- def send_data(self, data):
58
- try:
59
- requests.post(url=self.url, headers=self.headers, json=data)
60
- except requests.exceptions.ConnectionError as e: # 没网发送失败的时候也不报错
61
- raise e
62
-
63
- def send_text(self, content):
64
- msgtype = 'text'
65
- d = {}
66
- if content: d['content'] = content
67
- data = {"msgtype": msgtype, msgtype: d}
68
- self.send_data(data)
69
-
70
- def send_link(self, text='', title='', pic_url='', message_url=''):
71
- msgtype = 'link'
72
- d = {}
73
- if text: d['text'] = text
74
- if title: d['title'] = title
75
- if pic_url: d['picUrl'] = pic_url
76
- if message_url: d['messageUrl'] = message_url
77
- data = {"msgtype": msgtype, msgtype: d}
78
- self.send_data(data)
79
-
80
- def send_markdown(self, text='', title=''):
81
- msgtype = 'link'
82
- d = {}
83
- if text: d['text'] = text
84
- if title: d['title'] = title
85
- data = {"msgtype": msgtype, msgtype: d}
86
- self.send_data(data)
87
-
88
- def send_actioncard(self, text='', title='', siggle_url='', siggle_title='', btn_orientation='0'):
89
- raise NotImplementedError
90
-
91
- def send_feedcard(self):
92
- raise NotImplementedError
93
-
94
-
95
- class DingtalkRobot2(DingtalkRobot):
96
-
97
- def __init__(self, title=None):
98
- super().__init__()
99
- self.title = title
100
-
101
- def send_text2(self, text): # 增加一个更加定制化的便捷接口
102
- from pyxllib.prog.pupil import utc_timestamp
103
-
104
- if self.title:
105
- self.send_text(f'{utc_timestamp()} {get_host_nickname()} [{self.title}] {text}')
106
- else:
107
- self.send_text(f'{utc_timestamp()} {get_host_nickname()} {text}')
108
-
109
- def __enter__(self):
110
- self.send_text2('启动')
111
- return self
112
-
113
- def __exit__(self, exc_type, exc_val, exc_tb):
114
- from pyxllib.prog.pupil import format_exception
115
-
116
- if exc_tb is None:
117
- self.send_text2('完成')
118
- else:
119
- self.send_text2(f'报错\n{format_exception(exc_val, 3)}')
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : 陈坤泽
4
+ # @Email : 877362867@qq.com
5
+ # @Date : 2020/10/21 09:22
6
+
7
+ import os
8
+ import time
9
+ import hmac
10
+ import hashlib
11
+ import base64
12
+ import urllib.parse
13
+
14
+ import requests
15
+
16
+
17
+ class WeixinRobot:
18
+ """ 企业微信 机器人 """
19
+
20
+ def __init__(self, url):
21
+ self.url = url
22
+
23
+ def push_text(self, s):
24
+ msgtype = 'text'
25
+ try:
26
+ headers = {"Content-Type": "text/plain"}
27
+ t = {"content": s} if isinstance(s, str) else s
28
+ data = {"msgtype": msgtype, msgtype: t} # msgtype: text、markdown
29
+ requests.post(url=self.url, headers=headers, json=data)
30
+ except requests.exceptions.ConnectionError: # 没网发送失败的时候也不报错
31
+ pass
32
+
33
+
34
+ class DingtalkRobot:
35
+ """ 钉钉 自定义webhook机器人
36
+
37
+ https://ding-doc.dingtalk.com/doc#/serverapi2/qf2nxq
38
+ """
39
+
40
+ def __init__(self, access_token=None, secret=None):
41
+ access_token = access_token or os.getenv('DINGTALK_ROBOT_SECRET')
42
+ self.url = f'https://oapi.dingtalk.com/robot/send?access_token={access_token}'
43
+ self.url += self.add_secret(secret or os.getenv('DINGTALK_ROBOT_SECRET'))
44
+ self.headers = {"Content-Type": "application/json"}
45
+
46
+ @classmethod
47
+ def add_secret(cls, secret):
48
+ """ 钉钉机器人需要加签,确保安全性 """
49
+ timestamp = str(round(time.time() * 1000))
50
+ secret_enc = secret.encode('utf-8')
51
+ string_to_sign = '{}\n{}'.format(timestamp, secret)
52
+ string_to_sign_enc = string_to_sign.encode('utf-8')
53
+ hmac_code = hmac.new(secret_enc, string_to_sign_enc, digestmod=hashlib.sha256).digest()
54
+ sign = urllib.parse.quote_plus(base64.b64encode(hmac_code))
55
+ return f'&timestamp={timestamp}&sign={sign}'
56
+
57
+ def send_data(self, data):
58
+ try:
59
+ requests.post(url=self.url, headers=self.headers, json=data)
60
+ except requests.exceptions.ConnectionError as e: # 没网发送失败的时候也不报错
61
+ raise e
62
+
63
+ def send_text(self, content):
64
+ msgtype = 'text'
65
+ d = {}
66
+ if content: d['content'] = content
67
+ data = {"msgtype": msgtype, msgtype: d}
68
+ self.send_data(data)
69
+
70
+ def send_link(self, text='', title='', pic_url='', message_url=''):
71
+ msgtype = 'link'
72
+ d = {}
73
+ if text: d['text'] = text
74
+ if title: d['title'] = title
75
+ if pic_url: d['picUrl'] = pic_url
76
+ if message_url: d['messageUrl'] = message_url
77
+ data = {"msgtype": msgtype, msgtype: d}
78
+ self.send_data(data)
79
+
80
+ def send_markdown(self, text='', title=''):
81
+ msgtype = 'link'
82
+ d = {}
83
+ if text: d['text'] = text
84
+ if title: d['title'] = title
85
+ data = {"msgtype": msgtype, msgtype: d}
86
+ self.send_data(data)
87
+
88
+ def send_actioncard(self, text='', title='', siggle_url='', siggle_title='', btn_orientation='0'):
89
+ raise NotImplementedError
90
+
91
+ def send_feedcard(self):
92
+ raise NotImplementedError
93
+
94
+
95
+ class DingtalkRobot2(DingtalkRobot):
96
+
97
+ def __init__(self, title=None):
98
+ super().__init__()
99
+ self.title = title
100
+
101
+ def send_text2(self, text): # 增加一个更加定制化的便捷接口
102
+ from pyxllib.prog.pupil import utc_timestamp
103
+
104
+ if self.title:
105
+ self.send_text(f'{utc_timestamp()} {get_host_nickname()} [{self.title}] {text}')
106
+ else:
107
+ self.send_text(f'{utc_timestamp()} {get_host_nickname()} {text}')
108
+
109
+ def __enter__(self):
110
+ self.send_text2('启动')
111
+ return self
112
+
113
+ def __exit__(self, exc_type, exc_val, exc_tb):
114
+ from pyxllib.prog.pupil import format_exception
115
+
116
+ if exc_tb is None:
117
+ self.send_text2('完成')
118
+ else:
119
+ self.send_text2(f'报错\n{format_exception(exc_val, 3)}')
120
+
121
+ if __name__ == '__main__':
122
+ pass
123
+
124
+ from pyxllib.prog.newbie import typename
pyxllib/ext/win32lib.py CHANGED
@@ -1,40 +1,40 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- # @Author : 陈坤泽
4
- # @Email : 877362867@qq.com
5
- # @Date : 2021/09/07 10:21
6
-
7
-
8
- import win32com.client as win32
9
- import pythoncom
10
-
11
-
12
- def get_win32_app(name, visible=False):
13
- """ 启动可支持pywin32自动化处理的应用
14
-
15
- Args:
16
- str name: 应用名称,不区分大小写,比如word, excel, powerpoint, onenote
17
- 不带'.'的情况下,会自动添加'.Application'的后缀
18
- visible: 应用是否可见
19
-
20
- Returns: app
21
-
22
- """
23
- # 1 name
24
- name = name.lower()
25
- if '.' not in name:
26
- name += '.application'
27
-
28
- # 2 app
29
- # 这里可能还有些问题,不同的应用,机制不太一样,后面再细化完善吧
30
- try:
31
- app = win32.GetActiveObject(f'{name}') # 不能关联到普通方式打开的应用。但代码打开的应用都能找得到。
32
- except pythoncom.com_error:
33
- app = win32.gencache.EnsureDispatch(f'{name}')
34
- # 还有种常见的初始化方法,是 win32com.client.Dispatch和win32com.client.dynamic.Dispatch
35
- # from win32com.client.dynamic import Disypatch
36
-
37
- if visible is not None:
38
- app.Visible = visible
39
-
40
- return app
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : 陈坤泽
4
+ # @Email : 877362867@qq.com
5
+ # @Date : 2021/09/07 10:21
6
+
7
+
8
+ import win32com.client as win32
9
+ import pythoncom
10
+
11
+
12
+ def get_win32_app(name, visible=False):
13
+ """ 启动可支持pywin32自动化处理的应用
14
+
15
+ Args:
16
+ str name: 应用名称,不区分大小写,比如word, excel, powerpoint, onenote
17
+ 不带'.'的情况下,会自动添加'.Application'的后缀
18
+ visible: 应用是否可见
19
+
20
+ Returns: app
21
+
22
+ """
23
+ # 1 name
24
+ name = name.lower()
25
+ if '.' not in name:
26
+ name += '.application'
27
+
28
+ # 2 app
29
+ # 这里可能还有些问题,不同的应用,机制不太一样,后面再细化完善吧
30
+ try:
31
+ app = win32.GetActiveObject(f'{name}') # 不能关联到普通方式打开的应用。但代码打开的应用都能找得到。
32
+ except pythoncom.com_error:
33
+ app = win32.gencache.EnsureDispatch(f'{name}')
34
+ # 还有种常见的初始化方法,是 win32com.client.Dispatch和win32com.client.dynamic.Dispatch
35
+ # from win32com.client.dynamic import Disypatch
36
+
37
+ if visible is not None:
38
+ app.Visible = visible
39
+
40
+ return app
pyxllib/ext/wjxlib.py CHANGED
@@ -1,88 +1,88 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- # @Author : 陈坤泽
4
- # @Email : 877362867@qq.com
5
- # @Date : 2024/11/19
6
-
7
- """ 问卷星 相关工具 """
8
-
9
- import os
10
- import io
11
- import time
12
-
13
- from loguru import logger
14
- from DrissionPage import Chromium
15
- import pandas as pd
16
-
17
- from pyxllib.ext.drissionlib import DpWebBase
18
-
19
-
20
- class WjxWeb(DpWebBase):
21
- """ 问卷星网页的爬虫 """
22
-
23
- def __init__(self, url=None):
24
- super().__init__(url or 'https://www.wjx.cn')
25
- self.login()
26
-
27
- def login(self):
28
- tab = self.tab
29
-
30
- if tab.url.startswith('https://www.wjx.cn/wjx/activitystat/resultlimit.aspx'):
31
- tab('tag:a@@text():登录').click()
32
-
33
- if tab.url.lower().startswith('https://www.wjx.cn/login.aspx'):
34
- tab('tag:input@@name=UserName').input(os.getenv('WJX_USERNAME'), clear=True)
35
- tab('tag:input@@name=Password').input(os.getenv('WJX_PASSWORD'), clear=True)
36
- tab('tag:input@@type=submit').click()
37
-
38
- def get_page_num(self):
39
- """
40
- 返回当前页编号和总页数 (idx, num)。
41
- """
42
- idx, num = map(int, self.tab('tag:span@@class=paging-num').text.split('/'))
43
- return idx, num
44
-
45
- def prev_page(self):
46
- self.tab('tag:a@@class=go-pre').click()
47
-
48
- def next_page(self):
49
- self.tab('tag:a@@class=go-next').click()
50
-
51
- def _parse_table(self):
52
- """处理并解析网页中的表格数据"""
53
- table_html = self.tab('tag:table').html
54
- df = pd.read_html(io.StringIO(table_html))[0] # 读取表格
55
- df.columns = [col.replace('\ue645', '') for col in df.columns]
56
- # "星标"的内容特殊字符
57
- df.replace('\ue66b', '', regex=True, inplace=True)
58
- # "操作"的内容特殊字符
59
- df.replace('\ue6a3\ue6d4', '', regex=True, inplace=True)
60
- return df
61
-
62
- def set_num_of_page(self, num_of_page):
63
- """ 查看数据页面,设置每页显示多少条记录 """
64
- select = self.tab('tag:span@@text():每页显示').next('tag:select')
65
- select.click()
66
- opt = select(f'tag:option@@text()={num_of_page}')
67
- if opt.attr('selected') != 'selected':
68
- opt.click()
69
- else:
70
- select.click()
71
-
72
- def get_df(self, all_pages=False):
73
- """获得当前页面的表格数据,如果 all_pages 为 True,则下载所有页面的数据"""
74
- # 初始化DataFrame列表,用于存储每页的数据
75
- dfs = [self._parse_table()] # 获取当前页面的数据
76
-
77
- # 如果需要下载所有页面数据
78
- if all_pages:
79
- current_idx, total_pages = self.get_page_num()
80
- while current_idx < total_pages:
81
- self.next_page() # 翻到下一页
82
- time.sleep(2)
83
- dfs.append(self._parse_table()) # 获取并处理新一页的数据
84
- current_idx, total_pages = self.get_page_num() # 更新页码信息
85
-
86
- # 将所有数据合并为一个DataFrame
87
- final_df = pd.concat(dfs, ignore_index=True) if all_pages else dfs[0]
88
- return final_df
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : 陈坤泽
4
+ # @Email : 877362867@qq.com
5
+ # @Date : 2024/11/19
6
+
7
+ """ 问卷星 相关工具 """
8
+
9
+ import os
10
+ import io
11
+ import time
12
+
13
+ from loguru import logger
14
+ from DrissionPage import Chromium
15
+ import pandas as pd
16
+
17
+ from pyxllib.ext.drissionlib import DpWebBase
18
+
19
+
20
+ class WjxWeb(DpWebBase):
21
+ """ 问卷星网页的爬虫 """
22
+
23
+ def __init__(self, url=None):
24
+ super().__init__(url or 'https://www.wjx.cn')
25
+ self.login()
26
+
27
+ def login(self):
28
+ tab = self.tab
29
+
30
+ if tab.url.startswith('https://www.wjx.cn/wjx/activitystat/resultlimit.aspx'):
31
+ tab('tag:a@@text():登录').click()
32
+
33
+ if tab.url.lower().startswith('https://www.wjx.cn/login.aspx'):
34
+ tab('tag:input@@name=UserName').input(os.getenv('WJX_USERNAME'), clear=True)
35
+ tab('tag:input@@name=Password').input(os.getenv('WJX_PASSWORD'), clear=True)
36
+ tab('tag:input@@type=submit').click()
37
+
38
+ def get_page_num(self):
39
+ """
40
+ 返回当前页编号和总页数 (idx, num)。
41
+ """
42
+ idx, num = map(int, self.tab('tag:span@@class=paging-num').text.split('/'))
43
+ return idx, num
44
+
45
+ def prev_page(self):
46
+ self.tab('tag:a@@class=go-pre').click()
47
+
48
+ def next_page(self):
49
+ self.tab('tag:a@@class=go-next').click()
50
+
51
+ def _parse_table(self):
52
+ """处理并解析网页中的表格数据"""
53
+ table_html = self.tab('tag:table').html
54
+ df = pd.read_html(io.StringIO(table_html))[0] # 读取表格
55
+ df.columns = [col.replace('\ue645', '') for col in df.columns]
56
+ # "星标"的内容特殊字符
57
+ df.replace('\ue66b', '', regex=True, inplace=True)
58
+ # "操作"的内容特殊字符
59
+ df.replace('\ue6a3\ue6d4', '', regex=True, inplace=True)
60
+ return df
61
+
62
+ def set_num_of_page(self, num_of_page):
63
+ """ 查看数据页面,设置每页显示多少条记录 """
64
+ select = self.tab('tag:span@@text():每页显示').next('tag:select')
65
+ select.click()
66
+ opt = select(f'tag:option@@text()={num_of_page}')
67
+ if opt.attr('selected') != 'selected':
68
+ opt.click()
69
+ else:
70
+ select.click()
71
+
72
+ def get_df(self, all_pages=False):
73
+ """获得当前页面的表格数据,如果 all_pages 为 True,则下载所有页面的数据"""
74
+ # 初始化DataFrame列表,用于存储每页的数据
75
+ dfs = [self._parse_table()] # 获取当前页面的数据
76
+
77
+ # 如果需要下载所有页面数据
78
+ if all_pages:
79
+ current_idx, total_pages = self.get_page_num()
80
+ while current_idx < total_pages:
81
+ self.next_page() # 翻到下一页
82
+ time.sleep(2)
83
+ dfs.append(self._parse_table()) # 获取并处理新一页的数据
84
+ current_idx, total_pages = self.get_page_num() # 更新页码信息
85
+
86
+ # 将所有数据合并为一个DataFrame
87
+ final_df = pd.concat(dfs, ignore_index=True) if all_pages else dfs[0]
88
+ return final_df