pyxllib 0.3.197__py3-none-any.whl → 0.3.200__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyxllib/__init__.py +21 -21
- pyxllib/algo/__init__.py +8 -8
- pyxllib/algo/disjoint.py +54 -54
- pyxllib/algo/geo.py +541 -541
- pyxllib/algo/intervals.py +964 -964
- pyxllib/algo/matcher.py +389 -389
- pyxllib/algo/newbie.py +166 -166
- pyxllib/algo/pupil.py +629 -629
- pyxllib/algo/shapelylib.py +67 -67
- pyxllib/algo/specialist.py +241 -241
- pyxllib/algo/stat.py +494 -494
- pyxllib/algo/treelib.py +149 -149
- pyxllib/algo/unitlib.py +66 -66
- pyxllib/autogui/__init__.py +5 -5
- pyxllib/autogui/activewin.py +246 -246
- pyxllib/autogui/all.py +9 -9
- pyxllib/autogui/autogui.py +852 -852
- pyxllib/autogui/uiautolib.py +362 -362
- pyxllib/autogui/virtualkey.py +102 -102
- pyxllib/autogui/wechat.py +827 -827
- pyxllib/autogui/wechat_msg.py +421 -421
- pyxllib/autogui/wxautolib.py +84 -84
- pyxllib/cv/__init__.py +5 -5
- pyxllib/cv/expert.py +267 -267
- pyxllib/cv/imfile.py +159 -159
- pyxllib/cv/imhash.py +39 -39
- pyxllib/cv/pupil.py +9 -9
- pyxllib/cv/rgbfmt.py +1525 -1525
- pyxllib/cv/slidercaptcha.py +137 -137
- pyxllib/cv/trackbartools.py +251 -251
- pyxllib/cv/xlcvlib.py +1040 -1040
- pyxllib/cv/xlpillib.py +423 -423
- pyxllib/data/echarts.py +240 -240
- pyxllib/data/jsonlib.py +89 -89
- pyxllib/data/oss.py +72 -72
- pyxllib/data/pglib.py +1127 -1127
- pyxllib/data/sqlite.py +568 -568
- pyxllib/data/sqllib.py +297 -297
- pyxllib/ext/JLineViewer.py +505 -505
- pyxllib/ext/__init__.py +6 -6
- pyxllib/ext/demolib.py +246 -246
- pyxllib/ext/drissionlib.py +277 -277
- pyxllib/ext/kq5034lib.py +12 -12
- pyxllib/ext/old.py +663 -663
- pyxllib/ext/qt.py +449 -449
- pyxllib/ext/robustprocfile.py +497 -497
- pyxllib/ext/seleniumlib.py +76 -76
- pyxllib/ext/tk.py +173 -173
- pyxllib/ext/unixlib.py +827 -827
- pyxllib/ext/utools.py +351 -351
- pyxllib/ext/webhook.py +124 -119
- pyxllib/ext/win32lib.py +40 -40
- pyxllib/ext/wjxlib.py +88 -88
- pyxllib/ext/wpsapi.py +124 -124
- pyxllib/ext/xlwork.py +9 -9
- pyxllib/ext/yuquelib.py +1105 -1105
- pyxllib/file/__init__.py +17 -17
- pyxllib/file/docxlib.py +761 -761
- pyxllib/file/gitlib.py +309 -309
- pyxllib/file/libreoffice.py +165 -165
- pyxllib/file/movielib.py +148 -148
- pyxllib/file/newbie.py +10 -10
- pyxllib/file/onenotelib.py +1469 -1469
- pyxllib/file/packlib/__init__.py +330 -330
- pyxllib/file/packlib/zipfile.py +2441 -2441
- pyxllib/file/pdflib.py +426 -426
- pyxllib/file/pupil.py +185 -185
- pyxllib/file/specialist/__init__.py +685 -685
- pyxllib/file/specialist/dirlib.py +799 -799
- pyxllib/file/specialist/download.py +193 -193
- pyxllib/file/specialist/filelib.py +2829 -2829
- pyxllib/file/xlsxlib.py +3131 -3131
- pyxllib/file/xlsyncfile.py +341 -341
- pyxllib/prog/__init__.py +5 -5
- pyxllib/prog/cachetools.py +64 -64
- pyxllib/prog/deprecatedlib.py +233 -233
- pyxllib/prog/filelock.py +42 -42
- pyxllib/prog/ipyexec.py +253 -253
- pyxllib/prog/multiprogs.py +940 -940
- pyxllib/prog/newbie.py +451 -451
- pyxllib/prog/pupil.py +1197 -1197
- pyxllib/prog/sitepackages.py +33 -33
- pyxllib/prog/specialist/__init__.py +391 -391
- pyxllib/prog/specialist/bc.py +203 -203
- pyxllib/prog/specialist/browser.py +497 -497
- pyxllib/prog/specialist/common.py +347 -347
- pyxllib/prog/specialist/datetime.py +198 -198
- pyxllib/prog/specialist/tictoc.py +240 -240
- pyxllib/prog/specialist/xllog.py +180 -180
- pyxllib/prog/xlosenv.py +108 -108
- pyxllib/stdlib/__init__.py +17 -17
- pyxllib/stdlib/tablepyxl/__init__.py +10 -10
- pyxllib/stdlib/tablepyxl/style.py +303 -303
- pyxllib/stdlib/tablepyxl/tablepyxl.py +130 -130
- pyxllib/text/__init__.py +8 -8
- pyxllib/text/ahocorasick.py +39 -39
- pyxllib/text/airscript.js +744 -744
- pyxllib/text/charclasslib.py +121 -121
- pyxllib/text/jiebalib.py +267 -267
- pyxllib/text/jinjalib.py +32 -32
- pyxllib/text/jsa_ai_prompt.md +271 -271
- pyxllib/text/jscode.py +922 -922
- pyxllib/text/latex/__init__.py +158 -158
- pyxllib/text/levenshtein.py +303 -303
- pyxllib/text/nestenv.py +1215 -1215
- pyxllib/text/newbie.py +300 -300
- pyxllib/text/pupil/__init__.py +8 -8
- pyxllib/text/pupil/common.py +1121 -1121
- pyxllib/text/pupil/xlalign.py +326 -326
- pyxllib/text/pycode.py +47 -47
- pyxllib/text/specialist/__init__.py +8 -8
- pyxllib/text/specialist/common.py +112 -112
- pyxllib/text/specialist/ptag.py +186 -186
- pyxllib/text/spellchecker.py +172 -172
- pyxllib/text/templates/echart_base.html +10 -10
- pyxllib/text/templates/highlight_code.html +16 -16
- pyxllib/text/templates/latex_editor.html +102 -102
- pyxllib/text/vbacode.py +17 -17
- pyxllib/text/xmllib.py +747 -747
- pyxllib/xl.py +42 -39
- pyxllib/xlcv.py +17 -17
- {pyxllib-0.3.197.dist-info → pyxllib-0.3.200.dist-info}/METADATA +1 -1
- pyxllib-0.3.200.dist-info/RECORD +126 -0
- {pyxllib-0.3.197.dist-info → pyxllib-0.3.200.dist-info}/licenses/LICENSE +190 -190
- pyxllib-0.3.197.dist-info/RECORD +0 -126
- {pyxllib-0.3.197.dist-info → pyxllib-0.3.200.dist-info}/WHEEL +0 -0
@@ -1,130 +1,130 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
# @Author : 陈坤泽
|
4
|
-
# @Email : 877362867@qq.com
|
5
|
-
# @Date : 2020/06/02 19:57
|
6
|
-
|
7
|
-
|
8
|
-
# Do imports like python3 so our package works for 2 and 3
|
9
|
-
from __future__ import absolute_import
|
10
|
-
|
11
|
-
from lxml import html
|
12
|
-
|
13
|
-
from openpyxl import Workbook
|
14
|
-
from openpyxl.utils import get_column_letter
|
15
|
-
from premailer import Premailer
|
16
|
-
from pyxllib.stdlib.tablepyxl.style import Table
|
17
|
-
|
18
|
-
|
19
|
-
def string_to_int(s):
|
20
|
-
if s.isdigit():
|
21
|
-
return int(s)
|
22
|
-
return 0
|
23
|
-
|
24
|
-
|
25
|
-
def get_Tables(doc):
|
26
|
-
tree = html.fromstring(doc)
|
27
|
-
comments = tree.xpath('//comment()')
|
28
|
-
for comment in comments:
|
29
|
-
comment.drop_tag()
|
30
|
-
return [Table(table) for table in tree.xpath('//table')]
|
31
|
-
|
32
|
-
|
33
|
-
def write_rows(worksheet, elem, row, column=1):
|
34
|
-
"""
|
35
|
-
Writes every tr child element of elem to a row in the worksheet
|
36
|
-
|
37
|
-
returns the next row after all rows are written
|
38
|
-
"""
|
39
|
-
from openpyxl.cell.cell import MergedCell
|
40
|
-
|
41
|
-
initial_column = column
|
42
|
-
for table_row in elem.rows:
|
43
|
-
for table_cell in table_row.cells:
|
44
|
-
cell = worksheet.cell(row=row, column=column)
|
45
|
-
while isinstance(cell, MergedCell):
|
46
|
-
column += 1
|
47
|
-
cell = worksheet.cell(row=row, column=column)
|
48
|
-
|
49
|
-
colspan = string_to_int(table_cell.element.get("colspan", "1"))
|
50
|
-
rowspan = string_to_int(table_cell.element.get("rowspan", "1"))
|
51
|
-
if rowspan > 1 or colspan > 1:
|
52
|
-
worksheet.merge_cells(start_row=row, start_column=column,
|
53
|
-
end_row=row + rowspan - 1, end_column=column + colspan - 1)
|
54
|
-
|
55
|
-
cell.value = table_cell.value
|
56
|
-
table_cell.format(cell)
|
57
|
-
min_width = table_cell.get_dimension('min-width')
|
58
|
-
max_width = table_cell.get_dimension('max-width')
|
59
|
-
|
60
|
-
if colspan == 1:
|
61
|
-
# Initially, when iterating for the first time through the loop, the width of all the cells is None.
|
62
|
-
# As we start filling in contents, the initial width of the cell (which can be retrieved by:
|
63
|
-
# worksheet.column_dimensions[get_column_letter(column)].width) is equal to the width of the previous
|
64
|
-
# cell in the same column (i.e. width of A2 = width of A1)
|
65
|
-
width = max(worksheet.column_dimensions[get_column_letter(column)].width or 0, len(table_cell.value) + 2)
|
66
|
-
if max_width and width > max_width:
|
67
|
-
width = max_width
|
68
|
-
elif min_width and width < min_width:
|
69
|
-
width = min_width
|
70
|
-
worksheet.column_dimensions[get_column_letter(column)].width = width
|
71
|
-
column += colspan
|
72
|
-
row += 1
|
73
|
-
column = initial_column
|
74
|
-
return row
|
75
|
-
|
76
|
-
|
77
|
-
def table_to_sheet(table, wb):
|
78
|
-
"""
|
79
|
-
Takes a table and workbook and writes the table to a new sheet.
|
80
|
-
The sheet title will be the same as the table attribute name.
|
81
|
-
"""
|
82
|
-
ws = wb.create_sheet(title=table.element.get('name'))
|
83
|
-
insert_table(table, ws, 1, 1)
|
84
|
-
|
85
|
-
|
86
|
-
def document_to_workbook(doc, wb=None, base_url=None):
|
87
|
-
"""
|
88
|
-
Takes a string representation of an html document and writes one sheet for
|
89
|
-
every table in the document.
|
90
|
-
|
91
|
-
The workbook is returned
|
92
|
-
"""
|
93
|
-
if not wb:
|
94
|
-
wb = Workbook()
|
95
|
-
wb.remove(wb.active)
|
96
|
-
|
97
|
-
# Premailer 是一个第三方库,能把html中css标注的样式,展开到body每个blocks中
|
98
|
-
inline_styles_doc = Premailer(doc, base_url=base_url, remove_classes=False).transform()
|
99
|
-
# tablepyxl 库作者写的html转Table对象
|
100
|
-
tables = get_Tables(inline_styles_doc)
|
101
|
-
|
102
|
-
for table in tables:
|
103
|
-
table_to_sheet(table, wb)
|
104
|
-
|
105
|
-
return wb
|
106
|
-
|
107
|
-
|
108
|
-
def document_to_xl(doc, filename, base_url=None):
|
109
|
-
"""
|
110
|
-
Takes a string representation of an html document and writes one sheet for
|
111
|
-
every table in the document. The workbook is written out to a file called filename
|
112
|
-
"""
|
113
|
-
wb = document_to_workbook(doc, base_url=base_url)
|
114
|
-
wb.save(filename)
|
115
|
-
|
116
|
-
|
117
|
-
def insert_table(table, worksheet, column, row):
|
118
|
-
if table.head:
|
119
|
-
row = write_rows(worksheet, table.head, row, column)
|
120
|
-
if table.body:
|
121
|
-
row = write_rows(worksheet, table.body, row, column)
|
122
|
-
|
123
|
-
|
124
|
-
def insert_table_at_cell(table, cell):
|
125
|
-
"""
|
126
|
-
Inserts a table at the location of an openpyxl Cell object.
|
127
|
-
"""
|
128
|
-
ws = cell.parent
|
129
|
-
column, row = cell.column, cell.row
|
130
|
-
insert_table(table, ws, column, row)
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : 陈坤泽
|
4
|
+
# @Email : 877362867@qq.com
|
5
|
+
# @Date : 2020/06/02 19:57
|
6
|
+
|
7
|
+
|
8
|
+
# Do imports like python3 so our package works for 2 and 3
|
9
|
+
from __future__ import absolute_import
|
10
|
+
|
11
|
+
from lxml import html
|
12
|
+
|
13
|
+
from openpyxl import Workbook
|
14
|
+
from openpyxl.utils import get_column_letter
|
15
|
+
from premailer import Premailer
|
16
|
+
from pyxllib.stdlib.tablepyxl.style import Table
|
17
|
+
|
18
|
+
|
19
|
+
def string_to_int(s):
|
20
|
+
if s.isdigit():
|
21
|
+
return int(s)
|
22
|
+
return 0
|
23
|
+
|
24
|
+
|
25
|
+
def get_Tables(doc):
|
26
|
+
tree = html.fromstring(doc)
|
27
|
+
comments = tree.xpath('//comment()')
|
28
|
+
for comment in comments:
|
29
|
+
comment.drop_tag()
|
30
|
+
return [Table(table) for table in tree.xpath('//table')]
|
31
|
+
|
32
|
+
|
33
|
+
def write_rows(worksheet, elem, row, column=1):
|
34
|
+
"""
|
35
|
+
Writes every tr child element of elem to a row in the worksheet
|
36
|
+
|
37
|
+
returns the next row after all rows are written
|
38
|
+
"""
|
39
|
+
from openpyxl.cell.cell import MergedCell
|
40
|
+
|
41
|
+
initial_column = column
|
42
|
+
for table_row in elem.rows:
|
43
|
+
for table_cell in table_row.cells:
|
44
|
+
cell = worksheet.cell(row=row, column=column)
|
45
|
+
while isinstance(cell, MergedCell):
|
46
|
+
column += 1
|
47
|
+
cell = worksheet.cell(row=row, column=column)
|
48
|
+
|
49
|
+
colspan = string_to_int(table_cell.element.get("colspan", "1"))
|
50
|
+
rowspan = string_to_int(table_cell.element.get("rowspan", "1"))
|
51
|
+
if rowspan > 1 or colspan > 1:
|
52
|
+
worksheet.merge_cells(start_row=row, start_column=column,
|
53
|
+
end_row=row + rowspan - 1, end_column=column + colspan - 1)
|
54
|
+
|
55
|
+
cell.value = table_cell.value
|
56
|
+
table_cell.format(cell)
|
57
|
+
min_width = table_cell.get_dimension('min-width')
|
58
|
+
max_width = table_cell.get_dimension('max-width')
|
59
|
+
|
60
|
+
if colspan == 1:
|
61
|
+
# Initially, when iterating for the first time through the loop, the width of all the cells is None.
|
62
|
+
# As we start filling in contents, the initial width of the cell (which can be retrieved by:
|
63
|
+
# worksheet.column_dimensions[get_column_letter(column)].width) is equal to the width of the previous
|
64
|
+
# cell in the same column (i.e. width of A2 = width of A1)
|
65
|
+
width = max(worksheet.column_dimensions[get_column_letter(column)].width or 0, len(table_cell.value) + 2)
|
66
|
+
if max_width and width > max_width:
|
67
|
+
width = max_width
|
68
|
+
elif min_width and width < min_width:
|
69
|
+
width = min_width
|
70
|
+
worksheet.column_dimensions[get_column_letter(column)].width = width
|
71
|
+
column += colspan
|
72
|
+
row += 1
|
73
|
+
column = initial_column
|
74
|
+
return row
|
75
|
+
|
76
|
+
|
77
|
+
def table_to_sheet(table, wb):
|
78
|
+
"""
|
79
|
+
Takes a table and workbook and writes the table to a new sheet.
|
80
|
+
The sheet title will be the same as the table attribute name.
|
81
|
+
"""
|
82
|
+
ws = wb.create_sheet(title=table.element.get('name'))
|
83
|
+
insert_table(table, ws, 1, 1)
|
84
|
+
|
85
|
+
|
86
|
+
def document_to_workbook(doc, wb=None, base_url=None):
|
87
|
+
"""
|
88
|
+
Takes a string representation of an html document and writes one sheet for
|
89
|
+
every table in the document.
|
90
|
+
|
91
|
+
The workbook is returned
|
92
|
+
"""
|
93
|
+
if not wb:
|
94
|
+
wb = Workbook()
|
95
|
+
wb.remove(wb.active)
|
96
|
+
|
97
|
+
# Premailer 是一个第三方库,能把html中css标注的样式,展开到body每个blocks中
|
98
|
+
inline_styles_doc = Premailer(doc, base_url=base_url, remove_classes=False).transform()
|
99
|
+
# tablepyxl 库作者写的html转Table对象
|
100
|
+
tables = get_Tables(inline_styles_doc)
|
101
|
+
|
102
|
+
for table in tables:
|
103
|
+
table_to_sheet(table, wb)
|
104
|
+
|
105
|
+
return wb
|
106
|
+
|
107
|
+
|
108
|
+
def document_to_xl(doc, filename, base_url=None):
|
109
|
+
"""
|
110
|
+
Takes a string representation of an html document and writes one sheet for
|
111
|
+
every table in the document. The workbook is written out to a file called filename
|
112
|
+
"""
|
113
|
+
wb = document_to_workbook(doc, base_url=base_url)
|
114
|
+
wb.save(filename)
|
115
|
+
|
116
|
+
|
117
|
+
def insert_table(table, worksheet, column, row):
|
118
|
+
if table.head:
|
119
|
+
row = write_rows(worksheet, table.head, row, column)
|
120
|
+
if table.body:
|
121
|
+
row = write_rows(worksheet, table.body, row, column)
|
122
|
+
|
123
|
+
|
124
|
+
def insert_table_at_cell(table, cell):
|
125
|
+
"""
|
126
|
+
Inserts a table at the location of an openpyxl Cell object.
|
127
|
+
"""
|
128
|
+
ws = cell.parent
|
129
|
+
column, row = cell.column, cell.row
|
130
|
+
insert_table(table, ws, column, row)
|
pyxllib/text/__init__.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
# @Author : 陈坤泽
|
4
|
-
# @Email : 877362867@qq.com
|
5
|
-
# @Date : 2020/12/08 15:33
|
6
|
-
|
7
|
-
""" 一些文本处理功能 """
|
8
|
-
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : 陈坤泽
|
4
|
+
# @Email : 877362867@qq.com
|
5
|
+
# @Date : 2020/12/08 15:33
|
6
|
+
|
7
|
+
""" 一些文本处理功能 """
|
8
|
+
|
pyxllib/text/ahocorasick.py
CHANGED
@@ -1,39 +1,39 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
# @Author : 陈坤泽
|
4
|
-
# @Email : 877362867@qq.com
|
5
|
-
# @Date : 2021/06/06 16:57
|
6
|
-
|
7
|
-
from pyxllib.prog.pupil import check_install_package
|
8
|
-
|
9
|
-
check_install_package('ahocorasick', 'pyahocorasick')
|
10
|
-
|
11
|
-
from collections import Counter
|
12
|
-
import re
|
13
|
-
|
14
|
-
import ahocorasick
|
15
|
-
|
16
|
-
|
17
|
-
def make_automaton(words):
|
18
|
-
""" 根据输入的一串words模式,生成一个AC自动机 """
|
19
|
-
a = ahocorasick.Automaton()
|
20
|
-
for index, word in enumerate(words):
|
21
|
-
a.add_word(word, (index, word))
|
22
|
-
a.make_automaton()
|
23
|
-
return a
|
24
|
-
|
25
|
-
|
26
|
-
def count_words(content, word, scope=2, exclude=None):
|
27
|
-
# 1 统计所有词汇出现次数
|
28
|
-
c = Counter()
|
29
|
-
c += Counter(re.findall(f'.{{,{scope}}}{word}.{{,{scope}}}', content))
|
30
|
-
# 2 排除掉不处理的词 (注意因为这里每句话都已经是被筛选过的,所以处理比较简单,并不需要复杂到用区间集处理)
|
31
|
-
if exclude:
|
32
|
-
new_c = Counter()
|
33
|
-
a = make_automaton(exclude) # 创建AC自动机
|
34
|
-
for k in c.keys():
|
35
|
-
if not next(a.iter(k), None):
|
36
|
-
# 如果k没匹配到需要排除的词汇,则拷贝到新的计数器
|
37
|
-
new_c[k] = c[k]
|
38
|
-
c = new_c
|
39
|
-
return c
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : 陈坤泽
|
4
|
+
# @Email : 877362867@qq.com
|
5
|
+
# @Date : 2021/06/06 16:57
|
6
|
+
|
7
|
+
from pyxllib.prog.pupil import check_install_package
|
8
|
+
|
9
|
+
check_install_package('ahocorasick', 'pyahocorasick')
|
10
|
+
|
11
|
+
from collections import Counter
|
12
|
+
import re
|
13
|
+
|
14
|
+
import ahocorasick
|
15
|
+
|
16
|
+
|
17
|
+
def make_automaton(words):
|
18
|
+
""" 根据输入的一串words模式,生成一个AC自动机 """
|
19
|
+
a = ahocorasick.Automaton()
|
20
|
+
for index, word in enumerate(words):
|
21
|
+
a.add_word(word, (index, word))
|
22
|
+
a.make_automaton()
|
23
|
+
return a
|
24
|
+
|
25
|
+
|
26
|
+
def count_words(content, word, scope=2, exclude=None):
|
27
|
+
# 1 统计所有词汇出现次数
|
28
|
+
c = Counter()
|
29
|
+
c += Counter(re.findall(f'.{{,{scope}}}{word}.{{,{scope}}}', content))
|
30
|
+
# 2 排除掉不处理的词 (注意因为这里每句话都已经是被筛选过的,所以处理比较简单,并不需要复杂到用区间集处理)
|
31
|
+
if exclude:
|
32
|
+
new_c = Counter()
|
33
|
+
a = make_automaton(exclude) # 创建AC自动机
|
34
|
+
for k in c.keys():
|
35
|
+
if not next(a.iter(k), None):
|
36
|
+
# 如果k没匹配到需要排除的词汇,则拷贝到新的计数器
|
37
|
+
new_c[k] = c[k]
|
38
|
+
c = new_c
|
39
|
+
return c
|