pyxllib 0.3.197__py3-none-any.whl → 3.201.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyxllib/__init__.py +14 -21
- pyxllib/algo/__init__.py +8 -8
- pyxllib/algo/disjoint.py +54 -54
- pyxllib/algo/geo.py +537 -541
- pyxllib/algo/intervals.py +964 -964
- pyxllib/algo/matcher.py +389 -389
- pyxllib/algo/newbie.py +166 -166
- pyxllib/algo/pupil.py +629 -629
- pyxllib/algo/shapelylib.py +67 -67
- pyxllib/algo/specialist.py +241 -241
- pyxllib/algo/stat.py +494 -494
- pyxllib/algo/treelib.py +145 -149
- pyxllib/algo/unitlib.py +62 -66
- pyxllib/autogui/__init__.py +5 -5
- pyxllib/autogui/activewin.py +246 -246
- pyxllib/autogui/all.py +9 -9
- pyxllib/autogui/autogui.py +846 -852
- pyxllib/autogui/uiautolib.py +362 -362
- pyxllib/autogui/virtualkey.py +102 -102
- pyxllib/autogui/wechat.py +827 -827
- pyxllib/autogui/wechat_msg.py +421 -421
- pyxllib/autogui/wxautolib.py +84 -84
- pyxllib/cv/__init__.py +5 -5
- pyxllib/cv/expert.py +267 -267
- pyxllib/cv/imfile.py +159 -159
- pyxllib/cv/imhash.py +39 -39
- pyxllib/cv/pupil.py +9 -9
- pyxllib/cv/rgbfmt.py +1525 -1525
- pyxllib/cv/slidercaptcha.py +137 -137
- pyxllib/cv/trackbartools.py +251 -251
- pyxllib/cv/xlcvlib.py +1040 -1040
- pyxllib/cv/xlpillib.py +423 -423
- pyxllib/data/echarts.py +236 -240
- pyxllib/data/jsonlib.py +85 -89
- pyxllib/data/oss.py +72 -72
- pyxllib/data/pglib.py +1111 -1127
- pyxllib/data/sqlite.py +568 -568
- pyxllib/data/sqllib.py +297 -297
- pyxllib/ext/JLineViewer.py +505 -505
- pyxllib/ext/__init__.py +6 -6
- pyxllib/ext/demolib.py +251 -246
- pyxllib/ext/drissionlib.py +277 -277
- pyxllib/ext/kq5034lib.py +12 -12
- pyxllib/ext/qt.py +449 -449
- pyxllib/ext/robustprocfile.py +493 -497
- pyxllib/ext/seleniumlib.py +76 -76
- pyxllib/ext/tk.py +173 -173
- pyxllib/ext/unixlib.py +821 -827
- pyxllib/ext/utools.py +345 -351
- pyxllib/ext/webhook.py +124 -119
- pyxllib/ext/win32lib.py +40 -40
- pyxllib/ext/wjxlib.py +91 -88
- pyxllib/ext/wpsapi.py +124 -124
- pyxllib/ext/xlwork.py +9 -9
- pyxllib/ext/yuquelib.py +1110 -1105
- pyxllib/file/__init__.py +17 -17
- pyxllib/file/docxlib.py +757 -761
- pyxllib/file/gitlib.py +309 -309
- pyxllib/file/libreoffice.py +165 -165
- pyxllib/file/movielib.py +144 -148
- pyxllib/file/newbie.py +10 -10
- pyxllib/file/onenotelib.py +1469 -1469
- pyxllib/file/packlib/__init__.py +330 -330
- pyxllib/file/packlib/zipfile.py +2441 -2441
- pyxllib/file/pdflib.py +422 -426
- pyxllib/file/pupil.py +185 -185
- pyxllib/file/specialist/__init__.py +681 -685
- pyxllib/file/specialist/dirlib.py +799 -799
- pyxllib/file/specialist/download.py +193 -193
- pyxllib/file/specialist/filelib.py +2825 -2829
- pyxllib/file/xlsxlib.py +3122 -3131
- pyxllib/file/xlsyncfile.py +341 -341
- pyxllib/prog/__init__.py +5 -5
- pyxllib/prog/cachetools.py +58 -64
- pyxllib/prog/deprecatedlib.py +233 -233
- pyxllib/prog/filelock.py +42 -42
- pyxllib/prog/ipyexec.py +253 -253
- pyxllib/prog/multiprogs.py +940 -940
- pyxllib/prog/newbie.py +451 -451
- pyxllib/prog/pupil.py +1208 -1197
- pyxllib/prog/sitepackages.py +33 -33
- pyxllib/prog/specialist/__init__.py +348 -391
- pyxllib/prog/specialist/bc.py +203 -203
- pyxllib/prog/specialist/browser.py +497 -497
- pyxllib/prog/specialist/common.py +347 -347
- pyxllib/prog/specialist/datetime.py +198 -198
- pyxllib/prog/specialist/tictoc.py +240 -240
- pyxllib/prog/specialist/xllog.py +180 -180
- pyxllib/prog/xlosenv.py +110 -108
- pyxllib/stdlib/__init__.py +17 -17
- pyxllib/stdlib/tablepyxl/__init__.py +10 -10
- pyxllib/stdlib/tablepyxl/style.py +303 -303
- pyxllib/stdlib/tablepyxl/tablepyxl.py +130 -130
- pyxllib/text/__init__.py +8 -8
- pyxllib/text/ahocorasick.py +36 -39
- pyxllib/text/airscript.js +754 -744
- pyxllib/text/charclasslib.py +121 -121
- pyxllib/text/jiebalib.py +267 -267
- pyxllib/text/jinjalib.py +27 -32
- pyxllib/text/jsa_ai_prompt.md +271 -271
- pyxllib/text/jscode.py +922 -922
- pyxllib/text/latex/__init__.py +158 -158
- pyxllib/text/levenshtein.py +303 -303
- pyxllib/text/nestenv.py +1215 -1215
- pyxllib/text/newbie.py +300 -300
- pyxllib/text/pupil/__init__.py +8 -8
- pyxllib/text/pupil/common.py +1121 -1121
- pyxllib/text/pupil/xlalign.py +326 -326
- pyxllib/text/pycode.py +47 -47
- pyxllib/text/specialist/__init__.py +8 -8
- pyxllib/text/specialist/common.py +112 -112
- pyxllib/text/specialist/ptag.py +186 -186
- pyxllib/text/spellchecker.py +172 -172
- pyxllib/text/templates/echart_base.html +10 -10
- pyxllib/text/templates/highlight_code.html +16 -16
- pyxllib/text/templates/latex_editor.html +102 -102
- pyxllib/text/vbacode.py +17 -17
- pyxllib/text/xmllib.py +741 -747
- pyxllib/xl.py +42 -39
- pyxllib/xlcv.py +17 -17
- pyxllib-3.201.1.dist-info/METADATA +296 -0
- pyxllib-3.201.1.dist-info/RECORD +125 -0
- {pyxllib-0.3.197.dist-info → pyxllib-3.201.1.dist-info}/licenses/LICENSE +190 -190
- pyxllib/ext/old.py +0 -663
- pyxllib-0.3.197.dist-info/METADATA +0 -48
- pyxllib-0.3.197.dist-info/RECORD +0 -126
- {pyxllib-0.3.197.dist-info → pyxllib-3.201.1.dist-info}/WHEEL +0 -0
@@ -1,303 +1,303 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
# @Author : 陈坤泽
|
4
|
-
# @Email : 877362867@qq.com
|
5
|
-
# @Date : 2020/06/02 19:57
|
6
|
-
|
7
|
-
"""
|
8
|
-
tablepyxl.style的代码
|
9
|
-
坤泽进行了一些修改
|
10
|
-
"""
|
11
|
-
|
12
|
-
# This is where we handle translating css styles into openpyxl styles
|
13
|
-
# and cascading those from parent to child in the dom.
|
14
|
-
|
15
|
-
|
16
|
-
from lxml import html
|
17
|
-
|
18
|
-
from openpyxl.cell import cell
|
19
|
-
from openpyxl.styles import Font, Alignment, PatternFill, NamedStyle, Border, Side, Color
|
20
|
-
from openpyxl.styles.fills import FILL_SOLID
|
21
|
-
from openpyxl.styles.numbers import FORMAT_CURRENCY_USD_SIMPLE, FORMAT_PERCENTAGE
|
22
|
-
from openpyxl.styles.colors import BLACK
|
23
|
-
|
24
|
-
FORMAT_DATE_MMDDYYYY = 'mm/dd/yyyy'
|
25
|
-
|
26
|
-
|
27
|
-
def colormap(color):
|
28
|
-
"""
|
29
|
-
Convenience for looking up known colors
|
30
|
-
"""
|
31
|
-
cmap = {'black': BLACK}
|
32
|
-
return cmap.get(color, color)
|
33
|
-
|
34
|
-
|
35
|
-
def style_string_to_dict(style):
|
36
|
-
"""
|
37
|
-
Convert css style string to a python dictionary
|
38
|
-
"""
|
39
|
-
def clean_split(string, delim):
|
40
|
-
return (s.strip() for s in string.split(delim))
|
41
|
-
styles = [clean_split(s, ":") for s in style.split(";") if ":" in s]
|
42
|
-
return dict(styles)
|
43
|
-
|
44
|
-
|
45
|
-
def get_side(style, name):
|
46
|
-
return {'border_style': style.get('border-{}-style'.format(name)),
|
47
|
-
'color': colormap(style.get('border-{}-color'.format(name)))}
|
48
|
-
|
49
|
-
known_styles = {}
|
50
|
-
|
51
|
-
|
52
|
-
def style_dict_to_named_style(style_dict, number_format=None):
|
53
|
-
"""
|
54
|
-
Change css style (stored in a python dictionary) to openpyxl NamedStyle
|
55
|
-
"""
|
56
|
-
|
57
|
-
style_and_format_string = str({
|
58
|
-
'style_dict': style_dict,
|
59
|
-
'parent': style_dict.parent,
|
60
|
-
'number_format': number_format,
|
61
|
-
})
|
62
|
-
|
63
|
-
if style_and_format_string not in known_styles:
|
64
|
-
# Font
|
65
|
-
font = Font(bold=style_dict.get('font-weight') == 'bold',
|
66
|
-
color=style_dict.get_color('color', None),
|
67
|
-
size=style_dict.get('font-size'))
|
68
|
-
|
69
|
-
# Alignment
|
70
|
-
# 坤泽修改
|
71
|
-
vertical = style_dict.get('vertical-align', 'center')
|
72
|
-
if vertical not in {'bottom', 'justify', 'distributed', 'top', 'center'}: vertical = 'center'
|
73
|
-
alignment = Alignment(horizontal=style_dict.get('text-align', 'general'),
|
74
|
-
vertical=vertical,
|
75
|
-
wrap_text=style_dict.get('white-space', 'nowrap') == 'normal')
|
76
|
-
|
77
|
-
# Fill
|
78
|
-
bg_color = style_dict.get_color('background-color')
|
79
|
-
fg_color = style_dict.get_color('foreground-color', Color())
|
80
|
-
fill_type = style_dict.get('fill-type')
|
81
|
-
if bg_color and bg_color != 'transparent':
|
82
|
-
fill = PatternFill(fill_type=fill_type or FILL_SOLID,
|
83
|
-
start_color=bg_color,
|
84
|
-
end_color=fg_color)
|
85
|
-
else:
|
86
|
-
fill = PatternFill()
|
87
|
-
|
88
|
-
# Border
|
89
|
-
border = Border(left=Side(**get_side(style_dict, 'left')),
|
90
|
-
right=Side(**get_side(style_dict, 'right')),
|
91
|
-
top=Side(**get_side(style_dict, 'top')),
|
92
|
-
bottom=Side(**get_side(style_dict, 'bottom')),
|
93
|
-
diagonal=Side(**get_side(style_dict, 'diagonal')),
|
94
|
-
diagonal_direction=None,
|
95
|
-
outline=Side(**get_side(style_dict, 'outline')),
|
96
|
-
vertical=None,
|
97
|
-
horizontal=None)
|
98
|
-
|
99
|
-
name = 'Style {}'.format(len(known_styles) + 1)
|
100
|
-
|
101
|
-
pyxl_style = NamedStyle(name=name, font=font, fill=fill, alignment=alignment, border=border,
|
102
|
-
number_format=number_format)
|
103
|
-
|
104
|
-
known_styles[style_and_format_string] = pyxl_style
|
105
|
-
|
106
|
-
return known_styles[style_and_format_string]
|
107
|
-
|
108
|
-
|
109
|
-
class StyleDict(dict):
|
110
|
-
"""
|
111
|
-
It's like a dictionary, but it looks for items in the parent dictionary
|
112
|
-
"""
|
113
|
-
def __init__(self, *args, **kwargs):
|
114
|
-
self.parent = kwargs.pop('parent', None)
|
115
|
-
super(StyleDict, self).__init__(*args, **kwargs)
|
116
|
-
|
117
|
-
def __getitem__(self, item):
|
118
|
-
if item in self:
|
119
|
-
return super(StyleDict, self).__getitem__(item)
|
120
|
-
elif self.parent:
|
121
|
-
return self.parent[item]
|
122
|
-
else:
|
123
|
-
raise KeyError('{} not found'.format(item))
|
124
|
-
|
125
|
-
def __hash__(self):
|
126
|
-
return hash(tuple([(k, self.get(k)) for k in self._keys()]))
|
127
|
-
|
128
|
-
# Yielding the keys avoids creating unnecessary data structures
|
129
|
-
# and happily works with both python2 and python3 where the
|
130
|
-
# .keys() method is a dictionary_view in python3 and a list in python2.
|
131
|
-
def _keys(self):
|
132
|
-
yielded = set()
|
133
|
-
for k in self.keys():
|
134
|
-
yielded.add(k)
|
135
|
-
yield k
|
136
|
-
if self.parent:
|
137
|
-
for k in self.parent._keys():
|
138
|
-
if k not in yielded:
|
139
|
-
yielded.add(k)
|
140
|
-
yield k
|
141
|
-
|
142
|
-
def get(self, k, d=None):
|
143
|
-
try:
|
144
|
-
return self[k]
|
145
|
-
except KeyError:
|
146
|
-
return d
|
147
|
-
|
148
|
-
def get_color(self, k, d=None):
|
149
|
-
"""
|
150
|
-
Strip leading # off colors if necessary
|
151
|
-
"""
|
152
|
-
color = self.get(k, d)
|
153
|
-
if hasattr(color, 'startswith') and color.startswith('#'):
|
154
|
-
color = color[1:]
|
155
|
-
if len(color) == 3: # Premailers reduces colors like #00ff00 to #0f0, openpyxl doesn't like that
|
156
|
-
color = ''.join(2 * c for c in color)
|
157
|
-
return color
|
158
|
-
|
159
|
-
|
160
|
-
class Element(object):
|
161
|
-
"""
|
162
|
-
Our base class for representing an html element along with a cascading style.
|
163
|
-
The element is created along with a parent so that the StyleDict that we store
|
164
|
-
can point to the parent's StyleDict.
|
165
|
-
"""
|
166
|
-
def __init__(self, element, parent=None):
|
167
|
-
self.element = element
|
168
|
-
self.number_format = None
|
169
|
-
parent_style = parent.style_dict if parent else None
|
170
|
-
self.style_dict = StyleDict(style_string_to_dict(element.get('style', '')), parent=parent_style)
|
171
|
-
self._style_cache = None
|
172
|
-
|
173
|
-
def style(self):
|
174
|
-
"""
|
175
|
-
Turn the css styles for this element into an openpyxl NamedStyle.
|
176
|
-
"""
|
177
|
-
if not self._style_cache:
|
178
|
-
self._style_cache = style_dict_to_named_style(self.style_dict, number_format=self.number_format)
|
179
|
-
return self._style_cache
|
180
|
-
|
181
|
-
def get_dimension(self, dimension_key):
|
182
|
-
"""
|
183
|
-
Extracts the dimension from the style dict of the Element and returns it as a float.
|
184
|
-
"""
|
185
|
-
dimension = self.style_dict.get(dimension_key)
|
186
|
-
if dimension:
|
187
|
-
if dimension[-2:] in ['px', 'em', 'pt', 'in', 'cm']:
|
188
|
-
dimension = dimension[:-2]
|
189
|
-
dimension = float(dimension)
|
190
|
-
return dimension
|
191
|
-
|
192
|
-
|
193
|
-
class Table(Element):
|
194
|
-
"""
|
195
|
-
The concrete implementations of Elements are semantically named for the types of elements we are interested in.
|
196
|
-
This defines a very concrete tree structure for html tables that we expect to deal with. I prefer this compared to
|
197
|
-
allowing Element to have an arbitrary number of children and dealing with an abstract element tree.
|
198
|
-
|
199
|
-
"""
|
200
|
-
def __init__(self, table):
|
201
|
-
"""
|
202
|
-
takes an html table object (from lxml)
|
203
|
-
"""
|
204
|
-
super(Table, self).__init__(table)
|
205
|
-
table_head = table.find('thead')
|
206
|
-
self.head = TableHead(table_head, parent=self) if table_head is not None else None
|
207
|
-
table_body = table.find('tbody')
|
208
|
-
self.body = TableBody(table_body if table_body is not None else table, parent=self)
|
209
|
-
|
210
|
-
|
211
|
-
class TableHead(Element):
|
212
|
-
"""
|
213
|
-
This class maps to the `<th>` element of the html table.
|
214
|
-
"""
|
215
|
-
def __init__(self, head, parent=None):
|
216
|
-
super(TableHead, self).__init__(head, parent=parent)
|
217
|
-
self.rows = [TableRow(tr, parent=self) for tr in head.findall('tr')]
|
218
|
-
|
219
|
-
|
220
|
-
class TableBody(Element):
|
221
|
-
"""
|
222
|
-
This class maps to the `<tbody>` element of the html table.
|
223
|
-
"""
|
224
|
-
def __init__(self, body, parent=None):
|
225
|
-
super(TableBody, self).__init__(body, parent=parent)
|
226
|
-
self.rows = [TableRow(tr, parent=self) for tr in body.findall('tr')]
|
227
|
-
|
228
|
-
|
229
|
-
class TableRow(Element):
|
230
|
-
"""
|
231
|
-
This class maps to the `<tr>` element of the html table.
|
232
|
-
"""
|
233
|
-
def __init__(self, tr, parent=None):
|
234
|
-
super(TableRow, self).__init__(tr, parent=parent)
|
235
|
-
self.cells = [TableCell(cell, parent=self) for cell in tr.findall('th') + tr.findall('td')]
|
236
|
-
|
237
|
-
|
238
|
-
def element_to_string(el):
|
239
|
-
return _element_to_string(el).strip()
|
240
|
-
|
241
|
-
|
242
|
-
def _element_to_string(el):
|
243
|
-
string = ''
|
244
|
-
|
245
|
-
for x in el.iterchildren():
|
246
|
-
# 表格里的内容保持不变
|
247
|
-
# string += '\n' + _element_to_string(x)
|
248
|
-
string += html.tostring(x, encoding='unicode', with_tail=False)
|
249
|
-
|
250
|
-
text = el.text.strip() if el.text else ''
|
251
|
-
tail = el.tail.strip() if el.tail else ''
|
252
|
-
|
253
|
-
return text + string + '\n' + tail
|
254
|
-
|
255
|
-
|
256
|
-
class TableCell(Element):
|
257
|
-
"""
|
258
|
-
This class maps to the `<td>` element of the html table.
|
259
|
-
"""
|
260
|
-
CELL_TYPES = {'TYPE_STRING', 'TYPE_FORMULA', 'TYPE_NUMERIC', 'TYPE_BOOL', 'TYPE_CURRENCY', 'TYPE_PERCENTAGE',
|
261
|
-
'TYPE_NULL', 'TYPE_INLINE', 'TYPE_ERROR', 'TYPE_FORMULA_CACHE_STRING', 'TYPE_INTEGER'}
|
262
|
-
|
263
|
-
def __init__(self, cell, parent=None):
|
264
|
-
super(TableCell, self).__init__(cell, parent=parent)
|
265
|
-
self.value = element_to_string(cell)
|
266
|
-
self.number_format = self.get_number_format()
|
267
|
-
|
268
|
-
def data_type(self):
|
269
|
-
cell_types = self.CELL_TYPES & set(self.element.get('class', '').split())
|
270
|
-
if cell_types:
|
271
|
-
if 'TYPE_FORMULA' in cell_types:
|
272
|
-
# Make sure TYPE_FORMULA takes precedence over the other classes in the set.
|
273
|
-
cell_type = 'TYPE_FORMULA'
|
274
|
-
elif cell_types & {'TYPE_CURRENCY', 'TYPE_INTEGER', 'TYPE_PERCENTAGE'}:
|
275
|
-
cell_type = 'TYPE_NUMERIC'
|
276
|
-
else:
|
277
|
-
cell_type = cell_types.pop()
|
278
|
-
else:
|
279
|
-
cell_type = 'TYPE_STRING'
|
280
|
-
return getattr(cell, cell_type)
|
281
|
-
|
282
|
-
def get_number_format(self):
|
283
|
-
if 'TYPE_CURRENCY' in self.element.get('class', '').split():
|
284
|
-
return FORMAT_CURRENCY_USD_SIMPLE
|
285
|
-
if 'TYPE_INTEGER' in self.element.get('class', '').split():
|
286
|
-
return '#,##0'
|
287
|
-
if 'TYPE_PERCENTAGE' in self.element.get('class', '').split():
|
288
|
-
return FORMAT_PERCENTAGE
|
289
|
-
if 'TYPE_DATE' in self.element.get('class', '').split():
|
290
|
-
return FORMAT_DATE_MMDDYYYY
|
291
|
-
if self.data_type() == cell.TYPE_NUMERIC:
|
292
|
-
try:
|
293
|
-
int(self.value)
|
294
|
-
except ValueError:
|
295
|
-
return '#,##0.##'
|
296
|
-
else:
|
297
|
-
return '#,##0'
|
298
|
-
|
299
|
-
def format(self, cell):
|
300
|
-
cell.style = self.style()
|
301
|
-
data_type = self.data_type()
|
302
|
-
if data_type:
|
303
|
-
cell.data_type = data_type
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : 陈坤泽
|
4
|
+
# @Email : 877362867@qq.com
|
5
|
+
# @Date : 2020/06/02 19:57
|
6
|
+
|
7
|
+
"""
|
8
|
+
tablepyxl.style的代码
|
9
|
+
坤泽进行了一些修改
|
10
|
+
"""
|
11
|
+
|
12
|
+
# This is where we handle translating css styles into openpyxl styles
|
13
|
+
# and cascading those from parent to child in the dom.
|
14
|
+
|
15
|
+
|
16
|
+
from lxml import html
|
17
|
+
|
18
|
+
from openpyxl.cell import cell
|
19
|
+
from openpyxl.styles import Font, Alignment, PatternFill, NamedStyle, Border, Side, Color
|
20
|
+
from openpyxl.styles.fills import FILL_SOLID
|
21
|
+
from openpyxl.styles.numbers import FORMAT_CURRENCY_USD_SIMPLE, FORMAT_PERCENTAGE
|
22
|
+
from openpyxl.styles.colors import BLACK
|
23
|
+
|
24
|
+
FORMAT_DATE_MMDDYYYY = 'mm/dd/yyyy'
|
25
|
+
|
26
|
+
|
27
|
+
def colormap(color):
|
28
|
+
"""
|
29
|
+
Convenience for looking up known colors
|
30
|
+
"""
|
31
|
+
cmap = {'black': BLACK}
|
32
|
+
return cmap.get(color, color)
|
33
|
+
|
34
|
+
|
35
|
+
def style_string_to_dict(style):
|
36
|
+
"""
|
37
|
+
Convert css style string to a python dictionary
|
38
|
+
"""
|
39
|
+
def clean_split(string, delim):
|
40
|
+
return (s.strip() for s in string.split(delim))
|
41
|
+
styles = [clean_split(s, ":") for s in style.split(";") if ":" in s]
|
42
|
+
return dict(styles)
|
43
|
+
|
44
|
+
|
45
|
+
def get_side(style, name):
|
46
|
+
return {'border_style': style.get('border-{}-style'.format(name)),
|
47
|
+
'color': colormap(style.get('border-{}-color'.format(name)))}
|
48
|
+
|
49
|
+
known_styles = {}
|
50
|
+
|
51
|
+
|
52
|
+
def style_dict_to_named_style(style_dict, number_format=None):
|
53
|
+
"""
|
54
|
+
Change css style (stored in a python dictionary) to openpyxl NamedStyle
|
55
|
+
"""
|
56
|
+
|
57
|
+
style_and_format_string = str({
|
58
|
+
'style_dict': style_dict,
|
59
|
+
'parent': style_dict.parent,
|
60
|
+
'number_format': number_format,
|
61
|
+
})
|
62
|
+
|
63
|
+
if style_and_format_string not in known_styles:
|
64
|
+
# Font
|
65
|
+
font = Font(bold=style_dict.get('font-weight') == 'bold',
|
66
|
+
color=style_dict.get_color('color', None),
|
67
|
+
size=style_dict.get('font-size'))
|
68
|
+
|
69
|
+
# Alignment
|
70
|
+
# 坤泽修改
|
71
|
+
vertical = style_dict.get('vertical-align', 'center')
|
72
|
+
if vertical not in {'bottom', 'justify', 'distributed', 'top', 'center'}: vertical = 'center'
|
73
|
+
alignment = Alignment(horizontal=style_dict.get('text-align', 'general'),
|
74
|
+
vertical=vertical,
|
75
|
+
wrap_text=style_dict.get('white-space', 'nowrap') == 'normal')
|
76
|
+
|
77
|
+
# Fill
|
78
|
+
bg_color = style_dict.get_color('background-color')
|
79
|
+
fg_color = style_dict.get_color('foreground-color', Color())
|
80
|
+
fill_type = style_dict.get('fill-type')
|
81
|
+
if bg_color and bg_color != 'transparent':
|
82
|
+
fill = PatternFill(fill_type=fill_type or FILL_SOLID,
|
83
|
+
start_color=bg_color,
|
84
|
+
end_color=fg_color)
|
85
|
+
else:
|
86
|
+
fill = PatternFill()
|
87
|
+
|
88
|
+
# Border
|
89
|
+
border = Border(left=Side(**get_side(style_dict, 'left')),
|
90
|
+
right=Side(**get_side(style_dict, 'right')),
|
91
|
+
top=Side(**get_side(style_dict, 'top')),
|
92
|
+
bottom=Side(**get_side(style_dict, 'bottom')),
|
93
|
+
diagonal=Side(**get_side(style_dict, 'diagonal')),
|
94
|
+
diagonal_direction=None,
|
95
|
+
outline=Side(**get_side(style_dict, 'outline')),
|
96
|
+
vertical=None,
|
97
|
+
horizontal=None)
|
98
|
+
|
99
|
+
name = 'Style {}'.format(len(known_styles) + 1)
|
100
|
+
|
101
|
+
pyxl_style = NamedStyle(name=name, font=font, fill=fill, alignment=alignment, border=border,
|
102
|
+
number_format=number_format)
|
103
|
+
|
104
|
+
known_styles[style_and_format_string] = pyxl_style
|
105
|
+
|
106
|
+
return known_styles[style_and_format_string]
|
107
|
+
|
108
|
+
|
109
|
+
class StyleDict(dict):
|
110
|
+
"""
|
111
|
+
It's like a dictionary, but it looks for items in the parent dictionary
|
112
|
+
"""
|
113
|
+
def __init__(self, *args, **kwargs):
|
114
|
+
self.parent = kwargs.pop('parent', None)
|
115
|
+
super(StyleDict, self).__init__(*args, **kwargs)
|
116
|
+
|
117
|
+
def __getitem__(self, item):
|
118
|
+
if item in self:
|
119
|
+
return super(StyleDict, self).__getitem__(item)
|
120
|
+
elif self.parent:
|
121
|
+
return self.parent[item]
|
122
|
+
else:
|
123
|
+
raise KeyError('{} not found'.format(item))
|
124
|
+
|
125
|
+
def __hash__(self):
|
126
|
+
return hash(tuple([(k, self.get(k)) for k in self._keys()]))
|
127
|
+
|
128
|
+
# Yielding the keys avoids creating unnecessary data structures
|
129
|
+
# and happily works with both python2 and python3 where the
|
130
|
+
# .keys() method is a dictionary_view in python3 and a list in python2.
|
131
|
+
def _keys(self):
|
132
|
+
yielded = set()
|
133
|
+
for k in self.keys():
|
134
|
+
yielded.add(k)
|
135
|
+
yield k
|
136
|
+
if self.parent:
|
137
|
+
for k in self.parent._keys():
|
138
|
+
if k not in yielded:
|
139
|
+
yielded.add(k)
|
140
|
+
yield k
|
141
|
+
|
142
|
+
def get(self, k, d=None):
|
143
|
+
try:
|
144
|
+
return self[k]
|
145
|
+
except KeyError:
|
146
|
+
return d
|
147
|
+
|
148
|
+
def get_color(self, k, d=None):
|
149
|
+
"""
|
150
|
+
Strip leading # off colors if necessary
|
151
|
+
"""
|
152
|
+
color = self.get(k, d)
|
153
|
+
if hasattr(color, 'startswith') and color.startswith('#'):
|
154
|
+
color = color[1:]
|
155
|
+
if len(color) == 3: # Premailers reduces colors like #00ff00 to #0f0, openpyxl doesn't like that
|
156
|
+
color = ''.join(2 * c for c in color)
|
157
|
+
return color
|
158
|
+
|
159
|
+
|
160
|
+
class Element(object):
|
161
|
+
"""
|
162
|
+
Our base class for representing an html element along with a cascading style.
|
163
|
+
The element is created along with a parent so that the StyleDict that we store
|
164
|
+
can point to the parent's StyleDict.
|
165
|
+
"""
|
166
|
+
def __init__(self, element, parent=None):
|
167
|
+
self.element = element
|
168
|
+
self.number_format = None
|
169
|
+
parent_style = parent.style_dict if parent else None
|
170
|
+
self.style_dict = StyleDict(style_string_to_dict(element.get('style', '')), parent=parent_style)
|
171
|
+
self._style_cache = None
|
172
|
+
|
173
|
+
def style(self):
|
174
|
+
"""
|
175
|
+
Turn the css styles for this element into an openpyxl NamedStyle.
|
176
|
+
"""
|
177
|
+
if not self._style_cache:
|
178
|
+
self._style_cache = style_dict_to_named_style(self.style_dict, number_format=self.number_format)
|
179
|
+
return self._style_cache
|
180
|
+
|
181
|
+
def get_dimension(self, dimension_key):
|
182
|
+
"""
|
183
|
+
Extracts the dimension from the style dict of the Element and returns it as a float.
|
184
|
+
"""
|
185
|
+
dimension = self.style_dict.get(dimension_key)
|
186
|
+
if dimension:
|
187
|
+
if dimension[-2:] in ['px', 'em', 'pt', 'in', 'cm']:
|
188
|
+
dimension = dimension[:-2]
|
189
|
+
dimension = float(dimension)
|
190
|
+
return dimension
|
191
|
+
|
192
|
+
|
193
|
+
class Table(Element):
|
194
|
+
"""
|
195
|
+
The concrete implementations of Elements are semantically named for the types of elements we are interested in.
|
196
|
+
This defines a very concrete tree structure for html tables that we expect to deal with. I prefer this compared to
|
197
|
+
allowing Element to have an arbitrary number of children and dealing with an abstract element tree.
|
198
|
+
|
199
|
+
"""
|
200
|
+
def __init__(self, table):
|
201
|
+
"""
|
202
|
+
takes an html table object (from lxml)
|
203
|
+
"""
|
204
|
+
super(Table, self).__init__(table)
|
205
|
+
table_head = table.find('thead')
|
206
|
+
self.head = TableHead(table_head, parent=self) if table_head is not None else None
|
207
|
+
table_body = table.find('tbody')
|
208
|
+
self.body = TableBody(table_body if table_body is not None else table, parent=self)
|
209
|
+
|
210
|
+
|
211
|
+
class TableHead(Element):
|
212
|
+
"""
|
213
|
+
This class maps to the `<th>` element of the html table.
|
214
|
+
"""
|
215
|
+
def __init__(self, head, parent=None):
|
216
|
+
super(TableHead, self).__init__(head, parent=parent)
|
217
|
+
self.rows = [TableRow(tr, parent=self) for tr in head.findall('tr')]
|
218
|
+
|
219
|
+
|
220
|
+
class TableBody(Element):
|
221
|
+
"""
|
222
|
+
This class maps to the `<tbody>` element of the html table.
|
223
|
+
"""
|
224
|
+
def __init__(self, body, parent=None):
|
225
|
+
super(TableBody, self).__init__(body, parent=parent)
|
226
|
+
self.rows = [TableRow(tr, parent=self) for tr in body.findall('tr')]
|
227
|
+
|
228
|
+
|
229
|
+
class TableRow(Element):
|
230
|
+
"""
|
231
|
+
This class maps to the `<tr>` element of the html table.
|
232
|
+
"""
|
233
|
+
def __init__(self, tr, parent=None):
|
234
|
+
super(TableRow, self).__init__(tr, parent=parent)
|
235
|
+
self.cells = [TableCell(cell, parent=self) for cell in tr.findall('th') + tr.findall('td')]
|
236
|
+
|
237
|
+
|
238
|
+
def element_to_string(el):
|
239
|
+
return _element_to_string(el).strip()
|
240
|
+
|
241
|
+
|
242
|
+
def _element_to_string(el):
|
243
|
+
string = ''
|
244
|
+
|
245
|
+
for x in el.iterchildren():
|
246
|
+
# 表格里的内容保持不变
|
247
|
+
# string += '\n' + _element_to_string(x)
|
248
|
+
string += html.tostring(x, encoding='unicode', with_tail=False)
|
249
|
+
|
250
|
+
text = el.text.strip() if el.text else ''
|
251
|
+
tail = el.tail.strip() if el.tail else ''
|
252
|
+
|
253
|
+
return text + string + '\n' + tail
|
254
|
+
|
255
|
+
|
256
|
+
class TableCell(Element):
|
257
|
+
"""
|
258
|
+
This class maps to the `<td>` element of the html table.
|
259
|
+
"""
|
260
|
+
CELL_TYPES = {'TYPE_STRING', 'TYPE_FORMULA', 'TYPE_NUMERIC', 'TYPE_BOOL', 'TYPE_CURRENCY', 'TYPE_PERCENTAGE',
|
261
|
+
'TYPE_NULL', 'TYPE_INLINE', 'TYPE_ERROR', 'TYPE_FORMULA_CACHE_STRING', 'TYPE_INTEGER'}
|
262
|
+
|
263
|
+
def __init__(self, cell, parent=None):
|
264
|
+
super(TableCell, self).__init__(cell, parent=parent)
|
265
|
+
self.value = element_to_string(cell)
|
266
|
+
self.number_format = self.get_number_format()
|
267
|
+
|
268
|
+
def data_type(self):
|
269
|
+
cell_types = self.CELL_TYPES & set(self.element.get('class', '').split())
|
270
|
+
if cell_types:
|
271
|
+
if 'TYPE_FORMULA' in cell_types:
|
272
|
+
# Make sure TYPE_FORMULA takes precedence over the other classes in the set.
|
273
|
+
cell_type = 'TYPE_FORMULA'
|
274
|
+
elif cell_types & {'TYPE_CURRENCY', 'TYPE_INTEGER', 'TYPE_PERCENTAGE'}:
|
275
|
+
cell_type = 'TYPE_NUMERIC'
|
276
|
+
else:
|
277
|
+
cell_type = cell_types.pop()
|
278
|
+
else:
|
279
|
+
cell_type = 'TYPE_STRING'
|
280
|
+
return getattr(cell, cell_type)
|
281
|
+
|
282
|
+
def get_number_format(self):
|
283
|
+
if 'TYPE_CURRENCY' in self.element.get('class', '').split():
|
284
|
+
return FORMAT_CURRENCY_USD_SIMPLE
|
285
|
+
if 'TYPE_INTEGER' in self.element.get('class', '').split():
|
286
|
+
return '#,##0'
|
287
|
+
if 'TYPE_PERCENTAGE' in self.element.get('class', '').split():
|
288
|
+
return FORMAT_PERCENTAGE
|
289
|
+
if 'TYPE_DATE' in self.element.get('class', '').split():
|
290
|
+
return FORMAT_DATE_MMDDYYYY
|
291
|
+
if self.data_type() == cell.TYPE_NUMERIC:
|
292
|
+
try:
|
293
|
+
int(self.value)
|
294
|
+
except ValueError:
|
295
|
+
return '#,##0.##'
|
296
|
+
else:
|
297
|
+
return '#,##0'
|
298
|
+
|
299
|
+
def format(self, cell):
|
300
|
+
cell.style = self.style()
|
301
|
+
data_type = self.data_type()
|
302
|
+
if data_type:
|
303
|
+
cell.data_type = data_type
|