pyxllib 0.3.197__py3-none-any.whl → 3.201.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. pyxllib/__init__.py +14 -21
  2. pyxllib/algo/__init__.py +8 -8
  3. pyxllib/algo/disjoint.py +54 -54
  4. pyxllib/algo/geo.py +537 -541
  5. pyxllib/algo/intervals.py +964 -964
  6. pyxllib/algo/matcher.py +389 -389
  7. pyxllib/algo/newbie.py +166 -166
  8. pyxllib/algo/pupil.py +629 -629
  9. pyxllib/algo/shapelylib.py +67 -67
  10. pyxllib/algo/specialist.py +241 -241
  11. pyxllib/algo/stat.py +494 -494
  12. pyxllib/algo/treelib.py +145 -149
  13. pyxllib/algo/unitlib.py +62 -66
  14. pyxllib/autogui/__init__.py +5 -5
  15. pyxllib/autogui/activewin.py +246 -246
  16. pyxllib/autogui/all.py +9 -9
  17. pyxllib/autogui/autogui.py +846 -852
  18. pyxllib/autogui/uiautolib.py +362 -362
  19. pyxllib/autogui/virtualkey.py +102 -102
  20. pyxllib/autogui/wechat.py +827 -827
  21. pyxllib/autogui/wechat_msg.py +421 -421
  22. pyxllib/autogui/wxautolib.py +84 -84
  23. pyxllib/cv/__init__.py +5 -5
  24. pyxllib/cv/expert.py +267 -267
  25. pyxllib/cv/imfile.py +159 -159
  26. pyxllib/cv/imhash.py +39 -39
  27. pyxllib/cv/pupil.py +9 -9
  28. pyxllib/cv/rgbfmt.py +1525 -1525
  29. pyxllib/cv/slidercaptcha.py +137 -137
  30. pyxllib/cv/trackbartools.py +251 -251
  31. pyxllib/cv/xlcvlib.py +1040 -1040
  32. pyxllib/cv/xlpillib.py +423 -423
  33. pyxllib/data/echarts.py +236 -240
  34. pyxllib/data/jsonlib.py +85 -89
  35. pyxllib/data/oss.py +72 -72
  36. pyxllib/data/pglib.py +1111 -1127
  37. pyxllib/data/sqlite.py +568 -568
  38. pyxllib/data/sqllib.py +297 -297
  39. pyxllib/ext/JLineViewer.py +505 -505
  40. pyxllib/ext/__init__.py +6 -6
  41. pyxllib/ext/demolib.py +251 -246
  42. pyxllib/ext/drissionlib.py +277 -277
  43. pyxllib/ext/kq5034lib.py +12 -12
  44. pyxllib/ext/qt.py +449 -449
  45. pyxllib/ext/robustprocfile.py +493 -497
  46. pyxllib/ext/seleniumlib.py +76 -76
  47. pyxllib/ext/tk.py +173 -173
  48. pyxllib/ext/unixlib.py +821 -827
  49. pyxllib/ext/utools.py +345 -351
  50. pyxllib/ext/webhook.py +124 -119
  51. pyxllib/ext/win32lib.py +40 -40
  52. pyxllib/ext/wjxlib.py +91 -88
  53. pyxllib/ext/wpsapi.py +124 -124
  54. pyxllib/ext/xlwork.py +9 -9
  55. pyxllib/ext/yuquelib.py +1110 -1105
  56. pyxllib/file/__init__.py +17 -17
  57. pyxllib/file/docxlib.py +757 -761
  58. pyxllib/file/gitlib.py +309 -309
  59. pyxllib/file/libreoffice.py +165 -165
  60. pyxllib/file/movielib.py +144 -148
  61. pyxllib/file/newbie.py +10 -10
  62. pyxllib/file/onenotelib.py +1469 -1469
  63. pyxllib/file/packlib/__init__.py +330 -330
  64. pyxllib/file/packlib/zipfile.py +2441 -2441
  65. pyxllib/file/pdflib.py +422 -426
  66. pyxllib/file/pupil.py +185 -185
  67. pyxllib/file/specialist/__init__.py +681 -685
  68. pyxllib/file/specialist/dirlib.py +799 -799
  69. pyxllib/file/specialist/download.py +193 -193
  70. pyxllib/file/specialist/filelib.py +2825 -2829
  71. pyxllib/file/xlsxlib.py +3122 -3131
  72. pyxllib/file/xlsyncfile.py +341 -341
  73. pyxllib/prog/__init__.py +5 -5
  74. pyxllib/prog/cachetools.py +58 -64
  75. pyxllib/prog/deprecatedlib.py +233 -233
  76. pyxllib/prog/filelock.py +42 -42
  77. pyxllib/prog/ipyexec.py +253 -253
  78. pyxllib/prog/multiprogs.py +940 -940
  79. pyxllib/prog/newbie.py +451 -451
  80. pyxllib/prog/pupil.py +1208 -1197
  81. pyxllib/prog/sitepackages.py +33 -33
  82. pyxllib/prog/specialist/__init__.py +348 -391
  83. pyxllib/prog/specialist/bc.py +203 -203
  84. pyxllib/prog/specialist/browser.py +497 -497
  85. pyxllib/prog/specialist/common.py +347 -347
  86. pyxllib/prog/specialist/datetime.py +198 -198
  87. pyxllib/prog/specialist/tictoc.py +240 -240
  88. pyxllib/prog/specialist/xllog.py +180 -180
  89. pyxllib/prog/xlosenv.py +110 -108
  90. pyxllib/stdlib/__init__.py +17 -17
  91. pyxllib/stdlib/tablepyxl/__init__.py +10 -10
  92. pyxllib/stdlib/tablepyxl/style.py +303 -303
  93. pyxllib/stdlib/tablepyxl/tablepyxl.py +130 -130
  94. pyxllib/text/__init__.py +8 -8
  95. pyxllib/text/ahocorasick.py +36 -39
  96. pyxllib/text/airscript.js +754 -744
  97. pyxllib/text/charclasslib.py +121 -121
  98. pyxllib/text/jiebalib.py +267 -267
  99. pyxllib/text/jinjalib.py +27 -32
  100. pyxllib/text/jsa_ai_prompt.md +271 -271
  101. pyxllib/text/jscode.py +922 -922
  102. pyxllib/text/latex/__init__.py +158 -158
  103. pyxllib/text/levenshtein.py +303 -303
  104. pyxllib/text/nestenv.py +1215 -1215
  105. pyxllib/text/newbie.py +300 -300
  106. pyxllib/text/pupil/__init__.py +8 -8
  107. pyxllib/text/pupil/common.py +1121 -1121
  108. pyxllib/text/pupil/xlalign.py +326 -326
  109. pyxllib/text/pycode.py +47 -47
  110. pyxllib/text/specialist/__init__.py +8 -8
  111. pyxllib/text/specialist/common.py +112 -112
  112. pyxllib/text/specialist/ptag.py +186 -186
  113. pyxllib/text/spellchecker.py +172 -172
  114. pyxllib/text/templates/echart_base.html +10 -10
  115. pyxllib/text/templates/highlight_code.html +16 -16
  116. pyxllib/text/templates/latex_editor.html +102 -102
  117. pyxllib/text/vbacode.py +17 -17
  118. pyxllib/text/xmllib.py +741 -747
  119. pyxllib/xl.py +42 -39
  120. pyxllib/xlcv.py +17 -17
  121. pyxllib-3.201.1.dist-info/METADATA +296 -0
  122. pyxllib-3.201.1.dist-info/RECORD +125 -0
  123. {pyxllib-0.3.197.dist-info → pyxllib-3.201.1.dist-info}/licenses/LICENSE +190 -190
  124. pyxllib/ext/old.py +0 -663
  125. pyxllib-0.3.197.dist-info/METADATA +0 -48
  126. pyxllib-0.3.197.dist-info/RECORD +0 -126
  127. {pyxllib-0.3.197.dist-info → pyxllib-3.201.1.dist-info}/WHEEL +0 -0
@@ -1,303 +1,303 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- # @Author : 陈坤泽
4
- # @Email : 877362867@qq.com
5
- # @Date : 2020/06/02 19:57
6
-
7
- """
8
- tablepyxl.style的代码
9
- 坤泽进行了一些修改
10
- """
11
-
12
- # This is where we handle translating css styles into openpyxl styles
13
- # and cascading those from parent to child in the dom.
14
-
15
-
16
- from lxml import html
17
-
18
- from openpyxl.cell import cell
19
- from openpyxl.styles import Font, Alignment, PatternFill, NamedStyle, Border, Side, Color
20
- from openpyxl.styles.fills import FILL_SOLID
21
- from openpyxl.styles.numbers import FORMAT_CURRENCY_USD_SIMPLE, FORMAT_PERCENTAGE
22
- from openpyxl.styles.colors import BLACK
23
-
24
- FORMAT_DATE_MMDDYYYY = 'mm/dd/yyyy'
25
-
26
-
27
- def colormap(color):
28
- """
29
- Convenience for looking up known colors
30
- """
31
- cmap = {'black': BLACK}
32
- return cmap.get(color, color)
33
-
34
-
35
- def style_string_to_dict(style):
36
- """
37
- Convert css style string to a python dictionary
38
- """
39
- def clean_split(string, delim):
40
- return (s.strip() for s in string.split(delim))
41
- styles = [clean_split(s, ":") for s in style.split(";") if ":" in s]
42
- return dict(styles)
43
-
44
-
45
- def get_side(style, name):
46
- return {'border_style': style.get('border-{}-style'.format(name)),
47
- 'color': colormap(style.get('border-{}-color'.format(name)))}
48
-
49
- known_styles = {}
50
-
51
-
52
- def style_dict_to_named_style(style_dict, number_format=None):
53
- """
54
- Change css style (stored in a python dictionary) to openpyxl NamedStyle
55
- """
56
-
57
- style_and_format_string = str({
58
- 'style_dict': style_dict,
59
- 'parent': style_dict.parent,
60
- 'number_format': number_format,
61
- })
62
-
63
- if style_and_format_string not in known_styles:
64
- # Font
65
- font = Font(bold=style_dict.get('font-weight') == 'bold',
66
- color=style_dict.get_color('color', None),
67
- size=style_dict.get('font-size'))
68
-
69
- # Alignment
70
- # 坤泽修改
71
- vertical = style_dict.get('vertical-align', 'center')
72
- if vertical not in {'bottom', 'justify', 'distributed', 'top', 'center'}: vertical = 'center'
73
- alignment = Alignment(horizontal=style_dict.get('text-align', 'general'),
74
- vertical=vertical,
75
- wrap_text=style_dict.get('white-space', 'nowrap') == 'normal')
76
-
77
- # Fill
78
- bg_color = style_dict.get_color('background-color')
79
- fg_color = style_dict.get_color('foreground-color', Color())
80
- fill_type = style_dict.get('fill-type')
81
- if bg_color and bg_color != 'transparent':
82
- fill = PatternFill(fill_type=fill_type or FILL_SOLID,
83
- start_color=bg_color,
84
- end_color=fg_color)
85
- else:
86
- fill = PatternFill()
87
-
88
- # Border
89
- border = Border(left=Side(**get_side(style_dict, 'left')),
90
- right=Side(**get_side(style_dict, 'right')),
91
- top=Side(**get_side(style_dict, 'top')),
92
- bottom=Side(**get_side(style_dict, 'bottom')),
93
- diagonal=Side(**get_side(style_dict, 'diagonal')),
94
- diagonal_direction=None,
95
- outline=Side(**get_side(style_dict, 'outline')),
96
- vertical=None,
97
- horizontal=None)
98
-
99
- name = 'Style {}'.format(len(known_styles) + 1)
100
-
101
- pyxl_style = NamedStyle(name=name, font=font, fill=fill, alignment=alignment, border=border,
102
- number_format=number_format)
103
-
104
- known_styles[style_and_format_string] = pyxl_style
105
-
106
- return known_styles[style_and_format_string]
107
-
108
-
109
- class StyleDict(dict):
110
- """
111
- It's like a dictionary, but it looks for items in the parent dictionary
112
- """
113
- def __init__(self, *args, **kwargs):
114
- self.parent = kwargs.pop('parent', None)
115
- super(StyleDict, self).__init__(*args, **kwargs)
116
-
117
- def __getitem__(self, item):
118
- if item in self:
119
- return super(StyleDict, self).__getitem__(item)
120
- elif self.parent:
121
- return self.parent[item]
122
- else:
123
- raise KeyError('{} not found'.format(item))
124
-
125
- def __hash__(self):
126
- return hash(tuple([(k, self.get(k)) for k in self._keys()]))
127
-
128
- # Yielding the keys avoids creating unnecessary data structures
129
- # and happily works with both python2 and python3 where the
130
- # .keys() method is a dictionary_view in python3 and a list in python2.
131
- def _keys(self):
132
- yielded = set()
133
- for k in self.keys():
134
- yielded.add(k)
135
- yield k
136
- if self.parent:
137
- for k in self.parent._keys():
138
- if k not in yielded:
139
- yielded.add(k)
140
- yield k
141
-
142
- def get(self, k, d=None):
143
- try:
144
- return self[k]
145
- except KeyError:
146
- return d
147
-
148
- def get_color(self, k, d=None):
149
- """
150
- Strip leading # off colors if necessary
151
- """
152
- color = self.get(k, d)
153
- if hasattr(color, 'startswith') and color.startswith('#'):
154
- color = color[1:]
155
- if len(color) == 3: # Premailers reduces colors like #00ff00 to #0f0, openpyxl doesn't like that
156
- color = ''.join(2 * c for c in color)
157
- return color
158
-
159
-
160
- class Element(object):
161
- """
162
- Our base class for representing an html element along with a cascading style.
163
- The element is created along with a parent so that the StyleDict that we store
164
- can point to the parent's StyleDict.
165
- """
166
- def __init__(self, element, parent=None):
167
- self.element = element
168
- self.number_format = None
169
- parent_style = parent.style_dict if parent else None
170
- self.style_dict = StyleDict(style_string_to_dict(element.get('style', '')), parent=parent_style)
171
- self._style_cache = None
172
-
173
- def style(self):
174
- """
175
- Turn the css styles for this element into an openpyxl NamedStyle.
176
- """
177
- if not self._style_cache:
178
- self._style_cache = style_dict_to_named_style(self.style_dict, number_format=self.number_format)
179
- return self._style_cache
180
-
181
- def get_dimension(self, dimension_key):
182
- """
183
- Extracts the dimension from the style dict of the Element and returns it as a float.
184
- """
185
- dimension = self.style_dict.get(dimension_key)
186
- if dimension:
187
- if dimension[-2:] in ['px', 'em', 'pt', 'in', 'cm']:
188
- dimension = dimension[:-2]
189
- dimension = float(dimension)
190
- return dimension
191
-
192
-
193
- class Table(Element):
194
- """
195
- The concrete implementations of Elements are semantically named for the types of elements we are interested in.
196
- This defines a very concrete tree structure for html tables that we expect to deal with. I prefer this compared to
197
- allowing Element to have an arbitrary number of children and dealing with an abstract element tree.
198
-
199
- """
200
- def __init__(self, table):
201
- """
202
- takes an html table object (from lxml)
203
- """
204
- super(Table, self).__init__(table)
205
- table_head = table.find('thead')
206
- self.head = TableHead(table_head, parent=self) if table_head is not None else None
207
- table_body = table.find('tbody')
208
- self.body = TableBody(table_body if table_body is not None else table, parent=self)
209
-
210
-
211
- class TableHead(Element):
212
- """
213
- This class maps to the `<th>` element of the html table.
214
- """
215
- def __init__(self, head, parent=None):
216
- super(TableHead, self).__init__(head, parent=parent)
217
- self.rows = [TableRow(tr, parent=self) for tr in head.findall('tr')]
218
-
219
-
220
- class TableBody(Element):
221
- """
222
- This class maps to the `<tbody>` element of the html table.
223
- """
224
- def __init__(self, body, parent=None):
225
- super(TableBody, self).__init__(body, parent=parent)
226
- self.rows = [TableRow(tr, parent=self) for tr in body.findall('tr')]
227
-
228
-
229
- class TableRow(Element):
230
- """
231
- This class maps to the `<tr>` element of the html table.
232
- """
233
- def __init__(self, tr, parent=None):
234
- super(TableRow, self).__init__(tr, parent=parent)
235
- self.cells = [TableCell(cell, parent=self) for cell in tr.findall('th') + tr.findall('td')]
236
-
237
-
238
- def element_to_string(el):
239
- return _element_to_string(el).strip()
240
-
241
-
242
- def _element_to_string(el):
243
- string = ''
244
-
245
- for x in el.iterchildren():
246
- # 表格里的内容保持不变
247
- # string += '\n' + _element_to_string(x)
248
- string += html.tostring(x, encoding='unicode', with_tail=False)
249
-
250
- text = el.text.strip() if el.text else ''
251
- tail = el.tail.strip() if el.tail else ''
252
-
253
- return text + string + '\n' + tail
254
-
255
-
256
- class TableCell(Element):
257
- """
258
- This class maps to the `<td>` element of the html table.
259
- """
260
- CELL_TYPES = {'TYPE_STRING', 'TYPE_FORMULA', 'TYPE_NUMERIC', 'TYPE_BOOL', 'TYPE_CURRENCY', 'TYPE_PERCENTAGE',
261
- 'TYPE_NULL', 'TYPE_INLINE', 'TYPE_ERROR', 'TYPE_FORMULA_CACHE_STRING', 'TYPE_INTEGER'}
262
-
263
- def __init__(self, cell, parent=None):
264
- super(TableCell, self).__init__(cell, parent=parent)
265
- self.value = element_to_string(cell)
266
- self.number_format = self.get_number_format()
267
-
268
- def data_type(self):
269
- cell_types = self.CELL_TYPES & set(self.element.get('class', '').split())
270
- if cell_types:
271
- if 'TYPE_FORMULA' in cell_types:
272
- # Make sure TYPE_FORMULA takes precedence over the other classes in the set.
273
- cell_type = 'TYPE_FORMULA'
274
- elif cell_types & {'TYPE_CURRENCY', 'TYPE_INTEGER', 'TYPE_PERCENTAGE'}:
275
- cell_type = 'TYPE_NUMERIC'
276
- else:
277
- cell_type = cell_types.pop()
278
- else:
279
- cell_type = 'TYPE_STRING'
280
- return getattr(cell, cell_type)
281
-
282
- def get_number_format(self):
283
- if 'TYPE_CURRENCY' in self.element.get('class', '').split():
284
- return FORMAT_CURRENCY_USD_SIMPLE
285
- if 'TYPE_INTEGER' in self.element.get('class', '').split():
286
- return '#,##0'
287
- if 'TYPE_PERCENTAGE' in self.element.get('class', '').split():
288
- return FORMAT_PERCENTAGE
289
- if 'TYPE_DATE' in self.element.get('class', '').split():
290
- return FORMAT_DATE_MMDDYYYY
291
- if self.data_type() == cell.TYPE_NUMERIC:
292
- try:
293
- int(self.value)
294
- except ValueError:
295
- return '#,##0.##'
296
- else:
297
- return '#,##0'
298
-
299
- def format(self, cell):
300
- cell.style = self.style()
301
- data_type = self.data_type()
302
- if data_type:
303
- cell.data_type = data_type
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : 陈坤泽
4
+ # @Email : 877362867@qq.com
5
+ # @Date : 2020/06/02 19:57
6
+
7
+ """
8
+ tablepyxl.style的代码
9
+ 坤泽进行了一些修改
10
+ """
11
+
12
+ # This is where we handle translating css styles into openpyxl styles
13
+ # and cascading those from parent to child in the dom.
14
+
15
+
16
+ from lxml import html
17
+
18
+ from openpyxl.cell import cell
19
+ from openpyxl.styles import Font, Alignment, PatternFill, NamedStyle, Border, Side, Color
20
+ from openpyxl.styles.fills import FILL_SOLID
21
+ from openpyxl.styles.numbers import FORMAT_CURRENCY_USD_SIMPLE, FORMAT_PERCENTAGE
22
+ from openpyxl.styles.colors import BLACK
23
+
24
+ FORMAT_DATE_MMDDYYYY = 'mm/dd/yyyy'
25
+
26
+
27
+ def colormap(color):
28
+ """
29
+ Convenience for looking up known colors
30
+ """
31
+ cmap = {'black': BLACK}
32
+ return cmap.get(color, color)
33
+
34
+
35
+ def style_string_to_dict(style):
36
+ """
37
+ Convert css style string to a python dictionary
38
+ """
39
+ def clean_split(string, delim):
40
+ return (s.strip() for s in string.split(delim))
41
+ styles = [clean_split(s, ":") for s in style.split(";") if ":" in s]
42
+ return dict(styles)
43
+
44
+
45
+ def get_side(style, name):
46
+ return {'border_style': style.get('border-{}-style'.format(name)),
47
+ 'color': colormap(style.get('border-{}-color'.format(name)))}
48
+
49
+ known_styles = {}
50
+
51
+
52
+ def style_dict_to_named_style(style_dict, number_format=None):
53
+ """
54
+ Change css style (stored in a python dictionary) to openpyxl NamedStyle
55
+ """
56
+
57
+ style_and_format_string = str({
58
+ 'style_dict': style_dict,
59
+ 'parent': style_dict.parent,
60
+ 'number_format': number_format,
61
+ })
62
+
63
+ if style_and_format_string not in known_styles:
64
+ # Font
65
+ font = Font(bold=style_dict.get('font-weight') == 'bold',
66
+ color=style_dict.get_color('color', None),
67
+ size=style_dict.get('font-size'))
68
+
69
+ # Alignment
70
+ # 坤泽修改
71
+ vertical = style_dict.get('vertical-align', 'center')
72
+ if vertical not in {'bottom', 'justify', 'distributed', 'top', 'center'}: vertical = 'center'
73
+ alignment = Alignment(horizontal=style_dict.get('text-align', 'general'),
74
+ vertical=vertical,
75
+ wrap_text=style_dict.get('white-space', 'nowrap') == 'normal')
76
+
77
+ # Fill
78
+ bg_color = style_dict.get_color('background-color')
79
+ fg_color = style_dict.get_color('foreground-color', Color())
80
+ fill_type = style_dict.get('fill-type')
81
+ if bg_color and bg_color != 'transparent':
82
+ fill = PatternFill(fill_type=fill_type or FILL_SOLID,
83
+ start_color=bg_color,
84
+ end_color=fg_color)
85
+ else:
86
+ fill = PatternFill()
87
+
88
+ # Border
89
+ border = Border(left=Side(**get_side(style_dict, 'left')),
90
+ right=Side(**get_side(style_dict, 'right')),
91
+ top=Side(**get_side(style_dict, 'top')),
92
+ bottom=Side(**get_side(style_dict, 'bottom')),
93
+ diagonal=Side(**get_side(style_dict, 'diagonal')),
94
+ diagonal_direction=None,
95
+ outline=Side(**get_side(style_dict, 'outline')),
96
+ vertical=None,
97
+ horizontal=None)
98
+
99
+ name = 'Style {}'.format(len(known_styles) + 1)
100
+
101
+ pyxl_style = NamedStyle(name=name, font=font, fill=fill, alignment=alignment, border=border,
102
+ number_format=number_format)
103
+
104
+ known_styles[style_and_format_string] = pyxl_style
105
+
106
+ return known_styles[style_and_format_string]
107
+
108
+
109
+ class StyleDict(dict):
110
+ """
111
+ It's like a dictionary, but it looks for items in the parent dictionary
112
+ """
113
+ def __init__(self, *args, **kwargs):
114
+ self.parent = kwargs.pop('parent', None)
115
+ super(StyleDict, self).__init__(*args, **kwargs)
116
+
117
+ def __getitem__(self, item):
118
+ if item in self:
119
+ return super(StyleDict, self).__getitem__(item)
120
+ elif self.parent:
121
+ return self.parent[item]
122
+ else:
123
+ raise KeyError('{} not found'.format(item))
124
+
125
+ def __hash__(self):
126
+ return hash(tuple([(k, self.get(k)) for k in self._keys()]))
127
+
128
+ # Yielding the keys avoids creating unnecessary data structures
129
+ # and happily works with both python2 and python3 where the
130
+ # .keys() method is a dictionary_view in python3 and a list in python2.
131
+ def _keys(self):
132
+ yielded = set()
133
+ for k in self.keys():
134
+ yielded.add(k)
135
+ yield k
136
+ if self.parent:
137
+ for k in self.parent._keys():
138
+ if k not in yielded:
139
+ yielded.add(k)
140
+ yield k
141
+
142
+ def get(self, k, d=None):
143
+ try:
144
+ return self[k]
145
+ except KeyError:
146
+ return d
147
+
148
+ def get_color(self, k, d=None):
149
+ """
150
+ Strip leading # off colors if necessary
151
+ """
152
+ color = self.get(k, d)
153
+ if hasattr(color, 'startswith') and color.startswith('#'):
154
+ color = color[1:]
155
+ if len(color) == 3: # Premailers reduces colors like #00ff00 to #0f0, openpyxl doesn't like that
156
+ color = ''.join(2 * c for c in color)
157
+ return color
158
+
159
+
160
+ class Element(object):
161
+ """
162
+ Our base class for representing an html element along with a cascading style.
163
+ The element is created along with a parent so that the StyleDict that we store
164
+ can point to the parent's StyleDict.
165
+ """
166
+ def __init__(self, element, parent=None):
167
+ self.element = element
168
+ self.number_format = None
169
+ parent_style = parent.style_dict if parent else None
170
+ self.style_dict = StyleDict(style_string_to_dict(element.get('style', '')), parent=parent_style)
171
+ self._style_cache = None
172
+
173
+ def style(self):
174
+ """
175
+ Turn the css styles for this element into an openpyxl NamedStyle.
176
+ """
177
+ if not self._style_cache:
178
+ self._style_cache = style_dict_to_named_style(self.style_dict, number_format=self.number_format)
179
+ return self._style_cache
180
+
181
+ def get_dimension(self, dimension_key):
182
+ """
183
+ Extracts the dimension from the style dict of the Element and returns it as a float.
184
+ """
185
+ dimension = self.style_dict.get(dimension_key)
186
+ if dimension:
187
+ if dimension[-2:] in ['px', 'em', 'pt', 'in', 'cm']:
188
+ dimension = dimension[:-2]
189
+ dimension = float(dimension)
190
+ return dimension
191
+
192
+
193
+ class Table(Element):
194
+ """
195
+ The concrete implementations of Elements are semantically named for the types of elements we are interested in.
196
+ This defines a very concrete tree structure for html tables that we expect to deal with. I prefer this compared to
197
+ allowing Element to have an arbitrary number of children and dealing with an abstract element tree.
198
+
199
+ """
200
+ def __init__(self, table):
201
+ """
202
+ takes an html table object (from lxml)
203
+ """
204
+ super(Table, self).__init__(table)
205
+ table_head = table.find('thead')
206
+ self.head = TableHead(table_head, parent=self) if table_head is not None else None
207
+ table_body = table.find('tbody')
208
+ self.body = TableBody(table_body if table_body is not None else table, parent=self)
209
+
210
+
211
+ class TableHead(Element):
212
+ """
213
+ This class maps to the `<th>` element of the html table.
214
+ """
215
+ def __init__(self, head, parent=None):
216
+ super(TableHead, self).__init__(head, parent=parent)
217
+ self.rows = [TableRow(tr, parent=self) for tr in head.findall('tr')]
218
+
219
+
220
+ class TableBody(Element):
221
+ """
222
+ This class maps to the `<tbody>` element of the html table.
223
+ """
224
+ def __init__(self, body, parent=None):
225
+ super(TableBody, self).__init__(body, parent=parent)
226
+ self.rows = [TableRow(tr, parent=self) for tr in body.findall('tr')]
227
+
228
+
229
+ class TableRow(Element):
230
+ """
231
+ This class maps to the `<tr>` element of the html table.
232
+ """
233
+ def __init__(self, tr, parent=None):
234
+ super(TableRow, self).__init__(tr, parent=parent)
235
+ self.cells = [TableCell(cell, parent=self) for cell in tr.findall('th') + tr.findall('td')]
236
+
237
+
238
+ def element_to_string(el):
239
+ return _element_to_string(el).strip()
240
+
241
+
242
+ def _element_to_string(el):
243
+ string = ''
244
+
245
+ for x in el.iterchildren():
246
+ # 表格里的内容保持不变
247
+ # string += '\n' + _element_to_string(x)
248
+ string += html.tostring(x, encoding='unicode', with_tail=False)
249
+
250
+ text = el.text.strip() if el.text else ''
251
+ tail = el.tail.strip() if el.tail else ''
252
+
253
+ return text + string + '\n' + tail
254
+
255
+
256
+ class TableCell(Element):
257
+ """
258
+ This class maps to the `<td>` element of the html table.
259
+ """
260
+ CELL_TYPES = {'TYPE_STRING', 'TYPE_FORMULA', 'TYPE_NUMERIC', 'TYPE_BOOL', 'TYPE_CURRENCY', 'TYPE_PERCENTAGE',
261
+ 'TYPE_NULL', 'TYPE_INLINE', 'TYPE_ERROR', 'TYPE_FORMULA_CACHE_STRING', 'TYPE_INTEGER'}
262
+
263
+ def __init__(self, cell, parent=None):
264
+ super(TableCell, self).__init__(cell, parent=parent)
265
+ self.value = element_to_string(cell)
266
+ self.number_format = self.get_number_format()
267
+
268
+ def data_type(self):
269
+ cell_types = self.CELL_TYPES & set(self.element.get('class', '').split())
270
+ if cell_types:
271
+ if 'TYPE_FORMULA' in cell_types:
272
+ # Make sure TYPE_FORMULA takes precedence over the other classes in the set.
273
+ cell_type = 'TYPE_FORMULA'
274
+ elif cell_types & {'TYPE_CURRENCY', 'TYPE_INTEGER', 'TYPE_PERCENTAGE'}:
275
+ cell_type = 'TYPE_NUMERIC'
276
+ else:
277
+ cell_type = cell_types.pop()
278
+ else:
279
+ cell_type = 'TYPE_STRING'
280
+ return getattr(cell, cell_type)
281
+
282
+ def get_number_format(self):
283
+ if 'TYPE_CURRENCY' in self.element.get('class', '').split():
284
+ return FORMAT_CURRENCY_USD_SIMPLE
285
+ if 'TYPE_INTEGER' in self.element.get('class', '').split():
286
+ return '#,##0'
287
+ if 'TYPE_PERCENTAGE' in self.element.get('class', '').split():
288
+ return FORMAT_PERCENTAGE
289
+ if 'TYPE_DATE' in self.element.get('class', '').split():
290
+ return FORMAT_DATE_MMDDYYYY
291
+ if self.data_type() == cell.TYPE_NUMERIC:
292
+ try:
293
+ int(self.value)
294
+ except ValueError:
295
+ return '#,##0.##'
296
+ else:
297
+ return '#,##0'
298
+
299
+ def format(self, cell):
300
+ cell.style = self.style()
301
+ data_type = self.data_type()
302
+ if data_type:
303
+ cell.data_type = data_type