aspose-cells-foss 25.12.1__py3-none-any.whl → 26.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. aspose_cells/__init__.py +88 -0
  2. aspose_cells/auto_filter.py +527 -0
  3. aspose_cells/cell.py +483 -0
  4. aspose_cells/cell_value_handler.py +319 -0
  5. aspose_cells/cells.py +779 -0
  6. aspose_cells/cfb_handler.py +445 -0
  7. aspose_cells/cfb_writer.py +659 -0
  8. aspose_cells/cfb_writer_minimal.py +337 -0
  9. aspose_cells/comment_xml.py +475 -0
  10. aspose_cells/conditional_format.py +1185 -0
  11. aspose_cells/csv_handler.py +690 -0
  12. aspose_cells/data_validation.py +911 -0
  13. aspose_cells/document_properties.py +356 -0
  14. aspose_cells/encryption_crypto.py +247 -0
  15. aspose_cells/encryption_params.py +138 -0
  16. aspose_cells/hyperlink.py +372 -0
  17. aspose_cells/json_handler.py +185 -0
  18. aspose_cells/markdown_handler.py +583 -0
  19. aspose_cells/shared_strings.py +101 -0
  20. aspose_cells/style.py +841 -0
  21. aspose_cells/workbook.py +499 -0
  22. aspose_cells/workbook_hash_password.py +68 -0
  23. aspose_cells/workbook_properties.py +712 -0
  24. aspose_cells/worksheet.py +570 -0
  25. aspose_cells/worksheet_properties.py +1239 -0
  26. aspose_cells/xlsx_encryptor.py +403 -0
  27. aspose_cells/xml_autofilter_loader.py +195 -0
  28. aspose_cells/xml_autofilter_saver.py +173 -0
  29. aspose_cells/xml_conditional_format_loader.py +215 -0
  30. aspose_cells/xml_conditional_format_saver.py +351 -0
  31. aspose_cells/xml_datavalidation_loader.py +239 -0
  32. aspose_cells/xml_datavalidation_saver.py +245 -0
  33. aspose_cells/xml_hyperlink_handler.py +323 -0
  34. aspose_cells/xml_loader.py +986 -0
  35. aspose_cells/xml_properties_loader.py +512 -0
  36. aspose_cells/xml_properties_saver.py +607 -0
  37. aspose_cells/xml_saver.py +1306 -0
  38. aspose_cells_foss-26.2.2.dist-info/METADATA +190 -0
  39. aspose_cells_foss-26.2.2.dist-info/RECORD +41 -0
  40. {aspose_cells_foss-25.12.1.dist-info → aspose_cells_foss-26.2.2.dist-info}/WHEEL +1 -1
  41. aspose_cells_foss-26.2.2.dist-info/top_level.txt +1 -0
  42. aspose/__init__.py +0 -14
  43. aspose/cells/__init__.py +0 -31
  44. aspose/cells/cell.py +0 -350
  45. aspose/cells/constants.py +0 -44
  46. aspose/cells/converters/__init__.py +0 -13
  47. aspose/cells/converters/csv_converter.py +0 -55
  48. aspose/cells/converters/json_converter.py +0 -46
  49. aspose/cells/converters/markdown_converter.py +0 -453
  50. aspose/cells/drawing/__init__.py +0 -17
  51. aspose/cells/drawing/anchor.py +0 -172
  52. aspose/cells/drawing/collection.py +0 -233
  53. aspose/cells/drawing/image.py +0 -338
  54. aspose/cells/formats.py +0 -80
  55. aspose/cells/formula/__init__.py +0 -10
  56. aspose/cells/formula/evaluator.py +0 -360
  57. aspose/cells/formula/functions.py +0 -433
  58. aspose/cells/formula/tokenizer.py +0 -340
  59. aspose/cells/io/__init__.py +0 -27
  60. aspose/cells/io/csv/__init__.py +0 -8
  61. aspose/cells/io/csv/reader.py +0 -88
  62. aspose/cells/io/csv/writer.py +0 -98
  63. aspose/cells/io/factory.py +0 -138
  64. aspose/cells/io/interfaces.py +0 -48
  65. aspose/cells/io/json/__init__.py +0 -8
  66. aspose/cells/io/json/reader.py +0 -126
  67. aspose/cells/io/json/writer.py +0 -119
  68. aspose/cells/io/md/__init__.py +0 -8
  69. aspose/cells/io/md/reader.py +0 -161
  70. aspose/cells/io/md/writer.py +0 -334
  71. aspose/cells/io/models.py +0 -64
  72. aspose/cells/io/xlsx/__init__.py +0 -9
  73. aspose/cells/io/xlsx/constants.py +0 -312
  74. aspose/cells/io/xlsx/image_writer.py +0 -311
  75. aspose/cells/io/xlsx/reader.py +0 -284
  76. aspose/cells/io/xlsx/writer.py +0 -931
  77. aspose/cells/plugins/__init__.py +0 -6
  78. aspose/cells/plugins/docling_backend/__init__.py +0 -7
  79. aspose/cells/plugins/docling_backend/backend.py +0 -535
  80. aspose/cells/plugins/markitdown_plugin/__init__.py +0 -15
  81. aspose/cells/plugins/markitdown_plugin/plugin.py +0 -128
  82. aspose/cells/range.py +0 -210
  83. aspose/cells/style.py +0 -287
  84. aspose/cells/utils/__init__.py +0 -54
  85. aspose/cells/utils/coordinates.py +0 -68
  86. aspose/cells/utils/exceptions.py +0 -43
  87. aspose/cells/utils/validation.py +0 -102
  88. aspose/cells/workbook.py +0 -352
  89. aspose/cells/worksheet.py +0 -670
  90. aspose_cells_foss-25.12.1.dist-info/METADATA +0 -189
  91. aspose_cells_foss-25.12.1.dist-info/RECORD +0 -53
  92. aspose_cells_foss-25.12.1.dist-info/entry_points.txt +0 -2
  93. aspose_cells_foss-25.12.1.dist-info/top_level.txt +0 -1
@@ -0,0 +1,986 @@
1
+ """
2
+ Aspose.Cells for Python - XML Loader Module
3
+
4
+ This module provides XML loading functionality for Excel workbooks.
5
+ It handles parsing of workbook XML files and loading data into Workbook objects.
6
+
7
+ Compatible with Aspose.Cells for .NET API structure.
8
+ ECMA-376 Compliant cell value import.
9
+ """
10
+
11
+ import xml.etree.ElementTree as ET
12
+ from .cell_value_handler import CellValueHandler
13
+ from .comment_xml import CommentXMLReader
14
+ from .xml_autofilter_loader import AutoFilterXMLLoader
15
+ from .xml_conditional_format_loader import ConditionalFormatXMLLoader
16
+ from .xml_properties_loader import WorkbookPropertiesXMLLoader, WorksheetPropertiesXMLLoader
17
+ from .xml_hyperlink_handler import HyperlinkXMLLoader
18
+ from .xml_datavalidation_loader import DataValidationXmlLoader
19
+
20
+
21
+ class XMLLoader:
22
+ """
23
+ Handles loading of Excel workbook XML files.
24
+
25
+ This class provides methods to parse various XML components of an Excel workbook
26
+ including workbook structure, shared strings, styles, and worksheet data.
27
+ """
28
+
29
+ def __init__(self, workbook):
30
+ """
31
+ Initializes the XML loader with a workbook instance.
32
+
33
+ Args:
34
+ workbook (Workbook): The workbook instance to load data into.
35
+ """
36
+ self.workbook = workbook
37
+ self.ns = {
38
+ 'main': 'http://schemas.openxmlformats.org/spreadsheetml/2006/main',
39
+ 'r': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships'
40
+ }
41
+
42
+ # Initialize comment reader
43
+ self._comment_reader = CommentXMLReader()
44
+
45
+ # Initialize autofilter loader
46
+ self._autofilter_loader = AutoFilterXMLLoader(self.ns)
47
+
48
+ # Initialize conditional format loader
49
+ self._cf_loader = ConditionalFormatXMLLoader(self.ns, workbook)
50
+
51
+ # Initialize hyperlink loader
52
+ self._hyperlink_loader = HyperlinkXMLLoader(self.ns)
53
+
54
+ # Initialize data validation loader
55
+ self._dv_loader = DataValidationXmlLoader(self.ns['main'])
56
+
57
+ # Initialize properties loaders
58
+ self._wb_props_loader = WorkbookPropertiesXMLLoader(self.ns)
59
+ self._ws_props_loader = WorksheetPropertiesXMLLoader(self.ns)
60
+
61
+ def load_workbook(self, zipf):
62
+ """
63
+ Loads workbook data from a ZIP file.
64
+
65
+ Args:
66
+ zipf: A ZipFile object containing the workbook data.
67
+ """
68
+ # Load workbook.xml to get worksheet information
69
+ workbook_xml_content = zipf.read('xl/workbook.xml')
70
+ workbook_root = ET.fromstring(workbook_xml_content)
71
+
72
+ # Load workbook properties
73
+ self._load_workbook_properties(workbook_root)
74
+
75
+ # Load document properties (docProps/core.xml and docProps/app.xml)
76
+ self._load_document_properties(zipf)
77
+
78
+ # Load worksheet information
79
+ self._load_worksheet_info(workbook_root)
80
+
81
+ # Load shared strings
82
+ self._load_shared_strings(zipf)
83
+
84
+ # Load styles
85
+ self._load_styles(zipf)
86
+
87
+ # Load worksheet data
88
+ self._load_worksheets_data(zipf)
89
+
90
+ def _load_workbook_properties(self, workbook_root):
91
+ """
92
+ Loads workbook properties from workbook XML.
93
+
94
+ Args:
95
+ workbook_root: The XML root element of workbook.xml.
96
+ """
97
+ props = self.workbook.properties
98
+
99
+ # Load file version
100
+ self._wb_props_loader.load_file_version(props.file_version, workbook_root)
101
+
102
+ # Load workbook properties
103
+ self._wb_props_loader.load_workbook_pr(props.workbook_pr, workbook_root)
104
+
105
+ # Load workbook protection
106
+ self._wb_props_loader.load_workbook_protection(props.protection, workbook_root)
107
+
108
+ # Load book views
109
+ self._wb_props_loader.load_book_views(props.view, workbook_root)
110
+
111
+ # Load calculation properties
112
+ self._wb_props_loader.load_calc_pr(props.calculation, workbook_root)
113
+
114
+ # Load defined names
115
+ self._wb_props_loader.load_defined_names(props.defined_names, workbook_root)
116
+
117
+ def _load_worksheet_properties(self, worksheet, worksheet_root):
118
+ """
119
+ Loads worksheet properties from worksheet XML.
120
+
121
+ Args:
122
+ worksheet (Worksheet): The worksheet to load properties into.
123
+ worksheet_root: The XML root element of the worksheet.
124
+ """
125
+ props = worksheet.properties
126
+
127
+ # Load sheet views (includes selection and pane)
128
+ self._ws_props_loader.load_sheet_views(props, worksheet_root)
129
+
130
+ # Load sheet format properties
131
+ self._ws_props_loader.load_sheet_format_pr(props, worksheet_root)
132
+
133
+ # Load sheet protection
134
+ self._ws_props_loader.load_sheet_protection(props, worksheet_root)
135
+
136
+ # Load print options
137
+ self._ws_props_loader.load_print_options(props, worksheet_root)
138
+
139
+ # Load page margins
140
+ self._ws_props_loader.load_page_margins(props, worksheet_root)
141
+
142
+ # Load page setup
143
+ self._ws_props_loader.load_page_setup(props, worksheet_root)
144
+
145
+ # Load header/footer
146
+ self._ws_props_loader.load_header_footer(props, worksheet_root)
147
+
148
+ def _load_data_validations(self, worksheet, worksheet_root):
149
+ """
150
+ Loads data validations from worksheet XML.
151
+
152
+ Args:
153
+ worksheet (Worksheet): The worksheet to load validations into.
154
+ worksheet_root: The XML root element of the worksheet.
155
+ """
156
+ validations = self._dv_loader.load_data_validations(worksheet_root)
157
+ worksheet._data_validations = validations
158
+
159
+ def _load_worksheet_info(self, workbook_root):
160
+ """
161
+ Loads worksheet information from workbook XML.
162
+
163
+ Args:
164
+ workbook_root: The XML root element of workbook.xml.
165
+ """
166
+ from .worksheet import Worksheet
167
+
168
+ sheets = workbook_root.findall('.//main:sheet', namespaces=self.ns)
169
+ for sheet in sheets:
170
+ sheet_name = sheet.get('name')
171
+ worksheet = Worksheet(sheet_name)
172
+
173
+ # Load visibility state
174
+ state = sheet.get('state')
175
+ if state == 'hidden':
176
+ worksheet._visible = False
177
+ elif state == 'veryHidden':
178
+ worksheet._visible = 'veryHidden'
179
+
180
+ self.workbook._worksheets.append(worksheet)
181
+
182
+ def _load_shared_strings(self, zipf):
183
+ """
184
+ Loads shared strings from the workbook.
185
+
186
+ Args:
187
+ zipf: A ZipFile object containing the workbook data.
188
+ """
189
+ try:
190
+ shared_strings_content = zipf.read('xl/sharedStrings.xml')
191
+ shared_strings_root = ET.fromstring(shared_strings_content)
192
+ self.workbook._shared_strings = []
193
+ for si in shared_strings_root.findall('.//main:si', namespaces=self.ns):
194
+ text_parts = [
195
+ t.text if t.text is not None else ''
196
+ for t in si.findall('.//main:t', namespaces=self.ns)
197
+ ]
198
+ self.workbook._shared_strings.append(''.join(text_parts))
199
+ except KeyError:
200
+ self.workbook._shared_strings = []
201
+
202
+ def _load_styles(self, zipf):
203
+ """
204
+ Loads styles from the workbook.
205
+
206
+ Args:
207
+ zipf: A ZipFile object containing the workbook data.
208
+ """
209
+ try:
210
+ styles_content = zipf.read('xl/styles.xml')
211
+ styles_root = ET.fromstring(styles_content)
212
+ self._load_styles_xml(styles_root)
213
+ # Load differential formatting (dxf) for conditional formatting
214
+ self._load_dxf_styles(styles_root)
215
+ except KeyError:
216
+ # Use default styles
217
+ from .xml_saver import XMLSaver
218
+ saver = XMLSaver(self.workbook)
219
+ saver.register_default_styles()
220
+ self.workbook._dxf_styles = []
221
+
222
+ def _load_worksheets_data(self, zipf):
223
+ """
224
+ Loads data for all worksheets.
225
+
226
+ Args:
227
+ zipf: A ZipFile object containing the workbook data.
228
+ """
229
+ for i, worksheet in enumerate(self.workbook._worksheets):
230
+ try:
231
+ worksheet_content = zipf.read(f'xl/worksheets/sheet{i+1}.xml')
232
+ worksheet_root = ET.fromstring(worksheet_content)
233
+ self._load_worksheet_data(worksheet, worksheet_root)
234
+
235
+ # Load comments for this worksheet
236
+ self._comment_reader.load_comments(zipf, worksheet, i+1)
237
+
238
+ # Load hyperlinks for this worksheet
239
+ self._hyperlink_loader.load_hyperlinks(worksheet, worksheet_root, zipf, i+1)
240
+ except KeyError:
241
+ # Worksheet file not found, skip
242
+ pass
243
+
244
+ def _load_worksheet_data(self, worksheet, worksheet_root):
245
+ """
246
+ Loads cell data from worksheet XML according to ECMA-376 specification.
247
+
248
+ Args:
249
+ worksheet (Worksheet): The worksheet object to load data into.
250
+ worksheet_root: The XML root element of the worksheet.
251
+ """
252
+ # Load used range (dimension)
253
+ dim_elem = worksheet_root.find('main:dimension', namespaces=self.ns)
254
+ if dim_elem is not None:
255
+ ref = dim_elem.get('ref')
256
+ if ref:
257
+ from .cells import Cells
258
+ if ':' in ref:
259
+ start_ref, end_ref = ref.split(':', 1)
260
+ else:
261
+ start_ref = end_ref = ref
262
+ try:
263
+ min_row, min_col = Cells.coordinate_from_string(start_ref)
264
+ max_row, max_col = Cells.coordinate_from_string(end_ref)
265
+ worksheet._dimension = (min_row, min_col, max_row, max_col)
266
+ except ValueError:
267
+ pass
268
+
269
+ # Load worksheet properties
270
+ self._load_worksheet_properties(worksheet, worksheet_root)
271
+
272
+ # Load column widths and row heights
273
+ self._load_column_dimensions(worksheet, worksheet_root)
274
+ self._load_row_heights(worksheet, worksheet_root)
275
+
276
+ # Load auto filter settings (ECMA-376 Section 18.3.1.2)
277
+ self._autofilter_loader.load_auto_filter(worksheet, worksheet_root)
278
+
279
+ # Load conditional formatting (ECMA-376 Section 18.3.1.18)
280
+ self._cf_loader.load_conditional_formatting(worksheet, worksheet_root)
281
+
282
+ # Load data validations (ECMA-376 Section 18.3.1.30, 18.3.1.31)
283
+ self._load_data_validations(worksheet, worksheet_root)
284
+
285
+ # Find shared string table reference
286
+ shared_strings = self.workbook._shared_strings
287
+
288
+ # Load cell data
289
+ for row_elem in worksheet_root.findall('.//main:row', namespaces=self.ns):
290
+ for cell_elem in row_elem.findall('main:c', namespaces=self.ns):
291
+ cell_ref = cell_elem.get('r')
292
+ cell_type = cell_elem.get('t', 'n') # Default to numeric per ECMA-376
293
+
294
+ # Check for formula first (ECMA-376: formula must come before value)
295
+ f_elem = cell_elem.find('main:f', namespaces=self.ns)
296
+ formula = f_elem.text if f_elem is not None else None
297
+ # Add '=' prefix to formula if not present (ECMA-376 stores formulas without '=')
298
+ if formula is not None and not formula.startswith('='):
299
+ formula = '=' + formula
300
+
301
+ # Get cell style index
302
+ s_elem = cell_elem.get('s')
303
+ style_idx = int(s_elem) if s_elem is not None else 0
304
+
305
+ # Get cell value using CellValueHandler for ECMA-376 compliance
306
+ v_elem = cell_elem.find('main:v', namespaces=self.ns)
307
+ value = None
308
+
309
+ if v_elem is not None and v_elem.text is not None:
310
+ value_str = v_elem.text
311
+ # Use CellValueHandler to parse value according to ECMA-376
312
+ value = CellValueHandler.parse_value_from_xml(
313
+ value_str,
314
+ cell_type,
315
+ shared_strings
316
+ )
317
+
318
+ # Create cell with value and formula
319
+ from .cell import Cell
320
+ cell = Cell(value, formula)
321
+
322
+ # Apply style if present
323
+ if style_idx > 0:
324
+ self._apply_cell_style(cell, style_idx)
325
+
326
+ # Set cell value
327
+ worksheet.cells[cell_ref] = cell
328
+
329
+ def _load_column_dimensions(self, worksheet, worksheet_root):
330
+ """
331
+ Loads column width settings from worksheet XML.
332
+ """
333
+ cols_elem = worksheet_root.find('main:cols', namespaces=self.ns)
334
+ if cols_elem is None:
335
+ return
336
+
337
+ if not hasattr(worksheet, '_column_widths'):
338
+ worksheet._column_widths = {}
339
+ if not hasattr(worksheet, '_hidden_columns'):
340
+ worksheet._hidden_columns = set()
341
+
342
+ for col_elem in cols_elem.findall('main:col', namespaces=self.ns):
343
+ min_val = col_elem.get('min')
344
+ max_val = col_elem.get('max')
345
+ width_val = col_elem.get('width')
346
+ hidden_val = col_elem.get('hidden')
347
+ if min_val is None or max_val is None:
348
+ raise ValueError("Invalid column definition: missing min or max")
349
+ try:
350
+ min_col = int(min_val)
351
+ max_col = int(max_val)
352
+ except ValueError as exc:
353
+ raise ValueError("Invalid column definition values") from exc
354
+ if min_col < 1 or max_col < min_col:
355
+ raise ValueError("Invalid column definition range")
356
+ width = None
357
+ if width_val is not None:
358
+ try:
359
+ width = float(width_val)
360
+ except ValueError as exc:
361
+ raise ValueError("Invalid column width value") from exc
362
+ if width <= 0:
363
+ raise ValueError("Column width must be > 0")
364
+
365
+ for col_idx in range(min_col, max_col + 1):
366
+ if width is not None:
367
+ worksheet._column_widths[col_idx] = width
368
+ if hidden_val in ('1', 'true', 'True'):
369
+ worksheet._hidden_columns.add(col_idx)
370
+
371
+ def _load_row_heights(self, worksheet, worksheet_root):
372
+ """
373
+ Loads row height settings from worksheet XML.
374
+ """
375
+ if not hasattr(worksheet, '_row_heights'):
376
+ worksheet._row_heights = {}
377
+ if not hasattr(worksheet, '_hidden_rows'):
378
+ worksheet._hidden_rows = set()
379
+
380
+ for row_elem in worksheet_root.findall('.//main:row', namespaces=self.ns):
381
+ ht = row_elem.get('ht')
382
+ hidden_val = row_elem.get('hidden')
383
+ if ht is None:
384
+ if hidden_val not in ('1', 'true', 'True'):
385
+ continue
386
+ row_num = row_elem.get('r')
387
+ if row_num is None:
388
+ raise ValueError("Row definition missing row index")
389
+ try:
390
+ row_idx = int(row_num)
391
+ except ValueError as exc:
392
+ raise ValueError("Invalid row height definition values") from exc
393
+ if row_idx < 1:
394
+ raise ValueError("Row index must be >= 1")
395
+ if ht is not None:
396
+ try:
397
+ height = float(ht)
398
+ except ValueError as exc:
399
+ raise ValueError("Invalid row height value") from exc
400
+ if height <= 0:
401
+ raise ValueError("Row height must be > 0")
402
+ worksheet._row_heights[row_idx] = height
403
+ if hidden_val in ('1', 'true', 'True'):
404
+ worksheet._hidden_rows.add(row_idx)
405
+
406
+ def _apply_cell_style(self, cell, style_idx):
407
+ """
408
+ Applies a style to a cell based on style index.
409
+
410
+ Args:
411
+ cell (Cell): The cell to apply style to.
412
+ style_idx (int): The style index to apply.
413
+ """
414
+ cell_style_key = None
415
+ if hasattr(self.workbook, '_cell_xf_by_index'):
416
+ cell_style_key = self.workbook._cell_xf_by_index.get(style_idx)
417
+ if cell_style_key is None:
418
+ for style_key, cell_style_idx in self.workbook._cell_styles.items():
419
+ if cell_style_idx == style_idx:
420
+ cell_style_key = style_key
421
+ break
422
+
423
+ if cell_style_key is None:
424
+ return
425
+
426
+ font_key, fill_key, border_key, num_fmt_key, alignment_key, protection_key = cell_style_key
427
+
428
+ # Apply font
429
+ if font_key in self.workbook._font_styles:
430
+ font_data = self.workbook._font_styles[font_key]
431
+ cell.style.font.name = font_data['name']
432
+ cell.style.font.size = font_data['size']
433
+ cell.style.font.color = font_data['color']
434
+ cell.style.font.bold = font_data['bold']
435
+ cell.style.font.italic = font_data['italic']
436
+ cell.style.font.underline = font_data['underline']
437
+ cell.style.font.strikethrough = font_data['strikethrough']
438
+
439
+ # Apply fill
440
+ if fill_key in self.workbook._fill_styles:
441
+ fill_data = self.workbook._fill_styles[fill_key]
442
+ cell.style.fill.pattern_type = fill_data['pattern_type']
443
+ cell.style.fill.foreground_color = fill_data['fg_color']
444
+ cell.style.fill.background_color = fill_data['bg_color']
445
+
446
+ # Apply border
447
+ if border_key in self.workbook._border_styles:
448
+ border_data = self.workbook._border_styles[border_key]
449
+ cell.style.borders.top.line_style = border_data['top']['style']
450
+ cell.style.borders.top.color = border_data['top']['color']
451
+ cell.style.borders.bottom.line_style = border_data['bottom']['style']
452
+ cell.style.borders.bottom.color = border_data['bottom']['color']
453
+ cell.style.borders.left.line_style = border_data['left']['style']
454
+ cell.style.borders.left.color = border_data['left']['color']
455
+ cell.style.borders.right.line_style = border_data['right']['style']
456
+ cell.style.borders.right.color = border_data['right']['color']
457
+
458
+ # Apply number format
459
+ if num_fmt_key in self.workbook._num_formats:
460
+ cell.style.number_format = self.workbook._num_formats[num_fmt_key]
461
+
462
+ # Apply alignment
463
+ if alignment_key in self.workbook._alignment_styles:
464
+ align_data = self.workbook._alignment_styles[alignment_key]
465
+ cell.style.alignment.horizontal = align_data['horizontal']
466
+ cell.style.alignment.vertical = align_data['vertical']
467
+ cell.style.alignment.wrap_text = align_data['wrap_text']
468
+ cell.style.alignment.indent = align_data['indent']
469
+ cell.style.alignment.text_rotation = align_data['text_rotation']
470
+ cell.style.alignment.shrink_to_fit = align_data['shrink_to_fit']
471
+ cell.style.alignment.reading_order = align_data['reading_order']
472
+ cell.style.alignment.relative_indent = align_data['relative_indent']
473
+
474
+ # Apply protection
475
+ if protection_key in self.workbook._protection_styles:
476
+ prot_data = self.workbook._protection_styles[protection_key]
477
+ cell.style.protection.locked = prot_data['locked']
478
+ cell.style.protection.hidden = prot_data['hidden']
479
+
480
+ def _load_styles_xml(self, styles_root):
481
+ """
482
+ Loads styles from styles XML.
483
+
484
+ Args:
485
+ styles_root: The XML root element of styles.
486
+ """
487
+ # Register built-in number formats
488
+ builtin_formats = {
489
+ 0: 'General',
490
+ 1: '0',
491
+ 2: '0.00',
492
+ 3: '#,##0',
493
+ 4: '#,##0.00',
494
+ 5: '$#,##0_);($#,##0)',
495
+ 6: '$#,##0_);[Red]($#,##0)',
496
+ 7: '$#,##0.00_);($#,##0.00)',
497
+ 8: '$#,##0.00_);[Red]($#,##0.00)',
498
+ 9: '0%',
499
+ 10: '0.00%',
500
+ 11: '0.00E+00',
501
+ 12: '# ?/?',
502
+ 13: '# ??/??',
503
+ 14: 'mm-dd-yy',
504
+ 15: 'd-mmm-yy',
505
+ 16: 'd-mmm',
506
+ 17: 'mmm-yy',
507
+ 18: 'h:mm AM/PM',
508
+ 19: 'h:mm:ss AM/PM',
509
+ 20: 'h:mm',
510
+ 21: 'h:mm:ss',
511
+ 22: 'm/d/yy h:mm',
512
+ 37: '#,##0_);(#,##0)',
513
+ 38: '#,##0_);[Red](#,##0)',
514
+ 39: '#,##0.00_);(#,##0.00)',
515
+ 40: '#,##0.00_);[Red](#,##0.00)',
516
+ 45: 'mm:ss',
517
+ 46: '[h]:mm:ss',
518
+ 47: 'mm:ss.0',
519
+ 48: '##0.0E+0',
520
+ 49: '@'
521
+ }
522
+ self.workbook._num_formats.update(builtin_formats)
523
+
524
+ # Load custom number formats
525
+ num_fmts = styles_root.findall('.//main:numFmt', namespaces=self.ns)
526
+ for num_fmt in num_fmts:
527
+ num_fmt_id = int(num_fmt.get('numFmtId'))
528
+ format_code = num_fmt.get('formatCode')
529
+ self.workbook._num_formats[num_fmt_id] = format_code
530
+
531
+ # Load fonts
532
+ self._load_fonts(styles_root)
533
+
534
+ # Load fills
535
+ self._load_fills(styles_root)
536
+
537
+ # Load borders
538
+ self._load_borders(styles_root)
539
+
540
+ # Load cellXfs
541
+ self._load_cell_xfs(styles_root)
542
+
543
+ def _load_fonts(self, styles_root):
544
+ """
545
+ Loads font styles from styles XML.
546
+
547
+ Args:
548
+ styles_root: The XML root element of styles.
549
+ """
550
+ fonts = styles_root.findall('.//main:font', namespaces=self.ns)
551
+ for i, font_elem in enumerate(fonts):
552
+ if i == 0:
553
+ continue # Skip default font
554
+ sz_elem = font_elem.find('main:sz', namespaces=self.ns)
555
+ color_elem = font_elem.find('main:color', namespaces=self.ns)
556
+ name_elem = font_elem.find('main:name', namespaces=self.ns)
557
+ b_elem = font_elem.find('main:b', namespaces=self.ns)
558
+ i_elem = font_elem.find('main:i', namespaces=self.ns)
559
+ u_elem = font_elem.find('main:u', namespaces=self.ns)
560
+ strike_elem = font_elem.find('main:strike', namespaces=self.ns)
561
+
562
+ font_data = {
563
+ 'name': name_elem.get('val') if name_elem is not None else 'Calibri',
564
+ 'size': int(sz_elem.get('val', 11)) if sz_elem is not None else 11,
565
+ 'color': color_elem.get('rgb', color_elem.get('theme', 'FF000000')) if color_elem is not None else 'FF000000',
566
+ 'bold': b_elem is not None,
567
+ 'italic': i_elem is not None,
568
+ 'underline': u_elem is not None,
569
+ 'strikethrough': strike_elem is not None
570
+ }
571
+ self.workbook._font_styles[i] = font_data
572
+
573
+ def _load_fills(self, styles_root):
574
+ """
575
+ Loads fill styles from styles XML.
576
+
577
+ Args:
578
+ styles_root: The XML root element of styles.
579
+ """
580
+ fills = styles_root.findall('.//main:fill', namespaces=self.ns)
581
+ for i, fill_elem in enumerate(fills):
582
+ if i < 2:
583
+ continue # Skip default fills (none and gray125)
584
+ pattern_elem = fill_elem.find('main:patternFill', namespaces=self.ns)
585
+ fg_color_elem = pattern_elem.find('main:fgColor', namespaces=self.ns) if pattern_elem is not None else None
586
+ bg_color_elem = pattern_elem.find('main:bgColor', namespaces=self.ns) if pattern_elem is not None else None
587
+
588
+ fill_data = {
589
+ 'pattern_type': pattern_elem.get('patternType', 'none') if pattern_elem is not None else 'none',
590
+ 'fg_color': fg_color_elem.get('rgb', 'FFFFFFFF') if fg_color_elem is not None else 'FFFFFFFF',
591
+ 'bg_color': bg_color_elem.get('rgb', 'FFFFFFFF') if bg_color_elem is not None else 'FFFFFFFF'
592
+ }
593
+ self.workbook._fill_styles[i] = fill_data
594
+
595
+ def _load_borders(self, styles_root):
596
+ """
597
+ Loads border styles from styles XML.
598
+
599
+ Args:
600
+ styles_root: The XML root element of styles.
601
+ """
602
+ borders = styles_root.findall('.//main:border', namespaces=self.ns)
603
+ for i, border_elem in enumerate(borders):
604
+ if i == 0:
605
+ continue # Skip default border
606
+ left_elem = border_elem.find('main:left', namespaces=self.ns)
607
+ right_elem = border_elem.find('main:right', namespaces=self.ns)
608
+ top_elem = border_elem.find('main:top', namespaces=self.ns)
609
+ bottom_elem = border_elem.find('main:bottom', namespaces=self.ns)
610
+
611
+ # Load left border
612
+ left_style = 'none'
613
+ left_color = 'FF000000'
614
+ if left_elem is not None:
615
+ left_style = left_elem.get('style', 'none')
616
+ left_color_elem = left_elem.find('main:color', namespaces=self.ns)
617
+ if left_color_elem is not None:
618
+ left_color = left_color_elem.get('rgb', 'FF000000')
619
+
620
+ # Load right border
621
+ right_style = 'none'
622
+ right_color = 'FF000000'
623
+ if right_elem is not None:
624
+ right_style = right_elem.get('style', 'none')
625
+ right_color_elem = right_elem.find('main:color', namespaces=self.ns)
626
+ if right_color_elem is not None:
627
+ right_color = right_color_elem.get('rgb', 'FF000000')
628
+
629
+ # Load top border
630
+ top_style = 'none'
631
+ top_color = 'FF000000'
632
+ if top_elem is not None:
633
+ top_style = top_elem.get('style', 'none')
634
+ top_color_elem = top_elem.find('main:color', namespaces=self.ns)
635
+ if top_color_elem is not None:
636
+ top_color = top_color_elem.get('rgb', 'FF000000')
637
+
638
+ # Load bottom border
639
+ bottom_style = 'none'
640
+ bottom_color = 'FF000000'
641
+ if bottom_elem is not None:
642
+ bottom_style = bottom_elem.get('style', 'none')
643
+ bottom_color_elem = bottom_elem.find('main:color', namespaces=self.ns)
644
+ if bottom_color_elem is not None:
645
+ bottom_color = bottom_color_elem.get('rgb', 'FF000000')
646
+
647
+ border_data = {
648
+ 'top': {'style': top_style, 'color': top_color},
649
+ 'bottom': {'style': bottom_style, 'color': bottom_color},
650
+ 'left': {'style': left_style, 'color': left_color},
651
+ 'right': {'style': right_style, 'color': right_color}
652
+ }
653
+ self.workbook._border_styles[i] = border_data
654
+
655
+ def _load_cell_xfs(self, styles_root):
656
+ """
657
+ Loads cell XF records from styles XML.
658
+
659
+ Args:
660
+ styles_root: The XML root element of styles.
661
+ """
662
+ if not hasattr(self.workbook, '_cell_xf_by_index'):
663
+ self.workbook._cell_xf_by_index = {}
664
+
665
+ cell_xfs = styles_root.findall('.//main:cellXfs/main:xf', namespaces=self.ns)
666
+ for i, xf_elem in enumerate(cell_xfs):
667
+ if i == 0:
668
+ continue # Skip default cellXf
669
+ font_idx = int(xf_elem.get('fontId', 0))
670
+ fill_idx = int(xf_elem.get('fillId', 0))
671
+ border_idx = int(xf_elem.get('borderId', 0))
672
+ num_fmt_idx = int(xf_elem.get('numFmtId', 0))
673
+
674
+ # Load alignment if present
675
+ alignment_idx = 0
676
+ alignment_elem = xf_elem.find('main:alignment', namespaces=self.ns)
677
+ if alignment_elem is not None:
678
+ # Check if this alignment already exists
679
+ horizontal = alignment_elem.get('horizontal', 'general')
680
+ vertical = alignment_elem.get('vertical', 'bottom')
681
+ text_rotation = int(alignment_elem.get('textRotation', 0))
682
+ wrap_text = alignment_elem.get('wrapText') == '1'
683
+ shrink_to_fit = alignment_elem.get('shrinkToFit') == '1'
684
+ indent = int(alignment_elem.get('indent', 0))
685
+ reading_order = int(alignment_elem.get('readingOrder', 0))
686
+ relative_indent = int(alignment_elem.get('relativeIndent', 0))
687
+
688
+ # Check if this alignment already exists
689
+ for idx, align_data in self.workbook._alignment_styles.items():
690
+ if (align_data['horizontal'] == horizontal and
691
+ align_data['vertical'] == vertical and
692
+ align_data['wrap_text'] == wrap_text and
693
+ align_data['indent'] == indent and
694
+ align_data['text_rotation'] == text_rotation and
695
+ align_data['shrink_to_fit'] == shrink_to_fit and
696
+ align_data['reading_order'] == reading_order and
697
+ align_data['relative_indent'] == relative_indent):
698
+ alignment_idx = idx
699
+ break
700
+
701
+ # If not found, create new alignment style
702
+ if alignment_idx == 0:
703
+ alignment_idx = len(self.workbook._alignment_styles)
704
+ self.workbook._alignment_styles[alignment_idx] = {
705
+ 'horizontal': horizontal,
706
+ 'vertical': vertical,
707
+ 'wrap_text': wrap_text,
708
+ 'indent': indent,
709
+ 'text_rotation': text_rotation,
710
+ 'shrink_to_fit': shrink_to_fit,
711
+ 'reading_order': reading_order,
712
+ 'relative_indent': relative_indent
713
+ }
714
+
715
+ # Load protection if present
716
+ protection_idx = 0
717
+ protection_elem = xf_elem.find('main:protection', namespaces=self.ns)
718
+ if protection_elem is not None:
719
+ # Get protection attributes (default: locked=1, hidden=0)
720
+ locked = protection_elem.get('locked', '1') == '1'
721
+ hidden = protection_elem.get('hidden', '0') == '1'
722
+
723
+ # Check if this protection already exists
724
+ for idx, prot_data in self.workbook._protection_styles.items():
725
+ if (prot_data['locked'] == locked and
726
+ prot_data['hidden'] == hidden):
727
+ protection_idx = idx
728
+ break
729
+
730
+ # If not found, create new protection style
731
+ if protection_idx == 0 and not (locked and not hidden): # Skip if it's the default
732
+ protection_idx = len(self.workbook._protection_styles)
733
+ self.workbook._protection_styles[protection_idx] = {
734
+ 'locked': locked,
735
+ 'hidden': hidden
736
+ }
737
+
738
+ cell_style_key = (font_idx, fill_idx, border_idx, num_fmt_idx, alignment_idx, protection_idx)
739
+ self.workbook._cell_styles[cell_style_key] = i # Store the actual cellXf index
740
+ self.workbook._cell_xf_by_index[i] = cell_style_key
741
+
742
+ def _load_dxf_styles(self, styles_root):
743
+ """
744
+ Loads differential formatting (dxf) styles from styles XML.
745
+
746
+ These are used for conditional formatting.
747
+
748
+ Args:
749
+ styles_root: The XML root element of styles.
750
+ """
751
+ self.workbook._dxf_styles = []
752
+
753
+ dxfs_elem = styles_root.find('.//main:dxfs', namespaces=self.ns)
754
+ if dxfs_elem is None:
755
+ return
756
+
757
+ for dxf_elem in dxfs_elem.findall('main:dxf', namespaces=self.ns):
758
+ dxf_data = {}
759
+
760
+ # Load font
761
+ font_elem = dxf_elem.find('main:font', namespaces=self.ns)
762
+ if font_elem is not None:
763
+ font_data = {}
764
+ b_elem = font_elem.find('main:b', namespaces=self.ns)
765
+ if b_elem is not None:
766
+ font_data['bold'] = b_elem.get('val', '1') != '0'
767
+ i_elem = font_elem.find('main:i', namespaces=self.ns)
768
+ if i_elem is not None:
769
+ font_data['italic'] = i_elem.get('val', '1') != '0'
770
+ u_elem = font_elem.find('main:u', namespaces=self.ns)
771
+ if u_elem is not None:
772
+ font_data['underline'] = True
773
+ strike_elem = font_elem.find('main:strike', namespaces=self.ns)
774
+ if strike_elem is not None:
775
+ font_data['strikethrough'] = True
776
+ color_elem = font_elem.find('main:color', namespaces=self.ns)
777
+ if color_elem is not None:
778
+ font_data['color'] = color_elem.get('rgb', 'FF000000')
779
+ if font_data:
780
+ dxf_data['font'] = font_data
781
+
782
+ # Load fill
783
+ fill_elem = dxf_elem.find('main:fill', namespaces=self.ns)
784
+ if fill_elem is not None:
785
+ pattern_elem = fill_elem.find('main:patternFill', namespaces=self.ns)
786
+ if pattern_elem is not None:
787
+ fill_data = {
788
+ 'pattern_type': pattern_elem.get('patternType', 'solid')
789
+ }
790
+ fg_elem = pattern_elem.find('main:fgColor', namespaces=self.ns)
791
+ if fg_elem is not None:
792
+ fill_data['fg_color'] = fg_elem.get('rgb', 'FFFFFFFF')
793
+ bg_elem = pattern_elem.find('main:bgColor', namespaces=self.ns)
794
+ if bg_elem is not None:
795
+ fill_data['bg_color'] = bg_elem.get('rgb', 'FFFFFFFF')
796
+ dxf_data['fill'] = fill_data
797
+
798
+ # Load border (simplified - just check if any border is present)
799
+ border_elem = dxf_elem.find('main:border', namespaces=self.ns)
800
+ if border_elem is not None:
801
+ # Check any side for style
802
+ for side in ['left', 'right', 'top', 'bottom']:
803
+ side_elem = border_elem.find(f'main:{side}', namespaces=self.ns)
804
+ if side_elem is not None:
805
+ style = side_elem.get('style', 'thin')
806
+ color = 'FF000000'
807
+ color_elem = side_elem.find('main:color', namespaces=self.ns)
808
+ if color_elem is not None:
809
+ color = color_elem.get('rgb', 'FF000000')
810
+ dxf_data['border'] = {'style': style, 'color': color}
811
+ break
812
+
813
+ self.workbook._dxf_styles.append(dxf_data)
814
+
815
+ def _load_document_properties(self, zipf):
816
+ """
817
+ Loads document properties from docProps/core.xml and docProps/app.xml.
818
+
819
+ ECMA-376 Part 2, Section 11 - Core Properties
820
+ ECMA-376 Part 1, Section 22.2 - Extended Properties
821
+
822
+ Args:
823
+ zipf: A ZipFile object containing the workbook data.
824
+ """
825
+ # Load core properties
826
+ self._load_core_properties(zipf)
827
+
828
+ # Load extended/app properties
829
+ self._load_app_properties(zipf)
830
+
831
+ def _load_core_properties(self, zipf):
832
+ """
833
+ Loads core document properties from docProps/core.xml.
834
+
835
+ Args:
836
+ zipf: A ZipFile object containing the workbook data.
837
+ """
838
+ try:
839
+ core_xml_content = zipf.read('docProps/core.xml')
840
+ core_root = ET.fromstring(core_xml_content)
841
+
842
+ # Namespaces for core properties
843
+ ns = {
844
+ 'cp': 'http://schemas.openxmlformats.org/package/2006/metadata/core-properties',
845
+ 'dc': 'http://purl.org/dc/elements/1.1/',
846
+ 'dcterms': 'http://purl.org/dc/terms/'
847
+ }
848
+
849
+ # Ensure document_properties exists
850
+ if not hasattr(self.workbook, 'document_properties') or self.workbook.document_properties is None:
851
+ from .document_properties import DocumentProperties
852
+ self.workbook._document_properties = DocumentProperties()
853
+
854
+ core = self.workbook.document_properties.core
855
+
856
+ # Load Dublin Core properties
857
+ title = core_root.find('dc:title', ns)
858
+ if title is not None and title.text:
859
+ core._title = title.text
860
+
861
+ subject = core_root.find('dc:subject', ns)
862
+ if subject is not None and subject.text:
863
+ core._subject = subject.text
864
+
865
+ creator = core_root.find('dc:creator', ns)
866
+ if creator is not None and creator.text:
867
+ core._creator = creator.text
868
+
869
+ description = core_root.find('dc:description', ns)
870
+ if description is not None and description.text:
871
+ core._description = description.text
872
+
873
+ # Load OPC Core Properties
874
+ keywords = core_root.find('cp:keywords', ns)
875
+ if keywords is not None and keywords.text:
876
+ core._keywords = keywords.text
877
+
878
+ last_modified_by = core_root.find('cp:lastModifiedBy', ns)
879
+ if last_modified_by is not None and last_modified_by.text:
880
+ core._last_modified_by = last_modified_by.text
881
+
882
+ revision = core_root.find('cp:revision', ns)
883
+ if revision is not None and revision.text:
884
+ core._revision = revision.text
885
+
886
+ category = core_root.find('cp:category', ns)
887
+ if category is not None and category.text:
888
+ core._category = category.text
889
+
890
+ content_status = core_root.find('cp:contentStatus', ns)
891
+ if content_status is not None and content_status.text:
892
+ core._content_status = content_status.text
893
+
894
+ # Load dates
895
+ created = core_root.find('dcterms:created', ns)
896
+ if created is not None and created.text:
897
+ core._created = self._parse_datetime(created.text)
898
+
899
+ modified = core_root.find('dcterms:modified', ns)
900
+ if modified is not None and modified.text:
901
+ core._modified = self._parse_datetime(modified.text)
902
+
903
+ except KeyError:
904
+ # docProps/core.xml not found, skip
905
+ pass
906
+
907
+ def _load_app_properties(self, zipf):
908
+ """
909
+ Loads extended/application properties from docProps/app.xml.
910
+
911
+ Args:
912
+ zipf: A ZipFile object containing the workbook data.
913
+ """
914
+ try:
915
+ app_xml_content = zipf.read('docProps/app.xml')
916
+ app_root = ET.fromstring(app_xml_content)
917
+
918
+ # Ensure document_properties exists
919
+ if not hasattr(self.workbook, 'document_properties') or self.workbook.document_properties is None:
920
+ from .document_properties import DocumentProperties
921
+ self.workbook._document_properties = DocumentProperties()
922
+
923
+ ext = self.workbook.document_properties.extended
924
+
925
+ # Load properties (note: default namespace, so no prefix)
926
+ application = app_root.find('{http://schemas.openxmlformats.org/officeDocument/2006/extended-properties}Application')
927
+ if application is not None and application.text:
928
+ ext._application = application.text
929
+
930
+ app_version = app_root.find('{http://schemas.openxmlformats.org/officeDocument/2006/extended-properties}AppVersion')
931
+ if app_version is not None and app_version.text:
932
+ ext._app_version = app_version.text
933
+
934
+ company = app_root.find('{http://schemas.openxmlformats.org/officeDocument/2006/extended-properties}Company')
935
+ if company is not None and company.text:
936
+ ext._company = company.text
937
+
938
+ manager = app_root.find('{http://schemas.openxmlformats.org/officeDocument/2006/extended-properties}Manager')
939
+ if manager is not None and manager.text:
940
+ ext._manager = manager.text
941
+
942
+ hyperlink_base = app_root.find('{http://schemas.openxmlformats.org/officeDocument/2006/extended-properties}HyperlinkBase')
943
+ if hyperlink_base is not None and hyperlink_base.text:
944
+ ext._hyperlink_base = hyperlink_base.text
945
+
946
+ doc_security = app_root.find('{http://schemas.openxmlformats.org/officeDocument/2006/extended-properties}DocSecurity')
947
+ if doc_security is not None and doc_security.text:
948
+ ext._doc_security = int(doc_security.text)
949
+
950
+ scale_crop = app_root.find('{http://schemas.openxmlformats.org/officeDocument/2006/extended-properties}ScaleCrop')
951
+ if scale_crop is not None and scale_crop.text:
952
+ ext._scale_crop = scale_crop.text.lower() == 'true'
953
+
954
+ links_up_to_date = app_root.find('{http://schemas.openxmlformats.org/officeDocument/2006/extended-properties}LinksUpToDate')
955
+ if links_up_to_date is not None and links_up_to_date.text:
956
+ ext._links_up_to_date = links_up_to_date.text.lower() == 'true'
957
+
958
+ shared_doc = app_root.find('{http://schemas.openxmlformats.org/officeDocument/2006/extended-properties}SharedDoc')
959
+ if shared_doc is not None and shared_doc.text:
960
+ ext._shared_doc = shared_doc.text.lower() == 'true'
961
+
962
+ except KeyError:
963
+ # docProps/app.xml not found, skip
964
+ pass
965
+
966
+ def _parse_datetime(self, date_str):
967
+ """
968
+ Parses a W3CDTF datetime string.
969
+
970
+ Args:
971
+ date_str: A datetime string in W3CDTF format (e.g., '2024-01-15T10:30:00Z')
972
+
973
+ Returns:
974
+ datetime object or the original string if parsing fails.
975
+ """
976
+ from datetime import datetime
977
+
978
+ try:
979
+ # Try ISO format with Z suffix
980
+ if date_str.endswith('Z'):
981
+ return datetime.fromisoformat(date_str.replace('Z', '+00:00'))
982
+ # Try ISO format
983
+ return datetime.fromisoformat(date_str)
984
+ except (ValueError, AttributeError):
985
+ # Return the string if parsing fails
986
+ return date_str