aspose-cells-foss 25.12.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. aspose/__init__.py +14 -0
  2. aspose/cells/__init__.py +31 -0
  3. aspose/cells/cell.py +350 -0
  4. aspose/cells/constants.py +44 -0
  5. aspose/cells/converters/__init__.py +13 -0
  6. aspose/cells/converters/csv_converter.py +55 -0
  7. aspose/cells/converters/json_converter.py +46 -0
  8. aspose/cells/converters/markdown_converter.py +453 -0
  9. aspose/cells/drawing/__init__.py +17 -0
  10. aspose/cells/drawing/anchor.py +172 -0
  11. aspose/cells/drawing/collection.py +233 -0
  12. aspose/cells/drawing/image.py +338 -0
  13. aspose/cells/formats.py +80 -0
  14. aspose/cells/formula/__init__.py +10 -0
  15. aspose/cells/formula/evaluator.py +360 -0
  16. aspose/cells/formula/functions.py +433 -0
  17. aspose/cells/formula/tokenizer.py +340 -0
  18. aspose/cells/io/__init__.py +27 -0
  19. aspose/cells/io/csv/__init__.py +8 -0
  20. aspose/cells/io/csv/reader.py +88 -0
  21. aspose/cells/io/csv/writer.py +98 -0
  22. aspose/cells/io/factory.py +138 -0
  23. aspose/cells/io/interfaces.py +48 -0
  24. aspose/cells/io/json/__init__.py +8 -0
  25. aspose/cells/io/json/reader.py +126 -0
  26. aspose/cells/io/json/writer.py +119 -0
  27. aspose/cells/io/md/__init__.py +8 -0
  28. aspose/cells/io/md/reader.py +161 -0
  29. aspose/cells/io/md/writer.py +334 -0
  30. aspose/cells/io/models.py +64 -0
  31. aspose/cells/io/xlsx/__init__.py +9 -0
  32. aspose/cells/io/xlsx/constants.py +312 -0
  33. aspose/cells/io/xlsx/image_writer.py +311 -0
  34. aspose/cells/io/xlsx/reader.py +284 -0
  35. aspose/cells/io/xlsx/writer.py +931 -0
  36. aspose/cells/plugins/__init__.py +6 -0
  37. aspose/cells/plugins/docling_backend/__init__.py +7 -0
  38. aspose/cells/plugins/docling_backend/backend.py +535 -0
  39. aspose/cells/plugins/markitdown_plugin/__init__.py +15 -0
  40. aspose/cells/plugins/markitdown_plugin/plugin.py +128 -0
  41. aspose/cells/range.py +210 -0
  42. aspose/cells/style.py +287 -0
  43. aspose/cells/utils/__init__.py +54 -0
  44. aspose/cells/utils/coordinates.py +68 -0
  45. aspose/cells/utils/exceptions.py +43 -0
  46. aspose/cells/utils/validation.py +102 -0
  47. aspose/cells/workbook.py +352 -0
  48. aspose/cells/worksheet.py +670 -0
  49. aspose_cells_foss-25.12.1.dist-info/METADATA +189 -0
  50. aspose_cells_foss-25.12.1.dist-info/RECORD +53 -0
  51. aspose_cells_foss-25.12.1.dist-info/WHEEL +5 -0
  52. aspose_cells_foss-25.12.1.dist-info/entry_points.txt +2 -0
  53. aspose_cells_foss-25.12.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,284 @@
1
+ """Excel XLSX file reader with full OOXML implementation."""
2
+
3
+ import zipfile
4
+ import xml.etree.ElementTree as ET
5
+ from typing import Dict, List, Optional, TYPE_CHECKING
6
+ from pathlib import Path
7
+
8
+ from ...utils import FileFormatError, coordinate_to_tuple
9
+ from .constants import XlsxConstants
10
+
11
+ if TYPE_CHECKING:
12
+ from ...workbook import Workbook
13
+ from ...worksheet import Worksheet
14
+
15
+
16
+ class XlsxReader:
17
+ """Excel XLSX file reader with OOXML protocol support."""
18
+
19
+ def __init__(self):
20
+ self.namespaces = XlsxConstants.NAMESPACES
21
+
22
+ def read(self, file_path: str, **kwargs) -> 'Workbook':
23
+ """Read Excel file and return workbook object."""
24
+ from ...workbook import Workbook
25
+
26
+ workbook = Workbook()
27
+ self.load_workbook(workbook, file_path)
28
+ return workbook
29
+
30
+ def load_workbook(self, workbook: 'Workbook', filename: str):
31
+ """Load Excel file into workbook object."""
32
+ try:
33
+ with zipfile.ZipFile(filename, 'r') as zip_file:
34
+ # Read core files
35
+ shared_strings = self._read_shared_strings(zip_file)
36
+ workbook_data = self._read_workbook_structure(zip_file)
37
+ relationships = self._read_workbook_relationships(zip_file)
38
+
39
+ # Clear existing worksheets
40
+ workbook._worksheets.clear()
41
+ workbook._shared_strings = shared_strings
42
+
43
+ # Load worksheets with proper relationship mapping
44
+ for sheet_info in workbook_data['sheets']:
45
+ self._load_worksheet(zip_file, workbook, sheet_info, shared_strings, relationships)
46
+
47
+ # Set active sheet
48
+ if workbook._worksheets:
49
+ first_sheet = next(iter(workbook._worksheets.values()))
50
+ workbook._active_sheet = first_sheet
51
+
52
+ except zipfile.BadZipFile:
53
+ raise FileFormatError(f"Invalid ZIP file: {filename}")
54
+ except Exception as e:
55
+ raise FileFormatError(f"Failed to read Excel file: {e}")
56
+
57
+ def _read_shared_strings(self, zip_file: zipfile.ZipFile) -> List[str]:
58
+ """Read shared strings table."""
59
+ try:
60
+ content = zip_file.read('xl/sharedStrings.xml')
61
+ root = ET.fromstring(content)
62
+
63
+ strings = []
64
+ for si in root.findall('.//main:si', self.namespaces):
65
+ t_elem = si.find('main:t', self.namespaces)
66
+ if t_elem is not None:
67
+ strings.append(t_elem.text or "")
68
+ else:
69
+ strings.append("")
70
+
71
+ return strings
72
+ except KeyError:
73
+ # No shared strings file
74
+ return []
75
+
76
+ def _read_workbook_structure(self, zip_file: zipfile.ZipFile) -> Dict:
77
+ """Read workbook structure and sheet information."""
78
+ try:
79
+ content = zip_file.read('xl/workbook.xml')
80
+ root = ET.fromstring(content)
81
+
82
+ sheets = []
83
+ for sheet in root.findall('.//main:sheet', self.namespaces):
84
+ sheet_info = {
85
+ 'name': sheet.get('name', 'Sheet1'),
86
+ 'sheet_id': sheet.get('sheetId', '1'),
87
+ 'r_id': sheet.get(f'{{{XlsxConstants.NAMESPACES["r"]}}}id')
88
+ }
89
+ sheets.append(sheet_info)
90
+
91
+ return {'sheets': sheets}
92
+ except KeyError:
93
+ raise FileFormatError("Invalid workbook structure - missing workbook.xml")
94
+
95
+ def _read_workbook_relationships(self, zip_file: zipfile.ZipFile) -> Dict[str, str]:
96
+ """Read workbook relationships to map sheet IDs to file paths."""
97
+ try:
98
+ content = zip_file.read('xl/_rels/workbook.xml.rels')
99
+ root = ET.fromstring(content)
100
+
101
+ relationships = {}
102
+ for rel in root.findall('rel:Relationship', {'rel': XlsxConstants.NAMESPACES['pkg']}):
103
+ rel_id = rel.get('Id')
104
+ target = rel.get('Target')
105
+ if rel_id and target:
106
+ relationships[rel_id] = target
107
+
108
+ return relationships
109
+ except KeyError:
110
+ # No relationships file, return empty dict
111
+ return {}
112
+
113
+ def _load_worksheet(self, zip_file: zipfile.ZipFile, workbook: 'Workbook',
114
+ sheet_info: Dict, shared_strings: List[str], relationships: Dict[str, str]):
115
+ """Load individual worksheet data."""
116
+ from ...worksheet import Worksheet
117
+
118
+ # Create worksheet
119
+ worksheet = Worksheet(workbook, sheet_info['name'])
120
+ workbook._worksheets[sheet_info['name']] = worksheet
121
+
122
+ # Read worksheet XML
123
+ try:
124
+ # Determine worksheet path using relationships
125
+ sheet_path = None
126
+ r_id = sheet_info.get('r_id')
127
+ if r_id and r_id in relationships:
128
+ sheet_path = f"xl/{relationships[r_id]}"
129
+
130
+ # Fallback to naming convention
131
+ if not sheet_path or sheet_path not in zip_file.namelist():
132
+ sheet_path = f"xl/worksheets/sheet{sheet_info['sheet_id']}.xml"
133
+
134
+ # Final fallback - but don't use sheet1.xml for all sheets!
135
+ if sheet_path not in zip_file.namelist():
136
+ # Skip this sheet if we can't find its file
137
+ return
138
+
139
+ content = zip_file.read(sheet_path)
140
+ root = ET.fromstring(content)
141
+
142
+ # Process sheet data
143
+ sheet_data = root.find('.//main:sheetData', self.namespaces)
144
+ if sheet_data is not None:
145
+ self._process_sheet_data(worksheet, sheet_data, shared_strings)
146
+
147
+ # Process merged cells
148
+ merge_cells = root.find('.//main:mergeCells', self.namespaces)
149
+ if merge_cells is not None:
150
+ for merge_cell in merge_cells.findall('main:mergeCell', self.namespaces):
151
+ ref = merge_cell.get('ref')
152
+ if ref:
153
+ worksheet._merged_ranges.add(ref)
154
+
155
+ # Process hyperlinks
156
+ self._process_hyperlinks(zip_file, worksheet, root, sheet_info['sheet_id'])
157
+
158
+ except KeyError:
159
+ # Worksheet file not found, create empty worksheet
160
+ pass
161
+
162
+ def _process_sheet_data(self, worksheet: 'Worksheet', sheet_data: ET.Element,
163
+ shared_strings: List[str]):
164
+ """Process sheet data and populate cells."""
165
+ for row in sheet_data.findall('main:row', self.namespaces):
166
+ for cell_elem in row.findall('main:c', self.namespaces):
167
+ # Get cell reference
168
+ cell_ref = cell_elem.get('r')
169
+ if not cell_ref:
170
+ continue
171
+
172
+ try:
173
+ row_idx, col_idx = coordinate_to_tuple(cell_ref)
174
+ except (ValueError, TypeError, AttributeError):
175
+ # Skip invalid cell references
176
+ continue
177
+
178
+ # Get cell value and formula
179
+ cell_type = cell_elem.get('t', 'n') # Default to number
180
+ value_elem = cell_elem.find('main:v', self.namespaces)
181
+ formula_elem = cell_elem.find('main:f', self.namespaces)
182
+
183
+ # Create cell first
184
+ cell = worksheet.cell(row_idx, col_idx)
185
+
186
+ # Handle formula if present
187
+ if formula_elem is not None:
188
+ formula_text = formula_elem.text
189
+ if formula_text:
190
+ # Store formula
191
+ cell._formula = '=' + formula_text if not formula_text.startswith('=') else formula_text
192
+ cell._data_type = 'formula'
193
+ cell._value = cell._formula
194
+
195
+ # Handle calculated value
196
+ if value_elem is not None:
197
+ raw_value = value_elem.text or ""
198
+ calculated_value = self._parse_cell_value(raw_value, cell_type, shared_strings)
199
+
200
+ if cell.is_formula():
201
+ # Store calculated result for formula cells
202
+ cell._calculated_value = calculated_value
203
+ else:
204
+ # Regular cell value
205
+ cell.value = calculated_value
206
+
207
+ # Handle hyperlinks (basic implementation)
208
+ # Note: Full hyperlink support would require reading relationships
209
+
210
+ # Handle number format if present
211
+ style_id = cell_elem.get('s')
212
+ if style_id:
213
+ # In a full implementation, would look up style from styles.xml
214
+ pass
215
+
216
+ def _process_hyperlinks(self, zip_file: zipfile.ZipFile, worksheet: 'Worksheet',
217
+ worksheet_root: ET.Element, sheet_id: int):
218
+ """Process hyperlinks for the worksheet."""
219
+ # Find hyperlinks in the worksheet XML
220
+ hyperlinks_elem = worksheet_root.find('.//main:hyperlinks', self.namespaces)
221
+ if hyperlinks_elem is None:
222
+ return
223
+
224
+ # Read worksheet relationships to get hyperlink targets
225
+ rels_path = f"xl/worksheets/_rels/sheet{sheet_id}.xml.rels"
226
+ relationships = {}
227
+
228
+ try:
229
+ rels_content = zip_file.read(rels_path).decode('utf-8')
230
+ rels_root = ET.fromstring(rels_content)
231
+
232
+ # Build relationships map
233
+ # The relationships XML uses the package relationships namespace as default
234
+ for rel in rels_root.findall(f'.//{{{XlsxConstants.NAMESPACES["pkg"]}}}Relationship'):
235
+ rel_id = rel.get('Id')
236
+ target = rel.get('Target')
237
+ if rel_id and target:
238
+ relationships[rel_id] = target
239
+ except KeyError:
240
+ # No relationships file found
241
+ return
242
+
243
+ # Apply hyperlinks to cells
244
+ for hyperlink in hyperlinks_elem.findall('main:hyperlink', self.namespaces):
245
+ cell_ref = hyperlink.get('ref')
246
+ # Get the relationship ID using the proper namespace
247
+ rel_id = hyperlink.get(f'{{{XlsxConstants.NAMESPACES["r"]}}}id')
248
+
249
+ if cell_ref and rel_id and rel_id in relationships:
250
+ try:
251
+ row_idx, col_idx = coordinate_to_tuple(cell_ref)
252
+ cell = worksheet.cell(row_idx, col_idx)
253
+ cell._hyperlink = relationships[rel_id]
254
+ except (ValueError, TypeError, AttributeError, KeyError):
255
+ # Skip invalid cell references or missing relationships
256
+ continue
257
+
258
+ def _parse_cell_value(self, raw_value: str, cell_type: str, shared_strings: List[str]):
259
+ """Parse cell value based on type."""
260
+ if cell_type == 's': # Shared string
261
+ try:
262
+ index = int(raw_value)
263
+ if 0 <= index < len(shared_strings):
264
+ return shared_strings[index]
265
+ return raw_value
266
+ except (ValueError, IndexError):
267
+ return raw_value
268
+ elif cell_type == 'n': # Number
269
+ try:
270
+ # Try int first, then float
271
+ if '.' in raw_value or 'e' in raw_value.lower():
272
+ return float(raw_value)
273
+ else:
274
+ return int(raw_value)
275
+ except ValueError:
276
+ return raw_value
277
+ elif cell_type == 'b': # Boolean
278
+ return raw_value == '1'
279
+ elif cell_type == 'str': # Formula string
280
+ return raw_value
281
+ elif cell_type == 'inlineStr': # Inline string
282
+ return raw_value
283
+ else:
284
+ return raw_value