aspose-cells-foss 25.12.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aspose/__init__.py +14 -0
- aspose/cells/__init__.py +31 -0
- aspose/cells/cell.py +350 -0
- aspose/cells/constants.py +44 -0
- aspose/cells/converters/__init__.py +13 -0
- aspose/cells/converters/csv_converter.py +55 -0
- aspose/cells/converters/json_converter.py +46 -0
- aspose/cells/converters/markdown_converter.py +453 -0
- aspose/cells/drawing/__init__.py +17 -0
- aspose/cells/drawing/anchor.py +172 -0
- aspose/cells/drawing/collection.py +233 -0
- aspose/cells/drawing/image.py +338 -0
- aspose/cells/formats.py +80 -0
- aspose/cells/formula/__init__.py +10 -0
- aspose/cells/formula/evaluator.py +360 -0
- aspose/cells/formula/functions.py +433 -0
- aspose/cells/formula/tokenizer.py +340 -0
- aspose/cells/io/__init__.py +27 -0
- aspose/cells/io/csv/__init__.py +8 -0
- aspose/cells/io/csv/reader.py +88 -0
- aspose/cells/io/csv/writer.py +98 -0
- aspose/cells/io/factory.py +138 -0
- aspose/cells/io/interfaces.py +48 -0
- aspose/cells/io/json/__init__.py +8 -0
- aspose/cells/io/json/reader.py +126 -0
- aspose/cells/io/json/writer.py +119 -0
- aspose/cells/io/md/__init__.py +8 -0
- aspose/cells/io/md/reader.py +161 -0
- aspose/cells/io/md/writer.py +334 -0
- aspose/cells/io/models.py +64 -0
- aspose/cells/io/xlsx/__init__.py +9 -0
- aspose/cells/io/xlsx/constants.py +312 -0
- aspose/cells/io/xlsx/image_writer.py +311 -0
- aspose/cells/io/xlsx/reader.py +284 -0
- aspose/cells/io/xlsx/writer.py +931 -0
- aspose/cells/plugins/__init__.py +6 -0
- aspose/cells/plugins/docling_backend/__init__.py +7 -0
- aspose/cells/plugins/docling_backend/backend.py +535 -0
- aspose/cells/plugins/markitdown_plugin/__init__.py +15 -0
- aspose/cells/plugins/markitdown_plugin/plugin.py +128 -0
- aspose/cells/range.py +210 -0
- aspose/cells/style.py +287 -0
- aspose/cells/utils/__init__.py +54 -0
- aspose/cells/utils/coordinates.py +68 -0
- aspose/cells/utils/exceptions.py +43 -0
- aspose/cells/utils/validation.py +102 -0
- aspose/cells/workbook.py +352 -0
- aspose/cells/worksheet.py +670 -0
- aspose_cells_foss-25.12.1.dist-info/METADATA +189 -0
- aspose_cells_foss-25.12.1.dist-info/RECORD +53 -0
- aspose_cells_foss-25.12.1.dist-info/WHEEL +5 -0
- aspose_cells_foss-25.12.1.dist-info/entry_points.txt +2 -0
- aspose_cells_foss-25.12.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
"""Excel XLSX file reader with full OOXML implementation."""
|
|
2
|
+
|
|
3
|
+
import zipfile
|
|
4
|
+
import xml.etree.ElementTree as ET
|
|
5
|
+
from typing import Dict, List, Optional, TYPE_CHECKING
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from ...utils import FileFormatError, coordinate_to_tuple
|
|
9
|
+
from .constants import XlsxConstants
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from ...workbook import Workbook
|
|
13
|
+
from ...worksheet import Worksheet
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class XlsxReader:
|
|
17
|
+
"""Excel XLSX file reader with OOXML protocol support."""
|
|
18
|
+
|
|
19
|
+
def __init__(self):
|
|
20
|
+
self.namespaces = XlsxConstants.NAMESPACES
|
|
21
|
+
|
|
22
|
+
def read(self, file_path: str, **kwargs) -> 'Workbook':
|
|
23
|
+
"""Read Excel file and return workbook object."""
|
|
24
|
+
from ...workbook import Workbook
|
|
25
|
+
|
|
26
|
+
workbook = Workbook()
|
|
27
|
+
self.load_workbook(workbook, file_path)
|
|
28
|
+
return workbook
|
|
29
|
+
|
|
30
|
+
def load_workbook(self, workbook: 'Workbook', filename: str):
|
|
31
|
+
"""Load Excel file into workbook object."""
|
|
32
|
+
try:
|
|
33
|
+
with zipfile.ZipFile(filename, 'r') as zip_file:
|
|
34
|
+
# Read core files
|
|
35
|
+
shared_strings = self._read_shared_strings(zip_file)
|
|
36
|
+
workbook_data = self._read_workbook_structure(zip_file)
|
|
37
|
+
relationships = self._read_workbook_relationships(zip_file)
|
|
38
|
+
|
|
39
|
+
# Clear existing worksheets
|
|
40
|
+
workbook._worksheets.clear()
|
|
41
|
+
workbook._shared_strings = shared_strings
|
|
42
|
+
|
|
43
|
+
# Load worksheets with proper relationship mapping
|
|
44
|
+
for sheet_info in workbook_data['sheets']:
|
|
45
|
+
self._load_worksheet(zip_file, workbook, sheet_info, shared_strings, relationships)
|
|
46
|
+
|
|
47
|
+
# Set active sheet
|
|
48
|
+
if workbook._worksheets:
|
|
49
|
+
first_sheet = next(iter(workbook._worksheets.values()))
|
|
50
|
+
workbook._active_sheet = first_sheet
|
|
51
|
+
|
|
52
|
+
except zipfile.BadZipFile:
|
|
53
|
+
raise FileFormatError(f"Invalid ZIP file: {filename}")
|
|
54
|
+
except Exception as e:
|
|
55
|
+
raise FileFormatError(f"Failed to read Excel file: {e}")
|
|
56
|
+
|
|
57
|
+
def _read_shared_strings(self, zip_file: zipfile.ZipFile) -> List[str]:
|
|
58
|
+
"""Read shared strings table."""
|
|
59
|
+
try:
|
|
60
|
+
content = zip_file.read('xl/sharedStrings.xml')
|
|
61
|
+
root = ET.fromstring(content)
|
|
62
|
+
|
|
63
|
+
strings = []
|
|
64
|
+
for si in root.findall('.//main:si', self.namespaces):
|
|
65
|
+
t_elem = si.find('main:t', self.namespaces)
|
|
66
|
+
if t_elem is not None:
|
|
67
|
+
strings.append(t_elem.text or "")
|
|
68
|
+
else:
|
|
69
|
+
strings.append("")
|
|
70
|
+
|
|
71
|
+
return strings
|
|
72
|
+
except KeyError:
|
|
73
|
+
# No shared strings file
|
|
74
|
+
return []
|
|
75
|
+
|
|
76
|
+
def _read_workbook_structure(self, zip_file: zipfile.ZipFile) -> Dict:
|
|
77
|
+
"""Read workbook structure and sheet information."""
|
|
78
|
+
try:
|
|
79
|
+
content = zip_file.read('xl/workbook.xml')
|
|
80
|
+
root = ET.fromstring(content)
|
|
81
|
+
|
|
82
|
+
sheets = []
|
|
83
|
+
for sheet in root.findall('.//main:sheet', self.namespaces):
|
|
84
|
+
sheet_info = {
|
|
85
|
+
'name': sheet.get('name', 'Sheet1'),
|
|
86
|
+
'sheet_id': sheet.get('sheetId', '1'),
|
|
87
|
+
'r_id': sheet.get(f'{{{XlsxConstants.NAMESPACES["r"]}}}id')
|
|
88
|
+
}
|
|
89
|
+
sheets.append(sheet_info)
|
|
90
|
+
|
|
91
|
+
return {'sheets': sheets}
|
|
92
|
+
except KeyError:
|
|
93
|
+
raise FileFormatError("Invalid workbook structure - missing workbook.xml")
|
|
94
|
+
|
|
95
|
+
def _read_workbook_relationships(self, zip_file: zipfile.ZipFile) -> Dict[str, str]:
|
|
96
|
+
"""Read workbook relationships to map sheet IDs to file paths."""
|
|
97
|
+
try:
|
|
98
|
+
content = zip_file.read('xl/_rels/workbook.xml.rels')
|
|
99
|
+
root = ET.fromstring(content)
|
|
100
|
+
|
|
101
|
+
relationships = {}
|
|
102
|
+
for rel in root.findall('rel:Relationship', {'rel': XlsxConstants.NAMESPACES['pkg']}):
|
|
103
|
+
rel_id = rel.get('Id')
|
|
104
|
+
target = rel.get('Target')
|
|
105
|
+
if rel_id and target:
|
|
106
|
+
relationships[rel_id] = target
|
|
107
|
+
|
|
108
|
+
return relationships
|
|
109
|
+
except KeyError:
|
|
110
|
+
# No relationships file, return empty dict
|
|
111
|
+
return {}
|
|
112
|
+
|
|
113
|
+
def _load_worksheet(self, zip_file: zipfile.ZipFile, workbook: 'Workbook',
|
|
114
|
+
sheet_info: Dict, shared_strings: List[str], relationships: Dict[str, str]):
|
|
115
|
+
"""Load individual worksheet data."""
|
|
116
|
+
from ...worksheet import Worksheet
|
|
117
|
+
|
|
118
|
+
# Create worksheet
|
|
119
|
+
worksheet = Worksheet(workbook, sheet_info['name'])
|
|
120
|
+
workbook._worksheets[sheet_info['name']] = worksheet
|
|
121
|
+
|
|
122
|
+
# Read worksheet XML
|
|
123
|
+
try:
|
|
124
|
+
# Determine worksheet path using relationships
|
|
125
|
+
sheet_path = None
|
|
126
|
+
r_id = sheet_info.get('r_id')
|
|
127
|
+
if r_id and r_id in relationships:
|
|
128
|
+
sheet_path = f"xl/{relationships[r_id]}"
|
|
129
|
+
|
|
130
|
+
# Fallback to naming convention
|
|
131
|
+
if not sheet_path or sheet_path not in zip_file.namelist():
|
|
132
|
+
sheet_path = f"xl/worksheets/sheet{sheet_info['sheet_id']}.xml"
|
|
133
|
+
|
|
134
|
+
# Final fallback - but don't use sheet1.xml for all sheets!
|
|
135
|
+
if sheet_path not in zip_file.namelist():
|
|
136
|
+
# Skip this sheet if we can't find its file
|
|
137
|
+
return
|
|
138
|
+
|
|
139
|
+
content = zip_file.read(sheet_path)
|
|
140
|
+
root = ET.fromstring(content)
|
|
141
|
+
|
|
142
|
+
# Process sheet data
|
|
143
|
+
sheet_data = root.find('.//main:sheetData', self.namespaces)
|
|
144
|
+
if sheet_data is not None:
|
|
145
|
+
self._process_sheet_data(worksheet, sheet_data, shared_strings)
|
|
146
|
+
|
|
147
|
+
# Process merged cells
|
|
148
|
+
merge_cells = root.find('.//main:mergeCells', self.namespaces)
|
|
149
|
+
if merge_cells is not None:
|
|
150
|
+
for merge_cell in merge_cells.findall('main:mergeCell', self.namespaces):
|
|
151
|
+
ref = merge_cell.get('ref')
|
|
152
|
+
if ref:
|
|
153
|
+
worksheet._merged_ranges.add(ref)
|
|
154
|
+
|
|
155
|
+
# Process hyperlinks
|
|
156
|
+
self._process_hyperlinks(zip_file, worksheet, root, sheet_info['sheet_id'])
|
|
157
|
+
|
|
158
|
+
except KeyError:
|
|
159
|
+
# Worksheet file not found, create empty worksheet
|
|
160
|
+
pass
|
|
161
|
+
|
|
162
|
+
def _process_sheet_data(self, worksheet: 'Worksheet', sheet_data: ET.Element,
|
|
163
|
+
shared_strings: List[str]):
|
|
164
|
+
"""Process sheet data and populate cells."""
|
|
165
|
+
for row in sheet_data.findall('main:row', self.namespaces):
|
|
166
|
+
for cell_elem in row.findall('main:c', self.namespaces):
|
|
167
|
+
# Get cell reference
|
|
168
|
+
cell_ref = cell_elem.get('r')
|
|
169
|
+
if not cell_ref:
|
|
170
|
+
continue
|
|
171
|
+
|
|
172
|
+
try:
|
|
173
|
+
row_idx, col_idx = coordinate_to_tuple(cell_ref)
|
|
174
|
+
except (ValueError, TypeError, AttributeError):
|
|
175
|
+
# Skip invalid cell references
|
|
176
|
+
continue
|
|
177
|
+
|
|
178
|
+
# Get cell value and formula
|
|
179
|
+
cell_type = cell_elem.get('t', 'n') # Default to number
|
|
180
|
+
value_elem = cell_elem.find('main:v', self.namespaces)
|
|
181
|
+
formula_elem = cell_elem.find('main:f', self.namespaces)
|
|
182
|
+
|
|
183
|
+
# Create cell first
|
|
184
|
+
cell = worksheet.cell(row_idx, col_idx)
|
|
185
|
+
|
|
186
|
+
# Handle formula if present
|
|
187
|
+
if formula_elem is not None:
|
|
188
|
+
formula_text = formula_elem.text
|
|
189
|
+
if formula_text:
|
|
190
|
+
# Store formula
|
|
191
|
+
cell._formula = '=' + formula_text if not formula_text.startswith('=') else formula_text
|
|
192
|
+
cell._data_type = 'formula'
|
|
193
|
+
cell._value = cell._formula
|
|
194
|
+
|
|
195
|
+
# Handle calculated value
|
|
196
|
+
if value_elem is not None:
|
|
197
|
+
raw_value = value_elem.text or ""
|
|
198
|
+
calculated_value = self._parse_cell_value(raw_value, cell_type, shared_strings)
|
|
199
|
+
|
|
200
|
+
if cell.is_formula():
|
|
201
|
+
# Store calculated result for formula cells
|
|
202
|
+
cell._calculated_value = calculated_value
|
|
203
|
+
else:
|
|
204
|
+
# Regular cell value
|
|
205
|
+
cell.value = calculated_value
|
|
206
|
+
|
|
207
|
+
# Handle hyperlinks (basic implementation)
|
|
208
|
+
# Note: Full hyperlink support would require reading relationships
|
|
209
|
+
|
|
210
|
+
# Handle number format if present
|
|
211
|
+
style_id = cell_elem.get('s')
|
|
212
|
+
if style_id:
|
|
213
|
+
# In a full implementation, would look up style from styles.xml
|
|
214
|
+
pass
|
|
215
|
+
|
|
216
|
+
def _process_hyperlinks(self, zip_file: zipfile.ZipFile, worksheet: 'Worksheet',
|
|
217
|
+
worksheet_root: ET.Element, sheet_id: int):
|
|
218
|
+
"""Process hyperlinks for the worksheet."""
|
|
219
|
+
# Find hyperlinks in the worksheet XML
|
|
220
|
+
hyperlinks_elem = worksheet_root.find('.//main:hyperlinks', self.namespaces)
|
|
221
|
+
if hyperlinks_elem is None:
|
|
222
|
+
return
|
|
223
|
+
|
|
224
|
+
# Read worksheet relationships to get hyperlink targets
|
|
225
|
+
rels_path = f"xl/worksheets/_rels/sheet{sheet_id}.xml.rels"
|
|
226
|
+
relationships = {}
|
|
227
|
+
|
|
228
|
+
try:
|
|
229
|
+
rels_content = zip_file.read(rels_path).decode('utf-8')
|
|
230
|
+
rels_root = ET.fromstring(rels_content)
|
|
231
|
+
|
|
232
|
+
# Build relationships map
|
|
233
|
+
# The relationships XML uses the package relationships namespace as default
|
|
234
|
+
for rel in rels_root.findall(f'.//{{{XlsxConstants.NAMESPACES["pkg"]}}}Relationship'):
|
|
235
|
+
rel_id = rel.get('Id')
|
|
236
|
+
target = rel.get('Target')
|
|
237
|
+
if rel_id and target:
|
|
238
|
+
relationships[rel_id] = target
|
|
239
|
+
except KeyError:
|
|
240
|
+
# No relationships file found
|
|
241
|
+
return
|
|
242
|
+
|
|
243
|
+
# Apply hyperlinks to cells
|
|
244
|
+
for hyperlink in hyperlinks_elem.findall('main:hyperlink', self.namespaces):
|
|
245
|
+
cell_ref = hyperlink.get('ref')
|
|
246
|
+
# Get the relationship ID using the proper namespace
|
|
247
|
+
rel_id = hyperlink.get(f'{{{XlsxConstants.NAMESPACES["r"]}}}id')
|
|
248
|
+
|
|
249
|
+
if cell_ref and rel_id and rel_id in relationships:
|
|
250
|
+
try:
|
|
251
|
+
row_idx, col_idx = coordinate_to_tuple(cell_ref)
|
|
252
|
+
cell = worksheet.cell(row_idx, col_idx)
|
|
253
|
+
cell._hyperlink = relationships[rel_id]
|
|
254
|
+
except (ValueError, TypeError, AttributeError, KeyError):
|
|
255
|
+
# Skip invalid cell references or missing relationships
|
|
256
|
+
continue
|
|
257
|
+
|
|
258
|
+
def _parse_cell_value(self, raw_value: str, cell_type: str, shared_strings: List[str]):
|
|
259
|
+
"""Parse cell value based on type."""
|
|
260
|
+
if cell_type == 's': # Shared string
|
|
261
|
+
try:
|
|
262
|
+
index = int(raw_value)
|
|
263
|
+
if 0 <= index < len(shared_strings):
|
|
264
|
+
return shared_strings[index]
|
|
265
|
+
return raw_value
|
|
266
|
+
except (ValueError, IndexError):
|
|
267
|
+
return raw_value
|
|
268
|
+
elif cell_type == 'n': # Number
|
|
269
|
+
try:
|
|
270
|
+
# Try int first, then float
|
|
271
|
+
if '.' in raw_value or 'e' in raw_value.lower():
|
|
272
|
+
return float(raw_value)
|
|
273
|
+
else:
|
|
274
|
+
return int(raw_value)
|
|
275
|
+
except ValueError:
|
|
276
|
+
return raw_value
|
|
277
|
+
elif cell_type == 'b': # Boolean
|
|
278
|
+
return raw_value == '1'
|
|
279
|
+
elif cell_type == 'str': # Formula string
|
|
280
|
+
return raw_value
|
|
281
|
+
elif cell_type == 'inlineStr': # Inline string
|
|
282
|
+
return raw_value
|
|
283
|
+
else:
|
|
284
|
+
return raw_value
|