aspose-cells-foss 25.12.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aspose/__init__.py +14 -0
- aspose/cells/__init__.py +31 -0
- aspose/cells/cell.py +350 -0
- aspose/cells/constants.py +44 -0
- aspose/cells/converters/__init__.py +13 -0
- aspose/cells/converters/csv_converter.py +55 -0
- aspose/cells/converters/json_converter.py +46 -0
- aspose/cells/converters/markdown_converter.py +453 -0
- aspose/cells/drawing/__init__.py +17 -0
- aspose/cells/drawing/anchor.py +172 -0
- aspose/cells/drawing/collection.py +233 -0
- aspose/cells/drawing/image.py +338 -0
- aspose/cells/formats.py +80 -0
- aspose/cells/formula/__init__.py +10 -0
- aspose/cells/formula/evaluator.py +360 -0
- aspose/cells/formula/functions.py +433 -0
- aspose/cells/formula/tokenizer.py +340 -0
- aspose/cells/io/__init__.py +27 -0
- aspose/cells/io/csv/__init__.py +8 -0
- aspose/cells/io/csv/reader.py +88 -0
- aspose/cells/io/csv/writer.py +98 -0
- aspose/cells/io/factory.py +138 -0
- aspose/cells/io/interfaces.py +48 -0
- aspose/cells/io/json/__init__.py +8 -0
- aspose/cells/io/json/reader.py +126 -0
- aspose/cells/io/json/writer.py +119 -0
- aspose/cells/io/md/__init__.py +8 -0
- aspose/cells/io/md/reader.py +161 -0
- aspose/cells/io/md/writer.py +334 -0
- aspose/cells/io/models.py +64 -0
- aspose/cells/io/xlsx/__init__.py +9 -0
- aspose/cells/io/xlsx/constants.py +312 -0
- aspose/cells/io/xlsx/image_writer.py +311 -0
- aspose/cells/io/xlsx/reader.py +284 -0
- aspose/cells/io/xlsx/writer.py +931 -0
- aspose/cells/plugins/__init__.py +6 -0
- aspose/cells/plugins/docling_backend/__init__.py +7 -0
- aspose/cells/plugins/docling_backend/backend.py +535 -0
- aspose/cells/plugins/markitdown_plugin/__init__.py +15 -0
- aspose/cells/plugins/markitdown_plugin/plugin.py +128 -0
- aspose/cells/range.py +210 -0
- aspose/cells/style.py +287 -0
- aspose/cells/utils/__init__.py +54 -0
- aspose/cells/utils/coordinates.py +68 -0
- aspose/cells/utils/exceptions.py +43 -0
- aspose/cells/utils/validation.py +102 -0
- aspose/cells/workbook.py +352 -0
- aspose/cells/worksheet.py +670 -0
- aspose_cells_foss-25.12.1.dist-info/METADATA +189 -0
- aspose_cells_foss-25.12.1.dist-info/RECORD +53 -0
- aspose_cells_foss-25.12.1.dist-info/WHEEL +5 -0
- aspose_cells_foss-25.12.1.dist-info/entry_points.txt +2 -0
- aspose_cells_foss-25.12.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Markdown file writer for saving workbook data to Markdown table format.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import List, Optional, TYPE_CHECKING
|
|
6
|
+
from ...formats import CellValue
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from ...workbook import Workbook
|
|
10
|
+
from ...worksheet import Worksheet
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class MarkdownWriter:
|
|
14
|
+
"""Writer for Markdown table files."""
|
|
15
|
+
|
|
16
|
+
def __init__(self):
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
def write(self, file_path: str, data: List[List[CellValue]], **kwargs) -> None:
|
|
20
|
+
"""Write data to Markdown file."""
|
|
21
|
+
encoding = kwargs.get('encoding', 'utf-8')
|
|
22
|
+
include_headers = kwargs.get('include_headers', True)
|
|
23
|
+
table_alignment = kwargs.get('table_alignment', 'left')
|
|
24
|
+
max_col_width = kwargs.get('max_col_width', 50)
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
markdown_content = self._convert_data_to_markdown(
|
|
28
|
+
data, include_headers, table_alignment, max_col_width
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
with open(file_path, 'w', encoding=encoding) as file:
|
|
32
|
+
file.write(markdown_content)
|
|
33
|
+
|
|
34
|
+
except Exception as e:
|
|
35
|
+
raise ValueError(f"Error writing Markdown file: {e}")
|
|
36
|
+
|
|
37
|
+
def write_workbook(self, file_path: str, workbook: 'Workbook', **kwargs) -> None:
|
|
38
|
+
"""Write workbook data to Markdown file."""
|
|
39
|
+
sheet_name = kwargs.get('sheet_name')
|
|
40
|
+
include_headers = kwargs.get('include_headers', True)
|
|
41
|
+
table_alignment = kwargs.get('table_alignment', 'left')
|
|
42
|
+
max_col_width = kwargs.get('max_col_width', 50)
|
|
43
|
+
all_sheets = kwargs.get('all_sheets', False)
|
|
44
|
+
encoding = kwargs.get('encoding', 'utf-8')
|
|
45
|
+
|
|
46
|
+
result_parts = []
|
|
47
|
+
|
|
48
|
+
if sheet_name and sheet_name in workbook._worksheets:
|
|
49
|
+
# Convert specific sheet
|
|
50
|
+
worksheet = workbook._worksheets[sheet_name]
|
|
51
|
+
sheet_md = self._convert_single_sheet(worksheet, include_headers, table_alignment, max_col_width)
|
|
52
|
+
if sheet_md:
|
|
53
|
+
result_parts.append(sheet_md)
|
|
54
|
+
elif all_sheets:
|
|
55
|
+
# Convert all sheets with headers
|
|
56
|
+
for worksheet in workbook._worksheets.values():
|
|
57
|
+
sheet_md = self._convert_single_sheet(worksheet, include_headers, table_alignment, max_col_width)
|
|
58
|
+
if sheet_md:
|
|
59
|
+
result_parts.append(sheet_md)
|
|
60
|
+
result_parts.append("") # Add empty line between sheets
|
|
61
|
+
else:
|
|
62
|
+
# Convert active sheet only
|
|
63
|
+
worksheet = workbook.active
|
|
64
|
+
sheet_md = self._convert_single_sheet(worksheet, include_headers, table_alignment, max_col_width)
|
|
65
|
+
if sheet_md:
|
|
66
|
+
result_parts.append(sheet_md)
|
|
67
|
+
|
|
68
|
+
markdown_content = "\n".join(result_parts).strip()
|
|
69
|
+
|
|
70
|
+
try:
|
|
71
|
+
with open(file_path, 'w', encoding=encoding) as file:
|
|
72
|
+
file.write(markdown_content)
|
|
73
|
+
except Exception as e:
|
|
74
|
+
raise ValueError(f"Error writing Markdown file: {e}")
|
|
75
|
+
|
|
76
|
+
def _convert_single_sheet(self, worksheet: 'Worksheet', include_headers: bool,
|
|
77
|
+
table_alignment: str, max_col_width: int) -> str:
|
|
78
|
+
"""Convert single worksheet to markdown with header."""
|
|
79
|
+
if not worksheet or not worksheet._cells:
|
|
80
|
+
return ""
|
|
81
|
+
|
|
82
|
+
sheet_parts = []
|
|
83
|
+
|
|
84
|
+
# Add worksheet title with markdown header
|
|
85
|
+
sheet_parts.append(f"# {worksheet.name}")
|
|
86
|
+
sheet_parts.append("") # Empty line after header
|
|
87
|
+
|
|
88
|
+
# Convert worksheet to data first
|
|
89
|
+
data = self._worksheet_to_data(worksheet)
|
|
90
|
+
# Heuristically trim leading non-tabular/title rows so header aligns with table
|
|
91
|
+
if data:
|
|
92
|
+
# Use improved detection logic to find the best starting row
|
|
93
|
+
best_idx = self._detect_table_start_index(data)
|
|
94
|
+
if best_idx > 0:
|
|
95
|
+
data = data[best_idx:]
|
|
96
|
+
|
|
97
|
+
# Convert data to markdown table
|
|
98
|
+
table_md = self._convert_data_to_markdown(
|
|
99
|
+
data, include_headers, table_alignment, max_col_width
|
|
100
|
+
)
|
|
101
|
+
if table_md:
|
|
102
|
+
sheet_parts.append(table_md)
|
|
103
|
+
|
|
104
|
+
return "\n".join(sheet_parts)
|
|
105
|
+
|
|
106
|
+
def _worksheet_to_data(self, worksheet: 'Worksheet') -> List[List]:
|
|
107
|
+
"""Convert worksheet to list of rows with cell objects for hyperlink support."""
|
|
108
|
+
max_row = worksheet.max_row
|
|
109
|
+
max_col = worksheet.max_column
|
|
110
|
+
|
|
111
|
+
if max_row == 0 or max_col == 0:
|
|
112
|
+
return []
|
|
113
|
+
|
|
114
|
+
# Collect all data with cell objects to preserve hyperlinks
|
|
115
|
+
table_data = []
|
|
116
|
+
for row in range(1, max_row + 1):
|
|
117
|
+
row_data = []
|
|
118
|
+
for col in range(1, max_col + 1):
|
|
119
|
+
cell = worksheet._cells.get((row, col))
|
|
120
|
+
if cell:
|
|
121
|
+
row_data.append(cell) # Pass cell object instead of just value
|
|
122
|
+
else:
|
|
123
|
+
row_data.append(None)
|
|
124
|
+
table_data.append(row_data)
|
|
125
|
+
|
|
126
|
+
return table_data
|
|
127
|
+
|
|
128
|
+
def _convert_data_to_markdown(self, data: List[List], include_headers: bool,
|
|
129
|
+
alignment: str, max_width: int) -> str:
|
|
130
|
+
"""Convert data to markdown table."""
|
|
131
|
+
if not data:
|
|
132
|
+
return ""
|
|
133
|
+
|
|
134
|
+
# Format all cell values (now handling cell objects)
|
|
135
|
+
formatted_data = []
|
|
136
|
+
for row_data in data:
|
|
137
|
+
formatted_row = []
|
|
138
|
+
for cell_or_value in row_data:
|
|
139
|
+
formatted_cell = self._format_cell_for_markdown(cell_or_value, max_width)
|
|
140
|
+
formatted_row.append(formatted_cell)
|
|
141
|
+
formatted_data.append(formatted_row)
|
|
142
|
+
|
|
143
|
+
if not formatted_data:
|
|
144
|
+
return ""
|
|
145
|
+
|
|
146
|
+
# Determine column widths
|
|
147
|
+
max_col = max(len(row) for row in formatted_data) if formatted_data else 0
|
|
148
|
+
col_widths = [0] * max_col
|
|
149
|
+
|
|
150
|
+
for row_data in formatted_data:
|
|
151
|
+
for i, cell_value in enumerate(row_data):
|
|
152
|
+
if i < len(col_widths):
|
|
153
|
+
col_widths[i] = max(col_widths[i], len(str(cell_value)))
|
|
154
|
+
|
|
155
|
+
# Generate markdown table
|
|
156
|
+
result = []
|
|
157
|
+
|
|
158
|
+
# Add header row if requested and data exists
|
|
159
|
+
if include_headers and formatted_data:
|
|
160
|
+
header_row = formatted_data[0]
|
|
161
|
+
data_rows = formatted_data[1:]
|
|
162
|
+
|
|
163
|
+
# Format header
|
|
164
|
+
header_line = "| " + " | ".join(
|
|
165
|
+
str(cell).ljust(col_widths[i]) for i, cell in enumerate(header_row)
|
|
166
|
+
) + " |"
|
|
167
|
+
result.append(header_line)
|
|
168
|
+
|
|
169
|
+
# Add separator line
|
|
170
|
+
separator_parts = []
|
|
171
|
+
align_char = self._get_alignment_chars(alignment)
|
|
172
|
+
for width in col_widths:
|
|
173
|
+
separator_parts.append(align_char[0] + "-" * max(1, width - 2) + align_char[1])
|
|
174
|
+
separator_line = "| " + " | ".join(separator_parts) + " |"
|
|
175
|
+
result.append(separator_line)
|
|
176
|
+
|
|
177
|
+
# Add data rows
|
|
178
|
+
for row_data in data_rows:
|
|
179
|
+
formatted_row = [self._format_cell_for_markdown(cell, max_width) for cell in row_data]
|
|
180
|
+
data_line = "| " + " | ".join(
|
|
181
|
+
str(cell).ljust(col_widths[i]) for i, cell in enumerate(formatted_row)
|
|
182
|
+
) + " |"
|
|
183
|
+
result.append(data_line)
|
|
184
|
+
else:
|
|
185
|
+
# No headers, treat all as data
|
|
186
|
+
for row_data in formatted_data:
|
|
187
|
+
data_line = "| " + " | ".join(
|
|
188
|
+
str(cell).ljust(col_widths[i]) for i, cell in enumerate(row_data)
|
|
189
|
+
) + " |"
|
|
190
|
+
result.append(data_line)
|
|
191
|
+
|
|
192
|
+
return "\n".join(result)
|
|
193
|
+
|
|
194
|
+
def _get_alignment_chars(self, alignment: str) -> tuple:
|
|
195
|
+
"""Get alignment characters for markdown table."""
|
|
196
|
+
if alignment == 'center':
|
|
197
|
+
return (":", ":")
|
|
198
|
+
elif alignment == 'right':
|
|
199
|
+
return ("-", ":")
|
|
200
|
+
else: # left
|
|
201
|
+
return ("-", "-")
|
|
202
|
+
|
|
203
|
+
def _format_cell_for_markdown(self, cell_or_value, max_width: int) -> str:
|
|
204
|
+
"""Format cell or value for markdown output with hyperlink support."""
|
|
205
|
+
# Handle cell objects vs direct values
|
|
206
|
+
if hasattr(cell_or_value, 'value') and hasattr(cell_or_value, 'hyperlink'):
|
|
207
|
+
# This is a Cell object
|
|
208
|
+
cell = cell_or_value
|
|
209
|
+
value = cell.value
|
|
210
|
+
hyperlink = cell.hyperlink
|
|
211
|
+
else:
|
|
212
|
+
# This is a direct value
|
|
213
|
+
value = cell_or_value
|
|
214
|
+
hyperlink = None
|
|
215
|
+
|
|
216
|
+
if value is None:
|
|
217
|
+
return ""
|
|
218
|
+
|
|
219
|
+
# Convert to string
|
|
220
|
+
if isinstance(value, bool):
|
|
221
|
+
text = "TRUE" if value else "FALSE"
|
|
222
|
+
else:
|
|
223
|
+
text = str(value)
|
|
224
|
+
|
|
225
|
+
# Escape markdown special characters
|
|
226
|
+
text = text.replace("|", "\\|")
|
|
227
|
+
text = text.replace("\n", " ")
|
|
228
|
+
text = text.replace("\r", "")
|
|
229
|
+
|
|
230
|
+
# Create hyperlink if present
|
|
231
|
+
if hyperlink:
|
|
232
|
+
# Escape hyperlink URL for markdown
|
|
233
|
+
escaped_url = hyperlink.replace(")", "\\)")
|
|
234
|
+
text = f"[{text}]({escaped_url})"
|
|
235
|
+
|
|
236
|
+
# Truncate if too long (account for hyperlink syntax)
|
|
237
|
+
if len(text) > max_width:
|
|
238
|
+
if hyperlink:
|
|
239
|
+
# For hyperlinks, try to preserve the link structure
|
|
240
|
+
display_text = str(value)
|
|
241
|
+
if len(display_text) > max_width - len(hyperlink) - 4: # Account for []() syntax
|
|
242
|
+
display_text = display_text[:max_width - len(hyperlink) - 7] + "..."
|
|
243
|
+
escaped_url = hyperlink.replace(")", "\\)")
|
|
244
|
+
text = f"[{display_text}]({escaped_url})"
|
|
245
|
+
else:
|
|
246
|
+
text = text[:max_width - 3] + "..."
|
|
247
|
+
|
|
248
|
+
return text
|
|
249
|
+
|
|
250
|
+
def _detect_table_start_index(self, data: List[List]) -> int:
|
|
251
|
+
"""Detect the best starting index for the table data.
|
|
252
|
+
|
|
253
|
+
Uses similar logic to the enhanced converter to skip rows with many "Unnamed" columns.
|
|
254
|
+
"""
|
|
255
|
+
if not data:
|
|
256
|
+
return 0
|
|
257
|
+
|
|
258
|
+
best_idx = 0
|
|
259
|
+
best_score = -1
|
|
260
|
+
|
|
261
|
+
for idx, row in enumerate(data):
|
|
262
|
+
score = self._score_row_as_table_start(row)
|
|
263
|
+
if score > best_score:
|
|
264
|
+
best_score = score
|
|
265
|
+
best_idx = idx
|
|
266
|
+
|
|
267
|
+
return best_idx
|
|
268
|
+
|
|
269
|
+
def _score_row_as_table_start(self, row: List) -> float:
|
|
270
|
+
"""Score a row's likelihood of being the actual table start."""
|
|
271
|
+
non_empty = 0
|
|
272
|
+
unnamed_count = 0
|
|
273
|
+
meaningful_content = 0
|
|
274
|
+
total_chars = 0
|
|
275
|
+
|
|
276
|
+
for cell_or_value in row:
|
|
277
|
+
# Handle both cell objects and direct values
|
|
278
|
+
if hasattr(cell_or_value, 'value'):
|
|
279
|
+
value = cell_or_value.value
|
|
280
|
+
else:
|
|
281
|
+
value = cell_or_value
|
|
282
|
+
|
|
283
|
+
if value is None:
|
|
284
|
+
continue
|
|
285
|
+
|
|
286
|
+
value_str = str(value).strip()
|
|
287
|
+
if value_str == "":
|
|
288
|
+
continue
|
|
289
|
+
|
|
290
|
+
non_empty += 1
|
|
291
|
+
total_chars += len(value_str)
|
|
292
|
+
|
|
293
|
+
# Check for pandas-style "Unnamed" columns
|
|
294
|
+
if value_str.startswith("Unnamed"):
|
|
295
|
+
unnamed_count += 1
|
|
296
|
+
else:
|
|
297
|
+
meaningful_content += 1
|
|
298
|
+
|
|
299
|
+
if non_empty == 0:
|
|
300
|
+
return 0
|
|
301
|
+
|
|
302
|
+
# Calculate score components
|
|
303
|
+
unnamed_ratio = unnamed_count / non_empty if non_empty > 0 else 0
|
|
304
|
+
meaningful_ratio = meaningful_content / non_empty if non_empty > 0 else 0
|
|
305
|
+
avg_content_length = total_chars / non_empty if non_empty > 0 else 0
|
|
306
|
+
|
|
307
|
+
score = 0
|
|
308
|
+
|
|
309
|
+
# Penalize unnamed columns heavily
|
|
310
|
+
if unnamed_ratio > 0.5: # More than half are "Unnamed"
|
|
311
|
+
score -= 100 * unnamed_ratio
|
|
312
|
+
|
|
313
|
+
# Reward meaningful content
|
|
314
|
+
score += 50 * meaningful_ratio
|
|
315
|
+
|
|
316
|
+
# Reward reasonable content length
|
|
317
|
+
if 2 <= avg_content_length <= 20:
|
|
318
|
+
score += 20
|
|
319
|
+
elif avg_content_length > 20:
|
|
320
|
+
score += 10
|
|
321
|
+
|
|
322
|
+
# Reward having multiple non-empty cells (but not too many unnamed ones)
|
|
323
|
+
if non_empty >= 2 and unnamed_ratio < 0.5:
|
|
324
|
+
score += min(non_empty * 5, 25)
|
|
325
|
+
|
|
326
|
+
return score
|
|
327
|
+
|
|
328
|
+
def _format_cell_value(self, value: CellValue, max_width: int) -> str:
|
|
329
|
+
"""Legacy method for backward compatibility."""
|
|
330
|
+
return self._format_cell_for_markdown(value, max_width)
|
|
331
|
+
|
|
332
|
+
def save_workbook(self, workbook: 'Workbook', file_path: str, **options) -> None:
|
|
333
|
+
"""Save workbook to Markdown file - unified interface method."""
|
|
334
|
+
self.write_workbook(file_path, workbook, **options)
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Unified data models for cross-format workbook operations.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import Dict, Any, Optional, TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from ..worksheet import Worksheet
|
|
10
|
+
from ..workbook import Workbook
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class WorkbookData:
|
|
15
|
+
"""Unified workbook data model for cross-format operations."""
|
|
16
|
+
|
|
17
|
+
worksheets: Dict[str, 'Worksheet'] = field(default_factory=dict)
|
|
18
|
+
active_sheet_name: Optional[str] = None
|
|
19
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
20
|
+
|
|
21
|
+
def add_worksheet(self, name: str, worksheet: 'Worksheet') -> None:
|
|
22
|
+
"""Add worksheet to the data model."""
|
|
23
|
+
self.worksheets[name] = worksheet
|
|
24
|
+
if self.active_sheet_name is None:
|
|
25
|
+
self.active_sheet_name = name
|
|
26
|
+
|
|
27
|
+
def to_workbook(self) -> 'Workbook':
|
|
28
|
+
"""Convert unified data model to Workbook object."""
|
|
29
|
+
from ..workbook import Workbook
|
|
30
|
+
|
|
31
|
+
wb = Workbook.__new__(Workbook) # Create without calling __init__
|
|
32
|
+
wb._worksheets = {}
|
|
33
|
+
wb._active_sheet = None
|
|
34
|
+
wb._shared_strings = []
|
|
35
|
+
wb._properties = self.metadata.copy()
|
|
36
|
+
wb._filename = None
|
|
37
|
+
|
|
38
|
+
# Copy worksheets
|
|
39
|
+
for name, worksheet in self.worksheets.items():
|
|
40
|
+
wb._worksheets[name] = worksheet
|
|
41
|
+
worksheet._parent = wb # Update parent reference
|
|
42
|
+
|
|
43
|
+
# Set active sheet
|
|
44
|
+
if self.active_sheet_name and self.active_sheet_name in wb._worksheets:
|
|
45
|
+
wb._active_sheet = wb._worksheets[self.active_sheet_name]
|
|
46
|
+
elif wb._worksheets:
|
|
47
|
+
wb._active_sheet = next(iter(wb._worksheets.values()))
|
|
48
|
+
|
|
49
|
+
return wb
|
|
50
|
+
|
|
51
|
+
@classmethod
|
|
52
|
+
def from_workbook(cls, workbook: 'Workbook') -> 'WorkbookData':
|
|
53
|
+
"""Create unified data model from Workbook object."""
|
|
54
|
+
active_name = None
|
|
55
|
+
if workbook._active_sheet:
|
|
56
|
+
active_name = workbook._active_sheet.name
|
|
57
|
+
|
|
58
|
+
metadata = getattr(workbook, '_properties', {}).copy()
|
|
59
|
+
|
|
60
|
+
return cls(
|
|
61
|
+
worksheets=workbook._worksheets.copy(),
|
|
62
|
+
active_sheet_name=active_name,
|
|
63
|
+
metadata=metadata
|
|
64
|
+
)
|