aspose-cells-foss 25.12.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. aspose/__init__.py +14 -0
  2. aspose/cells/__init__.py +31 -0
  3. aspose/cells/cell.py +350 -0
  4. aspose/cells/constants.py +44 -0
  5. aspose/cells/converters/__init__.py +13 -0
  6. aspose/cells/converters/csv_converter.py +55 -0
  7. aspose/cells/converters/json_converter.py +46 -0
  8. aspose/cells/converters/markdown_converter.py +453 -0
  9. aspose/cells/drawing/__init__.py +17 -0
  10. aspose/cells/drawing/anchor.py +172 -0
  11. aspose/cells/drawing/collection.py +233 -0
  12. aspose/cells/drawing/image.py +338 -0
  13. aspose/cells/formats.py +80 -0
  14. aspose/cells/formula/__init__.py +10 -0
  15. aspose/cells/formula/evaluator.py +360 -0
  16. aspose/cells/formula/functions.py +433 -0
  17. aspose/cells/formula/tokenizer.py +340 -0
  18. aspose/cells/io/__init__.py +27 -0
  19. aspose/cells/io/csv/__init__.py +8 -0
  20. aspose/cells/io/csv/reader.py +88 -0
  21. aspose/cells/io/csv/writer.py +98 -0
  22. aspose/cells/io/factory.py +138 -0
  23. aspose/cells/io/interfaces.py +48 -0
  24. aspose/cells/io/json/__init__.py +8 -0
  25. aspose/cells/io/json/reader.py +126 -0
  26. aspose/cells/io/json/writer.py +119 -0
  27. aspose/cells/io/md/__init__.py +8 -0
  28. aspose/cells/io/md/reader.py +161 -0
  29. aspose/cells/io/md/writer.py +334 -0
  30. aspose/cells/io/models.py +64 -0
  31. aspose/cells/io/xlsx/__init__.py +9 -0
  32. aspose/cells/io/xlsx/constants.py +312 -0
  33. aspose/cells/io/xlsx/image_writer.py +311 -0
  34. aspose/cells/io/xlsx/reader.py +284 -0
  35. aspose/cells/io/xlsx/writer.py +931 -0
  36. aspose/cells/plugins/__init__.py +6 -0
  37. aspose/cells/plugins/docling_backend/__init__.py +7 -0
  38. aspose/cells/plugins/docling_backend/backend.py +535 -0
  39. aspose/cells/plugins/markitdown_plugin/__init__.py +15 -0
  40. aspose/cells/plugins/markitdown_plugin/plugin.py +128 -0
  41. aspose/cells/range.py +210 -0
  42. aspose/cells/style.py +287 -0
  43. aspose/cells/utils/__init__.py +54 -0
  44. aspose/cells/utils/coordinates.py +68 -0
  45. aspose/cells/utils/exceptions.py +43 -0
  46. aspose/cells/utils/validation.py +102 -0
  47. aspose/cells/workbook.py +352 -0
  48. aspose/cells/worksheet.py +670 -0
  49. aspose_cells_foss-25.12.1.dist-info/METADATA +189 -0
  50. aspose_cells_foss-25.12.1.dist-info/RECORD +53 -0
  51. aspose_cells_foss-25.12.1.dist-info/WHEEL +5 -0
  52. aspose_cells_foss-25.12.1.dist-info/entry_points.txt +2 -0
  53. aspose_cells_foss-25.12.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,334 @@
1
+ """
2
+ Markdown file writer for saving workbook data to Markdown table format.
3
+ """
4
+
5
+ from typing import List, Optional, TYPE_CHECKING
6
+ from ...formats import CellValue
7
+
8
+ if TYPE_CHECKING:
9
+ from ...workbook import Workbook
10
+ from ...worksheet import Worksheet
11
+
12
+
13
+ class MarkdownWriter:
14
+ """Writer for Markdown table files."""
15
+
16
+ def __init__(self):
17
+ pass
18
+
19
+ def write(self, file_path: str, data: List[List[CellValue]], **kwargs) -> None:
20
+ """Write data to Markdown file."""
21
+ encoding = kwargs.get('encoding', 'utf-8')
22
+ include_headers = kwargs.get('include_headers', True)
23
+ table_alignment = kwargs.get('table_alignment', 'left')
24
+ max_col_width = kwargs.get('max_col_width', 50)
25
+
26
+ try:
27
+ markdown_content = self._convert_data_to_markdown(
28
+ data, include_headers, table_alignment, max_col_width
29
+ )
30
+
31
+ with open(file_path, 'w', encoding=encoding) as file:
32
+ file.write(markdown_content)
33
+
34
+ except Exception as e:
35
+ raise ValueError(f"Error writing Markdown file: {e}")
36
+
37
+ def write_workbook(self, file_path: str, workbook: 'Workbook', **kwargs) -> None:
38
+ """Write workbook data to Markdown file."""
39
+ sheet_name = kwargs.get('sheet_name')
40
+ include_headers = kwargs.get('include_headers', True)
41
+ table_alignment = kwargs.get('table_alignment', 'left')
42
+ max_col_width = kwargs.get('max_col_width', 50)
43
+ all_sheets = kwargs.get('all_sheets', False)
44
+ encoding = kwargs.get('encoding', 'utf-8')
45
+
46
+ result_parts = []
47
+
48
+ if sheet_name and sheet_name in workbook._worksheets:
49
+ # Convert specific sheet
50
+ worksheet = workbook._worksheets[sheet_name]
51
+ sheet_md = self._convert_single_sheet(worksheet, include_headers, table_alignment, max_col_width)
52
+ if sheet_md:
53
+ result_parts.append(sheet_md)
54
+ elif all_sheets:
55
+ # Convert all sheets with headers
56
+ for worksheet in workbook._worksheets.values():
57
+ sheet_md = self._convert_single_sheet(worksheet, include_headers, table_alignment, max_col_width)
58
+ if sheet_md:
59
+ result_parts.append(sheet_md)
60
+ result_parts.append("") # Add empty line between sheets
61
+ else:
62
+ # Convert active sheet only
63
+ worksheet = workbook.active
64
+ sheet_md = self._convert_single_sheet(worksheet, include_headers, table_alignment, max_col_width)
65
+ if sheet_md:
66
+ result_parts.append(sheet_md)
67
+
68
+ markdown_content = "\n".join(result_parts).strip()
69
+
70
+ try:
71
+ with open(file_path, 'w', encoding=encoding) as file:
72
+ file.write(markdown_content)
73
+ except Exception as e:
74
+ raise ValueError(f"Error writing Markdown file: {e}")
75
+
76
+ def _convert_single_sheet(self, worksheet: 'Worksheet', include_headers: bool,
77
+ table_alignment: str, max_col_width: int) -> str:
78
+ """Convert single worksheet to markdown with header."""
79
+ if not worksheet or not worksheet._cells:
80
+ return ""
81
+
82
+ sheet_parts = []
83
+
84
+ # Add worksheet title with markdown header
85
+ sheet_parts.append(f"# {worksheet.name}")
86
+ sheet_parts.append("") # Empty line after header
87
+
88
+ # Convert worksheet to data first
89
+ data = self._worksheet_to_data(worksheet)
90
+ # Heuristically trim leading non-tabular/title rows so header aligns with table
91
+ if data:
92
+ # Use improved detection logic to find the best starting row
93
+ best_idx = self._detect_table_start_index(data)
94
+ if best_idx > 0:
95
+ data = data[best_idx:]
96
+
97
+ # Convert data to markdown table
98
+ table_md = self._convert_data_to_markdown(
99
+ data, include_headers, table_alignment, max_col_width
100
+ )
101
+ if table_md:
102
+ sheet_parts.append(table_md)
103
+
104
+ return "\n".join(sheet_parts)
105
+
106
+ def _worksheet_to_data(self, worksheet: 'Worksheet') -> List[List]:
107
+ """Convert worksheet to list of rows with cell objects for hyperlink support."""
108
+ max_row = worksheet.max_row
109
+ max_col = worksheet.max_column
110
+
111
+ if max_row == 0 or max_col == 0:
112
+ return []
113
+
114
+ # Collect all data with cell objects to preserve hyperlinks
115
+ table_data = []
116
+ for row in range(1, max_row + 1):
117
+ row_data = []
118
+ for col in range(1, max_col + 1):
119
+ cell = worksheet._cells.get((row, col))
120
+ if cell:
121
+ row_data.append(cell) # Pass cell object instead of just value
122
+ else:
123
+ row_data.append(None)
124
+ table_data.append(row_data)
125
+
126
+ return table_data
127
+
128
+ def _convert_data_to_markdown(self, data: List[List], include_headers: bool,
129
+ alignment: str, max_width: int) -> str:
130
+ """Convert data to markdown table."""
131
+ if not data:
132
+ return ""
133
+
134
+ # Format all cell values (now handling cell objects)
135
+ formatted_data = []
136
+ for row_data in data:
137
+ formatted_row = []
138
+ for cell_or_value in row_data:
139
+ formatted_cell = self._format_cell_for_markdown(cell_or_value, max_width)
140
+ formatted_row.append(formatted_cell)
141
+ formatted_data.append(formatted_row)
142
+
143
+ if not formatted_data:
144
+ return ""
145
+
146
+ # Determine column widths
147
+ max_col = max(len(row) for row in formatted_data) if formatted_data else 0
148
+ col_widths = [0] * max_col
149
+
150
+ for row_data in formatted_data:
151
+ for i, cell_value in enumerate(row_data):
152
+ if i < len(col_widths):
153
+ col_widths[i] = max(col_widths[i], len(str(cell_value)))
154
+
155
+ # Generate markdown table
156
+ result = []
157
+
158
+ # Add header row if requested and data exists
159
+ if include_headers and formatted_data:
160
+ header_row = formatted_data[0]
161
+ data_rows = formatted_data[1:]
162
+
163
+ # Format header
164
+ header_line = "| " + " | ".join(
165
+ str(cell).ljust(col_widths[i]) for i, cell in enumerate(header_row)
166
+ ) + " |"
167
+ result.append(header_line)
168
+
169
+ # Add separator line
170
+ separator_parts = []
171
+ align_char = self._get_alignment_chars(alignment)
172
+ for width in col_widths:
173
+ separator_parts.append(align_char[0] + "-" * max(1, width - 2) + align_char[1])
174
+ separator_line = "| " + " | ".join(separator_parts) + " |"
175
+ result.append(separator_line)
176
+
177
+ # Add data rows
178
+ for row_data in data_rows:
179
+ formatted_row = [self._format_cell_for_markdown(cell, max_width) for cell in row_data]
180
+ data_line = "| " + " | ".join(
181
+ str(cell).ljust(col_widths[i]) for i, cell in enumerate(formatted_row)
182
+ ) + " |"
183
+ result.append(data_line)
184
+ else:
185
+ # No headers, treat all as data
186
+ for row_data in formatted_data:
187
+ data_line = "| " + " | ".join(
188
+ str(cell).ljust(col_widths[i]) for i, cell in enumerate(row_data)
189
+ ) + " |"
190
+ result.append(data_line)
191
+
192
+ return "\n".join(result)
193
+
194
+ def _get_alignment_chars(self, alignment: str) -> tuple:
195
+ """Get alignment characters for markdown table."""
196
+ if alignment == 'center':
197
+ return (":", ":")
198
+ elif alignment == 'right':
199
+ return ("-", ":")
200
+ else: # left
201
+ return ("-", "-")
202
+
203
+ def _format_cell_for_markdown(self, cell_or_value, max_width: int) -> str:
204
+ """Format cell or value for markdown output with hyperlink support."""
205
+ # Handle cell objects vs direct values
206
+ if hasattr(cell_or_value, 'value') and hasattr(cell_or_value, 'hyperlink'):
207
+ # This is a Cell object
208
+ cell = cell_or_value
209
+ value = cell.value
210
+ hyperlink = cell.hyperlink
211
+ else:
212
+ # This is a direct value
213
+ value = cell_or_value
214
+ hyperlink = None
215
+
216
+ if value is None:
217
+ return ""
218
+
219
+ # Convert to string
220
+ if isinstance(value, bool):
221
+ text = "TRUE" if value else "FALSE"
222
+ else:
223
+ text = str(value)
224
+
225
+ # Escape markdown special characters
226
+ text = text.replace("|", "\\|")
227
+ text = text.replace("\n", " ")
228
+ text = text.replace("\r", "")
229
+
230
+ # Create hyperlink if present
231
+ if hyperlink:
232
+ # Escape hyperlink URL for markdown
233
+ escaped_url = hyperlink.replace(")", "\\)")
234
+ text = f"[{text}]({escaped_url})"
235
+
236
+ # Truncate if too long (account for hyperlink syntax)
237
+ if len(text) > max_width:
238
+ if hyperlink:
239
+ # For hyperlinks, try to preserve the link structure
240
+ display_text = str(value)
241
+ if len(display_text) > max_width - len(hyperlink) - 4: # Account for []() syntax
242
+ display_text = display_text[:max_width - len(hyperlink) - 7] + "..."
243
+ escaped_url = hyperlink.replace(")", "\\)")
244
+ text = f"[{display_text}]({escaped_url})"
245
+ else:
246
+ text = text[:max_width - 3] + "..."
247
+
248
+ return text
249
+
250
+ def _detect_table_start_index(self, data: List[List]) -> int:
251
+ """Detect the best starting index for the table data.
252
+
253
+ Uses similar logic to the enhanced converter to skip rows with many "Unnamed" columns.
254
+ """
255
+ if not data:
256
+ return 0
257
+
258
+ best_idx = 0
259
+ best_score = -1
260
+
261
+ for idx, row in enumerate(data):
262
+ score = self._score_row_as_table_start(row)
263
+ if score > best_score:
264
+ best_score = score
265
+ best_idx = idx
266
+
267
+ return best_idx
268
+
269
+ def _score_row_as_table_start(self, row: List) -> float:
270
+ """Score a row's likelihood of being the actual table start."""
271
+ non_empty = 0
272
+ unnamed_count = 0
273
+ meaningful_content = 0
274
+ total_chars = 0
275
+
276
+ for cell_or_value in row:
277
+ # Handle both cell objects and direct values
278
+ if hasattr(cell_or_value, 'value'):
279
+ value = cell_or_value.value
280
+ else:
281
+ value = cell_or_value
282
+
283
+ if value is None:
284
+ continue
285
+
286
+ value_str = str(value).strip()
287
+ if value_str == "":
288
+ continue
289
+
290
+ non_empty += 1
291
+ total_chars += len(value_str)
292
+
293
+ # Check for pandas-style "Unnamed" columns
294
+ if value_str.startswith("Unnamed"):
295
+ unnamed_count += 1
296
+ else:
297
+ meaningful_content += 1
298
+
299
+ if non_empty == 0:
300
+ return 0
301
+
302
+ # Calculate score components
303
+ unnamed_ratio = unnamed_count / non_empty if non_empty > 0 else 0
304
+ meaningful_ratio = meaningful_content / non_empty if non_empty > 0 else 0
305
+ avg_content_length = total_chars / non_empty if non_empty > 0 else 0
306
+
307
+ score = 0
308
+
309
+ # Penalize unnamed columns heavily
310
+ if unnamed_ratio > 0.5: # More than half are "Unnamed"
311
+ score -= 100 * unnamed_ratio
312
+
313
+ # Reward meaningful content
314
+ score += 50 * meaningful_ratio
315
+
316
+ # Reward reasonable content length
317
+ if 2 <= avg_content_length <= 20:
318
+ score += 20
319
+ elif avg_content_length > 20:
320
+ score += 10
321
+
322
+ # Reward having multiple non-empty cells (but not too many unnamed ones)
323
+ if non_empty >= 2 and unnamed_ratio < 0.5:
324
+ score += min(non_empty * 5, 25)
325
+
326
+ return score
327
+
328
+ def _format_cell_value(self, value: CellValue, max_width: int) -> str:
329
+ """Legacy method for backward compatibility."""
330
+ return self._format_cell_for_markdown(value, max_width)
331
+
332
+ def save_workbook(self, workbook: 'Workbook', file_path: str, **options) -> None:
333
+ """Save workbook to Markdown file - unified interface method."""
334
+ self.write_workbook(file_path, workbook, **options)
@@ -0,0 +1,64 @@
1
+ """
2
+ Unified data models for cross-format workbook operations.
3
+ """
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import Dict, Any, Optional, TYPE_CHECKING
7
+
8
+ if TYPE_CHECKING:
9
+ from ..worksheet import Worksheet
10
+ from ..workbook import Workbook
11
+
12
+
13
+ @dataclass
14
+ class WorkbookData:
15
+ """Unified workbook data model for cross-format operations."""
16
+
17
+ worksheets: Dict[str, 'Worksheet'] = field(default_factory=dict)
18
+ active_sheet_name: Optional[str] = None
19
+ metadata: Dict[str, Any] = field(default_factory=dict)
20
+
21
+ def add_worksheet(self, name: str, worksheet: 'Worksheet') -> None:
22
+ """Add worksheet to the data model."""
23
+ self.worksheets[name] = worksheet
24
+ if self.active_sheet_name is None:
25
+ self.active_sheet_name = name
26
+
27
+ def to_workbook(self) -> 'Workbook':
28
+ """Convert unified data model to Workbook object."""
29
+ from ..workbook import Workbook
30
+
31
+ wb = Workbook.__new__(Workbook) # Create without calling __init__
32
+ wb._worksheets = {}
33
+ wb._active_sheet = None
34
+ wb._shared_strings = []
35
+ wb._properties = self.metadata.copy()
36
+ wb._filename = None
37
+
38
+ # Copy worksheets
39
+ for name, worksheet in self.worksheets.items():
40
+ wb._worksheets[name] = worksheet
41
+ worksheet._parent = wb # Update parent reference
42
+
43
+ # Set active sheet
44
+ if self.active_sheet_name and self.active_sheet_name in wb._worksheets:
45
+ wb._active_sheet = wb._worksheets[self.active_sheet_name]
46
+ elif wb._worksheets:
47
+ wb._active_sheet = next(iter(wb._worksheets.values()))
48
+
49
+ return wb
50
+
51
+ @classmethod
52
+ def from_workbook(cls, workbook: 'Workbook') -> 'WorkbookData':
53
+ """Create unified data model from Workbook object."""
54
+ active_name = None
55
+ if workbook._active_sheet:
56
+ active_name = workbook._active_sheet.name
57
+
58
+ metadata = getattr(workbook, '_properties', {}).copy()
59
+
60
+ return cls(
61
+ worksheets=workbook._worksheets.copy(),
62
+ active_sheet_name=active_name,
63
+ metadata=metadata
64
+ )
@@ -0,0 +1,9 @@
1
+ """
2
+ Excel XLSX I/O operations.
3
+ """
4
+
5
+ from .reader import XlsxReader
6
+ from .writer import XlsxWriter
7
+ from .constants import XlsxConstants, XlsxTemplates
8
+
9
+ __all__ = ["XlsxReader", "XlsxWriter", "XlsxConstants", "XlsxTemplates"]