aspose-cells-foss 25.12.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aspose/__init__.py +14 -0
- aspose/cells/__init__.py +31 -0
- aspose/cells/cell.py +350 -0
- aspose/cells/constants.py +44 -0
- aspose/cells/converters/__init__.py +13 -0
- aspose/cells/converters/csv_converter.py +55 -0
- aspose/cells/converters/json_converter.py +46 -0
- aspose/cells/converters/markdown_converter.py +453 -0
- aspose/cells/drawing/__init__.py +17 -0
- aspose/cells/drawing/anchor.py +172 -0
- aspose/cells/drawing/collection.py +233 -0
- aspose/cells/drawing/image.py +338 -0
- aspose/cells/formats.py +80 -0
- aspose/cells/formula/__init__.py +10 -0
- aspose/cells/formula/evaluator.py +360 -0
- aspose/cells/formula/functions.py +433 -0
- aspose/cells/formula/tokenizer.py +340 -0
- aspose/cells/io/__init__.py +27 -0
- aspose/cells/io/csv/__init__.py +8 -0
- aspose/cells/io/csv/reader.py +88 -0
- aspose/cells/io/csv/writer.py +98 -0
- aspose/cells/io/factory.py +138 -0
- aspose/cells/io/interfaces.py +48 -0
- aspose/cells/io/json/__init__.py +8 -0
- aspose/cells/io/json/reader.py +126 -0
- aspose/cells/io/json/writer.py +119 -0
- aspose/cells/io/md/__init__.py +8 -0
- aspose/cells/io/md/reader.py +161 -0
- aspose/cells/io/md/writer.py +334 -0
- aspose/cells/io/models.py +64 -0
- aspose/cells/io/xlsx/__init__.py +9 -0
- aspose/cells/io/xlsx/constants.py +312 -0
- aspose/cells/io/xlsx/image_writer.py +311 -0
- aspose/cells/io/xlsx/reader.py +284 -0
- aspose/cells/io/xlsx/writer.py +931 -0
- aspose/cells/plugins/__init__.py +6 -0
- aspose/cells/plugins/docling_backend/__init__.py +7 -0
- aspose/cells/plugins/docling_backend/backend.py +535 -0
- aspose/cells/plugins/markitdown_plugin/__init__.py +15 -0
- aspose/cells/plugins/markitdown_plugin/plugin.py +128 -0
- aspose/cells/range.py +210 -0
- aspose/cells/style.py +287 -0
- aspose/cells/utils/__init__.py +54 -0
- aspose/cells/utils/coordinates.py +68 -0
- aspose/cells/utils/exceptions.py +43 -0
- aspose/cells/utils/validation.py +102 -0
- aspose/cells/workbook.py +352 -0
- aspose/cells/worksheet.py +670 -0
- aspose_cells_foss-25.12.1.dist-info/METADATA +189 -0
- aspose_cells_foss-25.12.1.dist-info/RECORD +53 -0
- aspose_cells_foss-25.12.1.dist-info/WHEEL +5 -0
- aspose_cells_foss-25.12.1.dist-info/entry_points.txt +2 -0
- aspose_cells_foss-25.12.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,453 @@
|
|
|
1
|
+
"""Optimized Markdown converter for Excel data."""
|
|
2
|
+
|
|
3
|
+
from typing import List, Optional, TYPE_CHECKING
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from ..workbook import Workbook
|
|
9
|
+
from ..worksheet import Worksheet
|
|
10
|
+
from ..cell import Cell
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class MarkdownConverter:
|
|
14
|
+
"""Optimized Excel to Markdown converter."""
|
|
15
|
+
|
|
16
|
+
def convert_workbook(self, workbook: 'Workbook', **kwargs) -> str:
|
|
17
|
+
"""Convert workbook to Markdown."""
|
|
18
|
+
config = self._create_config(**kwargs)
|
|
19
|
+
result_parts = []
|
|
20
|
+
|
|
21
|
+
if config['include_metadata']:
|
|
22
|
+
result_parts.extend([self._create_metadata(workbook), ""])
|
|
23
|
+
|
|
24
|
+
sheets = self._get_sheets(workbook, config)
|
|
25
|
+
for i, sheet in enumerate(sheets):
|
|
26
|
+
if i > 0:
|
|
27
|
+
result_parts.append("\n---\n" if config['include_metadata'] else "")
|
|
28
|
+
content = self._process_sheet(sheet, config)
|
|
29
|
+
if content:
|
|
30
|
+
result_parts.append(content)
|
|
31
|
+
|
|
32
|
+
return "\n".join(result_parts).strip()
|
|
33
|
+
|
|
34
|
+
def _create_config(self, **kwargs) -> dict:
|
|
35
|
+
"""Create config with simplified defaults."""
|
|
36
|
+
return {
|
|
37
|
+
'sheet_name': kwargs.get('sheet_name'),
|
|
38
|
+
'include_metadata': kwargs.get('include_metadata', False),
|
|
39
|
+
'value_mode': kwargs.get('value_mode', 'value'), # "value" shows calculated results, "formula" shows formulas
|
|
40
|
+
'include_hyperlinks': kwargs.get('include_hyperlinks', True),
|
|
41
|
+
'image_export_mode': kwargs.get('image_export_mode', 'none'), # 'none', 'base64', 'folder'
|
|
42
|
+
'image_folder': kwargs.get('image_folder', 'images'),
|
|
43
|
+
'output_dir': kwargs.get('output_dir', '.')
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
def _get_sheets(self, workbook: 'Workbook', config: dict) -> List['Worksheet']:
|
|
47
|
+
"""Get sheets to process - specific sheet by name or all sheets."""
|
|
48
|
+
sheet_name = config['sheet_name']
|
|
49
|
+
if sheet_name and sheet_name in workbook._worksheets:
|
|
50
|
+
# Convert specific sheet by name
|
|
51
|
+
return [workbook._worksheets[sheet_name]]
|
|
52
|
+
# Convert all sheets if no specific sheet is requested
|
|
53
|
+
return list(workbook._worksheets.values())
|
|
54
|
+
|
|
55
|
+
def _create_metadata(self, workbook: 'Workbook') -> str:
|
|
56
|
+
"""Create simplified metadata section without source file."""
|
|
57
|
+
lines = [
|
|
58
|
+
"# Document Metadata", "",
|
|
59
|
+
f"- **Source Type**: Excel Workbook",
|
|
60
|
+
f"- **Conversion Date**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
|
|
61
|
+
f"- **Total Sheets**: {len(workbook._worksheets)}",
|
|
62
|
+
f"- **Sheet Names**: {', '.join(workbook._worksheets.keys())}",
|
|
63
|
+
f"- **Active Sheet**: {workbook.active.name if workbook.active else 'None'}"
|
|
64
|
+
]
|
|
65
|
+
return "\n".join(lines)
|
|
66
|
+
|
|
67
|
+
def _process_sheet(self, worksheet: 'Worksheet', config: dict) -> str:
|
|
68
|
+
"""Process sheet to markdown with clean formatting."""
|
|
69
|
+
if not worksheet or not worksheet._cells:
|
|
70
|
+
return ""
|
|
71
|
+
|
|
72
|
+
parts = [f"## {worksheet.name}", ""]
|
|
73
|
+
|
|
74
|
+
# Add images if present and image export is enabled
|
|
75
|
+
if hasattr(worksheet, '_images') and len(worksheet._images) > 0:
|
|
76
|
+
image_content = self._process_images(worksheet, config)
|
|
77
|
+
if image_content:
|
|
78
|
+
parts.extend(["### Images", "", image_content, ""])
|
|
79
|
+
|
|
80
|
+
table = self._create_table(worksheet, config)
|
|
81
|
+
if table:
|
|
82
|
+
parts.append(table)
|
|
83
|
+
|
|
84
|
+
return "\n".join(parts)
|
|
85
|
+
|
|
86
|
+
def _create_table(self, worksheet: 'Worksheet', config: dict) -> str:
|
|
87
|
+
"""Create markdown table."""
|
|
88
|
+
if not worksheet._cells:
|
|
89
|
+
return ""
|
|
90
|
+
|
|
91
|
+
start_row = self._detect_start_row(worksheet)
|
|
92
|
+
|
|
93
|
+
# Check for merged cells before start_row that should be included
|
|
94
|
+
merged_title_rows = self._find_merged_title_rows(worksheet, start_row)
|
|
95
|
+
|
|
96
|
+
all_data = []
|
|
97
|
+
|
|
98
|
+
# Add merged title rows first
|
|
99
|
+
for title_row in merged_title_rows:
|
|
100
|
+
title_data = self._extract_data(worksheet, title_row, config, single_row=True)
|
|
101
|
+
if title_data:
|
|
102
|
+
all_data.extend(title_data)
|
|
103
|
+
|
|
104
|
+
# Add main table data
|
|
105
|
+
table_data = self._extract_data(worksheet, start_row, config)
|
|
106
|
+
if table_data:
|
|
107
|
+
all_data.extend(table_data)
|
|
108
|
+
|
|
109
|
+
if not all_data:
|
|
110
|
+
return ""
|
|
111
|
+
|
|
112
|
+
result = []
|
|
113
|
+
if all_data:
|
|
114
|
+
# Always include headers with simplified logic
|
|
115
|
+
header = all_data[0]
|
|
116
|
+
header_line = "| " + " | ".join(self._generate_column_header(cell, i) for i, cell in enumerate(header)) + " |"
|
|
117
|
+
separator = "| " + " | ".join("---" for _ in header) + " |"
|
|
118
|
+
result.extend([header_line, separator])
|
|
119
|
+
|
|
120
|
+
for row in all_data[1:]:
|
|
121
|
+
data_line = "| " + " | ".join(str(cell) if cell else "" for cell in row) + " |"
|
|
122
|
+
result.append(data_line)
|
|
123
|
+
|
|
124
|
+
return "\n".join(result)
|
|
125
|
+
|
|
126
|
+
def _find_merged_title_rows(self, worksheet: 'Worksheet', start_row: int) -> list:
|
|
127
|
+
"""Find merged cell rows before start_row that contain titles."""
|
|
128
|
+
title_rows = []
|
|
129
|
+
|
|
130
|
+
if hasattr(worksheet, '_merged_ranges') and worksheet._merged_ranges:
|
|
131
|
+
for merge_range in worksheet._merged_ranges:
|
|
132
|
+
if ':' in merge_range:
|
|
133
|
+
start_ref, end_ref = merge_range.split(':')
|
|
134
|
+
import re
|
|
135
|
+
merged_row = int(re.search(r'\d+', start_ref).group())
|
|
136
|
+
|
|
137
|
+
# If merged row is before start_row and contains meaningful content
|
|
138
|
+
if merged_row < start_row:
|
|
139
|
+
cell = worksheet._cells.get((merged_row, 1))
|
|
140
|
+
if cell and cell.value and str(cell.value).strip():
|
|
141
|
+
title_rows.append(merged_row)
|
|
142
|
+
|
|
143
|
+
return sorted(title_rows)
|
|
144
|
+
|
|
145
|
+
def _extract_data(self, worksheet: 'Worksheet', start_row: int, config: dict, single_row: bool = False) -> List[List[str]]:
|
|
146
|
+
"""Extract table data."""
|
|
147
|
+
data = []
|
|
148
|
+
end_row = start_row if single_row else worksheet.max_row
|
|
149
|
+
|
|
150
|
+
for row in range(start_row, end_row + 1):
|
|
151
|
+
row_data = []
|
|
152
|
+
for col in range(1, worksheet.max_column + 1):
|
|
153
|
+
cell = worksheet._cells.get((row, col))
|
|
154
|
+
value = self._format_cell(cell, config)
|
|
155
|
+
row_data.append(value)
|
|
156
|
+
data.append(row_data)
|
|
157
|
+
return data
|
|
158
|
+
|
|
159
|
+
def _format_cell(self, cell: Optional['Cell'], config: dict) -> str:
|
|
160
|
+
"""Format cell value with enhanced processing."""
|
|
161
|
+
if not cell or cell.is_empty():
|
|
162
|
+
return ""
|
|
163
|
+
|
|
164
|
+
# Enhanced hyperlink detection and formatting
|
|
165
|
+
if config['include_hyperlinks'] and cell.has_hyperlink():
|
|
166
|
+
return cell.get_markdown_link()
|
|
167
|
+
|
|
168
|
+
# Auto-detect URLs in text values and convert to hyperlinks
|
|
169
|
+
if config['include_hyperlinks'] and isinstance(cell.value, str):
|
|
170
|
+
url_detected = self._detect_and_format_urls(cell.value)
|
|
171
|
+
if url_detected != cell.value:
|
|
172
|
+
return url_detected
|
|
173
|
+
|
|
174
|
+
if config['value_mode'] == 'formula' and cell.is_formula():
|
|
175
|
+
value = cell.formula or cell.value
|
|
176
|
+
else:
|
|
177
|
+
value = cell.calculated_value
|
|
178
|
+
|
|
179
|
+
return self._format_value(value)
|
|
180
|
+
|
|
181
|
+
def _detect_and_format_urls(self, text: str) -> str:
|
|
182
|
+
"""Detect URLs in text and format them as markdown links."""
|
|
183
|
+
import re
|
|
184
|
+
|
|
185
|
+
# Don't process text that already contains markdown links
|
|
186
|
+
if '[' in text and '](' in text:
|
|
187
|
+
return text
|
|
188
|
+
|
|
189
|
+
# Patterns for different types of URLs (in order of specificity)
|
|
190
|
+
url_patterns = [
|
|
191
|
+
# Full HTTP/HTTPS URLs
|
|
192
|
+
(r'\bhttps?://[^\s<>"\'|\[\]()]+', lambda m: f"[{m.group()}]({m.group()})"),
|
|
193
|
+
# www domains
|
|
194
|
+
(r'\bwww\.[^\s<>"\'|\[\]()]+', lambda m: f"[{m.group()}](http://{m.group()})"),
|
|
195
|
+
# Email addresses
|
|
196
|
+
(r'\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\b', lambda m: f"[{m.group()}](mailto:{m.group()})")
|
|
197
|
+
]
|
|
198
|
+
|
|
199
|
+
result = text
|
|
200
|
+
for pattern, formatter in url_patterns:
|
|
201
|
+
result = re.sub(pattern, formatter, result)
|
|
202
|
+
|
|
203
|
+
return result
|
|
204
|
+
|
|
205
|
+
def _format_value(self, value) -> str:
|
|
206
|
+
"""Format value for display with improved formatting."""
|
|
207
|
+
if value is None:
|
|
208
|
+
return ""
|
|
209
|
+
|
|
210
|
+
if isinstance(value, bool):
|
|
211
|
+
return "TRUE" if value else "FALSE"
|
|
212
|
+
|
|
213
|
+
if isinstance(value, (int, float)):
|
|
214
|
+
# Improve number formatting
|
|
215
|
+
if isinstance(value, float):
|
|
216
|
+
# Use scientific notation only for extremely large/small numbers
|
|
217
|
+
if abs(value) >= 1e9 or (abs(value) < 1e-4 and value != 0):
|
|
218
|
+
return f"{value:.2e}" # Scientific notation for very large/small numbers
|
|
219
|
+
elif value.is_integer():
|
|
220
|
+
return str(int(value)) # Remove .0 from whole numbers
|
|
221
|
+
else:
|
|
222
|
+
# Format large numbers with commas and reasonable decimal places
|
|
223
|
+
if abs(value) >= 1000:
|
|
224
|
+
return f"{value:,.2f}".rstrip('0').rstrip('.')
|
|
225
|
+
else:
|
|
226
|
+
return f"{value:.2f}".rstrip('0').rstrip('.')
|
|
227
|
+
else:
|
|
228
|
+
# Format large integers with commas
|
|
229
|
+
if abs(value) >= 1000:
|
|
230
|
+
return f"{value:,}"
|
|
231
|
+
else:
|
|
232
|
+
return str(value)
|
|
233
|
+
|
|
234
|
+
if isinstance(value, str):
|
|
235
|
+
# Enhanced string formatting
|
|
236
|
+
text = value.replace("|", "\\|") # Escape pipe characters
|
|
237
|
+
text = text.replace("\n", " ").replace("\r", " ") # Handle line breaks
|
|
238
|
+
text = " ".join(text.split()) # Normalize whitespace
|
|
239
|
+
return text.strip()
|
|
240
|
+
|
|
241
|
+
# Fallback for other types
|
|
242
|
+
return str(value).replace("|", "\\|").replace("\n", " ").strip()
|
|
243
|
+
|
|
244
|
+
def _generate_column_header(self, cell_value: str, column_index: int) -> str:
|
|
245
|
+
"""Generate intelligent column headers."""
|
|
246
|
+
if cell_value and str(cell_value).strip():
|
|
247
|
+
header = str(cell_value).strip()
|
|
248
|
+
# Don't use generic names if we have meaningful content
|
|
249
|
+
if not header.startswith(('Unnamed', 'Col', 'Column')):
|
|
250
|
+
return header
|
|
251
|
+
|
|
252
|
+
# Generate Excel-style column names (A, B, C, ..., AA, AB, etc.)
|
|
253
|
+
result = ""
|
|
254
|
+
col_num = column_index
|
|
255
|
+
while col_num >= 0:
|
|
256
|
+
result = chr(col_num % 26 + ord('A')) + result
|
|
257
|
+
col_num = col_num // 26 - 1
|
|
258
|
+
if col_num < 0:
|
|
259
|
+
break
|
|
260
|
+
|
|
261
|
+
return result if result else f"Col{column_index + 1}"
|
|
262
|
+
|
|
263
|
+
def _detect_start_row(self, worksheet: 'Worksheet') -> int:
|
|
264
|
+
"""Detect optimal table start row."""
|
|
265
|
+
if worksheet.max_row <= 3:
|
|
266
|
+
return 1
|
|
267
|
+
|
|
268
|
+
best_row, best_score = 1, -1
|
|
269
|
+
for row in range(1, min(worksheet.max_row + 1, 6)):
|
|
270
|
+
score = self._score_row(worksheet, row)
|
|
271
|
+
if score > best_score:
|
|
272
|
+
best_score, best_row = score, row
|
|
273
|
+
|
|
274
|
+
return best_row
|
|
275
|
+
|
|
276
|
+
def _score_row(self, worksheet: 'Worksheet', row: int) -> float:
|
|
277
|
+
"""Score row as potential table start."""
|
|
278
|
+
non_empty = meaningful = unnamed = 0
|
|
279
|
+
merged_bonus = 0
|
|
280
|
+
|
|
281
|
+
for col in range(1, worksheet.max_column + 1):
|
|
282
|
+
cell = worksheet._cells.get((row, col))
|
|
283
|
+
if not cell or cell.value is None:
|
|
284
|
+
continue
|
|
285
|
+
|
|
286
|
+
value_str = str(cell.value).strip()
|
|
287
|
+
if not value_str:
|
|
288
|
+
continue
|
|
289
|
+
|
|
290
|
+
non_empty += 1
|
|
291
|
+
if value_str.startswith("Unnamed"):
|
|
292
|
+
unnamed += 1
|
|
293
|
+
else:
|
|
294
|
+
meaningful += 1
|
|
295
|
+
|
|
296
|
+
# Check if this row contains merged cells
|
|
297
|
+
if hasattr(worksheet, '_merged_ranges') and worksheet._merged_ranges:
|
|
298
|
+
for merge_range in worksheet._merged_ranges:
|
|
299
|
+
# Parse merge range like "A1:F1"
|
|
300
|
+
if ':' in merge_range:
|
|
301
|
+
start_ref, end_ref = merge_range.split(':')
|
|
302
|
+
# Extract row number from references
|
|
303
|
+
import re
|
|
304
|
+
start_row = int(re.search(r'\d+', start_ref).group())
|
|
305
|
+
if start_row == row:
|
|
306
|
+
# This row has merged cells, give it a bonus
|
|
307
|
+
merged_bonus = 20
|
|
308
|
+
break
|
|
309
|
+
|
|
310
|
+
if non_empty == 0:
|
|
311
|
+
return 0
|
|
312
|
+
|
|
313
|
+
score = 50 * (meaningful / non_empty) - 100 * (unnamed / non_empty) + merged_bonus
|
|
314
|
+
if non_empty >= 2 and unnamed / non_empty < 0.5:
|
|
315
|
+
score += min(non_empty * 5, 25)
|
|
316
|
+
|
|
317
|
+
return score
|
|
318
|
+
|
|
319
|
+
def _process_images(self, worksheet: 'Worksheet', config: dict) -> str:
|
|
320
|
+
"""Process images in worksheet based on export mode."""
|
|
321
|
+
if config['image_export_mode'] == 'none':
|
|
322
|
+
return ""
|
|
323
|
+
|
|
324
|
+
image_lines = []
|
|
325
|
+
|
|
326
|
+
for i, image in enumerate(worksheet._images):
|
|
327
|
+
if config['image_export_mode'] == 'base64':
|
|
328
|
+
# Export as base64 data URL
|
|
329
|
+
image_md = self._image_to_base64_markdown(image, i)
|
|
330
|
+
elif config['image_export_mode'] == 'folder':
|
|
331
|
+
# Export to file and reference
|
|
332
|
+
image_md = self._image_to_file_markdown(image, i, config)
|
|
333
|
+
else:
|
|
334
|
+
continue
|
|
335
|
+
|
|
336
|
+
if image_md:
|
|
337
|
+
image_lines.append(image_md)
|
|
338
|
+
|
|
339
|
+
return "\n\n".join(image_lines)
|
|
340
|
+
|
|
341
|
+
def _image_to_base64_markdown(self, image, index: int) -> str:
|
|
342
|
+
"""Convert image to base64 markdown."""
|
|
343
|
+
import base64
|
|
344
|
+
|
|
345
|
+
if not image.data:
|
|
346
|
+
return f"*Image {index + 1}: {image.name or 'Unnamed'} (No data available)*"
|
|
347
|
+
|
|
348
|
+
# Create base64 data URL
|
|
349
|
+
format_str = image.format.value if hasattr(image.format, 'value') else str(image.format)
|
|
350
|
+
base64_data = base64.b64encode(image.data).decode('utf-8')
|
|
351
|
+
data_url = f"data:image/{format_str};base64,{base64_data}"
|
|
352
|
+
|
|
353
|
+
# Create markdown
|
|
354
|
+
alt_text = image.description or image.name or f"Image {index + 1}"
|
|
355
|
+
anchor_info = self._get_anchor_description(image.anchor)
|
|
356
|
+
|
|
357
|
+
md_lines = [
|
|
358
|
+
f"**{image.name or f'Image {index + 1}'}**",
|
|
359
|
+
f"- Position: {anchor_info}",
|
|
360
|
+
f"- Size: {image.width}x{image.height}px" if image.width and image.height else "- Size: Unknown",
|
|
361
|
+
f"- Format: {format_str.upper()}",
|
|
362
|
+
f"- Description: {image.description}" if image.description else "",
|
|
363
|
+
"",
|
|
364
|
+
f""
|
|
365
|
+
]
|
|
366
|
+
|
|
367
|
+
return "\n".join(line for line in md_lines if line)
|
|
368
|
+
|
|
369
|
+
def _image_to_file_markdown(self, image, index: int, config: dict) -> str:
|
|
370
|
+
"""Convert image to file reference markdown."""
|
|
371
|
+
import os
|
|
372
|
+
|
|
373
|
+
if not image.data:
|
|
374
|
+
return f"*Image {index + 1}: {image.name or 'Unnamed'} (No data available)*"
|
|
375
|
+
|
|
376
|
+
# Create images directory
|
|
377
|
+
output_dir = Path(config['output_dir'])
|
|
378
|
+
image_dir = output_dir / config['image_folder']
|
|
379
|
+
image_dir.mkdir(parents=True, exist_ok=True)
|
|
380
|
+
|
|
381
|
+
# Generate unique filename
|
|
382
|
+
base_name = image.name or f"image_{index + 1}"
|
|
383
|
+
format_ext = self._get_file_extension(image.format)
|
|
384
|
+
filename = self._generate_unique_filename(image_dir, base_name, format_ext)
|
|
385
|
+
|
|
386
|
+
# Save image file
|
|
387
|
+
image_path = image_dir / filename
|
|
388
|
+
with open(image_path, 'wb') as f:
|
|
389
|
+
f.write(image.data)
|
|
390
|
+
|
|
391
|
+
# Create markdown
|
|
392
|
+
alt_text = image.description or image.name or f"Image {index + 1}"
|
|
393
|
+
anchor_info = self._get_anchor_description(image.anchor)
|
|
394
|
+
relative_path = f"{config['image_folder']}/{filename}"
|
|
395
|
+
|
|
396
|
+
md_lines = [
|
|
397
|
+
f"**{image.name or f'Image {index + 1}'}**",
|
|
398
|
+
f"- Position: {anchor_info}",
|
|
399
|
+
f"- Size: {image.width}x{image.height}px" if image.width and image.height else "- Size: Unknown",
|
|
400
|
+
f"- Format: {image.format.value.upper() if hasattr(image.format, 'value') else str(image.format).upper()}",
|
|
401
|
+
f"- Description: {image.description}" if image.description else "",
|
|
402
|
+
f"- File: [{filename}]({relative_path})",
|
|
403
|
+
"",
|
|
404
|
+
f""
|
|
405
|
+
]
|
|
406
|
+
|
|
407
|
+
return "\n".join(line for line in md_lines if line)
|
|
408
|
+
|
|
409
|
+
def _get_anchor_description(self, anchor) -> str:
|
|
410
|
+
"""Get human-readable anchor description."""
|
|
411
|
+
from ..drawing.anchor import AnchorType
|
|
412
|
+
|
|
413
|
+
if anchor.type == AnchorType.ONE_CELL:
|
|
414
|
+
from ..utils.coordinates import tuple_to_coordinate
|
|
415
|
+
cell_ref = tuple_to_coordinate(anchor.from_position[0] + 1, anchor.from_position[1] + 1)
|
|
416
|
+
if anchor.from_offset != (0, 0):
|
|
417
|
+
return f"Cell {cell_ref} + offset {anchor.from_offset}"
|
|
418
|
+
return f"Cell {cell_ref}"
|
|
419
|
+
elif anchor.type == AnchorType.TWO_CELL:
|
|
420
|
+
from ..utils.coordinates import tuple_to_coordinate
|
|
421
|
+
start_ref = tuple_to_coordinate(anchor.from_position[0] + 1, anchor.from_position[1] + 1)
|
|
422
|
+
end_ref = tuple_to_coordinate(anchor.to_position[0] + 1, anchor.to_position[1] + 1)
|
|
423
|
+
return f"Range {start_ref}:{end_ref}"
|
|
424
|
+
elif anchor.type == AnchorType.ABSOLUTE:
|
|
425
|
+
return f"Absolute ({anchor.absolute_position[0]}, {anchor.absolute_position[1]})"
|
|
426
|
+
else:
|
|
427
|
+
return "Unknown position"
|
|
428
|
+
|
|
429
|
+
def _get_file_extension(self, image_format) -> str:
|
|
430
|
+
"""Get file extension for image format."""
|
|
431
|
+
format_map = {
|
|
432
|
+
'png': '.png',
|
|
433
|
+
'jpeg': '.jpg',
|
|
434
|
+
'jpg': '.jpg',
|
|
435
|
+
'gif': '.gif'
|
|
436
|
+
}
|
|
437
|
+
format_str = image_format.value if hasattr(image_format, 'value') else str(image_format)
|
|
438
|
+
return format_map.get(format_str.lower(), '.png')
|
|
439
|
+
|
|
440
|
+
def _generate_unique_filename(self, directory: Path, base_name: str, extension: str) -> str:
|
|
441
|
+
"""Generate unique filename to avoid conflicts."""
|
|
442
|
+
# Sanitize base name
|
|
443
|
+
import re
|
|
444
|
+
safe_name = re.sub(r'[^\w\-_.]', '_', base_name)
|
|
445
|
+
|
|
446
|
+
filename = f"{safe_name}{extension}"
|
|
447
|
+
counter = 1
|
|
448
|
+
|
|
449
|
+
while (directory / filename).exists():
|
|
450
|
+
filename = f"{safe_name}_{counter}{extension}"
|
|
451
|
+
counter += 1
|
|
452
|
+
|
|
453
|
+
return filename
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Drawing and image processing module for Excel worksheets.
|
|
3
|
+
|
|
4
|
+
Provides image insertion, positioning, and format management capabilities.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .image import Image, ImageFormat
|
|
8
|
+
from .anchor import Anchor, AnchorType
|
|
9
|
+
from .collection import ImageCollection
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"Image",
|
|
13
|
+
"ImageFormat",
|
|
14
|
+
"Anchor",
|
|
15
|
+
"AnchorType",
|
|
16
|
+
"ImageCollection"
|
|
17
|
+
]
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Anchor system for positioning images within Excel worksheets.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Tuple, Optional
|
|
6
|
+
from enum import Enum
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class AnchorType(Enum):
|
|
10
|
+
"""Image anchoring behavior types."""
|
|
11
|
+
|
|
12
|
+
ONE_CELL = "oneCell" # Move but don't size with cells
|
|
13
|
+
TWO_CELL = "twoCell" # Move and size with cells
|
|
14
|
+
ABSOLUTE = "absolute" # Fixed position, independent of cells
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Anchor:
|
|
18
|
+
"""Image positioning and anchoring information."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, anchor_type: AnchorType = AnchorType.TWO_CELL):
|
|
21
|
+
self._type: AnchorType = anchor_type
|
|
22
|
+
self._from_row: int = 0
|
|
23
|
+
self._from_col: int = 0
|
|
24
|
+
self._from_row_offset: int = 0
|
|
25
|
+
self._from_col_offset: int = 0
|
|
26
|
+
self._to_row: Optional[int] = None
|
|
27
|
+
self._to_col: Optional[int] = None
|
|
28
|
+
self._to_row_offset: int = 0
|
|
29
|
+
self._to_col_offset: int = 0
|
|
30
|
+
self._x: Optional[int] = None # Absolute positioning
|
|
31
|
+
self._y: Optional[int] = None # Absolute positioning
|
|
32
|
+
|
|
33
|
+
@property
|
|
34
|
+
def type(self) -> AnchorType:
|
|
35
|
+
"""Get anchor type."""
|
|
36
|
+
return self._type
|
|
37
|
+
|
|
38
|
+
@type.setter
|
|
39
|
+
def type(self, value: AnchorType):
|
|
40
|
+
"""Set anchor type."""
|
|
41
|
+
self._type = value
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def from_position(self) -> Tuple[int, int]:
|
|
45
|
+
"""Get starting position (row, col)."""
|
|
46
|
+
return (self._from_row, self._from_col)
|
|
47
|
+
|
|
48
|
+
@from_position.setter
|
|
49
|
+
def from_position(self, value: Tuple[int, int]):
|
|
50
|
+
"""Set starting position (row, col)."""
|
|
51
|
+
self._from_row, self._from_col = value
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def from_offset(self) -> Tuple[int, int]:
|
|
55
|
+
"""Get starting offset (row_offset, col_offset) in pixels."""
|
|
56
|
+
return (self._from_row_offset, self._from_col_offset)
|
|
57
|
+
|
|
58
|
+
@from_offset.setter
|
|
59
|
+
def from_offset(self, value: Tuple[int, int]):
|
|
60
|
+
"""Set starting offset (row_offset, col_offset) in pixels."""
|
|
61
|
+
self._from_row_offset, self._from_col_offset = value
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def to_position(self) -> Optional[Tuple[int, int]]:
|
|
65
|
+
"""Get ending position (row, col) for TWO_CELL anchor."""
|
|
66
|
+
if self._type == AnchorType.TWO_CELL and self._to_row is not None:
|
|
67
|
+
return (self._to_row, self._to_col)
|
|
68
|
+
return None
|
|
69
|
+
|
|
70
|
+
@to_position.setter
|
|
71
|
+
def to_position(self, value: Optional[Tuple[int, int]]):
|
|
72
|
+
"""Set ending position (row, col) for TWO_CELL anchor."""
|
|
73
|
+
if value is None:
|
|
74
|
+
self._to_row = None
|
|
75
|
+
self._to_col = None
|
|
76
|
+
else:
|
|
77
|
+
self._to_row, self._to_col = value
|
|
78
|
+
if self._type == AnchorType.ONE_CELL:
|
|
79
|
+
self._type = AnchorType.TWO_CELL
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def to_offset(self) -> Tuple[int, int]:
|
|
83
|
+
"""Get ending offset (row_offset, col_offset) in pixels."""
|
|
84
|
+
return (self._to_row_offset, self._to_col_offset)
|
|
85
|
+
|
|
86
|
+
@to_offset.setter
|
|
87
|
+
def to_offset(self, value: Tuple[int, int]):
|
|
88
|
+
"""Set ending offset (row_offset, col_offset) in pixels."""
|
|
89
|
+
self._to_row_offset, self._to_col_offset = value
|
|
90
|
+
|
|
91
|
+
@property
|
|
92
|
+
def absolute_position(self) -> Optional[Tuple[int, int]]:
|
|
93
|
+
"""Get absolute position (x, y) in pixels."""
|
|
94
|
+
if self._type == AnchorType.ABSOLUTE and self._x is not None:
|
|
95
|
+
return (self._x, self._y)
|
|
96
|
+
return None
|
|
97
|
+
|
|
98
|
+
@absolute_position.setter
|
|
99
|
+
def absolute_position(self, value: Optional[Tuple[int, int]]):
|
|
100
|
+
"""Set absolute position (x, y) in pixels."""
|
|
101
|
+
if value is None:
|
|
102
|
+
self._x = None
|
|
103
|
+
self._y = None
|
|
104
|
+
else:
|
|
105
|
+
self._x, self._y = value
|
|
106
|
+
self._type = AnchorType.ABSOLUTE
|
|
107
|
+
|
|
108
|
+
@classmethod
|
|
109
|
+
def from_cell(cls, cell_ref: str, offset: Tuple[int, int] = (0, 0)) -> 'Anchor':
|
|
110
|
+
"""Create anchor from cell reference (e.g., 'A1', 'B2')."""
|
|
111
|
+
from ..utils.coordinates import coordinate_to_tuple
|
|
112
|
+
|
|
113
|
+
row, col = coordinate_to_tuple(cell_ref)
|
|
114
|
+
anchor = cls(AnchorType.TWO_CELL)
|
|
115
|
+
anchor.from_position = (row - 1, col - 1) # Convert to 0-based
|
|
116
|
+
anchor.from_offset = offset
|
|
117
|
+
# Set default to_position for TWO_CELL anchor (Excel standard)
|
|
118
|
+
anchor.to_position = (row + 4, col + 2) # Default span
|
|
119
|
+
anchor.to_offset = (0, 0)
|
|
120
|
+
return anchor
|
|
121
|
+
|
|
122
|
+
@classmethod
|
|
123
|
+
def from_range(cls, start_cell: str, end_cell: str,
|
|
124
|
+
start_offset: Tuple[int, int] = (0, 0),
|
|
125
|
+
end_offset: Tuple[int, int] = (0, 0)) -> 'Anchor':
|
|
126
|
+
"""Create TWO_CELL anchor from cell range."""
|
|
127
|
+
from ..utils.coordinates import coordinate_to_tuple
|
|
128
|
+
|
|
129
|
+
start_row, start_col = coordinate_to_tuple(start_cell)
|
|
130
|
+
end_row, end_col = coordinate_to_tuple(end_cell)
|
|
131
|
+
|
|
132
|
+
anchor = cls(AnchorType.TWO_CELL)
|
|
133
|
+
anchor.from_position = (start_row - 1, start_col - 1) # Convert to 0-based
|
|
134
|
+
anchor.from_offset = start_offset
|
|
135
|
+
anchor.to_position = (end_row - 1, end_col - 1) # Convert to 0-based
|
|
136
|
+
anchor.to_offset = end_offset
|
|
137
|
+
return anchor
|
|
138
|
+
|
|
139
|
+
@classmethod
|
|
140
|
+
def absolute(cls, x: int, y: int) -> 'Anchor':
|
|
141
|
+
"""Create absolute positioned anchor."""
|
|
142
|
+
anchor = cls(AnchorType.ABSOLUTE)
|
|
143
|
+
anchor.absolute_position = (x, y)
|
|
144
|
+
return anchor
|
|
145
|
+
|
|
146
|
+
def copy(self) -> 'Anchor':
|
|
147
|
+
"""Create a copy of this anchor."""
|
|
148
|
+
new_anchor = Anchor(self._type)
|
|
149
|
+
new_anchor._from_row = self._from_row
|
|
150
|
+
new_anchor._from_col = self._from_col
|
|
151
|
+
new_anchor._from_row_offset = self._from_row_offset
|
|
152
|
+
new_anchor._from_col_offset = self._from_col_offset
|
|
153
|
+
new_anchor._to_row = self._to_row
|
|
154
|
+
new_anchor._to_col = self._to_col
|
|
155
|
+
new_anchor._to_row_offset = self._to_row_offset
|
|
156
|
+
new_anchor._to_col_offset = self._to_col_offset
|
|
157
|
+
new_anchor._x = self._x
|
|
158
|
+
new_anchor._y = self._y
|
|
159
|
+
return new_anchor
|
|
160
|
+
|
|
161
|
+
def __str__(self) -> str:
|
|
162
|
+
"""String representation."""
|
|
163
|
+
if self._type == AnchorType.ABSOLUTE:
|
|
164
|
+
return f"Anchor(absolute: {self.absolute_position})"
|
|
165
|
+
elif self._type == AnchorType.TWO_CELL:
|
|
166
|
+
return f"Anchor(range: {self.from_position} -> {self.to_position})"
|
|
167
|
+
else:
|
|
168
|
+
return f"Anchor(cell: {self.from_position}, offset: {self.from_offset})"
|
|
169
|
+
|
|
170
|
+
def __repr__(self) -> str:
|
|
171
|
+
"""Debug representation."""
|
|
172
|
+
return f"Anchor(type={self._type.value}, from={self.from_position}, to={self.to_position})"
|