aspose-cells-foss 25.12.1__py3-none-any.whl → 26.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aspose_cells/__init__.py +88 -0
- aspose_cells/auto_filter.py +527 -0
- aspose_cells/cell.py +483 -0
- aspose_cells/cell_value_handler.py +319 -0
- aspose_cells/cells.py +779 -0
- aspose_cells/cfb_handler.py +445 -0
- aspose_cells/cfb_writer.py +659 -0
- aspose_cells/cfb_writer_minimal.py +337 -0
- aspose_cells/comment_xml.py +475 -0
- aspose_cells/conditional_format.py +1185 -0
- aspose_cells/csv_handler.py +690 -0
- aspose_cells/data_validation.py +911 -0
- aspose_cells/document_properties.py +356 -0
- aspose_cells/encryption_crypto.py +247 -0
- aspose_cells/encryption_params.py +138 -0
- aspose_cells/hyperlink.py +372 -0
- aspose_cells/json_handler.py +185 -0
- aspose_cells/markdown_handler.py +583 -0
- aspose_cells/shared_strings.py +101 -0
- aspose_cells/style.py +841 -0
- aspose_cells/workbook.py +499 -0
- aspose_cells/workbook_hash_password.py +68 -0
- aspose_cells/workbook_properties.py +712 -0
- aspose_cells/worksheet.py +570 -0
- aspose_cells/worksheet_properties.py +1239 -0
- aspose_cells/xlsx_encryptor.py +403 -0
- aspose_cells/xml_autofilter_loader.py +195 -0
- aspose_cells/xml_autofilter_saver.py +173 -0
- aspose_cells/xml_conditional_format_loader.py +215 -0
- aspose_cells/xml_conditional_format_saver.py +351 -0
- aspose_cells/xml_datavalidation_loader.py +239 -0
- aspose_cells/xml_datavalidation_saver.py +245 -0
- aspose_cells/xml_hyperlink_handler.py +323 -0
- aspose_cells/xml_loader.py +986 -0
- aspose_cells/xml_properties_loader.py +512 -0
- aspose_cells/xml_properties_saver.py +607 -0
- aspose_cells/xml_saver.py +1306 -0
- aspose_cells_foss-26.2.2.dist-info/METADATA +190 -0
- aspose_cells_foss-26.2.2.dist-info/RECORD +41 -0
- {aspose_cells_foss-25.12.1.dist-info → aspose_cells_foss-26.2.2.dist-info}/WHEEL +1 -1
- aspose_cells_foss-26.2.2.dist-info/top_level.txt +1 -0
- aspose/__init__.py +0 -14
- aspose/cells/__init__.py +0 -31
- aspose/cells/cell.py +0 -350
- aspose/cells/constants.py +0 -44
- aspose/cells/converters/__init__.py +0 -13
- aspose/cells/converters/csv_converter.py +0 -55
- aspose/cells/converters/json_converter.py +0 -46
- aspose/cells/converters/markdown_converter.py +0 -453
- aspose/cells/drawing/__init__.py +0 -17
- aspose/cells/drawing/anchor.py +0 -172
- aspose/cells/drawing/collection.py +0 -233
- aspose/cells/drawing/image.py +0 -338
- aspose/cells/formats.py +0 -80
- aspose/cells/formula/__init__.py +0 -10
- aspose/cells/formula/evaluator.py +0 -360
- aspose/cells/formula/functions.py +0 -433
- aspose/cells/formula/tokenizer.py +0 -340
- aspose/cells/io/__init__.py +0 -27
- aspose/cells/io/csv/__init__.py +0 -8
- aspose/cells/io/csv/reader.py +0 -88
- aspose/cells/io/csv/writer.py +0 -98
- aspose/cells/io/factory.py +0 -138
- aspose/cells/io/interfaces.py +0 -48
- aspose/cells/io/json/__init__.py +0 -8
- aspose/cells/io/json/reader.py +0 -126
- aspose/cells/io/json/writer.py +0 -119
- aspose/cells/io/md/__init__.py +0 -8
- aspose/cells/io/md/reader.py +0 -161
- aspose/cells/io/md/writer.py +0 -334
- aspose/cells/io/models.py +0 -64
- aspose/cells/io/xlsx/__init__.py +0 -9
- aspose/cells/io/xlsx/constants.py +0 -312
- aspose/cells/io/xlsx/image_writer.py +0 -311
- aspose/cells/io/xlsx/reader.py +0 -284
- aspose/cells/io/xlsx/writer.py +0 -931
- aspose/cells/plugins/__init__.py +0 -6
- aspose/cells/plugins/docling_backend/__init__.py +0 -7
- aspose/cells/plugins/docling_backend/backend.py +0 -535
- aspose/cells/plugins/markitdown_plugin/__init__.py +0 -15
- aspose/cells/plugins/markitdown_plugin/plugin.py +0 -128
- aspose/cells/range.py +0 -210
- aspose/cells/style.py +0 -287
- aspose/cells/utils/__init__.py +0 -54
- aspose/cells/utils/coordinates.py +0 -68
- aspose/cells/utils/exceptions.py +0 -43
- aspose/cells/utils/validation.py +0 -102
- aspose/cells/workbook.py +0 -352
- aspose/cells/worksheet.py +0 -670
- aspose_cells_foss-25.12.1.dist-info/METADATA +0 -189
- aspose_cells_foss-25.12.1.dist-info/RECORD +0 -53
- aspose_cells_foss-25.12.1.dist-info/entry_points.txt +0 -2
- aspose_cells_foss-25.12.1.dist-info/top_level.txt +0 -1
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Data conversion modules for exporting Excel data to various formats.
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
from .json_converter import JsonConverter
|
|
6
|
-
from .csv_converter import CsvConverter
|
|
7
|
-
from .markdown_converter import MarkdownConverter
|
|
8
|
-
|
|
9
|
-
__all__ = [
|
|
10
|
-
"JsonConverter",
|
|
11
|
-
"CsvConverter",
|
|
12
|
-
"MarkdownConverter"
|
|
13
|
-
]
|
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
CSV converter for exporting Excel data to CSV format.
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
import csv
|
|
6
|
-
import io
|
|
7
|
-
from typing import Optional, TYPE_CHECKING
|
|
8
|
-
from ..io.csv import CsvWriter
|
|
9
|
-
|
|
10
|
-
if TYPE_CHECKING:
|
|
11
|
-
from ..workbook import Workbook
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class CsvConverter:
|
|
15
|
-
"""Convert Excel workbook data to CSV format."""
|
|
16
|
-
|
|
17
|
-
def __init__(self):
|
|
18
|
-
self._writer = CsvWriter()
|
|
19
|
-
|
|
20
|
-
def convert_workbook(self, workbook: 'Workbook', **kwargs) -> str:
|
|
21
|
-
"""Convert active worksheet to CSV string."""
|
|
22
|
-
sheet_name = kwargs.get('sheet_name')
|
|
23
|
-
delimiter = kwargs.get('delimiter', ',')
|
|
24
|
-
quotechar = kwargs.get('quotechar', '"')
|
|
25
|
-
|
|
26
|
-
# Get target worksheet
|
|
27
|
-
if sheet_name and sheet_name in workbook._worksheets:
|
|
28
|
-
worksheet = workbook._worksheets[sheet_name]
|
|
29
|
-
else:
|
|
30
|
-
worksheet = workbook.active
|
|
31
|
-
|
|
32
|
-
if not worksheet or not worksheet._cells:
|
|
33
|
-
return ""
|
|
34
|
-
|
|
35
|
-
# Convert worksheet to data
|
|
36
|
-
data = self._writer._worksheet_to_data(worksheet)
|
|
37
|
-
|
|
38
|
-
if not data:
|
|
39
|
-
return ""
|
|
40
|
-
|
|
41
|
-
# Create CSV in memory
|
|
42
|
-
output = io.StringIO()
|
|
43
|
-
writer = csv.writer(output, delimiter=delimiter, quotechar=quotechar,
|
|
44
|
-
quoting=csv.QUOTE_MINIMAL)
|
|
45
|
-
|
|
46
|
-
# Write data rows
|
|
47
|
-
for row_data in data:
|
|
48
|
-
formatted_row = []
|
|
49
|
-
for cell in row_data:
|
|
50
|
-
formatted_row.append(self._writer._format_cell_value(cell))
|
|
51
|
-
writer.writerow(formatted_row)
|
|
52
|
-
|
|
53
|
-
csv_content = output.getvalue()
|
|
54
|
-
output.close()
|
|
55
|
-
return csv_content
|
|
@@ -1,46 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
JSON converter for exporting Excel data to JSON format.
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
import json
|
|
6
|
-
from typing import Dict, List, Optional, Union, TYPE_CHECKING
|
|
7
|
-
from ..io.json import JsonWriter
|
|
8
|
-
|
|
9
|
-
if TYPE_CHECKING:
|
|
10
|
-
from ..workbook import Workbook
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class JsonConverter:
|
|
14
|
-
"""Convert Excel workbook data to JSON format."""
|
|
15
|
-
|
|
16
|
-
def __init__(self):
|
|
17
|
-
self._writer = JsonWriter()
|
|
18
|
-
|
|
19
|
-
def convert_workbook(self, workbook: 'Workbook', **kwargs) -> str:
|
|
20
|
-
"""Convert entire workbook to JSON string."""
|
|
21
|
-
pretty_print = kwargs.get('pretty_print', False)
|
|
22
|
-
include_empty_cells = kwargs.get('include_empty_cells', False)
|
|
23
|
-
all_sheets = kwargs.get('all_sheets', False)
|
|
24
|
-
sheet_name = kwargs.get('sheet_name')
|
|
25
|
-
|
|
26
|
-
if sheet_name:
|
|
27
|
-
# Export specific sheet
|
|
28
|
-
if sheet_name in workbook._worksheets:
|
|
29
|
-
worksheet = workbook._worksheets[sheet_name]
|
|
30
|
-
result = self._writer._convert_worksheet(worksheet, include_empty_cells)
|
|
31
|
-
else:
|
|
32
|
-
result = []
|
|
33
|
-
elif all_sheets:
|
|
34
|
-
# Export all sheets with sheet names as keys
|
|
35
|
-
result = {}
|
|
36
|
-
for name, worksheet in workbook._worksheets.items():
|
|
37
|
-
sheet_data = self._writer._convert_worksheet(worksheet, include_empty_cells)
|
|
38
|
-
result[name] = sheet_data
|
|
39
|
-
else:
|
|
40
|
-
# Export only active sheet as simple list
|
|
41
|
-
result = self._writer._convert_worksheet(workbook.active, include_empty_cells)
|
|
42
|
-
|
|
43
|
-
if pretty_print:
|
|
44
|
-
return json.dumps(result, indent=2, ensure_ascii=False)
|
|
45
|
-
else:
|
|
46
|
-
return json.dumps(result, ensure_ascii=False)
|
|
@@ -1,453 +0,0 @@
|
|
|
1
|
-
"""Optimized Markdown converter for Excel data."""
|
|
2
|
-
|
|
3
|
-
from typing import List, Optional, TYPE_CHECKING
|
|
4
|
-
from datetime import datetime
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
|
|
7
|
-
if TYPE_CHECKING:
|
|
8
|
-
from ..workbook import Workbook
|
|
9
|
-
from ..worksheet import Worksheet
|
|
10
|
-
from ..cell import Cell
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class MarkdownConverter:
|
|
14
|
-
"""Optimized Excel to Markdown converter."""
|
|
15
|
-
|
|
16
|
-
def convert_workbook(self, workbook: 'Workbook', **kwargs) -> str:
|
|
17
|
-
"""Convert workbook to Markdown."""
|
|
18
|
-
config = self._create_config(**kwargs)
|
|
19
|
-
result_parts = []
|
|
20
|
-
|
|
21
|
-
if config['include_metadata']:
|
|
22
|
-
result_parts.extend([self._create_metadata(workbook), ""])
|
|
23
|
-
|
|
24
|
-
sheets = self._get_sheets(workbook, config)
|
|
25
|
-
for i, sheet in enumerate(sheets):
|
|
26
|
-
if i > 0:
|
|
27
|
-
result_parts.append("\n---\n" if config['include_metadata'] else "")
|
|
28
|
-
content = self._process_sheet(sheet, config)
|
|
29
|
-
if content:
|
|
30
|
-
result_parts.append(content)
|
|
31
|
-
|
|
32
|
-
return "\n".join(result_parts).strip()
|
|
33
|
-
|
|
34
|
-
def _create_config(self, **kwargs) -> dict:
|
|
35
|
-
"""Create config with simplified defaults."""
|
|
36
|
-
return {
|
|
37
|
-
'sheet_name': kwargs.get('sheet_name'),
|
|
38
|
-
'include_metadata': kwargs.get('include_metadata', False),
|
|
39
|
-
'value_mode': kwargs.get('value_mode', 'value'), # "value" shows calculated results, "formula" shows formulas
|
|
40
|
-
'include_hyperlinks': kwargs.get('include_hyperlinks', True),
|
|
41
|
-
'image_export_mode': kwargs.get('image_export_mode', 'none'), # 'none', 'base64', 'folder'
|
|
42
|
-
'image_folder': kwargs.get('image_folder', 'images'),
|
|
43
|
-
'output_dir': kwargs.get('output_dir', '.')
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
def _get_sheets(self, workbook: 'Workbook', config: dict) -> List['Worksheet']:
|
|
47
|
-
"""Get sheets to process - specific sheet by name or all sheets."""
|
|
48
|
-
sheet_name = config['sheet_name']
|
|
49
|
-
if sheet_name and sheet_name in workbook._worksheets:
|
|
50
|
-
# Convert specific sheet by name
|
|
51
|
-
return [workbook._worksheets[sheet_name]]
|
|
52
|
-
# Convert all sheets if no specific sheet is requested
|
|
53
|
-
return list(workbook._worksheets.values())
|
|
54
|
-
|
|
55
|
-
def _create_metadata(self, workbook: 'Workbook') -> str:
|
|
56
|
-
"""Create simplified metadata section without source file."""
|
|
57
|
-
lines = [
|
|
58
|
-
"# Document Metadata", "",
|
|
59
|
-
f"- **Source Type**: Excel Workbook",
|
|
60
|
-
f"- **Conversion Date**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
|
|
61
|
-
f"- **Total Sheets**: {len(workbook._worksheets)}",
|
|
62
|
-
f"- **Sheet Names**: {', '.join(workbook._worksheets.keys())}",
|
|
63
|
-
f"- **Active Sheet**: {workbook.active.name if workbook.active else 'None'}"
|
|
64
|
-
]
|
|
65
|
-
return "\n".join(lines)
|
|
66
|
-
|
|
67
|
-
def _process_sheet(self, worksheet: 'Worksheet', config: dict) -> str:
|
|
68
|
-
"""Process sheet to markdown with clean formatting."""
|
|
69
|
-
if not worksheet or not worksheet._cells:
|
|
70
|
-
return ""
|
|
71
|
-
|
|
72
|
-
parts = [f"## {worksheet.name}", ""]
|
|
73
|
-
|
|
74
|
-
# Add images if present and image export is enabled
|
|
75
|
-
if hasattr(worksheet, '_images') and len(worksheet._images) > 0:
|
|
76
|
-
image_content = self._process_images(worksheet, config)
|
|
77
|
-
if image_content:
|
|
78
|
-
parts.extend(["### Images", "", image_content, ""])
|
|
79
|
-
|
|
80
|
-
table = self._create_table(worksheet, config)
|
|
81
|
-
if table:
|
|
82
|
-
parts.append(table)
|
|
83
|
-
|
|
84
|
-
return "\n".join(parts)
|
|
85
|
-
|
|
86
|
-
def _create_table(self, worksheet: 'Worksheet', config: dict) -> str:
|
|
87
|
-
"""Create markdown table."""
|
|
88
|
-
if not worksheet._cells:
|
|
89
|
-
return ""
|
|
90
|
-
|
|
91
|
-
start_row = self._detect_start_row(worksheet)
|
|
92
|
-
|
|
93
|
-
# Check for merged cells before start_row that should be included
|
|
94
|
-
merged_title_rows = self._find_merged_title_rows(worksheet, start_row)
|
|
95
|
-
|
|
96
|
-
all_data = []
|
|
97
|
-
|
|
98
|
-
# Add merged title rows first
|
|
99
|
-
for title_row in merged_title_rows:
|
|
100
|
-
title_data = self._extract_data(worksheet, title_row, config, single_row=True)
|
|
101
|
-
if title_data:
|
|
102
|
-
all_data.extend(title_data)
|
|
103
|
-
|
|
104
|
-
# Add main table data
|
|
105
|
-
table_data = self._extract_data(worksheet, start_row, config)
|
|
106
|
-
if table_data:
|
|
107
|
-
all_data.extend(table_data)
|
|
108
|
-
|
|
109
|
-
if not all_data:
|
|
110
|
-
return ""
|
|
111
|
-
|
|
112
|
-
result = []
|
|
113
|
-
if all_data:
|
|
114
|
-
# Always include headers with simplified logic
|
|
115
|
-
header = all_data[0]
|
|
116
|
-
header_line = "| " + " | ".join(self._generate_column_header(cell, i) for i, cell in enumerate(header)) + " |"
|
|
117
|
-
separator = "| " + " | ".join("---" for _ in header) + " |"
|
|
118
|
-
result.extend([header_line, separator])
|
|
119
|
-
|
|
120
|
-
for row in all_data[1:]:
|
|
121
|
-
data_line = "| " + " | ".join(str(cell) if cell else "" for cell in row) + " |"
|
|
122
|
-
result.append(data_line)
|
|
123
|
-
|
|
124
|
-
return "\n".join(result)
|
|
125
|
-
|
|
126
|
-
def _find_merged_title_rows(self, worksheet: 'Worksheet', start_row: int) -> list:
|
|
127
|
-
"""Find merged cell rows before start_row that contain titles."""
|
|
128
|
-
title_rows = []
|
|
129
|
-
|
|
130
|
-
if hasattr(worksheet, '_merged_ranges') and worksheet._merged_ranges:
|
|
131
|
-
for merge_range in worksheet._merged_ranges:
|
|
132
|
-
if ':' in merge_range:
|
|
133
|
-
start_ref, end_ref = merge_range.split(':')
|
|
134
|
-
import re
|
|
135
|
-
merged_row = int(re.search(r'\d+', start_ref).group())
|
|
136
|
-
|
|
137
|
-
# If merged row is before start_row and contains meaningful content
|
|
138
|
-
if merged_row < start_row:
|
|
139
|
-
cell = worksheet._cells.get((merged_row, 1))
|
|
140
|
-
if cell and cell.value and str(cell.value).strip():
|
|
141
|
-
title_rows.append(merged_row)
|
|
142
|
-
|
|
143
|
-
return sorted(title_rows)
|
|
144
|
-
|
|
145
|
-
def _extract_data(self, worksheet: 'Worksheet', start_row: int, config: dict, single_row: bool = False) -> List[List[str]]:
|
|
146
|
-
"""Extract table data."""
|
|
147
|
-
data = []
|
|
148
|
-
end_row = start_row if single_row else worksheet.max_row
|
|
149
|
-
|
|
150
|
-
for row in range(start_row, end_row + 1):
|
|
151
|
-
row_data = []
|
|
152
|
-
for col in range(1, worksheet.max_column + 1):
|
|
153
|
-
cell = worksheet._cells.get((row, col))
|
|
154
|
-
value = self._format_cell(cell, config)
|
|
155
|
-
row_data.append(value)
|
|
156
|
-
data.append(row_data)
|
|
157
|
-
return data
|
|
158
|
-
|
|
159
|
-
def _format_cell(self, cell: Optional['Cell'], config: dict) -> str:
|
|
160
|
-
"""Format cell value with enhanced processing."""
|
|
161
|
-
if not cell or cell.is_empty():
|
|
162
|
-
return ""
|
|
163
|
-
|
|
164
|
-
# Enhanced hyperlink detection and formatting
|
|
165
|
-
if config['include_hyperlinks'] and cell.has_hyperlink():
|
|
166
|
-
return cell.get_markdown_link()
|
|
167
|
-
|
|
168
|
-
# Auto-detect URLs in text values and convert to hyperlinks
|
|
169
|
-
if config['include_hyperlinks'] and isinstance(cell.value, str):
|
|
170
|
-
url_detected = self._detect_and_format_urls(cell.value)
|
|
171
|
-
if url_detected != cell.value:
|
|
172
|
-
return url_detected
|
|
173
|
-
|
|
174
|
-
if config['value_mode'] == 'formula' and cell.is_formula():
|
|
175
|
-
value = cell.formula or cell.value
|
|
176
|
-
else:
|
|
177
|
-
value = cell.calculated_value
|
|
178
|
-
|
|
179
|
-
return self._format_value(value)
|
|
180
|
-
|
|
181
|
-
def _detect_and_format_urls(self, text: str) -> str:
|
|
182
|
-
"""Detect URLs in text and format them as markdown links."""
|
|
183
|
-
import re
|
|
184
|
-
|
|
185
|
-
# Don't process text that already contains markdown links
|
|
186
|
-
if '[' in text and '](' in text:
|
|
187
|
-
return text
|
|
188
|
-
|
|
189
|
-
# Patterns for different types of URLs (in order of specificity)
|
|
190
|
-
url_patterns = [
|
|
191
|
-
# Full HTTP/HTTPS URLs
|
|
192
|
-
(r'\bhttps?://[^\s<>"\'|\[\]()]+', lambda m: f"[{m.group()}]({m.group()})"),
|
|
193
|
-
# www domains
|
|
194
|
-
(r'\bwww\.[^\s<>"\'|\[\]()]+', lambda m: f"[{m.group()}](http://{m.group()})"),
|
|
195
|
-
# Email addresses
|
|
196
|
-
(r'\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\b', lambda m: f"[{m.group()}](mailto:{m.group()})")
|
|
197
|
-
]
|
|
198
|
-
|
|
199
|
-
result = text
|
|
200
|
-
for pattern, formatter in url_patterns:
|
|
201
|
-
result = re.sub(pattern, formatter, result)
|
|
202
|
-
|
|
203
|
-
return result
|
|
204
|
-
|
|
205
|
-
def _format_value(self, value) -> str:
|
|
206
|
-
"""Format value for display with improved formatting."""
|
|
207
|
-
if value is None:
|
|
208
|
-
return ""
|
|
209
|
-
|
|
210
|
-
if isinstance(value, bool):
|
|
211
|
-
return "TRUE" if value else "FALSE"
|
|
212
|
-
|
|
213
|
-
if isinstance(value, (int, float)):
|
|
214
|
-
# Improve number formatting
|
|
215
|
-
if isinstance(value, float):
|
|
216
|
-
# Use scientific notation only for extremely large/small numbers
|
|
217
|
-
if abs(value) >= 1e9 or (abs(value) < 1e-4 and value != 0):
|
|
218
|
-
return f"{value:.2e}" # Scientific notation for very large/small numbers
|
|
219
|
-
elif value.is_integer():
|
|
220
|
-
return str(int(value)) # Remove .0 from whole numbers
|
|
221
|
-
else:
|
|
222
|
-
# Format large numbers with commas and reasonable decimal places
|
|
223
|
-
if abs(value) >= 1000:
|
|
224
|
-
return f"{value:,.2f}".rstrip('0').rstrip('.')
|
|
225
|
-
else:
|
|
226
|
-
return f"{value:.2f}".rstrip('0').rstrip('.')
|
|
227
|
-
else:
|
|
228
|
-
# Format large integers with commas
|
|
229
|
-
if abs(value) >= 1000:
|
|
230
|
-
return f"{value:,}"
|
|
231
|
-
else:
|
|
232
|
-
return str(value)
|
|
233
|
-
|
|
234
|
-
if isinstance(value, str):
|
|
235
|
-
# Enhanced string formatting
|
|
236
|
-
text = value.replace("|", "\\|") # Escape pipe characters
|
|
237
|
-
text = text.replace("\n", " ").replace("\r", " ") # Handle line breaks
|
|
238
|
-
text = " ".join(text.split()) # Normalize whitespace
|
|
239
|
-
return text.strip()
|
|
240
|
-
|
|
241
|
-
# Fallback for other types
|
|
242
|
-
return str(value).replace("|", "\\|").replace("\n", " ").strip()
|
|
243
|
-
|
|
244
|
-
def _generate_column_header(self, cell_value: str, column_index: int) -> str:
|
|
245
|
-
"""Generate intelligent column headers."""
|
|
246
|
-
if cell_value and str(cell_value).strip():
|
|
247
|
-
header = str(cell_value).strip()
|
|
248
|
-
# Don't use generic names if we have meaningful content
|
|
249
|
-
if not header.startswith(('Unnamed', 'Col', 'Column')):
|
|
250
|
-
return header
|
|
251
|
-
|
|
252
|
-
# Generate Excel-style column names (A, B, C, ..., AA, AB, etc.)
|
|
253
|
-
result = ""
|
|
254
|
-
col_num = column_index
|
|
255
|
-
while col_num >= 0:
|
|
256
|
-
result = chr(col_num % 26 + ord('A')) + result
|
|
257
|
-
col_num = col_num // 26 - 1
|
|
258
|
-
if col_num < 0:
|
|
259
|
-
break
|
|
260
|
-
|
|
261
|
-
return result if result else f"Col{column_index + 1}"
|
|
262
|
-
|
|
263
|
-
def _detect_start_row(self, worksheet: 'Worksheet') -> int:
|
|
264
|
-
"""Detect optimal table start row."""
|
|
265
|
-
if worksheet.max_row <= 3:
|
|
266
|
-
return 1
|
|
267
|
-
|
|
268
|
-
best_row, best_score = 1, -1
|
|
269
|
-
for row in range(1, min(worksheet.max_row + 1, 6)):
|
|
270
|
-
score = self._score_row(worksheet, row)
|
|
271
|
-
if score > best_score:
|
|
272
|
-
best_score, best_row = score, row
|
|
273
|
-
|
|
274
|
-
return best_row
|
|
275
|
-
|
|
276
|
-
def _score_row(self, worksheet: 'Worksheet', row: int) -> float:
|
|
277
|
-
"""Score row as potential table start."""
|
|
278
|
-
non_empty = meaningful = unnamed = 0
|
|
279
|
-
merged_bonus = 0
|
|
280
|
-
|
|
281
|
-
for col in range(1, worksheet.max_column + 1):
|
|
282
|
-
cell = worksheet._cells.get((row, col))
|
|
283
|
-
if not cell or cell.value is None:
|
|
284
|
-
continue
|
|
285
|
-
|
|
286
|
-
value_str = str(cell.value).strip()
|
|
287
|
-
if not value_str:
|
|
288
|
-
continue
|
|
289
|
-
|
|
290
|
-
non_empty += 1
|
|
291
|
-
if value_str.startswith("Unnamed"):
|
|
292
|
-
unnamed += 1
|
|
293
|
-
else:
|
|
294
|
-
meaningful += 1
|
|
295
|
-
|
|
296
|
-
# Check if this row contains merged cells
|
|
297
|
-
if hasattr(worksheet, '_merged_ranges') and worksheet._merged_ranges:
|
|
298
|
-
for merge_range in worksheet._merged_ranges:
|
|
299
|
-
# Parse merge range like "A1:F1"
|
|
300
|
-
if ':' in merge_range:
|
|
301
|
-
start_ref, end_ref = merge_range.split(':')
|
|
302
|
-
# Extract row number from references
|
|
303
|
-
import re
|
|
304
|
-
start_row = int(re.search(r'\d+', start_ref).group())
|
|
305
|
-
if start_row == row:
|
|
306
|
-
# This row has merged cells, give it a bonus
|
|
307
|
-
merged_bonus = 20
|
|
308
|
-
break
|
|
309
|
-
|
|
310
|
-
if non_empty == 0:
|
|
311
|
-
return 0
|
|
312
|
-
|
|
313
|
-
score = 50 * (meaningful / non_empty) - 100 * (unnamed / non_empty) + merged_bonus
|
|
314
|
-
if non_empty >= 2 and unnamed / non_empty < 0.5:
|
|
315
|
-
score += min(non_empty * 5, 25)
|
|
316
|
-
|
|
317
|
-
return score
|
|
318
|
-
|
|
319
|
-
def _process_images(self, worksheet: 'Worksheet', config: dict) -> str:
|
|
320
|
-
"""Process images in worksheet based on export mode."""
|
|
321
|
-
if config['image_export_mode'] == 'none':
|
|
322
|
-
return ""
|
|
323
|
-
|
|
324
|
-
image_lines = []
|
|
325
|
-
|
|
326
|
-
for i, image in enumerate(worksheet._images):
|
|
327
|
-
if config['image_export_mode'] == 'base64':
|
|
328
|
-
# Export as base64 data URL
|
|
329
|
-
image_md = self._image_to_base64_markdown(image, i)
|
|
330
|
-
elif config['image_export_mode'] == 'folder':
|
|
331
|
-
# Export to file and reference
|
|
332
|
-
image_md = self._image_to_file_markdown(image, i, config)
|
|
333
|
-
else:
|
|
334
|
-
continue
|
|
335
|
-
|
|
336
|
-
if image_md:
|
|
337
|
-
image_lines.append(image_md)
|
|
338
|
-
|
|
339
|
-
return "\n\n".join(image_lines)
|
|
340
|
-
|
|
341
|
-
def _image_to_base64_markdown(self, image, index: int) -> str:
|
|
342
|
-
"""Convert image to base64 markdown."""
|
|
343
|
-
import base64
|
|
344
|
-
|
|
345
|
-
if not image.data:
|
|
346
|
-
return f"*Image {index + 1}: {image.name or 'Unnamed'} (No data available)*"
|
|
347
|
-
|
|
348
|
-
# Create base64 data URL
|
|
349
|
-
format_str = image.format.value if hasattr(image.format, 'value') else str(image.format)
|
|
350
|
-
base64_data = base64.b64encode(image.data).decode('utf-8')
|
|
351
|
-
data_url = f"data:image/{format_str};base64,{base64_data}"
|
|
352
|
-
|
|
353
|
-
# Create markdown
|
|
354
|
-
alt_text = image.description or image.name or f"Image {index + 1}"
|
|
355
|
-
anchor_info = self._get_anchor_description(image.anchor)
|
|
356
|
-
|
|
357
|
-
md_lines = [
|
|
358
|
-
f"**{image.name or f'Image {index + 1}'}**",
|
|
359
|
-
f"- Position: {anchor_info}",
|
|
360
|
-
f"- Size: {image.width}x{image.height}px" if image.width and image.height else "- Size: Unknown",
|
|
361
|
-
f"- Format: {format_str.upper()}",
|
|
362
|
-
f"- Description: {image.description}" if image.description else "",
|
|
363
|
-
"",
|
|
364
|
-
f""
|
|
365
|
-
]
|
|
366
|
-
|
|
367
|
-
return "\n".join(line for line in md_lines if line)
|
|
368
|
-
|
|
369
|
-
def _image_to_file_markdown(self, image, index: int, config: dict) -> str:
|
|
370
|
-
"""Convert image to file reference markdown."""
|
|
371
|
-
import os
|
|
372
|
-
|
|
373
|
-
if not image.data:
|
|
374
|
-
return f"*Image {index + 1}: {image.name or 'Unnamed'} (No data available)*"
|
|
375
|
-
|
|
376
|
-
# Create images directory
|
|
377
|
-
output_dir = Path(config['output_dir'])
|
|
378
|
-
image_dir = output_dir / config['image_folder']
|
|
379
|
-
image_dir.mkdir(parents=True, exist_ok=True)
|
|
380
|
-
|
|
381
|
-
# Generate unique filename
|
|
382
|
-
base_name = image.name or f"image_{index + 1}"
|
|
383
|
-
format_ext = self._get_file_extension(image.format)
|
|
384
|
-
filename = self._generate_unique_filename(image_dir, base_name, format_ext)
|
|
385
|
-
|
|
386
|
-
# Save image file
|
|
387
|
-
image_path = image_dir / filename
|
|
388
|
-
with open(image_path, 'wb') as f:
|
|
389
|
-
f.write(image.data)
|
|
390
|
-
|
|
391
|
-
# Create markdown
|
|
392
|
-
alt_text = image.description or image.name or f"Image {index + 1}"
|
|
393
|
-
anchor_info = self._get_anchor_description(image.anchor)
|
|
394
|
-
relative_path = f"{config['image_folder']}/{filename}"
|
|
395
|
-
|
|
396
|
-
md_lines = [
|
|
397
|
-
f"**{image.name or f'Image {index + 1}'}**",
|
|
398
|
-
f"- Position: {anchor_info}",
|
|
399
|
-
f"- Size: {image.width}x{image.height}px" if image.width and image.height else "- Size: Unknown",
|
|
400
|
-
f"- Format: {image.format.value.upper() if hasattr(image.format, 'value') else str(image.format).upper()}",
|
|
401
|
-
f"- Description: {image.description}" if image.description else "",
|
|
402
|
-
f"- File: [{filename}]({relative_path})",
|
|
403
|
-
"",
|
|
404
|
-
f""
|
|
405
|
-
]
|
|
406
|
-
|
|
407
|
-
return "\n".join(line for line in md_lines if line)
|
|
408
|
-
|
|
409
|
-
def _get_anchor_description(self, anchor) -> str:
|
|
410
|
-
"""Get human-readable anchor description."""
|
|
411
|
-
from ..drawing.anchor import AnchorType
|
|
412
|
-
|
|
413
|
-
if anchor.type == AnchorType.ONE_CELL:
|
|
414
|
-
from ..utils.coordinates import tuple_to_coordinate
|
|
415
|
-
cell_ref = tuple_to_coordinate(anchor.from_position[0] + 1, anchor.from_position[1] + 1)
|
|
416
|
-
if anchor.from_offset != (0, 0):
|
|
417
|
-
return f"Cell {cell_ref} + offset {anchor.from_offset}"
|
|
418
|
-
return f"Cell {cell_ref}"
|
|
419
|
-
elif anchor.type == AnchorType.TWO_CELL:
|
|
420
|
-
from ..utils.coordinates import tuple_to_coordinate
|
|
421
|
-
start_ref = tuple_to_coordinate(anchor.from_position[0] + 1, anchor.from_position[1] + 1)
|
|
422
|
-
end_ref = tuple_to_coordinate(anchor.to_position[0] + 1, anchor.to_position[1] + 1)
|
|
423
|
-
return f"Range {start_ref}:{end_ref}"
|
|
424
|
-
elif anchor.type == AnchorType.ABSOLUTE:
|
|
425
|
-
return f"Absolute ({anchor.absolute_position[0]}, {anchor.absolute_position[1]})"
|
|
426
|
-
else:
|
|
427
|
-
return "Unknown position"
|
|
428
|
-
|
|
429
|
-
def _get_file_extension(self, image_format) -> str:
|
|
430
|
-
"""Get file extension for image format."""
|
|
431
|
-
format_map = {
|
|
432
|
-
'png': '.png',
|
|
433
|
-
'jpeg': '.jpg',
|
|
434
|
-
'jpg': '.jpg',
|
|
435
|
-
'gif': '.gif'
|
|
436
|
-
}
|
|
437
|
-
format_str = image_format.value if hasattr(image_format, 'value') else str(image_format)
|
|
438
|
-
return format_map.get(format_str.lower(), '.png')
|
|
439
|
-
|
|
440
|
-
def _generate_unique_filename(self, directory: Path, base_name: str, extension: str) -> str:
|
|
441
|
-
"""Generate unique filename to avoid conflicts."""
|
|
442
|
-
# Sanitize base name
|
|
443
|
-
import re
|
|
444
|
-
safe_name = re.sub(r'[^\w\-_.]', '_', base_name)
|
|
445
|
-
|
|
446
|
-
filename = f"{safe_name}{extension}"
|
|
447
|
-
counter = 1
|
|
448
|
-
|
|
449
|
-
while (directory / filename).exists():
|
|
450
|
-
filename = f"{safe_name}_{counter}{extension}"
|
|
451
|
-
counter += 1
|
|
452
|
-
|
|
453
|
-
return filename
|
aspose/cells/drawing/__init__.py
DELETED
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Drawing and image processing module for Excel worksheets.
|
|
3
|
-
|
|
4
|
-
Provides image insertion, positioning, and format management capabilities.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
from .image import Image, ImageFormat
|
|
8
|
-
from .anchor import Anchor, AnchorType
|
|
9
|
-
from .collection import ImageCollection
|
|
10
|
-
|
|
11
|
-
__all__ = [
|
|
12
|
-
"Image",
|
|
13
|
-
"ImageFormat",
|
|
14
|
-
"Anchor",
|
|
15
|
-
"AnchorType",
|
|
16
|
-
"ImageCollection"
|
|
17
|
-
]
|