exstruct 0.2.80__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- exstruct/__init__.py +23 -12
- exstruct/cli/main.py +20 -0
- exstruct/core/backends/__init__.py +7 -0
- exstruct/core/backends/base.py +42 -0
- exstruct/core/backends/com_backend.py +230 -0
- exstruct/core/backends/openpyxl_backend.py +191 -0
- exstruct/core/cells.py +999 -483
- exstruct/core/charts.py +243 -241
- exstruct/core/integrate.py +42 -375
- exstruct/core/logging_utils.py +16 -0
- exstruct/core/modeling.py +87 -0
- exstruct/core/pipeline.py +749 -0
- exstruct/core/ranges.py +48 -0
- exstruct/core/shapes.py +282 -36
- exstruct/core/workbook.py +114 -0
- exstruct/engine.py +51 -123
- exstruct/errors.py +12 -1
- exstruct/io/__init__.py +130 -138
- exstruct/io/serialize.py +112 -0
- exstruct/models/__init__.py +58 -8
- exstruct/render/__init__.py +3 -7
- {exstruct-0.2.80.dist-info → exstruct-0.3.2.dist-info}/METADATA +133 -18
- exstruct-0.3.2.dist-info/RECORD +30 -0
- exstruct-0.2.80.dist-info/RECORD +0 -20
- {exstruct-0.2.80.dist-info → exstruct-0.3.2.dist-info}/WHEEL +0 -0
- {exstruct-0.2.80.dist-info → exstruct-0.3.2.dist-info}/entry_points.txt +0 -0
exstruct/engine.py
CHANGED
|
@@ -2,11 +2,11 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from collections.abc import Iterator
|
|
4
4
|
from contextlib import contextmanager
|
|
5
|
-
from dataclasses import dataclass
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
from typing import Literal, TextIO, TypedDict, cast
|
|
8
8
|
|
|
9
|
-
from pydantic import BaseModel, ConfigDict, Field
|
|
9
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
10
10
|
|
|
11
11
|
from .core import cells as _cells
|
|
12
12
|
from .core.cells import set_table_detection_params
|
|
@@ -30,6 +30,32 @@ class TableParams(TypedDict, total=False):
|
|
|
30
30
|
min_nonempty_cells: int
|
|
31
31
|
|
|
32
32
|
|
|
33
|
+
class ColorsOptions(BaseModel):
|
|
34
|
+
"""Color extraction options.
|
|
35
|
+
|
|
36
|
+
Examples:
|
|
37
|
+
>>> ColorsOptions(
|
|
38
|
+
... include_default_background=False,
|
|
39
|
+
... ignore_colors=["#FFFFFF", "AD3815", "theme:1:0.2", "indexed:64", "auto"],
|
|
40
|
+
... )
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
include_default_background: bool = Field(
|
|
44
|
+
default=False, description="Include default (white) backgrounds."
|
|
45
|
+
)
|
|
46
|
+
ignore_colors: list[str] = Field(
|
|
47
|
+
default_factory=list, description="List of color keys to ignore."
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
def ignore_colors_set(self) -> set[str]:
|
|
51
|
+
"""Return ignore_colors as a set of normalized strings.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
Set of color keys to ignore.
|
|
55
|
+
"""
|
|
56
|
+
return set(self.ignore_colors)
|
|
57
|
+
|
|
58
|
+
|
|
33
59
|
@dataclass(frozen=True)
|
|
34
60
|
class StructOptions:
|
|
35
61
|
"""
|
|
@@ -43,6 +69,9 @@ class StructOptions:
|
|
|
43
69
|
table_params: Optional dict passed to `set_table_detection_params(**table_params)`
|
|
44
70
|
before extraction. Use this to tweak table detection heuristics
|
|
45
71
|
per engine instance without touching global state.
|
|
72
|
+
include_colors_map: Whether to extract background color maps.
|
|
73
|
+
include_merged_cells: Whether to extract merged cell ranges.
|
|
74
|
+
colors: Color extraction options.
|
|
46
75
|
"""
|
|
47
76
|
|
|
48
77
|
mode: ExtractionMode = "standard"
|
|
@@ -50,6 +79,9 @@ class StructOptions:
|
|
|
50
79
|
None # forwarded to set_table_detection_params if provided
|
|
51
80
|
)
|
|
52
81
|
include_cell_links: bool | None = None # None -> auto: verbose=True, others=False
|
|
82
|
+
include_colors_map: bool | None = None # None -> auto: verbose=True, others=False
|
|
83
|
+
include_merged_cells: bool | None = None # None -> auto: light=False, others=True
|
|
84
|
+
colors: ColorsOptions = field(default_factory=ColorsOptions)
|
|
53
85
|
|
|
54
86
|
|
|
55
87
|
class FormatOptions(BaseModel):
|
|
@@ -91,6 +123,9 @@ class FilterOptions(BaseModel):
|
|
|
91
123
|
include_auto_print_areas: bool = Field(
|
|
92
124
|
default=False, description="Include COM-computed auto page-break areas."
|
|
93
125
|
)
|
|
126
|
+
include_merged_cells: bool = Field(
|
|
127
|
+
default=True, description="Include merged cell ranges."
|
|
128
|
+
)
|
|
94
129
|
|
|
95
130
|
|
|
96
131
|
class DestinationOptions(BaseModel):
|
|
@@ -118,11 +153,10 @@ class OutputOptions(BaseModel):
|
|
|
118
153
|
- format: serialization format/indent.
|
|
119
154
|
- filters: include/exclude flags (rows/shapes/charts/tables/print_areas, size flags).
|
|
120
155
|
- destinations: side outputs (per-sheet, per-print-area, stream override).
|
|
121
|
-
|
|
122
|
-
Legacy flat fields (fmt, pretty, indent, include_*, sheets_dir, print_areas_dir, stream)
|
|
123
|
-
are still accepted and normalized into the nested structures.
|
|
124
156
|
"""
|
|
125
157
|
|
|
158
|
+
model_config = ConfigDict(extra="forbid")
|
|
159
|
+
|
|
126
160
|
format: FormatOptions = Field(
|
|
127
161
|
default_factory=FormatOptions, description="Formatting options."
|
|
128
162
|
)
|
|
@@ -133,112 +167,6 @@ class OutputOptions(BaseModel):
|
|
|
133
167
|
default_factory=DestinationOptions, description="Side output destinations."
|
|
134
168
|
)
|
|
135
169
|
|
|
136
|
-
@model_validator(mode="before")
|
|
137
|
-
@classmethod
|
|
138
|
-
def _coerce_legacy(cls, values: dict[str, object]) -> dict[str, object]:
|
|
139
|
-
if not isinstance(values, dict):
|
|
140
|
-
return values
|
|
141
|
-
# Normalize legacy flat fields into nested configs
|
|
142
|
-
fmt_cfg = {
|
|
143
|
-
"fmt": values.pop("fmt", None),
|
|
144
|
-
"pretty": values.pop("pretty", None),
|
|
145
|
-
"indent": values.pop("indent", None),
|
|
146
|
-
}
|
|
147
|
-
filt_cfg = {
|
|
148
|
-
"include_rows": values.pop("include_rows", None),
|
|
149
|
-
"include_shapes": values.pop("include_shapes", None),
|
|
150
|
-
"include_shape_size": values.pop("include_shape_size", None),
|
|
151
|
-
"include_charts": values.pop("include_charts", None),
|
|
152
|
-
"include_chart_size": values.pop("include_chart_size", None),
|
|
153
|
-
"include_tables": values.pop("include_tables", None),
|
|
154
|
-
"include_print_areas": values.pop("include_print_areas", None),
|
|
155
|
-
}
|
|
156
|
-
dest_cfg = {
|
|
157
|
-
"sheets_dir": values.pop("sheets_dir", None),
|
|
158
|
-
"print_areas_dir": values.pop("print_areas_dir", None),
|
|
159
|
-
"auto_page_breaks_dir": values.pop("auto_page_breaks_dir", None),
|
|
160
|
-
"stream": values.pop("stream", None),
|
|
161
|
-
}
|
|
162
|
-
# Drop None to let defaults apply
|
|
163
|
-
fmt_cfg = {k: v for k, v in fmt_cfg.items() if v is not None}
|
|
164
|
-
filt_cfg = {k: v for k, v in filt_cfg.items() if v is not None}
|
|
165
|
-
dest_cfg = {k: v for k, v in dest_cfg.items() if v is not None}
|
|
166
|
-
|
|
167
|
-
merged = dict(values)
|
|
168
|
-
if "format" not in merged and fmt_cfg:
|
|
169
|
-
merged["format"] = fmt_cfg
|
|
170
|
-
if "filters" not in merged and filt_cfg:
|
|
171
|
-
merged["filters"] = filt_cfg
|
|
172
|
-
if "destinations" not in merged and dest_cfg:
|
|
173
|
-
merged["destinations"] = dest_cfg
|
|
174
|
-
return merged
|
|
175
|
-
|
|
176
|
-
# Legacy compatibility properties
|
|
177
|
-
@property
|
|
178
|
-
def fmt(self) -> Literal["json", "yaml", "yml", "toon"]:
|
|
179
|
-
return self.format.fmt
|
|
180
|
-
|
|
181
|
-
@property
|
|
182
|
-
def pretty(self) -> bool:
|
|
183
|
-
return self.format.pretty
|
|
184
|
-
|
|
185
|
-
@property
|
|
186
|
-
def indent(self) -> int | None:
|
|
187
|
-
return self.format.indent
|
|
188
|
-
|
|
189
|
-
@property
|
|
190
|
-
def include_rows(self) -> bool:
|
|
191
|
-
return self.filters.include_rows
|
|
192
|
-
|
|
193
|
-
@property
|
|
194
|
-
def include_shapes(self) -> bool:
|
|
195
|
-
return self.filters.include_shapes
|
|
196
|
-
|
|
197
|
-
@property
|
|
198
|
-
def include_shape_size(self) -> bool | None:
|
|
199
|
-
return self.filters.include_shape_size
|
|
200
|
-
|
|
201
|
-
@property
|
|
202
|
-
def include_charts(self) -> bool:
|
|
203
|
-
return self.filters.include_charts
|
|
204
|
-
|
|
205
|
-
@property
|
|
206
|
-
def include_chart_size(self) -> bool | None:
|
|
207
|
-
return self.filters.include_chart_size
|
|
208
|
-
|
|
209
|
-
@property
|
|
210
|
-
def include_tables(self) -> bool:
|
|
211
|
-
return self.filters.include_tables
|
|
212
|
-
|
|
213
|
-
@property
|
|
214
|
-
def include_print_areas(self) -> bool | None:
|
|
215
|
-
return self.filters.include_print_areas
|
|
216
|
-
|
|
217
|
-
@property
|
|
218
|
-
def sheets_dir(self) -> Path | None:
|
|
219
|
-
resolved = self.destinations.sheets_dir
|
|
220
|
-
if resolved is None:
|
|
221
|
-
return None
|
|
222
|
-
return resolved if isinstance(resolved, Path) else Path(resolved)
|
|
223
|
-
|
|
224
|
-
@property
|
|
225
|
-
def print_areas_dir(self) -> Path | None:
|
|
226
|
-
resolved = self.destinations.print_areas_dir
|
|
227
|
-
if resolved is None:
|
|
228
|
-
return None
|
|
229
|
-
return resolved if isinstance(resolved, Path) else Path(resolved)
|
|
230
|
-
|
|
231
|
-
@property
|
|
232
|
-
def stream(self) -> TextIO | None:
|
|
233
|
-
return self.destinations.stream
|
|
234
|
-
|
|
235
|
-
@property
|
|
236
|
-
def auto_page_breaks_dir(self) -> Path | None:
|
|
237
|
-
resolved = self.destinations.auto_page_breaks_dir
|
|
238
|
-
if resolved is None:
|
|
239
|
-
return None
|
|
240
|
-
return resolved if isinstance(resolved, Path) else Path(resolved)
|
|
241
|
-
|
|
242
170
|
|
|
243
171
|
class ExStructEngine:
|
|
244
172
|
"""
|
|
@@ -353,8 +281,12 @@ class ExStructEngine:
|
|
|
353
281
|
table_candidates=sheet.table_candidates
|
|
354
282
|
if self.output.filters.include_tables
|
|
355
283
|
else [],
|
|
284
|
+
colors_map=sheet.colors_map,
|
|
356
285
|
print_areas=sheet.print_areas if include_print_areas else [],
|
|
357
286
|
auto_print_areas=sheet.auto_print_areas if include_auto_print_areas else [],
|
|
287
|
+
merged_cells=sheet.merged_cells
|
|
288
|
+
if self.output.filters.include_merged_cells
|
|
289
|
+
else [],
|
|
358
290
|
)
|
|
359
291
|
|
|
360
292
|
def _filter_workbook(
|
|
@@ -409,14 +341,6 @@ class ExStructEngine:
|
|
|
409
341
|
- verbose: All shapes (with size) and charts (with size).
|
|
410
342
|
"""
|
|
411
343
|
chosen_mode = mode or self.options.mode
|
|
412
|
-
if chosen_mode not in ("light", "standard", "verbose"):
|
|
413
|
-
raise ValueError(f"Unsupported mode: {chosen_mode}")
|
|
414
|
-
include_links = (
|
|
415
|
-
self.options.include_cell_links
|
|
416
|
-
if self.options.include_cell_links is not None
|
|
417
|
-
else chosen_mode == "verbose"
|
|
418
|
-
)
|
|
419
|
-
include_print_areas = True # Extract print areas even in light mode
|
|
420
344
|
include_auto_page_breaks = (
|
|
421
345
|
self.output.filters.include_auto_print_areas
|
|
422
346
|
or self.output.destinations.auto_page_breaks_dir is not None
|
|
@@ -426,9 +350,13 @@ class ExStructEngine:
|
|
|
426
350
|
return extract_workbook(
|
|
427
351
|
normalized_file_path,
|
|
428
352
|
mode=chosen_mode,
|
|
429
|
-
include_cell_links=
|
|
430
|
-
include_print_areas=
|
|
353
|
+
include_cell_links=self.options.include_cell_links,
|
|
354
|
+
include_print_areas=None,
|
|
431
355
|
include_auto_page_breaks=include_auto_page_breaks,
|
|
356
|
+
include_colors_map=self.options.include_colors_map,
|
|
357
|
+
include_default_background=self.options.colors.include_default_background,
|
|
358
|
+
ignore_colors=self.options.colors.ignore_colors_set(),
|
|
359
|
+
include_merged_cells=self.options.include_merged_cells,
|
|
432
360
|
)
|
|
433
361
|
|
|
434
362
|
def serialize(
|
|
@@ -444,7 +372,7 @@ class ExStructEngine:
|
|
|
444
372
|
|
|
445
373
|
Args:
|
|
446
374
|
data: Workbook to serialize after filtering.
|
|
447
|
-
fmt: Serialization format; defaults to OutputOptions.fmt.
|
|
375
|
+
fmt: Serialization format; defaults to OutputOptions.format.fmt.
|
|
448
376
|
pretty: Whether to pretty-print JSON output.
|
|
449
377
|
indent: Indentation to use when pretty-printing JSON.
|
|
450
378
|
"""
|
|
@@ -478,7 +406,7 @@ class ExStructEngine:
|
|
|
478
406
|
Args:
|
|
479
407
|
data: Workbook to serialize and write.
|
|
480
408
|
output_path: Target file path (str or Path); writes to stdout when None.
|
|
481
|
-
fmt: Serialization format; defaults to OutputOptions.fmt.
|
|
409
|
+
fmt: Serialization format; defaults to OutputOptions.format.fmt.
|
|
482
410
|
pretty: Whether to pretty-print JSON output.
|
|
483
411
|
indent: Indentation to use when pretty-printing JSON.
|
|
484
412
|
sheets_dir: Directory for per-sheet outputs when provided (str or Path).
|
exstruct/errors.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
|
+
"""Project-specific exception hierarchy for ExStruct."""
|
|
2
|
+
|
|
1
3
|
from __future__ import annotations
|
|
2
4
|
|
|
3
|
-
|
|
5
|
+
from enum import Enum
|
|
4
6
|
|
|
5
7
|
|
|
6
8
|
class ExstructError(Exception):
|
|
@@ -33,3 +35,12 @@ class OutputError(ExstructError):
|
|
|
33
35
|
|
|
34
36
|
class PrintAreaError(ExstructError, ValueError):
|
|
35
37
|
"""Raised when print-area specific processing fails (also a ValueError for compatibility)."""
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class FallbackReason(str, Enum):
|
|
41
|
+
"""Reason codes for extraction fallbacks."""
|
|
42
|
+
|
|
43
|
+
LIGHT_MODE = "light_mode"
|
|
44
|
+
SKIP_COM_TESTS = "skip_com_tests"
|
|
45
|
+
COM_UNAVAILABLE = "com_unavailable"
|
|
46
|
+
COM_PIPELINE_FAILED = "com_pipeline_failed"
|
exstruct/io/__init__.py
CHANGED
|
@@ -1,24 +1,46 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import importlib
|
|
4
|
-
import json
|
|
5
3
|
import logging
|
|
6
4
|
from pathlib import Path
|
|
7
5
|
import re
|
|
8
|
-
from types import ModuleType
|
|
9
6
|
from typing import Literal, cast
|
|
10
7
|
|
|
11
|
-
from
|
|
12
|
-
|
|
13
|
-
from ..
|
|
14
|
-
|
|
8
|
+
from ..core.ranges import RangeBounds, parse_range_zero_based
|
|
9
|
+
from ..errors import OutputError, SerializationError
|
|
10
|
+
from ..models import (
|
|
11
|
+
Arrow,
|
|
12
|
+
CellRow,
|
|
13
|
+
Chart,
|
|
14
|
+
PrintArea,
|
|
15
|
+
PrintAreaView,
|
|
16
|
+
Shape,
|
|
17
|
+
SmartArt,
|
|
18
|
+
WorkbookData,
|
|
19
|
+
)
|
|
15
20
|
from ..models.types import JsonStructure
|
|
21
|
+
from .serialize import (
|
|
22
|
+
_FORMAT_HINTS,
|
|
23
|
+
_ensure_format_hint,
|
|
24
|
+
_require_toon,
|
|
25
|
+
_require_yaml,
|
|
26
|
+
_serialize_payload_from_hint,
|
|
27
|
+
)
|
|
16
28
|
|
|
17
29
|
logger = logging.getLogger(__name__)
|
|
18
30
|
|
|
19
31
|
|
|
20
32
|
def dict_without_empty_values(obj: object) -> JsonStructure:
|
|
21
|
-
"""
|
|
33
|
+
"""
|
|
34
|
+
Remove None, empty string, empty list, and empty dict values from a nested structure or supported model object.
|
|
35
|
+
|
|
36
|
+
Recursively processes dicts, lists, and supported model types (WorkbookData, CellRow, Chart, PrintArea, PrintAreaView, Shape, Arrow, SmartArt). Model instances are converted to dictionaries with None fields excluded before recursive cleaning. Values considered empty and removed are: `None`, `""` (empty string), `[]` (empty list), and `{}` (empty dict).
|
|
37
|
+
|
|
38
|
+
Parameters:
|
|
39
|
+
obj (object): A value to clean; may be a dict, list, scalar, or one of the supported model instances.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
JsonStructure: The input structure with empty values removed, preserving other values and nesting.
|
|
43
|
+
"""
|
|
22
44
|
if isinstance(obj, dict):
|
|
23
45
|
return {
|
|
24
46
|
k: dict_without_empty_values(v)
|
|
@@ -31,7 +53,14 @@ def dict_without_empty_values(obj: object) -> JsonStructure:
|
|
|
31
53
|
]
|
|
32
54
|
if isinstance(
|
|
33
55
|
obj,
|
|
34
|
-
WorkbookData
|
|
56
|
+
WorkbookData
|
|
57
|
+
| CellRow
|
|
58
|
+
| Chart
|
|
59
|
+
| PrintArea
|
|
60
|
+
| PrintAreaView
|
|
61
|
+
| Shape
|
|
62
|
+
| Arrow
|
|
63
|
+
| SmartArt,
|
|
35
64
|
):
|
|
36
65
|
return dict_without_empty_values(obj.model_dump(exclude_none=True))
|
|
37
66
|
return cast(JsonStructure, obj)
|
|
@@ -68,21 +97,16 @@ def _sanitize_sheet_filename(name: str) -> str:
|
|
|
68
97
|
return safe or "sheet"
|
|
69
98
|
|
|
70
99
|
|
|
71
|
-
def _parse_range_zero_based(range_str: str) ->
|
|
72
|
-
"""
|
|
73
|
-
|
|
74
|
-
|
|
100
|
+
def _parse_range_zero_based(range_str: str) -> RangeBounds | None:
|
|
101
|
+
"""Parse an Excel range string into zero-based bounds.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
range_str: Excel range string (e.g., "Sheet1!A1:B2").
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
RangeBounds in zero-based coordinates, or None on failure.
|
|
75
108
|
"""
|
|
76
|
-
|
|
77
|
-
if not cleaned:
|
|
78
|
-
return None
|
|
79
|
-
if "!" in cleaned:
|
|
80
|
-
cleaned = cleaned.split("!", 1)[1]
|
|
81
|
-
try:
|
|
82
|
-
min_col, min_row, max_col, max_row = range_boundaries(cleaned)
|
|
83
|
-
except Exception:
|
|
84
|
-
return None
|
|
85
|
-
return (min_row - 1, min_col - 1, max_row - 1, max_col - 1)
|
|
109
|
+
return parse_range_zero_based(range_str)
|
|
86
110
|
|
|
87
111
|
|
|
88
112
|
def _row_in_area(row: CellRow, area: PrintArea) -> bool:
|
|
@@ -132,8 +156,14 @@ def _filter_table_candidates_to_area(
|
|
|
132
156
|
bounds = _parse_range_zero_based(candidate)
|
|
133
157
|
if not bounds:
|
|
134
158
|
continue
|
|
135
|
-
r1
|
|
136
|
-
|
|
159
|
+
r1 = bounds.r1 + 1
|
|
160
|
+
r2 = bounds.r2 + 1
|
|
161
|
+
if (
|
|
162
|
+
r1 >= area.r1
|
|
163
|
+
and r2 <= area.r2
|
|
164
|
+
and bounds.c1 >= area.c1
|
|
165
|
+
and bounds.c2 <= area.c2
|
|
166
|
+
):
|
|
137
167
|
filtered.append(candidate)
|
|
138
168
|
return filtered
|
|
139
169
|
|
|
@@ -146,20 +176,46 @@ def _area_to_px_rect(
|
|
|
146
176
|
Uses default Excel-like cell sizes; accuracy is highest when shapes/charts are COM-extracted.
|
|
147
177
|
"""
|
|
148
178
|
left = area.c1 * col_px
|
|
149
|
-
top = area.r1 * row_px
|
|
179
|
+
top = (area.r1 - 1) * row_px
|
|
150
180
|
right = (area.c2 + 1) * col_px
|
|
151
|
-
bottom =
|
|
181
|
+
bottom = area.r2 * row_px
|
|
152
182
|
return left, top, right, bottom
|
|
153
183
|
|
|
154
184
|
|
|
155
185
|
def _rects_overlap(a: tuple[int, int, int, int], b: tuple[int, int, int, int]) -> bool:
|
|
156
|
-
"""
|
|
186
|
+
"""
|
|
187
|
+
Determine whether two axis-aligned rectangles intersect (overlap in area).
|
|
188
|
+
|
|
189
|
+
Parameters:
|
|
190
|
+
a (tuple[int, int, int, int]): Rectangle A as (left, top, right, bottom).
|
|
191
|
+
b (tuple[int, int, int, int]): Rectangle B as (left, top, right, bottom).
|
|
192
|
+
|
|
193
|
+
Notes:
|
|
194
|
+
Rectangles are treated as half-open in this context: if they only touch at edges or corners, they do not count as overlapping.
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
bool: `True` if the rectangles have a non-zero-area intersection, `False` otherwise.
|
|
198
|
+
"""
|
|
157
199
|
return not (a[2] <= b[0] or a[0] >= b[2] or a[3] <= b[1] or a[1] >= b[3])
|
|
158
200
|
|
|
159
201
|
|
|
160
|
-
def _filter_shapes_to_area(
|
|
202
|
+
def _filter_shapes_to_area(
|
|
203
|
+
shapes: list[Shape | Arrow | SmartArt], area: PrintArea
|
|
204
|
+
) -> list[Shape | Arrow | SmartArt]:
|
|
205
|
+
"""
|
|
206
|
+
Filter drawable shapes to those that intersect the given print area.
|
|
207
|
+
|
|
208
|
+
Shapes and the print area are compared in approximate pixel coordinates. Shapes that have both width and height are included when their bounding rectangle overlaps the area. Shapes with unknown size (width or height is None) are treated as a point at their left/top coordinates and included only if that point lies inside the area.
|
|
209
|
+
|
|
210
|
+
Parameters:
|
|
211
|
+
shapes (list[Shape | Arrow | SmartArt]): Drawable objects with `l`, `t`, `w`, `h` coordinates.
|
|
212
|
+
area (PrintArea): Cell-based print area that will be converted to an approximate pixel rectangle.
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
list[Shape | Arrow | SmartArt]: Subset of `shapes` whose geometry intersects the print area.
|
|
216
|
+
"""
|
|
161
217
|
area_rect = _area_to_px_rect(area)
|
|
162
|
-
filtered: list[Shape] = []
|
|
218
|
+
filtered: list[Shape | Arrow | SmartArt] = []
|
|
163
219
|
for shp in shapes:
|
|
164
220
|
if shp.w is None or shp.h is None:
|
|
165
221
|
# Fallback: treat shape as a point if size is unknown (standard mode).
|
|
@@ -281,13 +337,12 @@ def save_print_area_views(
|
|
|
281
337
|
Save each print area as an individual file in the specified format.
|
|
282
338
|
Returns a map of area key (e.g., 'Sheet1#1') to written path.
|
|
283
339
|
"""
|
|
284
|
-
format_hint =
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
)
|
|
340
|
+
format_hint = _ensure_format_hint(
|
|
341
|
+
fmt,
|
|
342
|
+
allowed=_FORMAT_HINTS,
|
|
343
|
+
error_type=SerializationError,
|
|
344
|
+
error_message="Unsupported print-area export format '{fmt}'. Allowed: json, yaml, yml, toon.",
|
|
345
|
+
)
|
|
291
346
|
|
|
292
347
|
views = build_print_area_views(
|
|
293
348
|
workbook,
|
|
@@ -314,18 +369,10 @@ def save_print_area_views(
|
|
|
314
369
|
f"_area{idx + 1}_r{area.r1}-{area.r2}_c{area.c1}-{area.c2}{suffix}"
|
|
315
370
|
)
|
|
316
371
|
path = output_dir / file_name
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
case "yaml":
|
|
322
|
-
text = view.to_yaml()
|
|
323
|
-
case "toon":
|
|
324
|
-
text = view.to_toon()
|
|
325
|
-
case _:
|
|
326
|
-
raise SerializationError(
|
|
327
|
-
f"Unsupported print-area export format '{fmt}'. Allowed: json, yaml, yml, toon."
|
|
328
|
-
)
|
|
372
|
+
payload = dict_without_empty_values(view.model_dump(exclude_none=True))
|
|
373
|
+
text = _serialize_payload_from_hint(
|
|
374
|
+
payload, format_hint, pretty=pretty, indent=indent
|
|
375
|
+
)
|
|
329
376
|
_write_text(path, text)
|
|
330
377
|
written[key] = path
|
|
331
378
|
return written
|
|
@@ -348,13 +395,12 @@ def save_auto_page_break_views(
|
|
|
348
395
|
Save auto page-break areas (computed via Excel COM) per sheet in the specified format.
|
|
349
396
|
Returns a map of area key (e.g., 'Sheet1#auto#1') to written path.
|
|
350
397
|
"""
|
|
351
|
-
format_hint =
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
)
|
|
398
|
+
format_hint = _ensure_format_hint(
|
|
399
|
+
fmt,
|
|
400
|
+
allowed=_FORMAT_HINTS,
|
|
401
|
+
error_type=SerializationError,
|
|
402
|
+
error_message="Unsupported auto page-break export format '{fmt}'. Allowed: json, yaml, yml, toon.",
|
|
403
|
+
)
|
|
358
404
|
|
|
359
405
|
views = _iter_area_views(
|
|
360
406
|
workbook,
|
|
@@ -382,18 +428,10 @@ def save_auto_page_break_views(
|
|
|
382
428
|
f"_auto_page{idx + 1}_r{area.r1}-{area.r2}_c{area.c1}-{area.c2}{suffix}"
|
|
383
429
|
)
|
|
384
430
|
path = output_dir / file_name
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
case "yaml":
|
|
390
|
-
text = view.to_yaml()
|
|
391
|
-
case "toon":
|
|
392
|
-
text = view.to_toon()
|
|
393
|
-
case _:
|
|
394
|
-
raise SerializationError(
|
|
395
|
-
f"Unsupported auto page-break export format '{fmt}'. Allowed: json, yaml, yml, toon."
|
|
396
|
-
)
|
|
431
|
+
payload = dict_without_empty_values(view.model_dump(exclude_none=True))
|
|
432
|
+
text = _serialize_payload_from_hint(
|
|
433
|
+
payload, format_hint, pretty=pretty, indent=indent
|
|
434
|
+
)
|
|
397
435
|
_write_text(path, text)
|
|
398
436
|
written[key] = path
|
|
399
437
|
return written
|
|
@@ -409,32 +447,16 @@ def serialize_workbook(
|
|
|
409
447
|
"""
|
|
410
448
|
Convert WorkbookData to string in the requested format without writing to disk.
|
|
411
449
|
"""
|
|
412
|
-
format_hint =
|
|
413
|
-
|
|
414
|
-
|
|
450
|
+
format_hint = _ensure_format_hint(
|
|
451
|
+
fmt,
|
|
452
|
+
allowed=_FORMAT_HINTS,
|
|
453
|
+
error_type=SerializationError,
|
|
454
|
+
error_message="Unsupported export format '{fmt}'. Allowed: json, yaml, yml, toon.",
|
|
455
|
+
)
|
|
415
456
|
filtered_dict = dict_without_empty_values(model.model_dump(exclude_none=True))
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
indent_val = 2 if pretty and indent is None else indent
|
|
420
|
-
return json.dumps(filtered_dict, ensure_ascii=False, indent=indent_val)
|
|
421
|
-
case "yaml":
|
|
422
|
-
yaml = _require_yaml()
|
|
423
|
-
return str(
|
|
424
|
-
yaml.safe_dump(
|
|
425
|
-
filtered_dict,
|
|
426
|
-
allow_unicode=True,
|
|
427
|
-
sort_keys=False,
|
|
428
|
-
indent=2,
|
|
429
|
-
)
|
|
430
|
-
)
|
|
431
|
-
case "toon":
|
|
432
|
-
toon = _require_toon()
|
|
433
|
-
return str(toon.encode(filtered_dict))
|
|
434
|
-
case _:
|
|
435
|
-
raise SerializationError(
|
|
436
|
-
f"Unsupported export format '{fmt}'. Allowed: json, yaml, yml, toon."
|
|
437
|
-
)
|
|
457
|
+
return _serialize_payload_from_hint(
|
|
458
|
+
filtered_dict, format_hint, pretty=pretty, indent=indent
|
|
459
|
+
)
|
|
438
460
|
|
|
439
461
|
|
|
440
462
|
def save_sheets_as_json(
|
|
@@ -461,8 +483,10 @@ def save_sheets_as_json(
|
|
|
461
483
|
)
|
|
462
484
|
file_name = f"{_sanitize_sheet_filename(sheet_name)}.json"
|
|
463
485
|
path = output_dir / file_name
|
|
464
|
-
|
|
465
|
-
|
|
486
|
+
text = _serialize_payload_from_hint(
|
|
487
|
+
payload, "json", pretty=pretty, indent=indent
|
|
488
|
+
)
|
|
489
|
+
_write_text(path, text)
|
|
466
490
|
written[sheet_name] = path
|
|
467
491
|
return written
|
|
468
492
|
|
|
@@ -479,11 +503,12 @@ def save_sheets(
|
|
|
479
503
|
Save each sheet as an individual file in the specified format (json/yaml/toon).
|
|
480
504
|
Payload includes book_name and the sheet's SheetData.
|
|
481
505
|
"""
|
|
482
|
-
format_hint =
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
506
|
+
format_hint = _ensure_format_hint(
|
|
507
|
+
fmt,
|
|
508
|
+
allowed=_FORMAT_HINTS,
|
|
509
|
+
error_type=SerializationError,
|
|
510
|
+
error_message="Unsupported sheet export format: {fmt}",
|
|
511
|
+
)
|
|
487
512
|
|
|
488
513
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
489
514
|
written: dict[str, Path] = {}
|
|
@@ -498,49 +523,14 @@ def save_sheets(
|
|
|
498
523
|
suffix = {"json": ".json", "yaml": ".yaml", "toon": ".toon"}[format_hint]
|
|
499
524
|
file_name = f"{_sanitize_sheet_filename(sheet_name)}{suffix}"
|
|
500
525
|
path = output_dir / file_name
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
text = json.dumps(payload, ensure_ascii=False, indent=indent_val)
|
|
505
|
-
case "yaml":
|
|
506
|
-
yaml = _require_yaml()
|
|
507
|
-
text = str(
|
|
508
|
-
yaml.safe_dump(
|
|
509
|
-
payload, allow_unicode=True, sort_keys=False, indent=2
|
|
510
|
-
)
|
|
511
|
-
)
|
|
512
|
-
case "toon":
|
|
513
|
-
toon = _require_toon()
|
|
514
|
-
text = str(toon.encode(payload))
|
|
515
|
-
case _:
|
|
516
|
-
raise SerializationError(
|
|
517
|
-
f"Unsupported sheet export format '{format_hint}'. Allowed: json, yaml, yml, toon."
|
|
518
|
-
)
|
|
526
|
+
text = _serialize_payload_from_hint(
|
|
527
|
+
payload, format_hint, pretty=pretty, indent=indent
|
|
528
|
+
)
|
|
519
529
|
_write_text(path, text)
|
|
520
530
|
written[sheet_name] = path
|
|
521
531
|
return written
|
|
522
532
|
|
|
523
533
|
|
|
524
|
-
def _require_yaml() -> ModuleType:
|
|
525
|
-
try:
|
|
526
|
-
module = importlib.import_module("yaml")
|
|
527
|
-
except ImportError as e:
|
|
528
|
-
raise MissingDependencyError(
|
|
529
|
-
"YAML export requires pyyaml. Install it via `pip install pyyaml` or add the 'yaml' extra."
|
|
530
|
-
) from e
|
|
531
|
-
return module
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
def _require_toon() -> ModuleType:
|
|
535
|
-
try:
|
|
536
|
-
module = importlib.import_module("toon")
|
|
537
|
-
except ImportError as e:
|
|
538
|
-
raise MissingDependencyError(
|
|
539
|
-
"TOON export requires python-toon. Install it via `pip install python-toon` or add the 'toon' extra."
|
|
540
|
-
) from e
|
|
541
|
-
return module
|
|
542
|
-
|
|
543
|
-
|
|
544
534
|
__all__ = [
|
|
545
535
|
"dict_without_empty_values",
|
|
546
536
|
"save_as_json",
|
|
@@ -552,4 +542,6 @@ __all__ = [
|
|
|
552
542
|
"save_print_area_views",
|
|
553
543
|
"save_auto_page_break_views",
|
|
554
544
|
"serialize_workbook",
|
|
545
|
+
"_require_yaml",
|
|
546
|
+
"_require_toon",
|
|
555
547
|
]
|