exstruct 0.2.80__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- exstruct/__init__.py +387 -0
- exstruct/cli/availability.py +49 -0
- exstruct/cli/main.py +134 -0
- exstruct/core/__init__.py +0 -0
- exstruct/core/cells.py +1039 -0
- exstruct/core/charts.py +241 -0
- exstruct/core/integrate.py +388 -0
- exstruct/core/shapes.py +275 -0
- exstruct/engine.py +643 -0
- exstruct/errors.py +35 -0
- exstruct/io/__init__.py +555 -0
- exstruct/models/__init__.py +335 -0
- exstruct/models/maps.py +335 -0
- exstruct/models/types.py +8 -0
- exstruct/py.typed +0 -0
- exstruct/render/__init__.py +118 -0
- exstruct-0.2.80.dist-info/METADATA +435 -0
- exstruct-0.2.80.dist-info/RECORD +20 -0
- exstruct-0.2.80.dist-info/WHEEL +4 -0
- exstruct-0.2.80.dist-info/entry_points.txt +3 -0
exstruct/io/__init__.py
ADDED
|
@@ -0,0 +1,555 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import importlib
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
import re
|
|
8
|
+
from types import ModuleType
|
|
9
|
+
from typing import Literal, cast
|
|
10
|
+
|
|
11
|
+
from openpyxl.utils import range_boundaries
|
|
12
|
+
|
|
13
|
+
from ..errors import MissingDependencyError, OutputError, SerializationError
|
|
14
|
+
from ..models import CellRow, Chart, PrintArea, PrintAreaView, Shape, WorkbookData
|
|
15
|
+
from ..models.types import JsonStructure
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def dict_without_empty_values(obj: object) -> JsonStructure:
|
|
21
|
+
"""Recursively drop empty values from nested structures."""
|
|
22
|
+
if isinstance(obj, dict):
|
|
23
|
+
return {
|
|
24
|
+
k: dict_without_empty_values(v)
|
|
25
|
+
for k, v in obj.items()
|
|
26
|
+
if v not in [None, "", [], {}]
|
|
27
|
+
}
|
|
28
|
+
if isinstance(obj, list):
|
|
29
|
+
return [
|
|
30
|
+
dict_without_empty_values(v) for v in obj if v not in [None, "", [], {}]
|
|
31
|
+
]
|
|
32
|
+
if isinstance(
|
|
33
|
+
obj,
|
|
34
|
+
WorkbookData | CellRow | Chart | PrintArea | PrintAreaView | Shape,
|
|
35
|
+
):
|
|
36
|
+
return dict_without_empty_values(obj.model_dump(exclude_none=True))
|
|
37
|
+
return cast(JsonStructure, obj)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _write_text(path: Path, text: str) -> None:
|
|
41
|
+
"""Write UTF-8 text to disk, wrapping IO errors."""
|
|
42
|
+
try:
|
|
43
|
+
path.write_text(text, encoding="utf-8")
|
|
44
|
+
except Exception as exc:
|
|
45
|
+
raise OutputError(f"Failed to write output to '{path}'.") from exc
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def save_as_json(
|
|
49
|
+
model: WorkbookData, path: Path, *, pretty: bool = False, indent: int | None = None
|
|
50
|
+
) -> None:
|
|
51
|
+
text = serialize_workbook(model, fmt="json", pretty=pretty, indent=indent)
|
|
52
|
+
_write_text(path, text)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def save_as_yaml(model: WorkbookData, path: Path) -> None:
|
|
56
|
+
text = serialize_workbook(model, fmt="yaml")
|
|
57
|
+
_write_text(path, text)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def save_as_toon(model: WorkbookData, path: Path) -> None:
|
|
61
|
+
text = serialize_workbook(model, fmt="toon")
|
|
62
|
+
_write_text(path, text)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _sanitize_sheet_filename(name: str) -> str:
|
|
66
|
+
"""Make a sheet name safe for filesystem usage."""
|
|
67
|
+
safe = re.sub(r"[\\/:*?\"<>|]", "_", name)
|
|
68
|
+
return safe or "sheet"
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _parse_range_zero_based(range_str: str) -> tuple[int, int, int, int] | None:
|
|
72
|
+
"""
|
|
73
|
+
Parse an Excel range string into zero-based (r1, c1, r2, c2) bounds.
|
|
74
|
+
Returns None on failure.
|
|
75
|
+
"""
|
|
76
|
+
cleaned = range_str.strip()
|
|
77
|
+
if not cleaned:
|
|
78
|
+
return None
|
|
79
|
+
if "!" in cleaned:
|
|
80
|
+
cleaned = cleaned.split("!", 1)[1]
|
|
81
|
+
try:
|
|
82
|
+
min_col, min_row, max_col, max_row = range_boundaries(cleaned)
|
|
83
|
+
except Exception:
|
|
84
|
+
return None
|
|
85
|
+
return (min_row - 1, min_col - 1, max_row - 1, max_col - 1)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _row_in_area(row: CellRow, area: PrintArea) -> bool:
|
|
89
|
+
return area.r1 <= row.r <= area.r2
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _filter_row_to_area(
|
|
93
|
+
row: CellRow, area: PrintArea, *, normalize: bool = False
|
|
94
|
+
) -> CellRow | None:
|
|
95
|
+
if not _row_in_area(row, area):
|
|
96
|
+
return None
|
|
97
|
+
|
|
98
|
+
filtered_cells: dict[str, int | float | str] = {}
|
|
99
|
+
filtered_links: dict[str, str] = {}
|
|
100
|
+
|
|
101
|
+
for col_idx_str, value in row.c.items():
|
|
102
|
+
try:
|
|
103
|
+
col_idx = int(col_idx_str)
|
|
104
|
+
except Exception:
|
|
105
|
+
continue
|
|
106
|
+
if area.c1 <= col_idx <= area.c2:
|
|
107
|
+
key = str(col_idx - area.c1) if normalize else col_idx_str
|
|
108
|
+
filtered_cells[key] = value
|
|
109
|
+
|
|
110
|
+
if row.links:
|
|
111
|
+
for col_idx_str, url in row.links.items():
|
|
112
|
+
try:
|
|
113
|
+
col_idx = int(col_idx_str)
|
|
114
|
+
except Exception:
|
|
115
|
+
continue
|
|
116
|
+
if area.c1 <= col_idx <= area.c2:
|
|
117
|
+
key = str(col_idx - area.c1) if normalize else col_idx_str
|
|
118
|
+
filtered_links[key] = url
|
|
119
|
+
|
|
120
|
+
if not filtered_cells and not filtered_links:
|
|
121
|
+
return None
|
|
122
|
+
|
|
123
|
+
new_row_idx = row.r - area.r1 if normalize else row.r
|
|
124
|
+
return CellRow(r=new_row_idx, c=filtered_cells, links=filtered_links or None)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _filter_table_candidates_to_area(
|
|
128
|
+
table_candidates: list[str], area: PrintArea
|
|
129
|
+
) -> list[str]:
|
|
130
|
+
filtered: list[str] = []
|
|
131
|
+
for candidate in table_candidates:
|
|
132
|
+
bounds = _parse_range_zero_based(candidate)
|
|
133
|
+
if not bounds:
|
|
134
|
+
continue
|
|
135
|
+
r1, c1, r2, c2 = bounds
|
|
136
|
+
if r1 >= area.r1 and r2 <= area.r2 and c1 >= area.c1 and c2 <= area.c2:
|
|
137
|
+
filtered.append(candidate)
|
|
138
|
+
return filtered
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def _area_to_px_rect(
|
|
142
|
+
area: PrintArea, *, col_px: int = 64, row_px: int = 20
|
|
143
|
+
) -> tuple[int, int, int, int]:
|
|
144
|
+
"""
|
|
145
|
+
Convert a cell-based print area to an approximate pixel rectangle (l, t, r, b).
|
|
146
|
+
Uses default Excel-like cell sizes; accuracy is highest when shapes/charts are COM-extracted.
|
|
147
|
+
"""
|
|
148
|
+
left = area.c1 * col_px
|
|
149
|
+
top = area.r1 * row_px
|
|
150
|
+
right = (area.c2 + 1) * col_px
|
|
151
|
+
bottom = (area.r2 + 1) * row_px
|
|
152
|
+
return left, top, right, bottom
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _rects_overlap(a: tuple[int, int, int, int], b: tuple[int, int, int, int]) -> bool:
|
|
156
|
+
"""Return True if rectangles (l, t, r, b) overlap."""
|
|
157
|
+
return not (a[2] <= b[0] or a[0] >= b[2] or a[3] <= b[1] or a[1] >= b[3])
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _filter_shapes_to_area(shapes: list[Shape], area: PrintArea) -> list[Shape]:
|
|
161
|
+
area_rect = _area_to_px_rect(area)
|
|
162
|
+
filtered: list[Shape] = []
|
|
163
|
+
for shp in shapes:
|
|
164
|
+
if shp.w is None or shp.h is None:
|
|
165
|
+
# Fallback: treat shape as a point if size is unknown (standard mode).
|
|
166
|
+
if (
|
|
167
|
+
area_rect[0] <= shp.l <= area_rect[2]
|
|
168
|
+
and area_rect[1] <= shp.t <= area_rect[3]
|
|
169
|
+
):
|
|
170
|
+
filtered.append(shp)
|
|
171
|
+
continue
|
|
172
|
+
shp_rect = (shp.l, shp.t, shp.l + shp.w, shp.t + shp.h)
|
|
173
|
+
if _rects_overlap(area_rect, shp_rect):
|
|
174
|
+
filtered.append(shp)
|
|
175
|
+
return filtered
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def _filter_charts_to_area(charts: list[Chart], area: PrintArea) -> list[Chart]:
|
|
179
|
+
area_rect = _area_to_px_rect(area)
|
|
180
|
+
filtered: list[Chart] = []
|
|
181
|
+
for ch in charts:
|
|
182
|
+
if ch.w is None or ch.h is None:
|
|
183
|
+
continue
|
|
184
|
+
ch_rect = (ch.l, ch.t, ch.l + ch.w, ch.t + ch.h)
|
|
185
|
+
if _rects_overlap(area_rect, ch_rect):
|
|
186
|
+
filtered.append(ch)
|
|
187
|
+
return filtered
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def _iter_area_views(
|
|
191
|
+
workbook: WorkbookData,
|
|
192
|
+
*,
|
|
193
|
+
area_attr: Literal["print_areas", "auto_print_areas"],
|
|
194
|
+
normalize: bool,
|
|
195
|
+
include_shapes: bool,
|
|
196
|
+
include_charts: bool,
|
|
197
|
+
include_shape_size: bool,
|
|
198
|
+
include_chart_size: bool,
|
|
199
|
+
) -> dict[str, list[PrintAreaView]]:
|
|
200
|
+
views: dict[str, list[PrintAreaView]] = {}
|
|
201
|
+
for sheet_name, sheet in workbook.sheets.items():
|
|
202
|
+
areas: list[PrintArea] = getattr(sheet, area_attr)
|
|
203
|
+
if not areas:
|
|
204
|
+
continue
|
|
205
|
+
sheet_views: list[PrintAreaView] = []
|
|
206
|
+
for area in areas:
|
|
207
|
+
rows_in_area: list[CellRow] = []
|
|
208
|
+
for row in sheet.rows:
|
|
209
|
+
filtered_row = _filter_row_to_area(row, area, normalize=normalize)
|
|
210
|
+
if filtered_row:
|
|
211
|
+
rows_in_area.append(filtered_row)
|
|
212
|
+
area_tables = _filter_table_candidates_to_area(sheet.table_candidates, area)
|
|
213
|
+
area_shapes = (
|
|
214
|
+
_filter_shapes_to_area(sheet.shapes, area) if include_shapes else []
|
|
215
|
+
)
|
|
216
|
+
if not include_shape_size:
|
|
217
|
+
area_shapes = [
|
|
218
|
+
s.model_copy(update={"w": None, "h": None}) for s in area_shapes
|
|
219
|
+
]
|
|
220
|
+
area_charts = (
|
|
221
|
+
_filter_charts_to_area(sheet.charts, area) if include_charts else []
|
|
222
|
+
)
|
|
223
|
+
if not include_chart_size:
|
|
224
|
+
area_charts = [
|
|
225
|
+
c.model_copy(update={"w": None, "h": None}) for c in area_charts
|
|
226
|
+
]
|
|
227
|
+
sheet_views.append(
|
|
228
|
+
PrintAreaView(
|
|
229
|
+
book_name=workbook.book_name,
|
|
230
|
+
sheet_name=sheet_name,
|
|
231
|
+
area=area,
|
|
232
|
+
shapes=area_shapes,
|
|
233
|
+
charts=area_charts,
|
|
234
|
+
rows=rows_in_area,
|
|
235
|
+
table_candidates=area_tables,
|
|
236
|
+
)
|
|
237
|
+
)
|
|
238
|
+
if sheet_views:
|
|
239
|
+
views[sheet_name] = sheet_views
|
|
240
|
+
return views
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def build_print_area_views(
|
|
244
|
+
workbook: WorkbookData,
|
|
245
|
+
*,
|
|
246
|
+
normalize: bool = False,
|
|
247
|
+
include_shapes: bool = True,
|
|
248
|
+
include_charts: bool = True,
|
|
249
|
+
include_shape_size: bool = True,
|
|
250
|
+
include_chart_size: bool = True,
|
|
251
|
+
) -> dict[str, list[PrintAreaView]]:
|
|
252
|
+
"""
|
|
253
|
+
Construct PrintAreaView instances for all print areas in the workbook.
|
|
254
|
+
Returns a mapping of sheet name to ordered list of PrintAreaView.
|
|
255
|
+
"""
|
|
256
|
+
return _iter_area_views(
|
|
257
|
+
workbook,
|
|
258
|
+
area_attr="print_areas",
|
|
259
|
+
normalize=normalize,
|
|
260
|
+
include_shapes=include_shapes,
|
|
261
|
+
include_charts=include_charts,
|
|
262
|
+
include_shape_size=include_shape_size,
|
|
263
|
+
include_chart_size=include_chart_size,
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def save_print_area_views(
|
|
268
|
+
workbook: WorkbookData,
|
|
269
|
+
output_dir: Path,
|
|
270
|
+
fmt: Literal["json", "yaml", "yml", "toon"] = "json",
|
|
271
|
+
*,
|
|
272
|
+
pretty: bool = False,
|
|
273
|
+
indent: int | None = None,
|
|
274
|
+
normalize: bool = False,
|
|
275
|
+
include_shapes: bool = True,
|
|
276
|
+
include_charts: bool = True,
|
|
277
|
+
include_shape_size: bool = True,
|
|
278
|
+
include_chart_size: bool = True,
|
|
279
|
+
) -> dict[str, Path]:
|
|
280
|
+
"""
|
|
281
|
+
Save each print area as an individual file in the specified format.
|
|
282
|
+
Returns a map of area key (e.g., 'Sheet1#1') to written path.
|
|
283
|
+
"""
|
|
284
|
+
format_hint = fmt.lower()
|
|
285
|
+
if format_hint == "yml":
|
|
286
|
+
format_hint = "yaml"
|
|
287
|
+
if format_hint not in ("json", "yaml", "toon"):
|
|
288
|
+
raise SerializationError(
|
|
289
|
+
f"Unsupported print-area export format '{fmt}'. Allowed: json, yaml, yml, toon."
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
views = build_print_area_views(
|
|
293
|
+
workbook,
|
|
294
|
+
normalize=normalize,
|
|
295
|
+
include_shapes=include_shapes,
|
|
296
|
+
include_charts=include_charts,
|
|
297
|
+
include_shape_size=include_shape_size,
|
|
298
|
+
include_chart_size=include_chart_size,
|
|
299
|
+
)
|
|
300
|
+
if not views:
|
|
301
|
+
logger.info("No print areas found; skipping export to %s", output_dir)
|
|
302
|
+
return {}
|
|
303
|
+
|
|
304
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
305
|
+
written: dict[str, Path] = {}
|
|
306
|
+
suffix = {"json": ".json", "yaml": ".yaml", "toon": ".toon"}[format_hint]
|
|
307
|
+
|
|
308
|
+
for sheet_name, sheet_views in views.items():
|
|
309
|
+
for idx, view in enumerate(sheet_views):
|
|
310
|
+
key = f"{sheet_name}#{idx + 1}"
|
|
311
|
+
area = view.area
|
|
312
|
+
file_name = (
|
|
313
|
+
f"{_sanitize_sheet_filename(sheet_name)}"
|
|
314
|
+
f"_area{idx + 1}_r{area.r1}-{area.r2}_c{area.c1}-{area.c2}{suffix}"
|
|
315
|
+
)
|
|
316
|
+
path = output_dir / file_name
|
|
317
|
+
match format_hint:
|
|
318
|
+
case "json":
|
|
319
|
+
indent_val = 2 if pretty and indent is None else indent
|
|
320
|
+
text = view.to_json(pretty=pretty, indent=indent_val)
|
|
321
|
+
case "yaml":
|
|
322
|
+
text = view.to_yaml()
|
|
323
|
+
case "toon":
|
|
324
|
+
text = view.to_toon()
|
|
325
|
+
case _:
|
|
326
|
+
raise SerializationError(
|
|
327
|
+
f"Unsupported print-area export format '{fmt}'. Allowed: json, yaml, yml, toon."
|
|
328
|
+
)
|
|
329
|
+
_write_text(path, text)
|
|
330
|
+
written[key] = path
|
|
331
|
+
return written
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def save_auto_page_break_views(
|
|
335
|
+
workbook: WorkbookData,
|
|
336
|
+
output_dir: Path,
|
|
337
|
+
fmt: Literal["json", "yaml", "yml", "toon"] = "json",
|
|
338
|
+
*,
|
|
339
|
+
pretty: bool = False,
|
|
340
|
+
indent: int | None = None,
|
|
341
|
+
normalize: bool = False,
|
|
342
|
+
include_shapes: bool = True,
|
|
343
|
+
include_charts: bool = True,
|
|
344
|
+
include_shape_size: bool = True,
|
|
345
|
+
include_chart_size: bool = True,
|
|
346
|
+
) -> dict[str, Path]:
|
|
347
|
+
"""
|
|
348
|
+
Save auto page-break areas (computed via Excel COM) per sheet in the specified format.
|
|
349
|
+
Returns a map of area key (e.g., 'Sheet1#auto#1') to written path.
|
|
350
|
+
"""
|
|
351
|
+
format_hint = fmt.lower()
|
|
352
|
+
if format_hint == "yml":
|
|
353
|
+
format_hint = "yaml"
|
|
354
|
+
if format_hint not in ("json", "yaml", "toon"):
|
|
355
|
+
raise SerializationError(
|
|
356
|
+
f"Unsupported auto page-break export format '{fmt}'. Allowed: json, yaml, yml, toon."
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
views = _iter_area_views(
|
|
360
|
+
workbook,
|
|
361
|
+
area_attr="auto_print_areas",
|
|
362
|
+
normalize=normalize,
|
|
363
|
+
include_shapes=include_shapes,
|
|
364
|
+
include_charts=include_charts,
|
|
365
|
+
include_shape_size=include_shape_size,
|
|
366
|
+
include_chart_size=include_chart_size,
|
|
367
|
+
)
|
|
368
|
+
if not views:
|
|
369
|
+
logger.info("No auto page-break areas found; skipping export to %s", output_dir)
|
|
370
|
+
return {}
|
|
371
|
+
|
|
372
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
373
|
+
written: dict[str, Path] = {}
|
|
374
|
+
suffix = {"json": ".json", "yaml": ".yaml", "toon": ".toon"}[format_hint]
|
|
375
|
+
|
|
376
|
+
for sheet_name, sheet_views in views.items():
|
|
377
|
+
for idx, view in enumerate(sheet_views):
|
|
378
|
+
key = f"{sheet_name}#auto#{idx + 1}"
|
|
379
|
+
area = view.area
|
|
380
|
+
file_name = (
|
|
381
|
+
f"{_sanitize_sheet_filename(sheet_name)}"
|
|
382
|
+
f"_auto_page{idx + 1}_r{area.r1}-{area.r2}_c{area.c1}-{area.c2}{suffix}"
|
|
383
|
+
)
|
|
384
|
+
path = output_dir / file_name
|
|
385
|
+
match format_hint:
|
|
386
|
+
case "json":
|
|
387
|
+
indent_val = 2 if pretty and indent is None else indent
|
|
388
|
+
text = view.to_json(pretty=pretty, indent=indent_val)
|
|
389
|
+
case "yaml":
|
|
390
|
+
text = view.to_yaml()
|
|
391
|
+
case "toon":
|
|
392
|
+
text = view.to_toon()
|
|
393
|
+
case _:
|
|
394
|
+
raise SerializationError(
|
|
395
|
+
f"Unsupported auto page-break export format '{fmt}'. Allowed: json, yaml, yml, toon."
|
|
396
|
+
)
|
|
397
|
+
_write_text(path, text)
|
|
398
|
+
written[key] = path
|
|
399
|
+
return written
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
def serialize_workbook(
|
|
403
|
+
model: WorkbookData,
|
|
404
|
+
fmt: Literal["json", "yaml", "yml", "toon"] = "json",
|
|
405
|
+
*,
|
|
406
|
+
pretty: bool = False,
|
|
407
|
+
indent: int | None = None,
|
|
408
|
+
) -> str:
|
|
409
|
+
"""
|
|
410
|
+
Convert WorkbookData to string in the requested format without writing to disk.
|
|
411
|
+
"""
|
|
412
|
+
format_hint = fmt.lower()
|
|
413
|
+
if format_hint == "yml":
|
|
414
|
+
format_hint = "yaml"
|
|
415
|
+
filtered_dict = dict_without_empty_values(model.model_dump(exclude_none=True))
|
|
416
|
+
|
|
417
|
+
match format_hint:
|
|
418
|
+
case "json":
|
|
419
|
+
indent_val = 2 if pretty and indent is None else indent
|
|
420
|
+
return json.dumps(filtered_dict, ensure_ascii=False, indent=indent_val)
|
|
421
|
+
case "yaml":
|
|
422
|
+
yaml = _require_yaml()
|
|
423
|
+
return str(
|
|
424
|
+
yaml.safe_dump(
|
|
425
|
+
filtered_dict,
|
|
426
|
+
allow_unicode=True,
|
|
427
|
+
sort_keys=False,
|
|
428
|
+
indent=2,
|
|
429
|
+
)
|
|
430
|
+
)
|
|
431
|
+
case "toon":
|
|
432
|
+
toon = _require_toon()
|
|
433
|
+
return str(toon.encode(filtered_dict))
|
|
434
|
+
case _:
|
|
435
|
+
raise SerializationError(
|
|
436
|
+
f"Unsupported export format '{fmt}'. Allowed: json, yaml, yml, toon."
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
def save_sheets_as_json(
|
|
441
|
+
workbook: WorkbookData,
|
|
442
|
+
output_dir: Path,
|
|
443
|
+
*,
|
|
444
|
+
pretty: bool = False,
|
|
445
|
+
indent: int | None = None,
|
|
446
|
+
) -> dict[str, Path]:
|
|
447
|
+
"""
|
|
448
|
+
Save each sheet as an individual JSON file.
|
|
449
|
+
Contents include book_name and the sheet's SheetData.
|
|
450
|
+
Returns a map of sheet name -> written path.
|
|
451
|
+
"""
|
|
452
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
453
|
+
written: dict[str, Path] = {}
|
|
454
|
+
for sheet_name, sheet_data in workbook.sheets.items():
|
|
455
|
+
payload = dict_without_empty_values(
|
|
456
|
+
{
|
|
457
|
+
"book_name": workbook.book_name,
|
|
458
|
+
"sheet_name": sheet_name,
|
|
459
|
+
"sheet": sheet_data.model_dump(exclude_none=True),
|
|
460
|
+
}
|
|
461
|
+
)
|
|
462
|
+
file_name = f"{_sanitize_sheet_filename(sheet_name)}.json"
|
|
463
|
+
path = output_dir / file_name
|
|
464
|
+
indent_val = 2 if pretty and indent is None else indent
|
|
465
|
+
_write_text(path, json.dumps(payload, ensure_ascii=False, indent=indent_val))
|
|
466
|
+
written[sheet_name] = path
|
|
467
|
+
return written
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
def save_sheets(
|
|
471
|
+
workbook: WorkbookData,
|
|
472
|
+
output_dir: Path,
|
|
473
|
+
fmt: Literal["json", "yaml", "yml", "toon"] = "json",
|
|
474
|
+
*,
|
|
475
|
+
pretty: bool = False,
|
|
476
|
+
indent: int | None = None,
|
|
477
|
+
) -> dict[str, Path]:
|
|
478
|
+
"""
|
|
479
|
+
Save each sheet as an individual file in the specified format (json/yaml/toon).
|
|
480
|
+
Payload includes book_name and the sheet's SheetData.
|
|
481
|
+
"""
|
|
482
|
+
format_hint = fmt.lower()
|
|
483
|
+
if format_hint == "yml":
|
|
484
|
+
format_hint = "yaml"
|
|
485
|
+
if format_hint not in ("json", "yaml", "toon"):
|
|
486
|
+
raise ValueError(f"Unsupported sheet export format: {fmt}")
|
|
487
|
+
|
|
488
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
489
|
+
written: dict[str, Path] = {}
|
|
490
|
+
for sheet_name, sheet_data in workbook.sheets.items():
|
|
491
|
+
payload = dict_without_empty_values(
|
|
492
|
+
{
|
|
493
|
+
"book_name": workbook.book_name,
|
|
494
|
+
"sheet_name": sheet_name,
|
|
495
|
+
"sheet": sheet_data.model_dump(exclude_none=True),
|
|
496
|
+
}
|
|
497
|
+
)
|
|
498
|
+
suffix = {"json": ".json", "yaml": ".yaml", "toon": ".toon"}[format_hint]
|
|
499
|
+
file_name = f"{_sanitize_sheet_filename(sheet_name)}{suffix}"
|
|
500
|
+
path = output_dir / file_name
|
|
501
|
+
match format_hint:
|
|
502
|
+
case "json":
|
|
503
|
+
indent_val = 2 if pretty and indent is None else indent
|
|
504
|
+
text = json.dumps(payload, ensure_ascii=False, indent=indent_val)
|
|
505
|
+
case "yaml":
|
|
506
|
+
yaml = _require_yaml()
|
|
507
|
+
text = str(
|
|
508
|
+
yaml.safe_dump(
|
|
509
|
+
payload, allow_unicode=True, sort_keys=False, indent=2
|
|
510
|
+
)
|
|
511
|
+
)
|
|
512
|
+
case "toon":
|
|
513
|
+
toon = _require_toon()
|
|
514
|
+
text = str(toon.encode(payload))
|
|
515
|
+
case _:
|
|
516
|
+
raise SerializationError(
|
|
517
|
+
f"Unsupported sheet export format '{format_hint}'. Allowed: json, yaml, yml, toon."
|
|
518
|
+
)
|
|
519
|
+
_write_text(path, text)
|
|
520
|
+
written[sheet_name] = path
|
|
521
|
+
return written
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
def _require_yaml() -> ModuleType:
|
|
525
|
+
try:
|
|
526
|
+
module = importlib.import_module("yaml")
|
|
527
|
+
except ImportError as e:
|
|
528
|
+
raise MissingDependencyError(
|
|
529
|
+
"YAML export requires pyyaml. Install it via `pip install pyyaml` or add the 'yaml' extra."
|
|
530
|
+
) from e
|
|
531
|
+
return module
|
|
532
|
+
|
|
533
|
+
|
|
534
|
+
def _require_toon() -> ModuleType:
|
|
535
|
+
try:
|
|
536
|
+
module = importlib.import_module("toon")
|
|
537
|
+
except ImportError as e:
|
|
538
|
+
raise MissingDependencyError(
|
|
539
|
+
"TOON export requires python-toon. Install it via `pip install python-toon` or add the 'toon' extra."
|
|
540
|
+
) from e
|
|
541
|
+
return module
|
|
542
|
+
|
|
543
|
+
|
|
544
|
+
__all__ = [
|
|
545
|
+
"dict_without_empty_values",
|
|
546
|
+
"save_as_json",
|
|
547
|
+
"save_as_yaml",
|
|
548
|
+
"save_as_toon",
|
|
549
|
+
"save_sheets",
|
|
550
|
+
"save_sheets_as_json",
|
|
551
|
+
"build_print_area_views",
|
|
552
|
+
"save_print_area_views",
|
|
553
|
+
"save_auto_page_break_views",
|
|
554
|
+
"serialize_workbook",
|
|
555
|
+
]
|