exstruct 0.2.80__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
exstruct/engine.py CHANGED
@@ -2,11 +2,11 @@ from __future__ import annotations
2
2
 
3
3
  from collections.abc import Iterator
4
4
  from contextlib import contextmanager
5
- from dataclasses import dataclass
5
+ from dataclasses import dataclass, field
6
6
  from pathlib import Path
7
7
  from typing import Literal, TextIO, TypedDict, cast
8
8
 
9
- from pydantic import BaseModel, ConfigDict, Field, model_validator
9
+ from pydantic import BaseModel, ConfigDict, Field
10
10
 
11
11
  from .core import cells as _cells
12
12
  from .core.cells import set_table_detection_params
@@ -30,6 +30,32 @@ class TableParams(TypedDict, total=False):
30
30
  min_nonempty_cells: int
31
31
 
32
32
 
33
+ class ColorsOptions(BaseModel):
34
+ """Color extraction options.
35
+
36
+ Examples:
37
+ >>> ColorsOptions(
38
+ ... include_default_background=False,
39
+ ... ignore_colors=["#FFFFFF", "AD3815", "theme:1:0.2", "indexed:64", "auto"],
40
+ ... )
41
+ """
42
+
43
+ include_default_background: bool = Field(
44
+ default=False, description="Include default (white) backgrounds."
45
+ )
46
+ ignore_colors: list[str] = Field(
47
+ default_factory=list, description="List of color keys to ignore."
48
+ )
49
+
50
+ def ignore_colors_set(self) -> set[str]:
51
+ """Return ignore_colors as a set of normalized strings.
52
+
53
+ Returns:
54
+ Set of color keys to ignore.
55
+ """
56
+ return set(self.ignore_colors)
57
+
58
+
33
59
  @dataclass(frozen=True)
34
60
  class StructOptions:
35
61
  """
@@ -43,6 +69,9 @@ class StructOptions:
43
69
  table_params: Optional dict passed to `set_table_detection_params(**table_params)`
44
70
  before extraction. Use this to tweak table detection heuristics
45
71
  per engine instance without touching global state.
72
+ include_colors_map: Whether to extract background color maps.
73
+ include_merged_cells: Whether to extract merged cell ranges.
74
+ colors: Color extraction options.
46
75
  """
47
76
 
48
77
  mode: ExtractionMode = "standard"
@@ -50,6 +79,9 @@ class StructOptions:
50
79
  None # forwarded to set_table_detection_params if provided
51
80
  )
52
81
  include_cell_links: bool | None = None # None -> auto: verbose=True, others=False
82
+ include_colors_map: bool | None = None # None -> auto: verbose=True, others=False
83
+ include_merged_cells: bool | None = None # None -> auto: light=False, others=True
84
+ colors: ColorsOptions = field(default_factory=ColorsOptions)
53
85
 
54
86
 
55
87
  class FormatOptions(BaseModel):
@@ -91,6 +123,9 @@ class FilterOptions(BaseModel):
91
123
  include_auto_print_areas: bool = Field(
92
124
  default=False, description="Include COM-computed auto page-break areas."
93
125
  )
126
+ include_merged_cells: bool = Field(
127
+ default=True, description="Include merged cell ranges."
128
+ )
94
129
 
95
130
 
96
131
  class DestinationOptions(BaseModel):
@@ -118,11 +153,10 @@ class OutputOptions(BaseModel):
118
153
  - format: serialization format/indent.
119
154
  - filters: include/exclude flags (rows/shapes/charts/tables/print_areas, size flags).
120
155
  - destinations: side outputs (per-sheet, per-print-area, stream override).
121
-
122
- Legacy flat fields (fmt, pretty, indent, include_*, sheets_dir, print_areas_dir, stream)
123
- are still accepted and normalized into the nested structures.
124
156
  """
125
157
 
158
+ model_config = ConfigDict(extra="forbid")
159
+
126
160
  format: FormatOptions = Field(
127
161
  default_factory=FormatOptions, description="Formatting options."
128
162
  )
@@ -133,112 +167,6 @@ class OutputOptions(BaseModel):
133
167
  default_factory=DestinationOptions, description="Side output destinations."
134
168
  )
135
169
 
136
- @model_validator(mode="before")
137
- @classmethod
138
- def _coerce_legacy(cls, values: dict[str, object]) -> dict[str, object]:
139
- if not isinstance(values, dict):
140
- return values
141
- # Normalize legacy flat fields into nested configs
142
- fmt_cfg = {
143
- "fmt": values.pop("fmt", None),
144
- "pretty": values.pop("pretty", None),
145
- "indent": values.pop("indent", None),
146
- }
147
- filt_cfg = {
148
- "include_rows": values.pop("include_rows", None),
149
- "include_shapes": values.pop("include_shapes", None),
150
- "include_shape_size": values.pop("include_shape_size", None),
151
- "include_charts": values.pop("include_charts", None),
152
- "include_chart_size": values.pop("include_chart_size", None),
153
- "include_tables": values.pop("include_tables", None),
154
- "include_print_areas": values.pop("include_print_areas", None),
155
- }
156
- dest_cfg = {
157
- "sheets_dir": values.pop("sheets_dir", None),
158
- "print_areas_dir": values.pop("print_areas_dir", None),
159
- "auto_page_breaks_dir": values.pop("auto_page_breaks_dir", None),
160
- "stream": values.pop("stream", None),
161
- }
162
- # Drop None to let defaults apply
163
- fmt_cfg = {k: v for k, v in fmt_cfg.items() if v is not None}
164
- filt_cfg = {k: v for k, v in filt_cfg.items() if v is not None}
165
- dest_cfg = {k: v for k, v in dest_cfg.items() if v is not None}
166
-
167
- merged = dict(values)
168
- if "format" not in merged and fmt_cfg:
169
- merged["format"] = fmt_cfg
170
- if "filters" not in merged and filt_cfg:
171
- merged["filters"] = filt_cfg
172
- if "destinations" not in merged and dest_cfg:
173
- merged["destinations"] = dest_cfg
174
- return merged
175
-
176
- # Legacy compatibility properties
177
- @property
178
- def fmt(self) -> Literal["json", "yaml", "yml", "toon"]:
179
- return self.format.fmt
180
-
181
- @property
182
- def pretty(self) -> bool:
183
- return self.format.pretty
184
-
185
- @property
186
- def indent(self) -> int | None:
187
- return self.format.indent
188
-
189
- @property
190
- def include_rows(self) -> bool:
191
- return self.filters.include_rows
192
-
193
- @property
194
- def include_shapes(self) -> bool:
195
- return self.filters.include_shapes
196
-
197
- @property
198
- def include_shape_size(self) -> bool | None:
199
- return self.filters.include_shape_size
200
-
201
- @property
202
- def include_charts(self) -> bool:
203
- return self.filters.include_charts
204
-
205
- @property
206
- def include_chart_size(self) -> bool | None:
207
- return self.filters.include_chart_size
208
-
209
- @property
210
- def include_tables(self) -> bool:
211
- return self.filters.include_tables
212
-
213
- @property
214
- def include_print_areas(self) -> bool | None:
215
- return self.filters.include_print_areas
216
-
217
- @property
218
- def sheets_dir(self) -> Path | None:
219
- resolved = self.destinations.sheets_dir
220
- if resolved is None:
221
- return None
222
- return resolved if isinstance(resolved, Path) else Path(resolved)
223
-
224
- @property
225
- def print_areas_dir(self) -> Path | None:
226
- resolved = self.destinations.print_areas_dir
227
- if resolved is None:
228
- return None
229
- return resolved if isinstance(resolved, Path) else Path(resolved)
230
-
231
- @property
232
- def stream(self) -> TextIO | None:
233
- return self.destinations.stream
234
-
235
- @property
236
- def auto_page_breaks_dir(self) -> Path | None:
237
- resolved = self.destinations.auto_page_breaks_dir
238
- if resolved is None:
239
- return None
240
- return resolved if isinstance(resolved, Path) else Path(resolved)
241
-
242
170
 
243
171
  class ExStructEngine:
244
172
  """
@@ -353,8 +281,12 @@ class ExStructEngine:
353
281
  table_candidates=sheet.table_candidates
354
282
  if self.output.filters.include_tables
355
283
  else [],
284
+ colors_map=sheet.colors_map,
356
285
  print_areas=sheet.print_areas if include_print_areas else [],
357
286
  auto_print_areas=sheet.auto_print_areas if include_auto_print_areas else [],
287
+ merged_cells=sheet.merged_cells
288
+ if self.output.filters.include_merged_cells
289
+ else [],
358
290
  )
359
291
 
360
292
  def _filter_workbook(
@@ -409,14 +341,6 @@ class ExStructEngine:
409
341
  - verbose: All shapes (with size) and charts (with size).
410
342
  """
411
343
  chosen_mode = mode or self.options.mode
412
- if chosen_mode not in ("light", "standard", "verbose"):
413
- raise ValueError(f"Unsupported mode: {chosen_mode}")
414
- include_links = (
415
- self.options.include_cell_links
416
- if self.options.include_cell_links is not None
417
- else chosen_mode == "verbose"
418
- )
419
- include_print_areas = True # Extract print areas even in light mode
420
344
  include_auto_page_breaks = (
421
345
  self.output.filters.include_auto_print_areas
422
346
  or self.output.destinations.auto_page_breaks_dir is not None
@@ -426,9 +350,13 @@ class ExStructEngine:
426
350
  return extract_workbook(
427
351
  normalized_file_path,
428
352
  mode=chosen_mode,
429
- include_cell_links=include_links,
430
- include_print_areas=include_print_areas,
353
+ include_cell_links=self.options.include_cell_links,
354
+ include_print_areas=None,
431
355
  include_auto_page_breaks=include_auto_page_breaks,
356
+ include_colors_map=self.options.include_colors_map,
357
+ include_default_background=self.options.colors.include_default_background,
358
+ ignore_colors=self.options.colors.ignore_colors_set(),
359
+ include_merged_cells=self.options.include_merged_cells,
432
360
  )
433
361
 
434
362
  def serialize(
@@ -444,7 +372,7 @@ class ExStructEngine:
444
372
 
445
373
  Args:
446
374
  data: Workbook to serialize after filtering.
447
- fmt: Serialization format; defaults to OutputOptions.fmt.
375
+ fmt: Serialization format; defaults to OutputOptions.format.fmt.
448
376
  pretty: Whether to pretty-print JSON output.
449
377
  indent: Indentation to use when pretty-printing JSON.
450
378
  """
@@ -478,7 +406,7 @@ class ExStructEngine:
478
406
  Args:
479
407
  data: Workbook to serialize and write.
480
408
  output_path: Target file path (str or Path); writes to stdout when None.
481
- fmt: Serialization format; defaults to OutputOptions.fmt.
409
+ fmt: Serialization format; defaults to OutputOptions.format.fmt.
482
410
  pretty: Whether to pretty-print JSON output.
483
411
  indent: Indentation to use when pretty-printing JSON.
484
412
  sheets_dir: Directory for per-sheet outputs when provided (str or Path).
exstruct/errors.py CHANGED
@@ -1,6 +1,8 @@
1
+ """Project-specific exception hierarchy for ExStruct."""
2
+
1
3
  from __future__ import annotations
2
4
 
3
- """Project-specific exception hierarchy for ExStruct."""
5
+ from enum import Enum
4
6
 
5
7
 
6
8
  class ExstructError(Exception):
@@ -33,3 +35,12 @@ class OutputError(ExstructError):
33
35
 
34
36
  class PrintAreaError(ExstructError, ValueError):
35
37
  """Raised when print-area specific processing fails (also a ValueError for compatibility)."""
38
+
39
+
40
+ class FallbackReason(str, Enum):
41
+ """Reason codes for extraction fallbacks."""
42
+
43
+ LIGHT_MODE = "light_mode"
44
+ SKIP_COM_TESTS = "skip_com_tests"
45
+ COM_UNAVAILABLE = "com_unavailable"
46
+ COM_PIPELINE_FAILED = "com_pipeline_failed"
exstruct/io/__init__.py CHANGED
@@ -1,24 +1,46 @@
1
1
  from __future__ import annotations
2
2
 
3
- import importlib
4
- import json
5
3
  import logging
6
4
  from pathlib import Path
7
5
  import re
8
- from types import ModuleType
9
6
  from typing import Literal, cast
10
7
 
11
- from openpyxl.utils import range_boundaries
12
-
13
- from ..errors import MissingDependencyError, OutputError, SerializationError
14
- from ..models import CellRow, Chart, PrintArea, PrintAreaView, Shape, WorkbookData
8
+ from ..core.ranges import RangeBounds, parse_range_zero_based
9
+ from ..errors import OutputError, SerializationError
10
+ from ..models import (
11
+ Arrow,
12
+ CellRow,
13
+ Chart,
14
+ PrintArea,
15
+ PrintAreaView,
16
+ Shape,
17
+ SmartArt,
18
+ WorkbookData,
19
+ )
15
20
  from ..models.types import JsonStructure
21
+ from .serialize import (
22
+ _FORMAT_HINTS,
23
+ _ensure_format_hint,
24
+ _require_toon,
25
+ _require_yaml,
26
+ _serialize_payload_from_hint,
27
+ )
16
28
 
17
29
  logger = logging.getLogger(__name__)
18
30
 
19
31
 
20
32
  def dict_without_empty_values(obj: object) -> JsonStructure:
21
- """Recursively drop empty values from nested structures."""
33
+ """
34
+ Remove None, empty string, empty list, and empty dict values from a nested structure or supported model object.
35
+
36
+ Recursively processes dicts, lists, and supported model types (WorkbookData, CellRow, Chart, PrintArea, PrintAreaView, Shape, Arrow, SmartArt). Model instances are converted to dictionaries with None fields excluded before recursive cleaning. Values considered empty and removed are: `None`, `""` (empty string), `[]` (empty list), and `{}` (empty dict).
37
+
38
+ Parameters:
39
+ obj (object): A value to clean; may be a dict, list, scalar, or one of the supported model instances.
40
+
41
+ Returns:
42
+ JsonStructure: The input structure with empty values removed, preserving other values and nesting.
43
+ """
22
44
  if isinstance(obj, dict):
23
45
  return {
24
46
  k: dict_without_empty_values(v)
@@ -31,7 +53,14 @@ def dict_without_empty_values(obj: object) -> JsonStructure:
31
53
  ]
32
54
  if isinstance(
33
55
  obj,
34
- WorkbookData | CellRow | Chart | PrintArea | PrintAreaView | Shape,
56
+ WorkbookData
57
+ | CellRow
58
+ | Chart
59
+ | PrintArea
60
+ | PrintAreaView
61
+ | Shape
62
+ | Arrow
63
+ | SmartArt,
35
64
  ):
36
65
  return dict_without_empty_values(obj.model_dump(exclude_none=True))
37
66
  return cast(JsonStructure, obj)
@@ -68,21 +97,16 @@ def _sanitize_sheet_filename(name: str) -> str:
68
97
  return safe or "sheet"
69
98
 
70
99
 
71
- def _parse_range_zero_based(range_str: str) -> tuple[int, int, int, int] | None:
72
- """
73
- Parse an Excel range string into zero-based (r1, c1, r2, c2) bounds.
74
- Returns None on failure.
100
+ def _parse_range_zero_based(range_str: str) -> RangeBounds | None:
101
+ """Parse an Excel range string into zero-based bounds.
102
+
103
+ Args:
104
+ range_str: Excel range string (e.g., "Sheet1!A1:B2").
105
+
106
+ Returns:
107
+ RangeBounds in zero-based coordinates, or None on failure.
75
108
  """
76
- cleaned = range_str.strip()
77
- if not cleaned:
78
- return None
79
- if "!" in cleaned:
80
- cleaned = cleaned.split("!", 1)[1]
81
- try:
82
- min_col, min_row, max_col, max_row = range_boundaries(cleaned)
83
- except Exception:
84
- return None
85
- return (min_row - 1, min_col - 1, max_row - 1, max_col - 1)
109
+ return parse_range_zero_based(range_str)
86
110
 
87
111
 
88
112
  def _row_in_area(row: CellRow, area: PrintArea) -> bool:
@@ -132,8 +156,14 @@ def _filter_table_candidates_to_area(
132
156
  bounds = _parse_range_zero_based(candidate)
133
157
  if not bounds:
134
158
  continue
135
- r1, c1, r2, c2 = bounds
136
- if r1 >= area.r1 and r2 <= area.r2 and c1 >= area.c1 and c2 <= area.c2:
159
+ r1 = bounds.r1 + 1
160
+ r2 = bounds.r2 + 1
161
+ if (
162
+ r1 >= area.r1
163
+ and r2 <= area.r2
164
+ and bounds.c1 >= area.c1
165
+ and bounds.c2 <= area.c2
166
+ ):
137
167
  filtered.append(candidate)
138
168
  return filtered
139
169
 
@@ -146,20 +176,46 @@ def _area_to_px_rect(
146
176
  Uses default Excel-like cell sizes; accuracy is highest when shapes/charts are COM-extracted.
147
177
  """
148
178
  left = area.c1 * col_px
149
- top = area.r1 * row_px
179
+ top = (area.r1 - 1) * row_px
150
180
  right = (area.c2 + 1) * col_px
151
- bottom = (area.r2 + 1) * row_px
181
+ bottom = area.r2 * row_px
152
182
  return left, top, right, bottom
153
183
 
154
184
 
155
185
  def _rects_overlap(a: tuple[int, int, int, int], b: tuple[int, int, int, int]) -> bool:
156
- """Return True if rectangles (l, t, r, b) overlap."""
186
+ """
187
+ Determine whether two axis-aligned rectangles intersect (overlap in area).
188
+
189
+ Parameters:
190
+ a (tuple[int, int, int, int]): Rectangle A as (left, top, right, bottom).
191
+ b (tuple[int, int, int, int]): Rectangle B as (left, top, right, bottom).
192
+
193
+ Notes:
194
+ Rectangles are treated as half-open in this context: if they only touch at edges or corners, they do not count as overlapping.
195
+
196
+ Returns:
197
+ bool: `True` if the rectangles have a non-zero-area intersection, `False` otherwise.
198
+ """
157
199
  return not (a[2] <= b[0] or a[0] >= b[2] or a[3] <= b[1] or a[1] >= b[3])
158
200
 
159
201
 
160
- def _filter_shapes_to_area(shapes: list[Shape], area: PrintArea) -> list[Shape]:
202
+ def _filter_shapes_to_area(
203
+ shapes: list[Shape | Arrow | SmartArt], area: PrintArea
204
+ ) -> list[Shape | Arrow | SmartArt]:
205
+ """
206
+ Filter drawable shapes to those that intersect the given print area.
207
+
208
+ Shapes and the print area are compared in approximate pixel coordinates. Shapes that have both width and height are included when their bounding rectangle overlaps the area. Shapes with unknown size (width or height is None) are treated as a point at their left/top coordinates and included only if that point lies inside the area.
209
+
210
+ Parameters:
211
+ shapes (list[Shape | Arrow | SmartArt]): Drawable objects with `l`, `t`, `w`, `h` coordinates.
212
+ area (PrintArea): Cell-based print area that will be converted to an approximate pixel rectangle.
213
+
214
+ Returns:
215
+ list[Shape | Arrow | SmartArt]: Subset of `shapes` whose geometry intersects the print area.
216
+ """
161
217
  area_rect = _area_to_px_rect(area)
162
- filtered: list[Shape] = []
218
+ filtered: list[Shape | Arrow | SmartArt] = []
163
219
  for shp in shapes:
164
220
  if shp.w is None or shp.h is None:
165
221
  # Fallback: treat shape as a point if size is unknown (standard mode).
@@ -281,13 +337,12 @@ def save_print_area_views(
281
337
  Save each print area as an individual file in the specified format.
282
338
  Returns a map of area key (e.g., 'Sheet1#1') to written path.
283
339
  """
284
- format_hint = fmt.lower()
285
- if format_hint == "yml":
286
- format_hint = "yaml"
287
- if format_hint not in ("json", "yaml", "toon"):
288
- raise SerializationError(
289
- f"Unsupported print-area export format '{fmt}'. Allowed: json, yaml, yml, toon."
290
- )
340
+ format_hint = _ensure_format_hint(
341
+ fmt,
342
+ allowed=_FORMAT_HINTS,
343
+ error_type=SerializationError,
344
+ error_message="Unsupported print-area export format '{fmt}'. Allowed: json, yaml, yml, toon.",
345
+ )
291
346
 
292
347
  views = build_print_area_views(
293
348
  workbook,
@@ -314,18 +369,10 @@ def save_print_area_views(
314
369
  f"_area{idx + 1}_r{area.r1}-{area.r2}_c{area.c1}-{area.c2}{suffix}"
315
370
  )
316
371
  path = output_dir / file_name
317
- match format_hint:
318
- case "json":
319
- indent_val = 2 if pretty and indent is None else indent
320
- text = view.to_json(pretty=pretty, indent=indent_val)
321
- case "yaml":
322
- text = view.to_yaml()
323
- case "toon":
324
- text = view.to_toon()
325
- case _:
326
- raise SerializationError(
327
- f"Unsupported print-area export format '{fmt}'. Allowed: json, yaml, yml, toon."
328
- )
372
+ payload = dict_without_empty_values(view.model_dump(exclude_none=True))
373
+ text = _serialize_payload_from_hint(
374
+ payload, format_hint, pretty=pretty, indent=indent
375
+ )
329
376
  _write_text(path, text)
330
377
  written[key] = path
331
378
  return written
@@ -348,13 +395,12 @@ def save_auto_page_break_views(
348
395
  Save auto page-break areas (computed via Excel COM) per sheet in the specified format.
349
396
  Returns a map of area key (e.g., 'Sheet1#auto#1') to written path.
350
397
  """
351
- format_hint = fmt.lower()
352
- if format_hint == "yml":
353
- format_hint = "yaml"
354
- if format_hint not in ("json", "yaml", "toon"):
355
- raise SerializationError(
356
- f"Unsupported auto page-break export format '{fmt}'. Allowed: json, yaml, yml, toon."
357
- )
398
+ format_hint = _ensure_format_hint(
399
+ fmt,
400
+ allowed=_FORMAT_HINTS,
401
+ error_type=SerializationError,
402
+ error_message="Unsupported auto page-break export format '{fmt}'. Allowed: json, yaml, yml, toon.",
403
+ )
358
404
 
359
405
  views = _iter_area_views(
360
406
  workbook,
@@ -382,18 +428,10 @@ def save_auto_page_break_views(
382
428
  f"_auto_page{idx + 1}_r{area.r1}-{area.r2}_c{area.c1}-{area.c2}{suffix}"
383
429
  )
384
430
  path = output_dir / file_name
385
- match format_hint:
386
- case "json":
387
- indent_val = 2 if pretty and indent is None else indent
388
- text = view.to_json(pretty=pretty, indent=indent_val)
389
- case "yaml":
390
- text = view.to_yaml()
391
- case "toon":
392
- text = view.to_toon()
393
- case _:
394
- raise SerializationError(
395
- f"Unsupported auto page-break export format '{fmt}'. Allowed: json, yaml, yml, toon."
396
- )
431
+ payload = dict_without_empty_values(view.model_dump(exclude_none=True))
432
+ text = _serialize_payload_from_hint(
433
+ payload, format_hint, pretty=pretty, indent=indent
434
+ )
397
435
  _write_text(path, text)
398
436
  written[key] = path
399
437
  return written
@@ -409,32 +447,16 @@ def serialize_workbook(
409
447
  """
410
448
  Convert WorkbookData to string in the requested format without writing to disk.
411
449
  """
412
- format_hint = fmt.lower()
413
- if format_hint == "yml":
414
- format_hint = "yaml"
450
+ format_hint = _ensure_format_hint(
451
+ fmt,
452
+ allowed=_FORMAT_HINTS,
453
+ error_type=SerializationError,
454
+ error_message="Unsupported export format '{fmt}'. Allowed: json, yaml, yml, toon.",
455
+ )
415
456
  filtered_dict = dict_without_empty_values(model.model_dump(exclude_none=True))
416
-
417
- match format_hint:
418
- case "json":
419
- indent_val = 2 if pretty and indent is None else indent
420
- return json.dumps(filtered_dict, ensure_ascii=False, indent=indent_val)
421
- case "yaml":
422
- yaml = _require_yaml()
423
- return str(
424
- yaml.safe_dump(
425
- filtered_dict,
426
- allow_unicode=True,
427
- sort_keys=False,
428
- indent=2,
429
- )
430
- )
431
- case "toon":
432
- toon = _require_toon()
433
- return str(toon.encode(filtered_dict))
434
- case _:
435
- raise SerializationError(
436
- f"Unsupported export format '{fmt}'. Allowed: json, yaml, yml, toon."
437
- )
457
+ return _serialize_payload_from_hint(
458
+ filtered_dict, format_hint, pretty=pretty, indent=indent
459
+ )
438
460
 
439
461
 
440
462
  def save_sheets_as_json(
@@ -461,8 +483,10 @@ def save_sheets_as_json(
461
483
  )
462
484
  file_name = f"{_sanitize_sheet_filename(sheet_name)}.json"
463
485
  path = output_dir / file_name
464
- indent_val = 2 if pretty and indent is None else indent
465
- _write_text(path, json.dumps(payload, ensure_ascii=False, indent=indent_val))
486
+ text = _serialize_payload_from_hint(
487
+ payload, "json", pretty=pretty, indent=indent
488
+ )
489
+ _write_text(path, text)
466
490
  written[sheet_name] = path
467
491
  return written
468
492
 
@@ -479,11 +503,12 @@ def save_sheets(
479
503
  Save each sheet as an individual file in the specified format (json/yaml/toon).
480
504
  Payload includes book_name and the sheet's SheetData.
481
505
  """
482
- format_hint = fmt.lower()
483
- if format_hint == "yml":
484
- format_hint = "yaml"
485
- if format_hint not in ("json", "yaml", "toon"):
486
- raise ValueError(f"Unsupported sheet export format: {fmt}")
506
+ format_hint = _ensure_format_hint(
507
+ fmt,
508
+ allowed=_FORMAT_HINTS,
509
+ error_type=SerializationError,
510
+ error_message="Unsupported sheet export format: {fmt}",
511
+ )
487
512
 
488
513
  output_dir.mkdir(parents=True, exist_ok=True)
489
514
  written: dict[str, Path] = {}
@@ -498,49 +523,14 @@ def save_sheets(
498
523
  suffix = {"json": ".json", "yaml": ".yaml", "toon": ".toon"}[format_hint]
499
524
  file_name = f"{_sanitize_sheet_filename(sheet_name)}{suffix}"
500
525
  path = output_dir / file_name
501
- match format_hint:
502
- case "json":
503
- indent_val = 2 if pretty and indent is None else indent
504
- text = json.dumps(payload, ensure_ascii=False, indent=indent_val)
505
- case "yaml":
506
- yaml = _require_yaml()
507
- text = str(
508
- yaml.safe_dump(
509
- payload, allow_unicode=True, sort_keys=False, indent=2
510
- )
511
- )
512
- case "toon":
513
- toon = _require_toon()
514
- text = str(toon.encode(payload))
515
- case _:
516
- raise SerializationError(
517
- f"Unsupported sheet export format '{format_hint}'. Allowed: json, yaml, yml, toon."
518
- )
526
+ text = _serialize_payload_from_hint(
527
+ payload, format_hint, pretty=pretty, indent=indent
528
+ )
519
529
  _write_text(path, text)
520
530
  written[sheet_name] = path
521
531
  return written
522
532
 
523
533
 
524
- def _require_yaml() -> ModuleType:
525
- try:
526
- module = importlib.import_module("yaml")
527
- except ImportError as e:
528
- raise MissingDependencyError(
529
- "YAML export requires pyyaml. Install it via `pip install pyyaml` or add the 'yaml' extra."
530
- ) from e
531
- return module
532
-
533
-
534
- def _require_toon() -> ModuleType:
535
- try:
536
- module = importlib.import_module("toon")
537
- except ImportError as e:
538
- raise MissingDependencyError(
539
- "TOON export requires python-toon. Install it via `pip install python-toon` or add the 'toon' extra."
540
- ) from e
541
- return module
542
-
543
-
544
534
  __all__ = [
545
535
  "dict_without_empty_values",
546
536
  "save_as_json",
@@ -552,4 +542,6 @@ __all__ = [
552
542
  "save_print_area_views",
553
543
  "save_auto_page_break_views",
554
544
  "serialize_workbook",
545
+ "_require_yaml",
546
+ "_require_toon",
555
547
  ]