exstruct 0.2.80__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
exstruct/engine.py ADDED
@@ -0,0 +1,643 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Iterator
4
+ from contextlib import contextmanager
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+ from typing import Literal, TextIO, TypedDict, cast
8
+
9
+ from pydantic import BaseModel, ConfigDict, Field, model_validator
10
+
11
+ from .core import cells as _cells
12
+ from .core.cells import set_table_detection_params
13
+ from .core.integrate import extract_workbook
14
+ from .io import (
15
+ save_auto_page_break_views,
16
+ save_print_area_views,
17
+ save_sheets,
18
+ serialize_workbook,
19
+ )
20
+ from .models import SheetData, WorkbookData
21
+ from .render import export_pdf, export_sheet_images
22
+
23
+ ExtractionMode = Literal["light", "standard", "verbose"]
24
+
25
+
26
+ class TableParams(TypedDict, total=False):
27
+ table_score_threshold: float
28
+ density_min: float
29
+ coverage_min: float
30
+ min_nonempty_cells: int
31
+
32
+
33
+ @dataclass(frozen=True)
34
+ class StructOptions:
35
+ """
36
+ Extraction-time options for ExStructEngine.
37
+
38
+ Attributes:
39
+ mode: Extraction mode. One of "light", "standard", "verbose".
40
+ - light: cells + table candidates only (no COM, shapes/charts empty)
41
+ - standard: texted shapes + arrows + charts (if COM available)
42
+ - verbose: all shapes (width/height), charts, table candidates
43
+ table_params: Optional dict passed to `set_table_detection_params(**table_params)`
44
+ before extraction. Use this to tweak table detection heuristics
45
+ per engine instance without touching global state.
46
+ """
47
+
48
+ mode: ExtractionMode = "standard"
49
+ table_params: TableParams | None = (
50
+ None # forwarded to set_table_detection_params if provided
51
+ )
52
+ include_cell_links: bool | None = None # None -> auto: verbose=True, others=False
53
+
54
+
55
+ class FormatOptions(BaseModel):
56
+ """Formatting options for serialization."""
57
+
58
+ model_config = ConfigDict(arbitrary_types_allowed=True)
59
+ fmt: Literal["json", "yaml", "yml", "toon"] = Field(
60
+ default="json", description="Serialization format."
61
+ )
62
+ pretty: bool = Field(default=False, description="Pretty-print JSON output.")
63
+ indent: int | None = Field(
64
+ default=None,
65
+ description="Indent width for JSON (defaults to 2 when pretty is True).",
66
+ )
67
+
68
+
69
+ class FilterOptions(BaseModel):
70
+ """Include/exclude filters for output."""
71
+
72
+ model_config = ConfigDict(arbitrary_types_allowed=True)
73
+ include_rows: bool = Field(default=True, description="Include cell rows.")
74
+ include_shapes: bool = Field(default=True, description="Include shapes.")
75
+ include_shape_size: bool | None = Field(
76
+ default=None,
77
+ description="Include shape size; None -> auto (verbose=True, others=False).",
78
+ )
79
+ include_charts: bool = Field(default=True, description="Include charts.")
80
+ include_chart_size: bool | None = Field(
81
+ default=None,
82
+ description="Include chart size; None -> auto (verbose=True, others=False).",
83
+ )
84
+ include_tables: bool = Field(
85
+ default=True, description="Include table candidate ranges."
86
+ )
87
+ include_print_areas: bool | None = Field(
88
+ default=None,
89
+ description="Include print areas; None -> auto (light=False, others=True).",
90
+ )
91
+ include_auto_print_areas: bool = Field(
92
+ default=False, description="Include COM-computed auto page-break areas."
93
+ )
94
+
95
+
96
+ class DestinationOptions(BaseModel):
97
+ """Destinations for optional side outputs."""
98
+
99
+ model_config = ConfigDict(arbitrary_types_allowed=True)
100
+ sheets_dir: str | Path | None = Field(
101
+ default=None, description="Directory to write per-sheet files."
102
+ )
103
+ print_areas_dir: str | Path | None = Field(
104
+ default=None, description="Directory to write per-print-area files."
105
+ )
106
+ auto_page_breaks_dir: str | Path | None = Field(
107
+ default=None, description="Directory to write auto page-break files."
108
+ )
109
+ stream: TextIO | None = Field(
110
+ default=None, description="Stream override for primary output (stdout/file)."
111
+ )
112
+
113
+
114
+ class OutputOptions(BaseModel):
115
+ """
116
+ Output-time options for ExStructEngine.
117
+
118
+ - format: serialization format/indent.
119
+ - filters: include/exclude flags (rows/shapes/charts/tables/print_areas, size flags).
120
+ - destinations: side outputs (per-sheet, per-print-area, stream override).
121
+
122
+ Legacy flat fields (fmt, pretty, indent, include_*, sheets_dir, print_areas_dir, stream)
123
+ are still accepted and normalized into the nested structures.
124
+ """
125
+
126
+ format: FormatOptions = Field(
127
+ default_factory=FormatOptions, description="Formatting options."
128
+ )
129
+ filters: FilterOptions = Field(
130
+ default_factory=FilterOptions, description="Include/exclude flags."
131
+ )
132
+ destinations: DestinationOptions = Field(
133
+ default_factory=DestinationOptions, description="Side output destinations."
134
+ )
135
+
136
+ @model_validator(mode="before")
137
+ @classmethod
138
+ def _coerce_legacy(cls, values: dict[str, object]) -> dict[str, object]:
139
+ if not isinstance(values, dict):
140
+ return values
141
+ # Normalize legacy flat fields into nested configs
142
+ fmt_cfg = {
143
+ "fmt": values.pop("fmt", None),
144
+ "pretty": values.pop("pretty", None),
145
+ "indent": values.pop("indent", None),
146
+ }
147
+ filt_cfg = {
148
+ "include_rows": values.pop("include_rows", None),
149
+ "include_shapes": values.pop("include_shapes", None),
150
+ "include_shape_size": values.pop("include_shape_size", None),
151
+ "include_charts": values.pop("include_charts", None),
152
+ "include_chart_size": values.pop("include_chart_size", None),
153
+ "include_tables": values.pop("include_tables", None),
154
+ "include_print_areas": values.pop("include_print_areas", None),
155
+ }
156
+ dest_cfg = {
157
+ "sheets_dir": values.pop("sheets_dir", None),
158
+ "print_areas_dir": values.pop("print_areas_dir", None),
159
+ "auto_page_breaks_dir": values.pop("auto_page_breaks_dir", None),
160
+ "stream": values.pop("stream", None),
161
+ }
162
+ # Drop None to let defaults apply
163
+ fmt_cfg = {k: v for k, v in fmt_cfg.items() if v is not None}
164
+ filt_cfg = {k: v for k, v in filt_cfg.items() if v is not None}
165
+ dest_cfg = {k: v for k, v in dest_cfg.items() if v is not None}
166
+
167
+ merged = dict(values)
168
+ if "format" not in merged and fmt_cfg:
169
+ merged["format"] = fmt_cfg
170
+ if "filters" not in merged and filt_cfg:
171
+ merged["filters"] = filt_cfg
172
+ if "destinations" not in merged and dest_cfg:
173
+ merged["destinations"] = dest_cfg
174
+ return merged
175
+
176
+ # Legacy compatibility properties
177
+ @property
178
+ def fmt(self) -> Literal["json", "yaml", "yml", "toon"]:
179
+ return self.format.fmt
180
+
181
+ @property
182
+ def pretty(self) -> bool:
183
+ return self.format.pretty
184
+
185
+ @property
186
+ def indent(self) -> int | None:
187
+ return self.format.indent
188
+
189
+ @property
190
+ def include_rows(self) -> bool:
191
+ return self.filters.include_rows
192
+
193
+ @property
194
+ def include_shapes(self) -> bool:
195
+ return self.filters.include_shapes
196
+
197
+ @property
198
+ def include_shape_size(self) -> bool | None:
199
+ return self.filters.include_shape_size
200
+
201
+ @property
202
+ def include_charts(self) -> bool:
203
+ return self.filters.include_charts
204
+
205
+ @property
206
+ def include_chart_size(self) -> bool | None:
207
+ return self.filters.include_chart_size
208
+
209
+ @property
210
+ def include_tables(self) -> bool:
211
+ return self.filters.include_tables
212
+
213
+ @property
214
+ def include_print_areas(self) -> bool | None:
215
+ return self.filters.include_print_areas
216
+
217
+ @property
218
+ def sheets_dir(self) -> Path | None:
219
+ resolved = self.destinations.sheets_dir
220
+ if resolved is None:
221
+ return None
222
+ return resolved if isinstance(resolved, Path) else Path(resolved)
223
+
224
+ @property
225
+ def print_areas_dir(self) -> Path | None:
226
+ resolved = self.destinations.print_areas_dir
227
+ if resolved is None:
228
+ return None
229
+ return resolved if isinstance(resolved, Path) else Path(resolved)
230
+
231
+ @property
232
+ def stream(self) -> TextIO | None:
233
+ return self.destinations.stream
234
+
235
+ @property
236
+ def auto_page_breaks_dir(self) -> Path | None:
237
+ resolved = self.destinations.auto_page_breaks_dir
238
+ if resolved is None:
239
+ return None
240
+ return resolved if isinstance(resolved, Path) else Path(resolved)
241
+
242
+
243
+ class ExStructEngine:
244
+ """
245
+ Configurable engine for ExStruct extraction and export.
246
+
247
+ Instances are immutable; override options per call if needed.
248
+
249
+ Key behaviors:
250
+ - StructOptions: extraction mode and optional table detection params.
251
+ - OutputOptions: serialization format/pretty-print, include/exclude filters, per-sheet/per-print-area output dirs, etc.
252
+ - Main methods:
253
+ extract(path, mode=None) -> WorkbookData
254
+ - Modes: light/standard/verbose
255
+ - light: COM-free; cells + tables + print areas only (shapes/charts empty)
256
+ serialize(workbook, ...) -> str
257
+ - Applies include_* filters, then serializes
258
+ export(workbook, ...)
259
+ - Writes to file/stdout; optionally per-sheet and per-print-area files
260
+ process(file_path, ...)
261
+ - One-shot extract->export (CLI equivalent), with optional PDF/PNG
262
+ """
263
+
264
+ def __init__(
265
+ self,
266
+ options: StructOptions | None = None,
267
+ output: OutputOptions | None = None,
268
+ ) -> None:
269
+ self.options = options or StructOptions()
270
+ self.output = output or OutputOptions()
271
+
272
+ @staticmethod
273
+ def from_defaults() -> ExStructEngine:
274
+ """Factory to create an engine with default options."""
275
+ return ExStructEngine()
276
+
277
+ def _apply_table_params(self) -> None:
278
+ if self.options.table_params:
279
+ set_table_detection_params(**self.options.table_params)
280
+
281
+ @contextmanager
282
+ def _table_params_scope(self) -> Iterator[None]:
283
+ """
284
+ Temporarily apply table_params and restore previous global config afterward.
285
+ """
286
+ if not self.options.table_params:
287
+ yield
288
+ return
289
+ prev = cast(TableParams, dict(_cells._DETECTION_CONFIG))
290
+ set_table_detection_params(**self.options.table_params)
291
+ try:
292
+ yield
293
+ finally:
294
+ set_table_detection_params(**prev)
295
+
296
+ def _resolve_size_flags(self) -> tuple[bool, bool]:
297
+ """
298
+ Determine whether to include Shape/Chart size fields in output.
299
+ Auto: verbose -> include, others -> exclude.
300
+ """
301
+ include_shape_size = (
302
+ self.output.filters.include_shape_size
303
+ if self.output.filters.include_shape_size is not None
304
+ else self.options.mode == "verbose"
305
+ )
306
+ include_chart_size = (
307
+ self.output.filters.include_chart_size
308
+ if self.output.filters.include_chart_size is not None
309
+ else self.options.mode == "verbose"
310
+ )
311
+ return include_shape_size, include_chart_size
312
+
313
+ def _include_print_areas(self) -> bool:
314
+ """
315
+ Decide whether to include print areas in output.
316
+ Auto: light -> False, others -> True.
317
+ """
318
+ if self.output.filters.include_print_areas is None:
319
+ return self.options.mode != "light"
320
+ return self.output.filters.include_print_areas
321
+
322
+ def _include_auto_print_areas(self) -> bool:
323
+ """
324
+ Decide whether to include auto page-break areas in output.
325
+ Defaults to False unless explicitly enabled.
326
+ """
327
+ return self.output.filters.include_auto_print_areas
328
+
329
+ def _filter_sheet(
330
+ self, sheet: SheetData, include_auto_override: bool | None = None
331
+ ) -> SheetData:
332
+ include_shape_size, include_chart_size = self._resolve_size_flags()
333
+ include_print_areas = self._include_print_areas()
334
+ include_auto_print_areas = (
335
+ include_auto_override
336
+ if include_auto_override is not None
337
+ else self._include_auto_print_areas()
338
+ )
339
+ return SheetData(
340
+ rows=sheet.rows if self.output.filters.include_rows else [],
341
+ shapes=[
342
+ s if include_shape_size else s.model_copy(update={"w": None, "h": None})
343
+ for s in sheet.shapes
344
+ ]
345
+ if self.output.filters.include_shapes
346
+ else [],
347
+ charts=[
348
+ c if include_chart_size else c.model_copy(update={"w": None, "h": None})
349
+ for c in sheet.charts
350
+ ]
351
+ if self.output.filters.include_charts
352
+ else [],
353
+ table_candidates=sheet.table_candidates
354
+ if self.output.filters.include_tables
355
+ else [],
356
+ print_areas=sheet.print_areas if include_print_areas else [],
357
+ auto_print_areas=sheet.auto_print_areas if include_auto_print_areas else [],
358
+ )
359
+
360
+ def _filter_workbook(
361
+ self, wb: WorkbookData, *, include_auto_override: bool | None = None
362
+ ) -> WorkbookData:
363
+ filtered = {
364
+ name: self._filter_sheet(sheet, include_auto_override=include_auto_override)
365
+ for name, sheet in wb.sheets.items()
366
+ }
367
+ return WorkbookData(book_name=wb.book_name, sheets=filtered)
368
+
369
+ @staticmethod
370
+ def _ensure_path(path: str | Path) -> Path:
371
+ """Normalize a string or Path input to a Path instance.
372
+
373
+ Args:
374
+ path: Path-like input value.
375
+
376
+ Returns:
377
+ Path constructed from the given value.
378
+ """
379
+
380
+ return path if isinstance(path, Path) else Path(path)
381
+
382
+ @classmethod
383
+ def _ensure_optional_path(cls, path: str | Path | None) -> Path | None:
384
+ """Normalize an optional path-like value to Path when provided.
385
+
386
+ Args:
387
+ path: Optional path-like input value.
388
+
389
+ Returns:
390
+ Normalized Path when provided, otherwise None.
391
+ """
392
+
393
+ if path is None:
394
+ return None
395
+ return cls._ensure_path(path)
396
+
397
+ def extract(
398
+ self, file_path: str | Path, *, mode: ExtractionMode | None = None
399
+ ) -> WorkbookData:
400
+ """
401
+ Extract a workbook and return normalized workbook data.
402
+
403
+ Args:
404
+ file_path: Path to the .xlsx/.xlsm/.xls file to extract.
405
+ mode: Extraction mode; defaults to the engine's StructOptions.mode.
406
+ - light: COM-free; cells, table candidates, and print areas only.
407
+ - standard: Shapes with text/arrows plus charts; print areas included;
408
+ size fields retained but hidden from default output.
409
+ - verbose: All shapes (with size) and charts (with size).
410
+ """
411
+ chosen_mode = mode or self.options.mode
412
+ if chosen_mode not in ("light", "standard", "verbose"):
413
+ raise ValueError(f"Unsupported mode: {chosen_mode}")
414
+ include_links = (
415
+ self.options.include_cell_links
416
+ if self.options.include_cell_links is not None
417
+ else chosen_mode == "verbose"
418
+ )
419
+ include_print_areas = True # Extract print areas even in light mode
420
+ include_auto_page_breaks = (
421
+ self.output.filters.include_auto_print_areas
422
+ or self.output.destinations.auto_page_breaks_dir is not None
423
+ )
424
+ normalized_file_path = self._ensure_path(file_path)
425
+ with self._table_params_scope():
426
+ return extract_workbook(
427
+ normalized_file_path,
428
+ mode=chosen_mode,
429
+ include_cell_links=include_links,
430
+ include_print_areas=include_print_areas,
431
+ include_auto_page_breaks=include_auto_page_breaks,
432
+ )
433
+
434
+ def serialize(
435
+ self,
436
+ data: WorkbookData,
437
+ *,
438
+ fmt: Literal["json", "yaml", "yml", "toon"] | None = None,
439
+ pretty: bool | None = None,
440
+ indent: int | None = None,
441
+ ) -> str:
442
+ """
443
+ Serialize a workbook after applying include/exclude filters.
444
+
445
+ Args:
446
+ data: Workbook to serialize after filtering.
447
+ fmt: Serialization format; defaults to OutputOptions.fmt.
448
+ pretty: Whether to pretty-print JSON output.
449
+ indent: Indentation to use when pretty-printing JSON.
450
+ """
451
+ filtered = self._filter_workbook(data)
452
+ use_fmt = fmt or self.output.format.fmt
453
+ use_pretty = self.output.format.pretty if pretty is None else pretty
454
+ use_indent = self.output.format.indent if indent is None else indent
455
+ return serialize_workbook(
456
+ filtered, fmt=use_fmt, pretty=use_pretty, indent=use_indent
457
+ )
458
+
459
+ def export(
460
+ self,
461
+ data: WorkbookData,
462
+ output_path: str | Path | None = None,
463
+ *,
464
+ fmt: Literal["json", "yaml", "yml", "toon"] | None = None,
465
+ pretty: bool | None = None,
466
+ indent: int | None = None,
467
+ sheets_dir: str | Path | None = None,
468
+ print_areas_dir: str | Path | None = None,
469
+ auto_page_breaks_dir: str | Path | None = None,
470
+ stream: TextIO | None = None,
471
+ ) -> None:
472
+ """
473
+ Write filtered workbook data to a file or stream.
474
+
475
+ Includes optional per-sheet and per-print-area outputs when destinations are
476
+ provided.
477
+
478
+ Args:
479
+ data: Workbook to serialize and write.
480
+ output_path: Target file path (str or Path); writes to stdout when None.
481
+ fmt: Serialization format; defaults to OutputOptions.fmt.
482
+ pretty: Whether to pretty-print JSON output.
483
+ indent: Indentation to use when pretty-printing JSON.
484
+ sheets_dir: Directory for per-sheet outputs when provided (str or Path).
485
+ print_areas_dir: Directory for per-print-area outputs when provided (str or Path).
486
+ auto_page_breaks_dir: Directory for auto page-break outputs (str or Path; COM
487
+ environments only).
488
+ stream: Stream override when output_path is None.
489
+ """
490
+ text = self.serialize(data, fmt=fmt, pretty=pretty, indent=indent)
491
+ target_stream = stream or self.output.destinations.stream
492
+ chosen_fmt = fmt or self.output.format.fmt
493
+ chosen_sheets_dir = (
494
+ sheets_dir
495
+ if sheets_dir is not None
496
+ else self.output.destinations.sheets_dir
497
+ )
498
+ chosen_print_areas_dir = (
499
+ print_areas_dir
500
+ if print_areas_dir is not None
501
+ else self.output.destinations.print_areas_dir
502
+ )
503
+ chosen_auto_page_breaks_dir = (
504
+ auto_page_breaks_dir
505
+ if auto_page_breaks_dir is not None
506
+ else self.output.destinations.auto_page_breaks_dir
507
+ )
508
+
509
+ normalized_output_path = self._ensure_optional_path(output_path)
510
+ normalized_sheets_dir = self._ensure_optional_path(chosen_sheets_dir)
511
+ normalized_print_areas_dir = self._ensure_optional_path(chosen_print_areas_dir)
512
+ normalized_auto_page_breaks_dir = self._ensure_optional_path(
513
+ chosen_auto_page_breaks_dir
514
+ )
515
+
516
+ if normalized_output_path is not None:
517
+ normalized_output_path.write_text(text, encoding="utf-8")
518
+ elif (
519
+ normalized_output_path is None
520
+ and chosen_sheets_dir is None
521
+ and chosen_print_areas_dir is None
522
+ and chosen_auto_page_breaks_dir is None
523
+ ):
524
+ import sys
525
+
526
+ stream_target = target_stream or sys.stdout
527
+ stream_target.write(text)
528
+ if not text.endswith("\n"):
529
+ stream_target.write("\n")
530
+
531
+ if normalized_sheets_dir is not None:
532
+ filtered = self._filter_workbook(data)
533
+ save_sheets(
534
+ filtered,
535
+ normalized_sheets_dir,
536
+ fmt=chosen_fmt,
537
+ pretty=self.output.format.pretty if pretty is None else pretty,
538
+ indent=self.output.format.indent if indent is None else indent,
539
+ )
540
+
541
+ if normalized_print_areas_dir is not None:
542
+ include_shape_size, include_chart_size = self._resolve_size_flags()
543
+ if self._include_print_areas():
544
+ filtered = self._filter_workbook(data)
545
+ save_print_area_views(
546
+ filtered,
547
+ normalized_print_areas_dir,
548
+ fmt=chosen_fmt,
549
+ pretty=self.output.format.pretty if pretty is None else pretty,
550
+ indent=self.output.format.indent if indent is None else indent,
551
+ include_shapes=self.output.filters.include_shapes,
552
+ include_charts=self.output.filters.include_charts,
553
+ include_shape_size=include_shape_size,
554
+ include_chart_size=include_chart_size,
555
+ )
556
+
557
+ if normalized_auto_page_breaks_dir is not None:
558
+ include_shape_size, include_chart_size = self._resolve_size_flags()
559
+ filtered = self._filter_workbook(data, include_auto_override=True)
560
+ save_auto_page_break_views(
561
+ filtered,
562
+ normalized_auto_page_breaks_dir,
563
+ fmt=chosen_fmt,
564
+ pretty=self.output.format.pretty if pretty is None else pretty,
565
+ indent=self.output.format.indent if indent is None else indent,
566
+ include_shapes=self.output.filters.include_shapes,
567
+ include_charts=self.output.filters.include_charts,
568
+ include_shape_size=include_shape_size,
569
+ include_chart_size=include_chart_size,
570
+ )
571
+
572
+ return None
573
+
574
+ def process(
575
+ self,
576
+ file_path: str | Path,
577
+ output_path: str | Path | None = None,
578
+ *,
579
+ out_fmt: str | None = None,
580
+ image: bool = False,
581
+ pdf: bool = False,
582
+ dpi: int = 72,
583
+ mode: ExtractionMode | None = None,
584
+ pretty: bool | None = None,
585
+ indent: int | None = None,
586
+ sheets_dir: str | Path | None = None,
587
+ print_areas_dir: str | Path | None = None,
588
+ auto_page_breaks_dir: str | Path | None = None,
589
+ stream: TextIO | None = None,
590
+ ) -> None:
591
+ """
592
+ One-shot extract->export wrapper (CLI equivalent) with optional PDF/PNG output.
593
+
594
+ Args:
595
+ file_path: Input Excel workbook path (str or Path).
596
+ output_path: Target file path (str or Path); writes to stdout when None.
597
+ out_fmt: Serialization format for structured output.
598
+ image: Whether to export PNGs alongside structured output.
599
+ pdf: Whether to export a PDF snapshot alongside structured output.
600
+ dpi: DPI to use when rendering images.
601
+ mode: Extraction mode; defaults to the engine's StructOptions.mode.
602
+ pretty: Whether to pretty-print JSON output.
603
+ indent: Indentation to use when pretty-printing JSON.
604
+ sheets_dir: Directory for per-sheet structured outputs (str or Path).
605
+ print_areas_dir: Directory for per-print-area structured outputs (str or Path).
606
+ auto_page_breaks_dir: Directory for auto page-break outputs (str or Path).
607
+ stream: Stream override when writing to stdout.
608
+ """
609
+ normalized_file_path = self._ensure_path(file_path)
610
+ normalized_output_path = self._ensure_optional_path(output_path)
611
+ normalized_sheets_dir = self._ensure_optional_path(sheets_dir)
612
+ normalized_print_areas_dir = self._ensure_optional_path(print_areas_dir)
613
+ normalized_auto_page_breaks_dir = self._ensure_optional_path(
614
+ auto_page_breaks_dir
615
+ )
616
+
617
+ wb = self.extract(normalized_file_path, mode=mode)
618
+ chosen_fmt = out_fmt or self.output.format.fmt
619
+ self.export(
620
+ wb,
621
+ output_path=normalized_output_path,
622
+ fmt=chosen_fmt, # type: ignore[arg-type]
623
+ pretty=pretty,
624
+ indent=indent,
625
+ sheets_dir=normalized_sheets_dir,
626
+ print_areas_dir=normalized_print_areas_dir,
627
+ auto_page_breaks_dir=normalized_auto_page_breaks_dir,
628
+ stream=stream,
629
+ )
630
+
631
+ if pdf or image:
632
+ base_target = normalized_output_path or normalized_file_path.with_suffix(
633
+ ".yaml"
634
+ if chosen_fmt in ("yaml", "yml")
635
+ else ".toon"
636
+ if chosen_fmt == "toon"
637
+ else ".json"
638
+ )
639
+ pdf_path = base_target.with_suffix(".pdf")
640
+ export_pdf(normalized_file_path, pdf_path)
641
+ if image:
642
+ images_dir = pdf_path.parent / f"{pdf_path.stem}_images"
643
+ export_sheet_images(normalized_file_path, images_dir, dpi=dpi)
exstruct/errors.py ADDED
@@ -0,0 +1,35 @@
1
+ from __future__ import annotations
2
+
3
+ """Project-specific exception hierarchy for ExStruct."""
4
+
5
+
6
+ class ExstructError(Exception):
7
+ """Base exception for ExStruct."""
8
+
9
+
10
+ class ConfigError(ExstructError):
11
+ """Raised when user-provided configuration or parameters are invalid."""
12
+
13
+
14
+ class ExtractionError(ExstructError):
15
+ """Raised when workbook extraction fails."""
16
+
17
+
18
+ class SerializationError(ExstructError):
19
+ """Raised when serialization fails or an unsupported format is requested."""
20
+
21
+
22
+ class MissingDependencyError(ExstructError):
23
+ """Raised when an optional dependency required for the requested operation is missing."""
24
+
25
+
26
+ class RenderError(ExstructError):
27
+ """Raised when rendering (PDF/PNG) fails."""
28
+
29
+
30
+ class OutputError(ExstructError):
31
+ """Raised when writing outputs to disk or streams fails."""
32
+
33
+
34
+ class PrintAreaError(ExstructError, ValueError):
35
+ """Raised when print-area specific processing fails (also a ValueError for compatibility)."""