exstruct 0.2.80__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- exstruct/__init__.py +387 -0
- exstruct/cli/availability.py +49 -0
- exstruct/cli/main.py +134 -0
- exstruct/core/__init__.py +0 -0
- exstruct/core/cells.py +1039 -0
- exstruct/core/charts.py +241 -0
- exstruct/core/integrate.py +388 -0
- exstruct/core/shapes.py +275 -0
- exstruct/engine.py +643 -0
- exstruct/errors.py +35 -0
- exstruct/io/__init__.py +555 -0
- exstruct/models/__init__.py +335 -0
- exstruct/models/maps.py +335 -0
- exstruct/models/types.py +8 -0
- exstruct/py.typed +0 -0
- exstruct/render/__init__.py +118 -0
- exstruct-0.2.80.dist-info/METADATA +435 -0
- exstruct-0.2.80.dist-info/RECORD +20 -0
- exstruct-0.2.80.dist-info/WHEEL +4 -0
- exstruct-0.2.80.dist-info/entry_points.txt +3 -0
exstruct/__init__.py
ADDED
|
@@ -0,0 +1,387 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Literal, TextIO
|
|
6
|
+
|
|
7
|
+
from .core.cells import set_table_detection_params
|
|
8
|
+
from .core.integrate import extract_workbook
|
|
9
|
+
from .engine import (
|
|
10
|
+
DestinationOptions,
|
|
11
|
+
ExStructEngine,
|
|
12
|
+
FilterOptions,
|
|
13
|
+
OutputOptions,
|
|
14
|
+
StructOptions,
|
|
15
|
+
)
|
|
16
|
+
from .errors import (
|
|
17
|
+
ConfigError,
|
|
18
|
+
ExstructError,
|
|
19
|
+
MissingDependencyError,
|
|
20
|
+
PrintAreaError,
|
|
21
|
+
RenderError,
|
|
22
|
+
SerializationError,
|
|
23
|
+
)
|
|
24
|
+
from .io import (
|
|
25
|
+
save_as_json,
|
|
26
|
+
save_as_toon,
|
|
27
|
+
save_as_yaml,
|
|
28
|
+
save_auto_page_break_views,
|
|
29
|
+
save_print_area_views,
|
|
30
|
+
save_sheets,
|
|
31
|
+
serialize_workbook,
|
|
32
|
+
)
|
|
33
|
+
from .models import (
|
|
34
|
+
CellRow,
|
|
35
|
+
Chart,
|
|
36
|
+
ChartSeries,
|
|
37
|
+
PrintArea,
|
|
38
|
+
PrintAreaView,
|
|
39
|
+
Shape,
|
|
40
|
+
SheetData,
|
|
41
|
+
WorkbookData,
|
|
42
|
+
)
|
|
43
|
+
from .render import export_pdf, export_sheet_images
|
|
44
|
+
|
|
45
|
+
logger = logging.getLogger(__name__)
|
|
46
|
+
|
|
47
|
+
__all__ = [
|
|
48
|
+
"extract",
|
|
49
|
+
"export",
|
|
50
|
+
"export_sheets",
|
|
51
|
+
"export_sheets_as",
|
|
52
|
+
"export_print_areas_as",
|
|
53
|
+
"export_auto_page_breaks",
|
|
54
|
+
"export_pdf",
|
|
55
|
+
"export_sheet_images",
|
|
56
|
+
"ExstructError",
|
|
57
|
+
"ConfigError",
|
|
58
|
+
"MissingDependencyError",
|
|
59
|
+
"RenderError",
|
|
60
|
+
"SerializationError",
|
|
61
|
+
"PrintAreaError",
|
|
62
|
+
"process_excel",
|
|
63
|
+
"ExtractionMode",
|
|
64
|
+
"CellRow",
|
|
65
|
+
"Shape",
|
|
66
|
+
"ChartSeries",
|
|
67
|
+
"Chart",
|
|
68
|
+
"SheetData",
|
|
69
|
+
"WorkbookData",
|
|
70
|
+
"PrintArea",
|
|
71
|
+
"PrintAreaView",
|
|
72
|
+
"set_table_detection_params",
|
|
73
|
+
"extract_workbook",
|
|
74
|
+
"ExStructEngine",
|
|
75
|
+
"StructOptions",
|
|
76
|
+
"OutputOptions",
|
|
77
|
+
"FilterOptions",
|
|
78
|
+
"DestinationOptions",
|
|
79
|
+
"serialize_workbook",
|
|
80
|
+
"export_auto_page_breaks",
|
|
81
|
+
]
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
ExtractionMode = Literal["light", "standard", "verbose"]
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def extract(file_path: str | Path, mode: ExtractionMode = "standard") -> WorkbookData:
|
|
88
|
+
"""
|
|
89
|
+
Extract an Excel workbook into WorkbookData.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
file_path: Path to .xlsx/.xlsm/.xls.
|
|
93
|
+
mode: "light" / "standard" / "verbose"
|
|
94
|
+
- light: cells + table detection only (no COM, shapes/charts empty). Print areas via openpyxl.
|
|
95
|
+
- standard: texted shapes + arrows + charts (COM if available), print areas included. Shape/chart size is kept but hidden by default in output.
|
|
96
|
+
- verbose: all shapes (including textless) with size, charts with size.
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
WorkbookData containing sheets, rows, shapes, charts, and print areas.
|
|
100
|
+
|
|
101
|
+
Raises:
|
|
102
|
+
ValueError: If an invalid mode is provided.
|
|
103
|
+
|
|
104
|
+
Examples:
|
|
105
|
+
Extract with hyperlinks (verbose) and inspect table candidates:
|
|
106
|
+
|
|
107
|
+
>>> from exstruct import extract
|
|
108
|
+
>>> wb = extract("input.xlsx", mode="verbose")
|
|
109
|
+
>>> wb.sheets["Sheet1"].table_candidates
|
|
110
|
+
['A1:B5']
|
|
111
|
+
"""
|
|
112
|
+
include_links = True if mode == "verbose" else False
|
|
113
|
+
engine = ExStructEngine(
|
|
114
|
+
options=StructOptions(mode=mode, include_cell_links=include_links)
|
|
115
|
+
)
|
|
116
|
+
return engine.extract(file_path, mode=mode)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def export(
|
|
120
|
+
data: WorkbookData,
|
|
121
|
+
path: str | Path,
|
|
122
|
+
fmt: Literal["json", "yaml", "yml", "toon"] | None = None,
|
|
123
|
+
*,
|
|
124
|
+
pretty: bool = False,
|
|
125
|
+
indent: int | None = None,
|
|
126
|
+
) -> None:
|
|
127
|
+
"""
|
|
128
|
+
Save WorkbookData to a file (format inferred from extension).
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
data: WorkbookData from `extract` or similar
|
|
132
|
+
path: destination path; extension is used to infer format
|
|
133
|
+
fmt: explicitly set format if desired (json/yaml/yml/toon)
|
|
134
|
+
pretty: pretty-print JSON
|
|
135
|
+
indent: JSON indent width (defaults to 2 when pretty=True and indent is None)
|
|
136
|
+
|
|
137
|
+
Raises:
|
|
138
|
+
ValueError: If the format is unsupported.
|
|
139
|
+
|
|
140
|
+
Examples:
|
|
141
|
+
Write pretty JSON and YAML (requires pyyaml):
|
|
142
|
+
|
|
143
|
+
>>> from exstruct import export, extract
|
|
144
|
+
>>> wb = extract("input.xlsx")
|
|
145
|
+
>>> export(wb, "out.json", pretty=True)
|
|
146
|
+
>>> export(wb, "out.yaml", fmt="yaml") # doctest: +SKIP
|
|
147
|
+
"""
|
|
148
|
+
dest = Path(path)
|
|
149
|
+
format_hint = (fmt or dest.suffix.lstrip(".") or "json").lower()
|
|
150
|
+
match format_hint:
|
|
151
|
+
case "json":
|
|
152
|
+
save_as_json(data, dest, pretty=pretty, indent=indent)
|
|
153
|
+
case "yaml" | "yml":
|
|
154
|
+
save_as_yaml(data, dest)
|
|
155
|
+
case "toon":
|
|
156
|
+
save_as_toon(data, dest)
|
|
157
|
+
case _:
|
|
158
|
+
raise ValueError(f"Unsupported export format: {format_hint}")
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def export_sheets(data: WorkbookData, dir_path: str | Path) -> dict[str, Path]:
|
|
162
|
+
"""
|
|
163
|
+
Export each sheet as an individual JSON file.
|
|
164
|
+
|
|
165
|
+
- Payload: {book_name, sheet_name, sheet: SheetData}
|
|
166
|
+
- Returns: {sheet_name: Path}
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
data: WorkbookData to split by sheet.
|
|
170
|
+
dir_path: Output directory.
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
Mapping from sheet name to written JSON path.
|
|
174
|
+
|
|
175
|
+
Examples:
|
|
176
|
+
>>> from exstruct import export_sheets, extract
|
|
177
|
+
>>> wb = extract("input.xlsx")
|
|
178
|
+
>>> paths = export_sheets(wb, "out_sheets")
|
|
179
|
+
>>> "Sheet1" in paths
|
|
180
|
+
True
|
|
181
|
+
"""
|
|
182
|
+
return save_sheets(data, Path(dir_path), fmt="json")
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def export_sheets_as(
|
|
186
|
+
data: WorkbookData,
|
|
187
|
+
dir_path: str | Path,
|
|
188
|
+
fmt: Literal["json", "yaml", "yml", "toon"] = "json",
|
|
189
|
+
*,
|
|
190
|
+
pretty: bool = False,
|
|
191
|
+
indent: int | None = None,
|
|
192
|
+
) -> dict[str, Path]:
|
|
193
|
+
"""
|
|
194
|
+
Export each sheet in the given format (json/yaml/toon); returns sheet name to path map.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
data: WorkbookData to split by sheet.
|
|
198
|
+
dir_path: Output directory.
|
|
199
|
+
fmt: Output format; inferred defaults to json.
|
|
200
|
+
pretty: Pretty-print JSON.
|
|
201
|
+
indent: JSON indent width (defaults to 2 when pretty=True and indent is None).
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
Mapping from sheet name to written file path.
|
|
205
|
+
|
|
206
|
+
Raises:
|
|
207
|
+
ValueError: If an unsupported format is passed.
|
|
208
|
+
|
|
209
|
+
Examples:
|
|
210
|
+
Export per sheet as YAML (requires pyyaml):
|
|
211
|
+
|
|
212
|
+
>>> from exstruct import export_sheets_as, extract
|
|
213
|
+
>>> wb = extract("input.xlsx")
|
|
214
|
+
>>> _ = export_sheets_as(wb, "out_yaml", fmt="yaml") # doctest: +SKIP
|
|
215
|
+
"""
|
|
216
|
+
return save_sheets(data, Path(dir_path), fmt=fmt, pretty=pretty, indent=indent)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def export_print_areas_as(
|
|
220
|
+
data: WorkbookData,
|
|
221
|
+
dir_path: str | Path,
|
|
222
|
+
fmt: Literal["json", "yaml", "yml", "toon"] = "json",
|
|
223
|
+
*,
|
|
224
|
+
pretty: bool = False,
|
|
225
|
+
indent: int | None = None,
|
|
226
|
+
normalize: bool = False,
|
|
227
|
+
) -> dict[str, Path]:
|
|
228
|
+
"""
|
|
229
|
+
Export each print area as a PrintAreaView.
|
|
230
|
+
|
|
231
|
+
Args:
|
|
232
|
+
data: WorkbookData that contains print areas
|
|
233
|
+
dir_path: output directory
|
|
234
|
+
fmt: json/yaml/yml/toon
|
|
235
|
+
pretty: Pretty-print JSON output.
|
|
236
|
+
indent: JSON indent width (defaults to 2 when pretty is True and indent is None).
|
|
237
|
+
normalize: rebase row/col indices to the print-area origin when True
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
dict mapping area key to path (e.g., "Sheet1#1": /.../Sheet1_area1_...json)
|
|
241
|
+
|
|
242
|
+
Examples:
|
|
243
|
+
Export print areas when present:
|
|
244
|
+
|
|
245
|
+
>>> from exstruct import export_print_areas_as, extract
|
|
246
|
+
>>> wb = extract("input.xlsx", mode="standard")
|
|
247
|
+
>>> paths = export_print_areas_as(wb, "areas")
|
|
248
|
+
>>> isinstance(paths, dict)
|
|
249
|
+
True
|
|
250
|
+
"""
|
|
251
|
+
return save_print_area_views(
|
|
252
|
+
data,
|
|
253
|
+
Path(dir_path),
|
|
254
|
+
fmt=fmt,
|
|
255
|
+
pretty=pretty,
|
|
256
|
+
indent=indent,
|
|
257
|
+
normalize=normalize,
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def export_auto_page_breaks(
|
|
262
|
+
data: WorkbookData,
|
|
263
|
+
dir_path: str | Path,
|
|
264
|
+
fmt: Literal["json", "yaml", "yml", "toon"] = "json",
|
|
265
|
+
*,
|
|
266
|
+
pretty: bool = False,
|
|
267
|
+
indent: int | None = None,
|
|
268
|
+
normalize: bool = False,
|
|
269
|
+
) -> dict[str, Path]:
|
|
270
|
+
"""
|
|
271
|
+
Export auto page-break areas (COM-computed) as PrintAreaView files.
|
|
272
|
+
|
|
273
|
+
Args:
|
|
274
|
+
data: WorkbookData containing auto_print_areas (COM extraction with auto breaks enabled)
|
|
275
|
+
dir_path: output directory
|
|
276
|
+
fmt: json/yaml/yml/toon
|
|
277
|
+
pretty: Pretty-print JSON output.
|
|
278
|
+
indent: JSON indent width (defaults to 2 when pretty is True and indent is None).
|
|
279
|
+
normalize: rebase row/col indices to the area origin when True
|
|
280
|
+
|
|
281
|
+
Returns:
|
|
282
|
+
dict mapping area key to path (e.g., "Sheet1#1": /.../Sheet1_auto_page1_...json)
|
|
283
|
+
|
|
284
|
+
Raises:
|
|
285
|
+
PrintAreaError: If no auto page-break areas are present.
|
|
286
|
+
|
|
287
|
+
Examples:
|
|
288
|
+
>>> from exstruct import export_auto_page_breaks, extract
|
|
289
|
+
>>> wb = extract("input.xlsx", mode="standard")
|
|
290
|
+
>>> try:
|
|
291
|
+
... export_auto_page_breaks(wb, "auto_areas")
|
|
292
|
+
... except PrintAreaError:
|
|
293
|
+
... pass
|
|
294
|
+
"""
|
|
295
|
+
if not any(sheet.auto_print_areas for sheet in data.sheets.values()):
|
|
296
|
+
message = "No auto page-break areas found. Enable COM-based auto page breaks before exporting."
|
|
297
|
+
logger.warning(message)
|
|
298
|
+
raise PrintAreaError(message)
|
|
299
|
+
return save_auto_page_break_views(
|
|
300
|
+
data,
|
|
301
|
+
Path(dir_path),
|
|
302
|
+
fmt=fmt,
|
|
303
|
+
pretty=pretty,
|
|
304
|
+
indent=indent,
|
|
305
|
+
normalize=normalize,
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def process_excel(
|
|
310
|
+
file_path: str | Path,
|
|
311
|
+
output_path: str | Path | None = None,
|
|
312
|
+
out_fmt: str = "json",
|
|
313
|
+
image: bool = False,
|
|
314
|
+
pdf: bool = False,
|
|
315
|
+
dpi: int = 72,
|
|
316
|
+
mode: ExtractionMode = "standard",
|
|
317
|
+
pretty: bool = False,
|
|
318
|
+
indent: int | None = None,
|
|
319
|
+
sheets_dir: str | Path | None = None,
|
|
320
|
+
print_areas_dir: str | Path | None = None,
|
|
321
|
+
auto_page_breaks_dir: str | Path | None = None,
|
|
322
|
+
stream: TextIO | None = None,
|
|
323
|
+
) -> None:
|
|
324
|
+
"""
|
|
325
|
+
Convenience wrapper: extract -> serialize (file or stdout) -> optional PDF/PNG.
|
|
326
|
+
|
|
327
|
+
Args:
|
|
328
|
+
file_path: Input Excel workbook (path string or Path).
|
|
329
|
+
output_path: None for stdout; otherwise, write to file (string or Path).
|
|
330
|
+
out_fmt: json/yaml/yml/toon.
|
|
331
|
+
image: True to also output PNGs (requires Excel + COM + pypdfium2).
|
|
332
|
+
pdf: True to also output PDF (requires Excel + COM + pypdfium2).
|
|
333
|
+
dpi: DPI for image output.
|
|
334
|
+
mode: light/standard/verbose (same meaning as `extract`).
|
|
335
|
+
pretty: Pretty-print JSON.
|
|
336
|
+
indent: JSON indent width.
|
|
337
|
+
sheets_dir: Directory to write per-sheet files (string or Path).
|
|
338
|
+
print_areas_dir: Directory to write per-print-area files (string or Path).
|
|
339
|
+
auto_page_breaks_dir: Directory to write per-auto-page-break files (COM only).
|
|
340
|
+
stream: IO override when output_path is None.
|
|
341
|
+
|
|
342
|
+
Raises:
|
|
343
|
+
ValueError: If an unsupported format or mode is given.
|
|
344
|
+
PrintAreaError: When exporting auto page breaks without available data.
|
|
345
|
+
RenderError: When rendering fails (Excel/COM/pypdfium2 issues).
|
|
346
|
+
|
|
347
|
+
Examples:
|
|
348
|
+
Extract and write JSON to stdout, plus per-sheet files:
|
|
349
|
+
|
|
350
|
+
>>> from pathlib import Path
|
|
351
|
+
>>> from exstruct import process_excel
|
|
352
|
+
>>> process_excel(Path("input.xlsx"), output_path=None, sheets_dir=Path("sheets"))
|
|
353
|
+
|
|
354
|
+
Render PDF only (COM + Excel required):
|
|
355
|
+
|
|
356
|
+
>>> process_excel(Path("input.xlsx"), output_path=Path("out.json"), pdf=True) # doctest: +SKIP
|
|
357
|
+
"""
|
|
358
|
+
engine = ExStructEngine(
|
|
359
|
+
options=StructOptions(mode=mode),
|
|
360
|
+
output=OutputOptions(
|
|
361
|
+
fmt=out_fmt,
|
|
362
|
+
pretty=pretty,
|
|
363
|
+
indent=indent,
|
|
364
|
+
sheets_dir=sheets_dir,
|
|
365
|
+
print_areas_dir=print_areas_dir,
|
|
366
|
+
auto_page_breaks_dir=auto_page_breaks_dir,
|
|
367
|
+
include_print_areas=None if mode == "light" else True,
|
|
368
|
+
include_shape_size=True if mode == "verbose" else False,
|
|
369
|
+
include_chart_size=True if mode == "verbose" else False,
|
|
370
|
+
stream=stream,
|
|
371
|
+
),
|
|
372
|
+
)
|
|
373
|
+
engine.process(
|
|
374
|
+
file_path=file_path,
|
|
375
|
+
output_path=output_path,
|
|
376
|
+
out_fmt=out_fmt,
|
|
377
|
+
image=image,
|
|
378
|
+
pdf=pdf,
|
|
379
|
+
dpi=dpi,
|
|
380
|
+
mode=mode,
|
|
381
|
+
pretty=pretty,
|
|
382
|
+
indent=indent,
|
|
383
|
+
sheets_dir=sheets_dir,
|
|
384
|
+
print_areas_dir=print_areas_dir,
|
|
385
|
+
auto_page_breaks_dir=auto_page_breaks_dir,
|
|
386
|
+
stream=stream,
|
|
387
|
+
)
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
8
|
+
import xlwings as xw
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ComAvailability(BaseModel):
|
|
14
|
+
"""Availability information for Excel COM-dependent features."""
|
|
15
|
+
|
|
16
|
+
available: bool = Field(
|
|
17
|
+
..., description="True when Excel COM can be used from this environment."
|
|
18
|
+
)
|
|
19
|
+
reason: str | None = Field(
|
|
20
|
+
default=None, description="Reason COM features are unavailable."
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def get_com_availability() -> ComAvailability:
|
|
25
|
+
"""Detect whether Excel COM is available for CLI features.
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
ComAvailability describing whether COM features can be used.
|
|
29
|
+
"""
|
|
30
|
+
if os.getenv("SKIP_COM_TESTS"):
|
|
31
|
+
return ComAvailability(available=False, reason="SKIP_COM_TESTS is set.")
|
|
32
|
+
|
|
33
|
+
if sys.platform != "win32":
|
|
34
|
+
return ComAvailability(available=False, reason="Non-Windows platform.")
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
app = xw.App(add_book=False, visible=False)
|
|
38
|
+
except Exception as exc:
|
|
39
|
+
return ComAvailability(
|
|
40
|
+
available=False,
|
|
41
|
+
reason=f"Excel COM is unavailable ({exc.__class__.__name__}).",
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
try:
|
|
45
|
+
app.quit()
|
|
46
|
+
except Exception:
|
|
47
|
+
logger.warning("Failed to quit Excel during COM availability check.")
|
|
48
|
+
|
|
49
|
+
return ComAvailability(available=True, reason=None)
|
exstruct/cli/main.py
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from exstruct import process_excel
|
|
7
|
+
from exstruct.cli.availability import ComAvailability, get_com_availability
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _add_auto_page_breaks_argument(
|
|
11
|
+
parser: argparse.ArgumentParser, availability: ComAvailability
|
|
12
|
+
) -> None:
|
|
13
|
+
"""Add auto page-break export option when COM is available."""
|
|
14
|
+
if not availability.available:
|
|
15
|
+
return
|
|
16
|
+
parser.add_argument(
|
|
17
|
+
"--auto-page-breaks-dir",
|
|
18
|
+
type=Path,
|
|
19
|
+
help="Optional directory to write one file per auto page-break area (COM only).",
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def build_parser(
|
|
24
|
+
availability: ComAvailability | None = None,
|
|
25
|
+
) -> argparse.ArgumentParser:
|
|
26
|
+
"""Build the CLI argument parser.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
availability: Optional COM availability for tests or overrides.
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
Configured argument parser.
|
|
33
|
+
"""
|
|
34
|
+
parser = argparse.ArgumentParser(
|
|
35
|
+
description="Dev-only CLI stub for ExStruct extraction."
|
|
36
|
+
)
|
|
37
|
+
parser.add_argument("input", type=Path, help="Excel file (.xlsx/.xlsm/.xls)")
|
|
38
|
+
parser.add_argument(
|
|
39
|
+
"-o",
|
|
40
|
+
"--output",
|
|
41
|
+
type=Path,
|
|
42
|
+
help="Output path. If omitted, writes to stdout.",
|
|
43
|
+
)
|
|
44
|
+
parser.add_argument(
|
|
45
|
+
"-f",
|
|
46
|
+
"--format",
|
|
47
|
+
default="json",
|
|
48
|
+
choices=["json", "yaml", "yml", "toon"],
|
|
49
|
+
help="Export format",
|
|
50
|
+
)
|
|
51
|
+
parser.add_argument(
|
|
52
|
+
"--image",
|
|
53
|
+
action="store_true",
|
|
54
|
+
help="(placeholder) Render PNG alongside JSON",
|
|
55
|
+
)
|
|
56
|
+
parser.add_argument(
|
|
57
|
+
"--pdf",
|
|
58
|
+
action="store_true",
|
|
59
|
+
help="(placeholder) Render PDF alongside JSON",
|
|
60
|
+
)
|
|
61
|
+
parser.add_argument(
|
|
62
|
+
"--dpi",
|
|
63
|
+
type=int,
|
|
64
|
+
default=144,
|
|
65
|
+
help="DPI for image rendering (placeholder)",
|
|
66
|
+
)
|
|
67
|
+
parser.add_argument(
|
|
68
|
+
"-m",
|
|
69
|
+
"--mode",
|
|
70
|
+
default="standard",
|
|
71
|
+
choices=["light", "standard", "verbose"],
|
|
72
|
+
help="Extraction detail level",
|
|
73
|
+
)
|
|
74
|
+
parser.add_argument(
|
|
75
|
+
"--pretty",
|
|
76
|
+
action="store_true",
|
|
77
|
+
help="Pretty-print JSON output (indent=2). Default is compact JSON.",
|
|
78
|
+
)
|
|
79
|
+
parser.add_argument(
|
|
80
|
+
"--sheets-dir",
|
|
81
|
+
type=Path,
|
|
82
|
+
help="Optional directory to write one file per sheet (format follows --format).",
|
|
83
|
+
)
|
|
84
|
+
parser.add_argument(
|
|
85
|
+
"--print-areas-dir",
|
|
86
|
+
type=Path,
|
|
87
|
+
help="Optional directory to write one file per print area (format follows --format).",
|
|
88
|
+
)
|
|
89
|
+
resolved_availability = (
|
|
90
|
+
availability if availability is not None else get_com_availability()
|
|
91
|
+
)
|
|
92
|
+
_add_auto_page_breaks_argument(parser, resolved_availability)
|
|
93
|
+
return parser
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def main(argv: list[str] | None = None) -> int:
|
|
97
|
+
"""Run the CLI entrypoint.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
argv: Optional argument list for testing.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
Exit code (0 for success, 1 for failure).
|
|
104
|
+
"""
|
|
105
|
+
parser = build_parser()
|
|
106
|
+
args = parser.parse_args(argv)
|
|
107
|
+
|
|
108
|
+
input_path: Path = args.input
|
|
109
|
+
if not input_path.exists():
|
|
110
|
+
print(f"File not found: {input_path}", flush=True)
|
|
111
|
+
return 0
|
|
112
|
+
|
|
113
|
+
try:
|
|
114
|
+
process_excel(
|
|
115
|
+
file_path=input_path,
|
|
116
|
+
output_path=args.output,
|
|
117
|
+
out_fmt=args.format,
|
|
118
|
+
image=args.image,
|
|
119
|
+
pdf=args.pdf,
|
|
120
|
+
dpi=args.dpi,
|
|
121
|
+
mode=args.mode,
|
|
122
|
+
pretty=args.pretty,
|
|
123
|
+
sheets_dir=args.sheets_dir,
|
|
124
|
+
print_areas_dir=args.print_areas_dir,
|
|
125
|
+
auto_page_breaks_dir=getattr(args, "auto_page_breaks_dir", None),
|
|
126
|
+
)
|
|
127
|
+
return 0
|
|
128
|
+
except Exception as e:
|
|
129
|
+
print(f"Error: {e}", flush=True)
|
|
130
|
+
return 1
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
if __name__ == "__main__":
|
|
134
|
+
raise SystemExit(main())
|
|
File without changes
|