exstruct 0.2.80__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,118 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from pathlib import Path
5
+ import shutil
6
+ import tempfile
7
+ from types import ModuleType
8
+ from typing import Any, cast
9
+
10
+ import xlwings as xw
11
+
12
+ from ..errors import MissingDependencyError, RenderError
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ def _require_excel_app() -> xw.App:
18
+ """Ensure Excel COM is available and return an App; otherwise raise."""
19
+ try:
20
+ app = xw.App(add_book=False, visible=False)
21
+ return app
22
+ except Exception as e:
23
+ raise RenderError(
24
+ "Excel (COM) is not available. Rendering (PDF/image) requires a desktop Excel installation."
25
+ ) from e
26
+
27
+
28
+ def export_pdf(excel_path: str | Path, output_pdf: str | Path) -> list[str]:
29
+ """Export an Excel workbook to PDF via Excel COM and return sheet names in order."""
30
+ normalized_excel_path = Path(excel_path)
31
+ normalized_output_pdf = Path(output_pdf)
32
+ normalized_output_pdf.parent.mkdir(parents=True, exist_ok=True)
33
+
34
+ with tempfile.TemporaryDirectory() as td:
35
+ temp_dir = Path(td)
36
+ temp_xlsx = temp_dir / "book.xlsx"
37
+ temp_pdf = temp_dir / "book.pdf"
38
+ shutil.copy(normalized_excel_path, temp_xlsx)
39
+
40
+ app: xw.App | None = None
41
+ wb: xw.Book | None = None
42
+ try:
43
+ app = _require_excel_app()
44
+ wb = app.books.open(str(temp_xlsx))
45
+ sheet_names = [s.name for s in wb.sheets]
46
+ wb.api.ExportAsFixedFormat(0, str(temp_pdf))
47
+ shutil.copy(temp_pdf, normalized_output_pdf)
48
+ except RenderError:
49
+ raise
50
+ except Exception as exc:
51
+ raise RenderError(
52
+ (
53
+ "Failed to export PDF for "
54
+ f"'{normalized_excel_path}' to '{normalized_output_pdf}'."
55
+ )
56
+ ) from exc
57
+ finally:
58
+ if wb is not None:
59
+ wb.close()
60
+ if app is not None:
61
+ app.quit()
62
+ if not normalized_output_pdf.exists():
63
+ raise RenderError(
64
+ f"Failed to export PDF to '{normalized_output_pdf}'."
65
+ )
66
+ return sheet_names
67
+
68
+
69
+ def _require_pdfium() -> ModuleType:
70
+ """Ensure pypdfium2 is installed; otherwise raise with guidance."""
71
+ try:
72
+ import pypdfium2 as pdfium
73
+ except ImportError as e:
74
+ raise MissingDependencyError(
75
+ "Image rendering requires pypdfium2. Install it via `pip install pypdfium2 pillow` or add the 'render' extra."
76
+ ) from e
77
+ return cast(ModuleType, pdfium)
78
+
79
+
80
+ def export_sheet_images(
81
+ excel_path: str | Path, output_dir: str | Path, dpi: int = 144
82
+ ) -> list[Path]:
83
+ """Export each sheet as PNG (via PDF then pypdfium2 rasterization) and return paths in sheet order."""
84
+ pdfium = cast(Any, _require_pdfium())
85
+ normalized_excel_path = Path(excel_path)
86
+ normalized_output_dir = Path(output_dir)
87
+ normalized_output_dir.mkdir(parents=True, exist_ok=True)
88
+
89
+ try:
90
+ with tempfile.TemporaryDirectory() as td:
91
+ tmp_pdf = Path(td) / "book.pdf"
92
+ sheet_names = export_pdf(normalized_excel_path, tmp_pdf)
93
+
94
+ scale = dpi / 72.0
95
+ written: list[Path] = []
96
+ with pdfium.PdfDocument(str(tmp_pdf)) as pdf:
97
+ for i, sheet_name in enumerate(sheet_names):
98
+ page = pdf[i]
99
+ bitmap = page.render(scale=scale)
100
+ pil_image = bitmap.to_pil()
101
+ safe_name = _sanitize_sheet_filename(sheet_name)
102
+ img_path = normalized_output_dir / f"{i + 1:02d}_{safe_name}.png"
103
+ pil_image.save(img_path, format="PNG", dpi=(dpi, dpi))
104
+ written.append(img_path)
105
+ return written
106
+ except RenderError:
107
+ raise
108
+ except Exception as exc:
109
+ raise RenderError(
110
+ f"Failed to export sheet images to '{normalized_output_dir}'."
111
+ ) from exc
112
+
113
+
114
+ def _sanitize_sheet_filename(name: str) -> str:
115
+ return "".join("_" if c in '\\/:*?"<>|' else c for c in name).strip() or "sheet"
116
+
117
+
118
+ __all__ = ["export_pdf", "export_sheet_images"]
@@ -0,0 +1,435 @@
1
+ Metadata-Version: 2.3
2
+ Name: exstruct
3
+ Version: 0.2.80
4
+ Summary: Excel to structured JSON (tables, shapes, charts) for LLM/RAG pipelines
5
+ Keywords: excel,structure,data,exstruct
6
+ Author: harumiWeb
7
+ License: BSD 3-Clause License
8
+
9
+ Copyright (c) 2025, ExStruct Contributors
10
+ All rights reserved.
11
+
12
+ Redistribution and use in source and binary forms, with or without
13
+ modification, are permitted provided that the following conditions are met:
14
+
15
+ 1. Redistributions of source code must retain the above copyright notice, this
16
+ list of conditions and the following disclaimer.
17
+
18
+ 2. Redistributions in binary form must reproduce the above copyright notice,
19
+ this list of conditions and the following disclaimer in the documentation
20
+ and/or other materials provided with the distribution.
21
+
22
+ 3. Neither the name of the copyright holder nor the names of its
23
+ contributors may be used to endorse or promote products derived from
24
+ this software without specific prior written permission.
25
+
26
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
29
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
30
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
32
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
33
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
34
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36
+ Requires-Dist: numpy>=2.3.5
37
+ Requires-Dist: openpyxl>=3.1.5
38
+ Requires-Dist: pandas>=2.3.3
39
+ Requires-Dist: pydantic>=2.12.5
40
+ Requires-Dist: scipy>=1.16.3
41
+ Requires-Dist: xlwings>=0.33.16
42
+ Requires-Dist: pypdfium2>=5.1.0 ; extra == 'render'
43
+ Requires-Dist: pillow>=12.0.0 ; extra == 'render'
44
+ Requires-Dist: python-toon>=0.1.3 ; extra == 'toon'
45
+ Requires-Dist: pyyaml>=6.0.3 ; extra == 'yaml'
46
+ Requires-Python: >=3.11
47
+ Project-URL: Documentation, https://harumiweb.github.io/exstruct/
48
+ Project-URL: Homepage, https://harumiweb.github.io/exstruct/
49
+ Project-URL: Issues, https://github.com/harumiWeb/exstruct/issues
50
+ Project-URL: Repository, https://github.com/harumiWeb/exstruct
51
+ Provides-Extra: render
52
+ Provides-Extra: toon
53
+ Provides-Extra: yaml
54
+ Description-Content-Type: text/markdown
55
+
56
+ # ExStruct β€” Excel Structured Extraction Engine
57
+
58
+ [![PyPI version](https://badge.fury.io/py/exstruct.svg)](https://pypi.org/project/exstruct/) [![PyPI Downloads](https://static.pepy.tech/personalized-badge/exstruct?period=total&units=INTERNATIONAL_SYSTEM&left_color=BLACK&right_color=GREEN&left_text=downloads)](https://pepy.tech/projects/exstruct) ![Licence: BSD-3-Clause](https://img.shields.io/badge/license-BSD--3--Clause-blue?style=flat-square) [![pytest](https://github.com/harumiWeb/exstruct/actions/workflows/pytest.yml/badge.svg)](https://github.com/harumiWeb/exstruct/actions/workflows/pytest.yml) [![Codacy Badge](https://app.codacy.com/project/badge/Grade/e081cb4f634e4175b259eb7c34f54f60)](https://app.codacy.com/gh/harumiWeb/exstruct/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
59
+
60
+ ![ExStruct Image](/docs/assets/icon.webp)
61
+
62
+ ExStruct reads Excel workbooks and outputs structured data (cells, table candidates, shapes, charts, print areas/views, auto page-break areas, hyperlinks) as JSON by default, with optional YAML/TOON formats. It targets both COM/Excel environments (rich extraction) and non-COM environments (cells + table candidates + print areas), with tunable detection heuristics and multiple output modes to fit LLM/RAG pipelines.
63
+
64
+ [ζ—₯ζœ¬η‰ˆREADME](README.ja.md)
65
+
66
+ ## Features
67
+
68
+ - **Excel β†’ Structured JSON**: cells, shapes, charts, table candidates, print areas/views, and auto page-break areas per sheet.
69
+ - **Output modes**: `light` (cells + table candidates + print areas; no COM, shapes/charts empty), `standard` (texted shapes + arrows, charts, print areas), `verbose` (all shapes with width/height, charts with size, print areas). Verbose also emits cell hyperlinks. Size output is flag-controlled.
70
+ - **Auto page-break export (COM only)**: capture Excel-computed auto page breaks and write per-area JSON/YAML/TOON when requested (CLI option appears only when COM is available).
71
+ - **Formats**: JSON (compact by default, `--pretty` available), YAML, TOON (optional dependencies).
72
+ - **Table detection tuning**: adjust heuristics at runtime via API.
73
+ - **CLI rendering** (Excel required): optional PDF and per-sheet PNGs.
74
+ - **Graceful fallback**: if Excel COM is unavailable, extraction falls back to cells + table candidates without crashing.
75
+
76
+ ## Installation
77
+
78
+ ```bash
79
+ pip install exstruct
80
+ ```
81
+
82
+ Optional extras:
83
+
84
+ - YAML: `pip install pyyaml`
85
+ - TOON: `pip install python-toon`
86
+ - Rendering (PDF/PNG): Excel + `pip install pypdfium2 pillow`
87
+ - All extras at once: `pip install exstruct[yaml,toon,render]`
88
+
89
+ Platform note:
90
+
91
+ - Full extraction (shapes/charts) targets Windows + Excel (COM via xlwings). On other platforms, use `mode=light` to get cells + `table_candidates`.
92
+
93
+ ## Quick Start (CLI)
94
+
95
+ ```bash
96
+ exstruct input.xlsx > output.json # compact JSON to stdout (default)
97
+ exstruct input.xlsx -o out.json --pretty # pretty JSON to a file
98
+ exstruct input.xlsx --format yaml # YAML (needs pyyaml)
99
+ exstruct input.xlsx --format toon # TOON (needs python-toon)
100
+ exstruct input.xlsx --sheets-dir sheets/ # split per sheet in chosen format
101
+ exstruct input.xlsx --print-areas-dir areas/ # split per print area (if any)
102
+ exstruct input.xlsx --auto-page-breaks-dir auto_areas/ # COM only; option appears when available
103
+ exstruct input.xlsx --mode light # cells + table candidates only
104
+ exstruct input.xlsx --pdf --image # PDF and PNGs (Excel required)
105
+ ```
106
+
107
+ Auto page-break exports are available via API and CLI when Excel/COM is available; the CLI exposes `--auto-page-breaks-dir` only in COM-capable environments.
108
+
109
+ ## Quick Start (Python)
110
+
111
+ ```python
112
+ from pathlib import Path
113
+ from exstruct import extract, export, set_table_detection_params
114
+
115
+ # Tune table detection (optional)
116
+ set_table_detection_params(table_score_threshold=0.3, density_min=0.04)
117
+
118
+ # Extract with modes: "light", "standard", "verbose"
119
+ wb = extract("input.xlsx", mode="standard")
120
+ export(wb, Path("out.json"), pretty=False) # compact JSON
121
+
122
+ # Model helpers: iterate, index, and serialize directly
123
+ first_sheet = wb["Sheet1"] # __getitem__ access
124
+ for name, sheet in wb: # __iter__ yields (name, SheetData)
125
+ print(name, len(sheet.rows))
126
+ wb.save("out.json", pretty=True) # WorkbookData β†’ file (by extension)
127
+ first_sheet.save("sheet.json") # SheetData β†’ file (by extension)
128
+ print(first_sheet.to_yaml()) # YAML text (requires pyyaml)
129
+
130
+ # ExStructEngine: per-instance options (nested configs)
131
+ from exstruct import (
132
+ DestinationOptions,
133
+ ExStructEngine,
134
+ FilterOptions,
135
+ FormatOptions,
136
+ OutputOptions,
137
+ StructOptions,
138
+ export_auto_page_breaks,
139
+ )
140
+
141
+ engine = ExStructEngine(
142
+ options=StructOptions(mode="verbose"), # verbose includes hyperlinks by default
143
+ output=OutputOptions(
144
+ format=FormatOptions(pretty=True),
145
+ filters=FilterOptions(include_shapes=False), # drop shapes in output
146
+ destinations=DestinationOptions(sheets_dir=Path("out_sheets")), # also write per-sheet files
147
+ ),
148
+ )
149
+ wb2 = engine.extract("input.xlsx")
150
+ engine.export(wb2, Path("out_filtered.json")) # drops shapes via filters
151
+
152
+ # Enable hyperlinks in other modes
153
+ engine_links = ExStructEngine(options=StructOptions(mode="standard", include_cell_links=True))
154
+ with_links = engine_links.extract("input.xlsx")
155
+
156
+ # Export per print area (if print areas exist)
157
+ from exstruct import export_print_areas_as
158
+ export_print_areas_as(wb, "areas", fmt="json", pretty=True)
159
+
160
+ # Auto page-break extraction/output (COM only; raises if no auto breaks exist)
161
+ engine_auto = ExStructEngine(
162
+ output=OutputOptions(
163
+ destinations=DestinationOptions(auto_page_breaks_dir=Path("auto_areas"))
164
+ )
165
+ )
166
+ wb_auto = engine_auto.extract("input.xlsx") # includes SheetData.auto_print_areas
167
+ engine_auto.export(wb_auto, Path("out_with_auto.json")) # also writes auto_areas/*
168
+ export_auto_page_breaks(wb_auto, "auto_areas", fmt="json", pretty=True) # manual writer
169
+ ```
170
+
171
+ **Note (non-COM environments):** If Excel COM is unavailable, extraction still runs and returns cells + `table_candidates`; `shapes`/`charts` will be empty.
172
+
173
+ ## Table Detection Tuning
174
+
175
+ ```python
176
+ from exstruct import set_table_detection_params
177
+
178
+ set_table_detection_params(
179
+ table_score_threshold=0.35, # increase to be stricter
180
+ density_min=0.05,
181
+ coverage_min=0.2,
182
+ min_nonempty_cells=3,
183
+ )
184
+ ```
185
+
186
+ Use higher thresholds to reduce false positives; lower them if true tables are missed.
187
+
188
+ ## Output Modes
189
+
190
+ - **light**: cells + table candidates (no COM needed).
191
+ - **standard**: texted shapes + arrows, charts (COM if available), table candidates. Hyperlinks are off unless `include_cell_links=True`.
192
+ - **verbose**: all shapes (with width/height), charts, table candidates, and cell hyperlinks.
193
+
194
+ ## Error Handling / Fallbacks
195
+
196
+ - Excel COM unavailable β†’ falls back to cells + table candidates; shapes/charts empty.
197
+ - Shape extraction failure β†’ logs warning, still returns cells + table candidates.
198
+ - CLI prints errors to stdout/stderr and returns non-zero on failures.
199
+
200
+ ## Optional Rendering
201
+
202
+ Requires Excel and `pypdfium2`.
203
+
204
+ ```bash
205
+ exstruct input.xlsx --pdf --image --dpi 144
206
+ ```
207
+
208
+ Creates `<output>.pdf` and `<output>_images/` PNGs per sheet.
209
+
210
+ ## Benchmark: Excel Structuring Demo
211
+
212
+ To show how well exstruct can structure Excel, we parse a workbook that combines three elements on one sheet and share an AI reasoning benchmark that uses the JSON output.
213
+
214
+ - Table (sales data)
215
+ - Line chart
216
+ - Flowchart built only with shapes
217
+
218
+ (Screenshot below is the actual sample Excel sheet)
219
+ ![Sample Excel](/docs/assets/demo_sheet.png)
220
+ Sample workbook: `sample/sample.xlsx`
221
+ Sample workbook: `sample/sample.xlsx`
222
+
223
+ ### 1. Input: Excel Sheet Overview
224
+
225
+ This sample Excel contains:
226
+
227
+ ### β‘  Table (Sales Data)
228
+
229
+ | Month | Product A | Product B | Product C |
230
+ | ------ | --------- | --------- | --------- |
231
+ | Jan-25 | 120 | 80 | 60 |
232
+ | Feb-25 | 135 | 90 | 64 |
233
+ | Mar-25 | 150 | 100 | 70 |
234
+ | Apr-25 | 170 | 110 | 72 |
235
+ | May-25 | 160 | 120 | 75 |
236
+ | Jun-25 | 180 | 130 | 80 |
237
+
238
+ ### β‘‘ Chart (Line Chart)
239
+
240
+ - Title: Sales Data
241
+ - Series: Product A / Product B / Product C (six months)
242
+ - Y axis: 0–200
243
+
244
+ ### β‘’ Flowchart built with shapes
245
+
246
+ The sheet includes this flow:
247
+
248
+ - Start / End
249
+ - Format check
250
+ - Loop (items remaining?)
251
+ - Error handling
252
+ - Yes/No decision for sending email
253
+
254
+ ### 2. Output: Structured JSON produced by exstruct (excerpt)
255
+
256
+ Below is a **shortened JSON output example** from parsing this Excel workbook.
257
+
258
+ ```json
259
+ {
260
+ "book_name": "sample.xlsx",
261
+ "sheets": {
262
+ "Sheet1": {
263
+ "rows": [
264
+ {
265
+ "r": 3,
266
+ "c": {
267
+ "1": "月",
268
+ "2": "製品A",
269
+ "3": "製品B",
270
+ "4": "製品C"
271
+ }
272
+ },
273
+ ...
274
+ ],
275
+ "shapes": [
276
+ {
277
+ "text": "ι–‹ε§‹",
278
+ "l": 148,
279
+ "t": 220,
280
+ "type": "AutoShape-FlowchartProcess"
281
+ },
282
+ {
283
+ "text": "ε…₯εŠ›γƒ‡γƒΌγ‚Ώθͺ­γΏθΎΌγΏ",
284
+ "l": 132,
285
+ "t": 282,
286
+ "type": "AutoShape-FlowchartProcess"
287
+ },
288
+ {
289
+ "l": 193,
290
+ "t": 246,
291
+ "type": "AutoShape-Mixed",
292
+ "begin_arrow_style": 1,
293
+ "end_arrow_style": 2,
294
+ "direction": "N"
295
+ },
296
+ ...
297
+ ],
298
+ "charts": [
299
+ {
300
+ "name": "Chart 1",
301
+ "chart_type": "Line",
302
+ "title": "ε£²δΈŠγƒ‡γƒΌγ‚Ώ",
303
+ "y_axis_range": [
304
+ 0.0,
305
+ 200.0
306
+ ],
307
+ "series": [
308
+ {
309
+ "name": "製品A",
310
+ "name_range": "Sheet1!$C$3",
311
+ "x_range": "Sheet1!$B$4:$B$9",
312
+ "y_range": "Sheet1!$C$4:$C$9"
313
+ },
314
+ ...
315
+ ],
316
+ "l": 377,
317
+ "t": 25
318
+ }
319
+ ],
320
+ "table_candidates": [
321
+ "B3:E9"
322
+ ]
323
+ }
324
+ }
325
+ }
326
+ ```
327
+
328
+ ### 3. How AI (Copilot / LLM) interprets the JSON
329
+
330
+ ````md
331
+ Below is the Markdown reconstruction of the Excel workbook. The table, chart, and flowchart are all represented.
332
+
333
+ ---
334
+
335
+ ## πŸ“Š Sales Data Table
336
+
337
+ | Month | Product A | Product B | Product C |
338
+ | ---------- | --------- | --------- | --------- |
339
+ | 2025-01-01 | 120 | 80 | 60 |
340
+ | 2025-02-01 | 135 | 90 | 64 |
341
+ | 2025-03-01 | 150 | 100 | 70 |
342
+ | 2025-04-01 | 170 | 110 | 72 |
343
+ | 2025-05-01 | 160 | 120 | 75 |
344
+ | 2025-06-01 | 180 | 130 | 80 |
345
+
346
+ ---
347
+
348
+ ## πŸ“ˆ Sales Data (Line Chart)
349
+
350
+ - Chart title: **ε£²δΈŠγƒ‡γƒΌγ‚Ώ (Sales Data)**
351
+ - Chart type: Line
352
+ - Y-axis range: 0 to 200
353
+ - Data series:
354
+ - Product A: 120 β†’ 135 β†’ 150 β†’ 170 β†’ 160 β†’ 180
355
+ - Product B: 80 β†’ 90 β†’ 100 β†’ 110 β†’ 120 β†’ 130
356
+ - Product C: 60 β†’ 64 β†’ 70 β†’ 72 β†’ 75 β†’ 80
357
+
358
+ ---
359
+
360
+ ## πŸ”„ Process Flow (Mermaid Flowchart)
361
+
362
+ ```mermaid
363
+ flowchart TD
364
+ A[Start]
365
+ B[Load input data]
366
+ C{Is format valid?}
367
+ D[Show error]
368
+ E[Process one item]
369
+ F{Items remaining?}
370
+ G[Generate output]
371
+ H{Send email?}
372
+ I[Send email]
373
+ J[Finish]
374
+
375
+ A --> B
376
+ B --> C
377
+ C -- no --> D
378
+ C -- yes --> E
379
+ E --> F
380
+ F -- yes --> E
381
+ F -- no --> G
382
+ G --> H
383
+ H -- yes --> I
384
+ H -- no --> J
385
+ I --> J
386
+ ```
387
+ ````
388
+
389
+ From this we can see:
390
+
391
+ **exstruct's JSON is already in a format that AI can read and reason over directly.**
392
+
393
+ ### 4. Summary
394
+
395
+ This benchmark confirms exstruct can:
396
+
397
+ - **Parse tables, charts, and shapes (flowcharts) simultaneously**
398
+ - Convert the semantic structure of Excel into JSON
399
+ - Let AI/LLMs read that JSON directly and reconstruct the workbook contents
400
+
401
+ In short, **exstruct = β€œan engine that converts Excel into a format AI can understand.”**
402
+
403
+ ## Notes
404
+
405
+ - Default JSON is compact to reduce tokens; use `--pretty` or `pretty=True` when readability matters.
406
+ - Field `table_candidates` replaces `tables`; adjust downstream consumers accordingly.
407
+
408
+ ## Enterprise Use
409
+
410
+ ExStruct is used primarily as a **library**, not a service.
411
+
412
+ - No official support or SLA is provided
413
+ - Long-term stability is prioritized over rapid feature growth
414
+ - Forking and internal modification are expected in enterprise use
415
+
416
+ This project is suitable for teams that:
417
+ - need transparency over black-box tools
418
+ - are comfortable maintaining internal forks if necessary
419
+
420
+ ## Print Areas and Auto Page Breaks (PrintArea / PrintAreaView)
421
+
422
+ - `SheetData.print_areas` holds print areas (cell coordinates) in light/standard/verbose.
423
+ - `SheetData.auto_print_areas` holds Excel COM-computed auto page-break areas when auto page-break extraction is enabled (COM only).
424
+ - Use `export_print_areas_as(...)` or CLI `--print-areas-dir` to write one file per print area (nothing is written if none exist).
425
+ - Use CLI `--auto-page-breaks-dir` (COM only), `DestinationOptions.auto_page_breaks_dir` (preferred), or `export_auto_page_breaks(...)` to write per-auto-page-break files; the API raises `ValueError` if no auto page breaks exist.
426
+ - `PrintAreaView` includes rows and table candidates inside the area, plus shapes/charts that overlap the area (size-less shapes are treated as points). `normalize=True` rebases row/col indices to the area origin.
427
+
428
+ ## License
429
+
430
+ BSD-3-Clause. See `LICENSE` for details.
431
+
432
+ ## Documentation
433
+
434
+ - API Reference (GitHub Pages): https://harumiweb.github.io/exstruct/
435
+ - JSON Schemas: see `schemas/` (one file per model); regenerate via `python scripts/gen_json_schema.py`.
@@ -0,0 +1,20 @@
1
+ exstruct/__init__.py,sha256=649fea37d359a9c994790c0256de70adace62647809547b3567ab9d1c4ba0e63,12122
2
+ exstruct/cli/availability.py,sha256=29b79cc084e9d4d314626f56e2745c5c1238f51c984f1c97a7115c1c2fbe79ca,1410
3
+ exstruct/cli/main.py,sha256=63d299d9032522ab9b29032aa77d17700815f752f024d49ab5de7a5068e64751,3830
4
+ exstruct/core/__init__.py,sha256=e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855,0
5
+ exstruct/core/cells.py,sha256=e38266674e621ddd815fdea017d4b9ede833147933a80cd56f462f7db882f0a8,35930
6
+ exstruct/core/charts.py,sha256=e7962eebb32dbd58165825639f0b5a32da2bf0a9880d4a57687c8a265f1e76e7,7777
7
+ exstruct/core/integrate.py,sha256=da216a4f864bb0a3111aa9d8b2d7cbd3ef8c26d30a32a3425e0fb90b31eb1c1a,14037
8
+ exstruct/core/shapes.py,sha256=f2913c5134b1c82be066e805a24cc3e21c3e5afef880dbab57987fa18c58c8bf,11069
9
+ exstruct/engine.py,sha256=8027aa9e9bc8d2f23c42c230fdd935b7685f4eb8b51613ed7a2d7fce2236e5a7,25543
10
+ exstruct/errors.py,sha256=9be81f7e93df84642fd2db4591bfbff1d5440d715287b582da113e7b0a5549ac,1002
11
+ exstruct/io/__init__.py,sha256=8cb00dd3e1fed186ab79f80948b241ff72a76e95b79fd9c4d4746829b61bf5ee,19267
12
+ exstruct/models/__init__.py,sha256=c502e877ce9bdbc899de6f6a95583282c95f9ef81d6a69296f2fef827dc3b7e3,11924
13
+ exstruct/models/maps.py,sha256=9ebb0e67e4d80b771b2ec3babba488cb84fa4a56681906990fff733273e73f52,12930
14
+ exstruct/models/types.py,sha256=4226f75035fc144bfaf88fe29bdeaa6a986924f18b3b3a048502187d27339d2a,278
15
+ exstruct/py.typed,sha256=e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855,0
16
+ exstruct/render/__init__.py,sha256=e2c16904003c6fd28f96ff2ff13b2ac677465a48f732e93627544644b4dc37d9,4242
17
+ exstruct-0.2.80.dist-info/WHEEL,sha256=b6dc288e80aa2d1b1518ddb3502fd5b53e8fd6cb507ed2a4f932e9e6088b264a,78
18
+ exstruct-0.2.80.dist-info/entry_points.txt,sha256=3429e73dd9d41bb977b49a34914dddd7ec70352b79882bb937a3999e8e8bce9c,53
19
+ exstruct-0.2.80.dist-info/METADATA,sha256=d162e1003f843a4912e9dbfc82c298614068832b6506a23034fb11948ef790fe,16635
20
+ exstruct-0.2.80.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: uv 0.8.4
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ exstruct = exstruct.cli.main:main
3
+