office2pdf-python 0.1.0__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {office2pdf_python-0.1.0 → office2pdf_python-0.2.0}/Cargo.lock +1 -1
- {office2pdf_python-0.1.0 → office2pdf_python-0.2.0}/Cargo.toml +1 -1
- {office2pdf_python-0.1.0 → office2pdf_python-0.2.0}/PKG-INFO +103 -20
- {office2pdf_python-0.1.0 → office2pdf_python-0.2.0}/README.md +102 -19
- office2pdf_python-0.2.0/office2pdf/__init__.py +74 -0
- office2pdf_python-0.2.0/office2pdf/_native.pyi +67 -0
- office2pdf_python-0.2.0/office2pdf/_native_bridge.py +179 -0
- office2pdf_python-0.2.0/office2pdf/_results.py +140 -0
- office2pdf_python-0.2.0/office2pdf/exceptions.py +39 -0
- office2pdf_python-0.2.0/office2pdf/models.py +207 -0
- office2pdf_python-0.2.0/office2pdf/options.py +121 -0
- {office2pdf_python-0.1.0 → office2pdf_python-0.2.0}/pyproject.toml +1 -1
- office2pdf_python-0.2.0/src/errors.rs +59 -0
- office2pdf_python-0.2.0/src/lib.rs +47 -0
- office2pdf_python-0.2.0/src/options.rs +199 -0
- office2pdf_python-0.2.0/src/result.rs +77 -0
- office2pdf_python-0.2.0/tests/test_native_boundary.py +144 -0
- office2pdf_python-0.2.0/tests/test_public_api.py +156 -0
- office2pdf_python-0.1.0/office2pdf/__init__.py +0 -143
- office2pdf_python-0.1.0/office2pdf/_native.pyi +0 -9
- office2pdf_python-0.1.0/src/lib.rs +0 -202
- office2pdf_python-0.1.0/tests/test_public_api.py +0 -133
- {office2pdf_python-0.1.0 → office2pdf_python-0.2.0}/.github/workflows/ci.yml +0 -0
- {office2pdf_python-0.1.0 → office2pdf_python-0.2.0}/.github/workflows/release.yml +0 -0
- {office2pdf_python-0.1.0 → office2pdf_python-0.2.0}/.gitignore +0 -0
- {office2pdf_python-0.1.0 → office2pdf_python-0.2.0}/LICENSE +0 -0
- {office2pdf_python-0.1.0 → office2pdf_python-0.2.0}/office2pdf/cli.py +0 -0
- {office2pdf_python-0.1.0 → office2pdf_python-0.2.0}/office2pdf/py.typed +0 -0
- {office2pdf_python-0.1.0 → office2pdf_python-0.2.0}/tests/test_cli.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: office2pdf-python
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Classifier: Development Status :: 3 - Alpha
|
|
5
5
|
Classifier: Intended Audience :: Developers
|
|
6
6
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
@@ -67,30 +67,93 @@ The CLI accepts DOCX, PPTX, and XLSX input paths and writes the converted PDF by
|
|
|
67
67
|
|
|
68
68
|
### `Format`
|
|
69
69
|
|
|
70
|
-
|
|
70
|
+
`Format` identifies the input Office format for byte-based conversion:
|
|
71
|
+
|
|
72
|
+
- `Format.DOCX` (`"docx"`)
|
|
73
|
+
- `Format.PPTX` (`"pptx"`)
|
|
74
|
+
- `Format.XLSX` (`"xlsx"`)
|
|
75
|
+
|
|
76
|
+
### `PdfStandard`
|
|
77
|
+
|
|
78
|
+
`PdfStandard` currently supports `pdf/a-2b`:
|
|
79
|
+
|
|
80
|
+
- canonical: `PdfStandard.PDF_A_2B`
|
|
81
|
+
- compatibility alias: `PdfStandard.PDF_A_2_B`
|
|
82
|
+
|
|
83
|
+
`PdfStandard.from_value()` accepts both forms and related normalizations (`"pdf/a-2b"`, `"pdfa2b"`).
|
|
84
|
+
|
|
85
|
+
### `PaperSize`
|
|
86
|
+
|
|
87
|
+
- `PaperSize.A4`
|
|
88
|
+
- `PaperSize.LETTER`
|
|
89
|
+
- `PaperSize.LEGAL`
|
|
90
|
+
|
|
91
|
+
### `CustomPaperSize`
|
|
92
|
+
|
|
93
|
+
`CustomPaperSize(width: float, height: float)` stores explicit PDF point dimensions, matching upstream `PaperSize::Custom` (`1 point = 1/72 inch`).
|
|
71
94
|
|
|
72
95
|
### `ConvertOptions`
|
|
73
96
|
|
|
74
|
-
|
|
97
|
+
All options are stored in a Python dataclass and translated to native options via `to_native()`.
|
|
75
98
|
|
|
76
|
-
- `
|
|
77
|
-
- `sheet_filter: Sequence[str] | None`
|
|
78
|
-
- `slide_range: str | None` — accepts upstream strings like `"1-5"` or `"3"`.
|
|
79
|
-
- `paper_size: str | None` — accepts upstream `"a4"`, `"letter"`, or `"legal"`.
|
|
80
|
-
- `landscape: bool | None` — maps to upstream orientation control.
|
|
81
|
-
- `font_paths: Sequence[str | pathlib.Path]` — additional font directories.
|
|
82
|
-
- `pdf_standard: str | None` — currently supports `"pdf/a-2b"`.
|
|
83
|
-
- `include_warnings: bool` — controls whether returned warnings are included in `ConversionResult`.
|
|
84
|
-
- `memory_limit_mb: int | None` — rejected when set because `office2pdf 0.6.0` does not expose a native field for it.
|
|
85
|
-
- `streaming: bool` — enables upstream streaming mode for supported formats.
|
|
99
|
+
- `sheet_names: Sequence[str] | None`
|
|
100
|
+
- `sheet_filter: Sequence[str] | None`
|
|
86
101
|
|
|
87
|
-
|
|
102
|
+
These are aliases for XLSX sheet selection. If both are provided, they must be equal.
|
|
103
|
+
|
|
104
|
+
- `slide_range: SlideRange | str | None`
|
|
105
|
+
|
|
106
|
+
`SlideRange` supports `"1-5"` parsing and also accepts explicit `SlideRange(1, 5)`. Values are normalized to `start-end` strings for native conversion.
|
|
107
|
+
|
|
108
|
+
- `pdf_standard: PdfStandard | str | None`
|
|
109
|
+
|
|
110
|
+
Only `pdf/a-2b` is supported at this version.
|
|
111
|
+
|
|
112
|
+
- `paper_size: PaperSize | CustomPaperSize | str | None`
|
|
113
|
+
|
|
114
|
+
String values normalize to named page sizes.
|
|
88
115
|
|
|
89
|
-
|
|
116
|
+
- `font_paths: Sequence[str | pathlib.Path]`
|
|
117
|
+
- `landscape: bool | None`
|
|
118
|
+
- `tagged: bool | None`
|
|
119
|
+
- `pdf_ua: bool | None`
|
|
120
|
+
- `streaming: bool`
|
|
121
|
+
- `streaming_chunk_size: int | None`
|
|
122
|
+
- `include_warnings: bool`
|
|
123
|
+
|
|
124
|
+
Unsupported options are rejected to preserve API compatibility with upstream `office2pdf 0.6.0`:
|
|
125
|
+
|
|
126
|
+
- `page_range: str | None`
|
|
127
|
+
- `memory_limit_mb: int | None`
|
|
128
|
+
|
|
129
|
+
### `ConversionResult`
|
|
90
130
|
|
|
91
131
|
- `pdf: bytes`
|
|
92
|
-
- `warnings: tuple[
|
|
93
|
-
- `metrics:
|
|
132
|
+
- `warnings: tuple[ConvertWarning, ...]`
|
|
133
|
+
- `metrics: ConvertMetrics | None`
|
|
134
|
+
- `warning_messages: tuple[str, ...]` property collecting warning messages.
|
|
135
|
+
|
|
136
|
+
### Warning types
|
|
137
|
+
|
|
138
|
+
Warning payloads from the native layer are mapped to typed subclasses of `ConvertWarning`:
|
|
139
|
+
|
|
140
|
+
- `UnsupportedElementWarning(format, element)`
|
|
141
|
+
- `PartialElementWarning(format, element, detail)`
|
|
142
|
+
- `FallbackUsedWarning(format, from_, to)`
|
|
143
|
+
- `ParseSkippedWarning(format, reason)`
|
|
144
|
+
- and a base `ConvertWarning` for legacy/unknown forms.
|
|
145
|
+
|
|
146
|
+
### `ConvertMetrics`
|
|
147
|
+
|
|
148
|
+
- `parse_duration`
|
|
149
|
+
- `codegen_duration`
|
|
150
|
+
- `compile_duration`
|
|
151
|
+
- `total_duration`
|
|
152
|
+
- `input_size_bytes`
|
|
153
|
+
- `output_size_bytes`
|
|
154
|
+
- `page_count`
|
|
155
|
+
|
|
156
|
+
Duration fields are reported in seconds.
|
|
94
157
|
|
|
95
158
|
### Functions
|
|
96
159
|
|
|
@@ -100,7 +163,27 @@ convert_path(path: str | pathlib.Path, options: ConvertOptions | None = None) ->
|
|
|
100
163
|
infer_format(path: str | pathlib.Path) -> Format
|
|
101
164
|
```
|
|
102
165
|
|
|
103
|
-
`
|
|
166
|
+
- `infer_format()` reads the file suffix and accepts only `.docx`, `.pptx`, or `.xlsx`.
|
|
167
|
+
- `convert_path()` validates the file extension before conversion.
|
|
168
|
+
- `convert_bytes()` requires an explicit input `format`.
|
|
169
|
+
|
|
170
|
+
## Exceptions
|
|
171
|
+
|
|
172
|
+
Re-exported exception hierarchy:
|
|
173
|
+
|
|
174
|
+
- `Office2PdfError`
|
|
175
|
+
- `UnsupportedFormatError`
|
|
176
|
+
- `Office2PdfIoError`
|
|
177
|
+
- `Office2PdfParseError`
|
|
178
|
+
- `Office2PdfRenderError`
|
|
179
|
+
- `UnsupportedEncryptionError`
|
|
180
|
+
- `UnsupportedOptionError`
|
|
181
|
+
|
|
182
|
+
## API scope
|
|
183
|
+
|
|
184
|
+
Version `0.2.0` exposes the upstream `office2pdf 0.6.0` conversion API: file/bytes conversion, conversion options, structured warnings, metrics, and typed errors.
|
|
185
|
+
|
|
186
|
+
The upstream `pdf_ops` feature (`page_count`, `merge`, `split`), internal IR/parser/render modules, TypeScript helpers, and WASM APIs are intentionally out of scope for this Python release.
|
|
104
187
|
|
|
105
188
|
## Local development
|
|
106
189
|
|
|
@@ -148,8 +231,8 @@ Create a matching GitHub environment named `pypi` and require manual approval fo
|
|
|
148
231
|
To publish a release automatically, update the version in `pyproject.toml` and `Cargo.toml`, commit the change, then push a matching tag:
|
|
149
232
|
|
|
150
233
|
```bash
|
|
151
|
-
git tag v0.
|
|
152
|
-
git push origin v0.
|
|
234
|
+
git tag v0.2.0
|
|
235
|
+
git push origin v0.2.0
|
|
153
236
|
```
|
|
154
237
|
|
|
155
238
|
The tag push starts `.github/workflows/release.yml`, builds artifacts, publishes to PyPI after the `pypi` environment approval, and creates a GitHub Release for tag-triggered runs.
|
|
@@ -42,30 +42,93 @@ The CLI accepts DOCX, PPTX, and XLSX input paths and writes the converted PDF by
|
|
|
42
42
|
|
|
43
43
|
### `Format`
|
|
44
44
|
|
|
45
|
-
|
|
45
|
+
`Format` identifies the input Office format for byte-based conversion:
|
|
46
|
+
|
|
47
|
+
- `Format.DOCX` (`"docx"`)
|
|
48
|
+
- `Format.PPTX` (`"pptx"`)
|
|
49
|
+
- `Format.XLSX` (`"xlsx"`)
|
|
50
|
+
|
|
51
|
+
### `PdfStandard`
|
|
52
|
+
|
|
53
|
+
`PdfStandard` currently supports `pdf/a-2b`:
|
|
54
|
+
|
|
55
|
+
- canonical: `PdfStandard.PDF_A_2B`
|
|
56
|
+
- compatibility alias: `PdfStandard.PDF_A_2_B`
|
|
57
|
+
|
|
58
|
+
`PdfStandard.from_value()` accepts both forms and related normalizations (`"pdf/a-2b"`, `"pdfa2b"`).
|
|
59
|
+
|
|
60
|
+
### `PaperSize`
|
|
61
|
+
|
|
62
|
+
- `PaperSize.A4`
|
|
63
|
+
- `PaperSize.LETTER`
|
|
64
|
+
- `PaperSize.LEGAL`
|
|
65
|
+
|
|
66
|
+
### `CustomPaperSize`
|
|
67
|
+
|
|
68
|
+
`CustomPaperSize(width: float, height: float)` stores explicit PDF point dimensions, matching upstream `PaperSize::Custom` (`1 point = 1/72 inch`).
|
|
46
69
|
|
|
47
70
|
### `ConvertOptions`
|
|
48
71
|
|
|
49
|
-
|
|
72
|
+
All options are stored in a Python dataclass and translated to native options via `to_native()`.
|
|
50
73
|
|
|
51
|
-
- `
|
|
52
|
-
- `sheet_filter: Sequence[str] | None`
|
|
53
|
-
- `slide_range: str | None` — accepts upstream strings like `"1-5"` or `"3"`.
|
|
54
|
-
- `paper_size: str | None` — accepts upstream `"a4"`, `"letter"`, or `"legal"`.
|
|
55
|
-
- `landscape: bool | None` — maps to upstream orientation control.
|
|
56
|
-
- `font_paths: Sequence[str | pathlib.Path]` — additional font directories.
|
|
57
|
-
- `pdf_standard: str | None` — currently supports `"pdf/a-2b"`.
|
|
58
|
-
- `include_warnings: bool` — controls whether returned warnings are included in `ConversionResult`.
|
|
59
|
-
- `memory_limit_mb: int | None` — rejected when set because `office2pdf 0.6.0` does not expose a native field for it.
|
|
60
|
-
- `streaming: bool` — enables upstream streaming mode for supported formats.
|
|
74
|
+
- `sheet_names: Sequence[str] | None`
|
|
75
|
+
- `sheet_filter: Sequence[str] | None`
|
|
61
76
|
|
|
62
|
-
|
|
77
|
+
These are aliases for XLSX sheet selection. If both are provided, they must be equal.
|
|
78
|
+
|
|
79
|
+
- `slide_range: SlideRange | str | None`
|
|
80
|
+
|
|
81
|
+
`SlideRange` supports `"1-5"` parsing and also accepts explicit `SlideRange(1, 5)`. Values are normalized to `start-end` strings for native conversion.
|
|
82
|
+
|
|
83
|
+
- `pdf_standard: PdfStandard | str | None`
|
|
84
|
+
|
|
85
|
+
Only `pdf/a-2b` is supported at this version.
|
|
86
|
+
|
|
87
|
+
- `paper_size: PaperSize | CustomPaperSize | str | None`
|
|
88
|
+
|
|
89
|
+
String values normalize to named page sizes.
|
|
63
90
|
|
|
64
|
-
|
|
91
|
+
- `font_paths: Sequence[str | pathlib.Path]`
|
|
92
|
+
- `landscape: bool | None`
|
|
93
|
+
- `tagged: bool | None`
|
|
94
|
+
- `pdf_ua: bool | None`
|
|
95
|
+
- `streaming: bool`
|
|
96
|
+
- `streaming_chunk_size: int | None`
|
|
97
|
+
- `include_warnings: bool`
|
|
98
|
+
|
|
99
|
+
Unsupported options are rejected to preserve API compatibility with upstream `office2pdf 0.6.0`:
|
|
100
|
+
|
|
101
|
+
- `page_range: str | None`
|
|
102
|
+
- `memory_limit_mb: int | None`
|
|
103
|
+
|
|
104
|
+
### `ConversionResult`
|
|
65
105
|
|
|
66
106
|
- `pdf: bytes`
|
|
67
|
-
- `warnings: tuple[
|
|
68
|
-
- `metrics:
|
|
107
|
+
- `warnings: tuple[ConvertWarning, ...]`
|
|
108
|
+
- `metrics: ConvertMetrics | None`
|
|
109
|
+
- `warning_messages: tuple[str, ...]` property collecting warning messages.
|
|
110
|
+
|
|
111
|
+
### Warning types
|
|
112
|
+
|
|
113
|
+
Warning payloads from the native layer are mapped to typed subclasses of `ConvertWarning`:
|
|
114
|
+
|
|
115
|
+
- `UnsupportedElementWarning(format, element)`
|
|
116
|
+
- `PartialElementWarning(format, element, detail)`
|
|
117
|
+
- `FallbackUsedWarning(format, from_, to)`
|
|
118
|
+
- `ParseSkippedWarning(format, reason)`
|
|
119
|
+
- and a base `ConvertWarning` for legacy/unknown forms.
|
|
120
|
+
|
|
121
|
+
### `ConvertMetrics`
|
|
122
|
+
|
|
123
|
+
- `parse_duration`
|
|
124
|
+
- `codegen_duration`
|
|
125
|
+
- `compile_duration`
|
|
126
|
+
- `total_duration`
|
|
127
|
+
- `input_size_bytes`
|
|
128
|
+
- `output_size_bytes`
|
|
129
|
+
- `page_count`
|
|
130
|
+
|
|
131
|
+
Duration fields are reported in seconds.
|
|
69
132
|
|
|
70
133
|
### Functions
|
|
71
134
|
|
|
@@ -75,7 +138,27 @@ convert_path(path: str | pathlib.Path, options: ConvertOptions | None = None) ->
|
|
|
75
138
|
infer_format(path: str | pathlib.Path) -> Format
|
|
76
139
|
```
|
|
77
140
|
|
|
78
|
-
`
|
|
141
|
+
- `infer_format()` reads the file suffix and accepts only `.docx`, `.pptx`, or `.xlsx`.
|
|
142
|
+
- `convert_path()` validates the file extension before conversion.
|
|
143
|
+
- `convert_bytes()` requires an explicit input `format`.
|
|
144
|
+
|
|
145
|
+
## Exceptions
|
|
146
|
+
|
|
147
|
+
Re-exported exception hierarchy:
|
|
148
|
+
|
|
149
|
+
- `Office2PdfError`
|
|
150
|
+
- `UnsupportedFormatError`
|
|
151
|
+
- `Office2PdfIoError`
|
|
152
|
+
- `Office2PdfParseError`
|
|
153
|
+
- `Office2PdfRenderError`
|
|
154
|
+
- `UnsupportedEncryptionError`
|
|
155
|
+
- `UnsupportedOptionError`
|
|
156
|
+
|
|
157
|
+
## API scope
|
|
158
|
+
|
|
159
|
+
Version `0.2.0` exposes the upstream `office2pdf 0.6.0` conversion API: file/bytes conversion, conversion options, structured warnings, metrics, and typed errors.
|
|
160
|
+
|
|
161
|
+
The upstream `pdf_ops` feature (`page_count`, `merge`, `split`), internal IR/parser/render modules, TypeScript helpers, and WASM APIs are intentionally out of scope for this Python release.
|
|
79
162
|
|
|
80
163
|
## Local development
|
|
81
164
|
|
|
@@ -123,8 +206,8 @@ Create a matching GitHub environment named `pypi` and require manual approval fo
|
|
|
123
206
|
To publish a release automatically, update the version in `pyproject.toml` and `Cargo.toml`, commit the change, then push a matching tag:
|
|
124
207
|
|
|
125
208
|
```bash
|
|
126
|
-
git tag v0.
|
|
127
|
-
git push origin v0.
|
|
209
|
+
git tag v0.2.0
|
|
210
|
+
git push origin v0.2.0
|
|
128
211
|
```
|
|
129
212
|
|
|
130
213
|
The tag push starts `.github/workflows/release.yml`, builds artifacts, publishes to PyPI after the `pypi` environment approval, and creates a GitHub Release for tag-triggered runs.
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from importlib import metadata
|
|
4
|
+
|
|
5
|
+
from ._native_bridge import (
|
|
6
|
+
_metrics_from_native,
|
|
7
|
+
_native_module,
|
|
8
|
+
_native_options,
|
|
9
|
+
_result_from_native,
|
|
10
|
+
_warning_from_native,
|
|
11
|
+
convert_bytes,
|
|
12
|
+
convert_path,
|
|
13
|
+
infer_format,
|
|
14
|
+
)
|
|
15
|
+
from .exceptions import (
|
|
16
|
+
Office2PdfError,
|
|
17
|
+
Office2PdfIoError,
|
|
18
|
+
Office2PdfParseError,
|
|
19
|
+
Office2PdfRenderError,
|
|
20
|
+
UnsupportedEncryptionError,
|
|
21
|
+
UnsupportedFormatError,
|
|
22
|
+
UnsupportedOptionError,
|
|
23
|
+
)
|
|
24
|
+
from .models import (
|
|
25
|
+
ConversionMetrics,
|
|
26
|
+
ConversionResult,
|
|
27
|
+
ConvertMetrics,
|
|
28
|
+
ConvertWarning,
|
|
29
|
+
CustomPaperSize,
|
|
30
|
+
FallbackUsedWarning,
|
|
31
|
+
Format,
|
|
32
|
+
ParseSkippedWarning,
|
|
33
|
+
PaperSize,
|
|
34
|
+
PartialElementWarning,
|
|
35
|
+
PdfStandard,
|
|
36
|
+
SlideRange,
|
|
37
|
+
UnsupportedElementWarning,
|
|
38
|
+
)
|
|
39
|
+
from .options import ConvertOptions
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
__version__ = metadata.version("office2pdf-python")
|
|
44
|
+
except metadata.PackageNotFoundError:
|
|
45
|
+
__version__ = "0.2.0"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
__all__ = [
|
|
49
|
+
"ConversionResult",
|
|
50
|
+
"ConversionMetrics",
|
|
51
|
+
"ConvertMetrics",
|
|
52
|
+
"ConvertOptions",
|
|
53
|
+
"Format",
|
|
54
|
+
"PaperSize",
|
|
55
|
+
"CustomPaperSize",
|
|
56
|
+
"PdfStandard",
|
|
57
|
+
"SlideRange",
|
|
58
|
+
"ConvertWarning",
|
|
59
|
+
"UnsupportedElementWarning",
|
|
60
|
+
"PartialElementWarning",
|
|
61
|
+
"FallbackUsedWarning",
|
|
62
|
+
"ParseSkippedWarning",
|
|
63
|
+
"Office2PdfError",
|
|
64
|
+
"UnsupportedFormatError",
|
|
65
|
+
"Office2PdfIoError",
|
|
66
|
+
"Office2PdfParseError",
|
|
67
|
+
"Office2PdfRenderError",
|
|
68
|
+
"UnsupportedEncryptionError",
|
|
69
|
+
"UnsupportedOptionError",
|
|
70
|
+
"__version__",
|
|
71
|
+
"convert_bytes",
|
|
72
|
+
"convert_path",
|
|
73
|
+
"infer_format",
|
|
74
|
+
]
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Mapping, Sequence, TypedDict
|
|
4
|
+
|
|
5
|
+
class Office2PdfError(Exception): ...
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class UnsupportedFormatError(Office2PdfError): ...
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Office2PdfIoError(Office2PdfError): ...
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Office2PdfParseError(Office2PdfError): ...
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Office2PdfRenderError(Office2PdfError): ...
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class UnsupportedEncryptionError(Office2PdfError): ...
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class UnsupportedOptionError(Office2PdfError): ...
|
|
24
|
+
|
|
25
|
+
__version__: str
|
|
26
|
+
|
|
27
|
+
ConvertWarningData = TypedDict(
|
|
28
|
+
"ConvertWarningData",
|
|
29
|
+
{
|
|
30
|
+
"kind": str,
|
|
31
|
+
"format": str,
|
|
32
|
+
"element": str,
|
|
33
|
+
"detail": str,
|
|
34
|
+
"from": str,
|
|
35
|
+
"to": str,
|
|
36
|
+
"reason": str,
|
|
37
|
+
"message": str,
|
|
38
|
+
},
|
|
39
|
+
total=False,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class ConvertMetricsData(TypedDict):
|
|
44
|
+
parse_duration: float
|
|
45
|
+
codegen_duration: float
|
|
46
|
+
compile_duration: float
|
|
47
|
+
total_duration: float
|
|
48
|
+
input_size_bytes: int
|
|
49
|
+
output_size_bytes: int
|
|
50
|
+
page_count: int
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class ConvertResultData(TypedDict, total=False):
|
|
54
|
+
pdf: bytes
|
|
55
|
+
warnings: Sequence[str | ConvertWarningData]
|
|
56
|
+
metrics: ConvertMetricsData | None
|
|
57
|
+
|
|
58
|
+
def convert_bytes(
|
|
59
|
+
data: bytes,
|
|
60
|
+
format: str,
|
|
61
|
+
options: Mapping[str, object] | None = None,
|
|
62
|
+
) -> ConvertResultData: ...
|
|
63
|
+
|
|
64
|
+
def convert_path(
|
|
65
|
+
path: str,
|
|
66
|
+
options: Mapping[str, object] | None = None,
|
|
67
|
+
) -> ConvertResultData: ...
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Mapping
|
|
4
|
+
from importlib import import_module
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Protocol, runtime_checkable
|
|
7
|
+
|
|
8
|
+
from .exceptions import (
|
|
9
|
+
Office2PdfError,
|
|
10
|
+
Office2PdfIoError,
|
|
11
|
+
Office2PdfParseError,
|
|
12
|
+
Office2PdfRenderError,
|
|
13
|
+
UnsupportedEncryptionError,
|
|
14
|
+
UnsupportedFormatError,
|
|
15
|
+
UnsupportedOptionError,
|
|
16
|
+
)
|
|
17
|
+
from ._results import _metrics_from_native, _result_from_native, _warning_from_native
|
|
18
|
+
from .models import ConversionResult, Format
|
|
19
|
+
from .options import ConvertOptions
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@runtime_checkable
|
|
23
|
+
class _NativeModule(Protocol):
|
|
24
|
+
def convert_bytes(
|
|
25
|
+
self,
|
|
26
|
+
data: bytes,
|
|
27
|
+
format: str,
|
|
28
|
+
options: dict[str, object] | None = None,
|
|
29
|
+
) -> Mapping[str, object]:
|
|
30
|
+
...
|
|
31
|
+
|
|
32
|
+
def convert_path(
|
|
33
|
+
self,
|
|
34
|
+
path: str,
|
|
35
|
+
options: dict[str, object] | None = None,
|
|
36
|
+
) -> Mapping[str, object]:
|
|
37
|
+
...
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@runtime_checkable
|
|
41
|
+
class _NativeBytesModule(Protocol):
|
|
42
|
+
def convert_bytes(
|
|
43
|
+
self,
|
|
44
|
+
data: bytes,
|
|
45
|
+
format: str,
|
|
46
|
+
options: dict[str, object] | None = None,
|
|
47
|
+
) -> Mapping[str, object]:
|
|
48
|
+
...
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@runtime_checkable
|
|
52
|
+
class _NativePathModule(Protocol):
|
|
53
|
+
def convert_path(
|
|
54
|
+
self,
|
|
55
|
+
path: str,
|
|
56
|
+
options: dict[str, object] | None = None,
|
|
57
|
+
) -> Mapping[str, object]:
|
|
58
|
+
...
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _native_module() -> _NativeModule:
|
|
62
|
+
module = import_module("office2pdf._native")
|
|
63
|
+
if not isinstance(module, _NativeModule):
|
|
64
|
+
raise TypeError("office2pdf._native does not provide the expected conversion functions")
|
|
65
|
+
return module
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _native_error_types() -> tuple[type[BaseException], ...]:
|
|
69
|
+
module = import_module("office2pdf._native")
|
|
70
|
+
names = (
|
|
71
|
+
"Office2PdfError",
|
|
72
|
+
"UnsupportedFormatError",
|
|
73
|
+
"Office2PdfIoError",
|
|
74
|
+
"Office2PdfParseError",
|
|
75
|
+
"Office2PdfRenderError",
|
|
76
|
+
"UnsupportedEncryptionError",
|
|
77
|
+
"UnsupportedOptionError",
|
|
78
|
+
)
|
|
79
|
+
errors: list[type[BaseException]] = []
|
|
80
|
+
for name in names:
|
|
81
|
+
candidate = getattr(module, name, None)
|
|
82
|
+
if isinstance(candidate, type) and issubclass(candidate, BaseException):
|
|
83
|
+
errors.append(candidate)
|
|
84
|
+
return tuple(errors)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _public_error_from_native(error: BaseException) -> Office2PdfError:
|
|
88
|
+
message = str(error)
|
|
89
|
+
match error.__class__.__name__:
|
|
90
|
+
case "UnsupportedFormatError":
|
|
91
|
+
return UnsupportedFormatError(message)
|
|
92
|
+
case "Office2PdfIoError":
|
|
93
|
+
return Office2PdfIoError(message)
|
|
94
|
+
case "Office2PdfParseError":
|
|
95
|
+
return Office2PdfParseError(message)
|
|
96
|
+
case "Office2PdfRenderError":
|
|
97
|
+
return Office2PdfRenderError(message)
|
|
98
|
+
case "UnsupportedEncryptionError":
|
|
99
|
+
return UnsupportedEncryptionError(message)
|
|
100
|
+
case "UnsupportedOptionError":
|
|
101
|
+
return UnsupportedOptionError(message)
|
|
102
|
+
case _:
|
|
103
|
+
return Office2PdfError(message)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _native_bytes_module() -> _NativeBytesModule:
|
|
107
|
+
module = import_module("office2pdf._native")
|
|
108
|
+
if not isinstance(module, _NativeBytesModule):
|
|
109
|
+
raise TypeError("office2pdf._native does not provide convert_bytes")
|
|
110
|
+
return module
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _native_path_module() -> _NativePathModule:
|
|
114
|
+
module = import_module("office2pdf._native")
|
|
115
|
+
if not isinstance(module, _NativePathModule):
|
|
116
|
+
raise TypeError("office2pdf._native does not provide convert_path")
|
|
117
|
+
return module
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _native_options(options: ConvertOptions | None) -> dict[str, object]:
|
|
121
|
+
return ConvertOptions().to_native() if options is None else options.to_native()
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def infer_format(path: str | Path) -> Format:
|
|
125
|
+
suffix = Path(path).suffix.lower().lstrip(".")
|
|
126
|
+
if not suffix:
|
|
127
|
+
raise ValueError("path has no extension; expected .docx, .pptx, or .xlsx")
|
|
128
|
+
return Format.from_value(suffix)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def convert_bytes(
|
|
132
|
+
data: bytes | bytearray | memoryview,
|
|
133
|
+
format: Format | str,
|
|
134
|
+
options: ConvertOptions | None = None,
|
|
135
|
+
) -> ConversionResult:
|
|
136
|
+
if not isinstance(data, (bytes, bytearray, memoryview)):
|
|
137
|
+
raise TypeError("data must be bytes-like")
|
|
138
|
+
|
|
139
|
+
payload = bytes(data)
|
|
140
|
+
if not payload:
|
|
141
|
+
raise ValueError("data must not be empty")
|
|
142
|
+
|
|
143
|
+
input_format = Format.from_value(format)
|
|
144
|
+
native = _native_bytes_module()
|
|
145
|
+
native_errors = _native_error_types()
|
|
146
|
+
try:
|
|
147
|
+
result = native.convert_bytes(payload, input_format.value, _native_options(options))
|
|
148
|
+
except native_errors as error:
|
|
149
|
+
raise _public_error_from_native(error) from error
|
|
150
|
+
if not isinstance(result, Mapping):
|
|
151
|
+
raise TypeError("native conversion result must be a mapping")
|
|
152
|
+
return _result_from_native(result)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def convert_path(path: str | Path, options: ConvertOptions | None = None) -> ConversionResult:
|
|
156
|
+
input_path = Path(path)
|
|
157
|
+
infer_format(input_path)
|
|
158
|
+
|
|
159
|
+
native = _native_path_module()
|
|
160
|
+
native_errors = _native_error_types()
|
|
161
|
+
try:
|
|
162
|
+
result = native.convert_path(str(input_path), _native_options(options))
|
|
163
|
+
except native_errors as error:
|
|
164
|
+
raise _public_error_from_native(error) from error
|
|
165
|
+
if not isinstance(result, Mapping):
|
|
166
|
+
raise TypeError("native conversion result must be a mapping")
|
|
167
|
+
return _result_from_native(result)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
__all__ = [
|
|
171
|
+
"convert_bytes",
|
|
172
|
+
"convert_path",
|
|
173
|
+
"infer_format",
|
|
174
|
+
"_native_module",
|
|
175
|
+
"_native_options",
|
|
176
|
+
"_result_from_native",
|
|
177
|
+
"_warning_from_native",
|
|
178
|
+
"_metrics_from_native",
|
|
179
|
+
]
|