docxrender 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docxrender/__init__.py +30 -0
- docxrender/api.py +82 -0
- docxrender/contracts.py +256 -0
- docxrender/docx/__init__.py +1 -0
- docxrender/docx/body.py +369 -0
- docxrender/docx/fields.py +141 -0
- docxrender/docx/refresh.py +113 -0
- docxrender/markdown.py +177 -0
- docxrender/pdf_uno.py +608 -0
- docxrender/writer.py +423 -0
- docxrender-0.1.0.dist-info/METADATA +273 -0
- docxrender-0.1.0.dist-info/RECORD +14 -0
- docxrender-0.1.0.dist-info/WHEEL +4 -0
- docxrender-0.1.0.dist-info/entry_points.txt +4 -0
docxrender/__init__.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from docxrender.api import convert_docx_to_pdf, write_docx
|
|
2
|
+
from docxrender.contracts import (
|
|
3
|
+
DocxFieldRefreshOptions,
|
|
4
|
+
DocxFontStyle,
|
|
5
|
+
DocxParagraphStyle,
|
|
6
|
+
DocxSizeStyle,
|
|
7
|
+
DocxStyle,
|
|
8
|
+
DocxTableStyle,
|
|
9
|
+
DocxToPdfOptions,
|
|
10
|
+
DocxToPdfResult,
|
|
11
|
+
DocxWriteOptions,
|
|
12
|
+
DocxWriteResult,
|
|
13
|
+
)
|
|
14
|
+
from docxrender.writer import DocxWriter
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"DocxWriter",
|
|
18
|
+
"DocxFieldRefreshOptions",
|
|
19
|
+
"DocxFontStyle",
|
|
20
|
+
"DocxParagraphStyle",
|
|
21
|
+
"DocxSizeStyle",
|
|
22
|
+
"DocxStyle",
|
|
23
|
+
"DocxTableStyle",
|
|
24
|
+
"DocxToPdfOptions",
|
|
25
|
+
"DocxToPdfResult",
|
|
26
|
+
"DocxWriteOptions",
|
|
27
|
+
"DocxWriteResult",
|
|
28
|
+
"convert_docx_to_pdf",
|
|
29
|
+
"write_docx",
|
|
30
|
+
]
|
docxrender/api.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, cast
|
|
4
|
+
|
|
5
|
+
from docx import Document
|
|
6
|
+
from docxtpl import DocxTemplate
|
|
7
|
+
|
|
8
|
+
from docxrender.contracts import (
|
|
9
|
+
DocxToPdfOptions,
|
|
10
|
+
DocxToPdfResult,
|
|
11
|
+
DocxWriteOptions,
|
|
12
|
+
DocxWriteResult,
|
|
13
|
+
)
|
|
14
|
+
from docxrender.docx.body import insert_markdown_blocks
|
|
15
|
+
from docxrender.docx.fields import (
|
|
16
|
+
write_docx_field_update_markers,
|
|
17
|
+
write_frozen_docx_fields,
|
|
18
|
+
)
|
|
19
|
+
from docxrender.docx.refresh import refresh_docx_fields
|
|
20
|
+
from docxrender.markdown import parse_markdown_blocks
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def write_docx(options: DocxWriteOptions) -> DocxWriteResult:
|
|
24
|
+
"""Write a DOCX file from a template, context, markdown body, and style.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
options (DocxWriteOptions): DOCX writing options.
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
DocxWriteResult: Result containing the written DOCX path.
|
|
31
|
+
|
|
32
|
+
Raises:
|
|
33
|
+
FileNotFoundError: The template or a referenced image does not exist.
|
|
34
|
+
RuntimeError: The rendered DOCX cannot be opened or written.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
_write_template_docx(options)
|
|
38
|
+
markdown_blocks = parse_markdown_blocks(options.markdown_body)
|
|
39
|
+
document = Document(str(options.file_out_docx))
|
|
40
|
+
insert_markdown_blocks(
|
|
41
|
+
document,
|
|
42
|
+
markdown_blocks,
|
|
43
|
+
anchor_token=options.anchor_token,
|
|
44
|
+
dir_base=options.dir_base,
|
|
45
|
+
style=options.style,
|
|
46
|
+
)
|
|
47
|
+
document.save(str(options.file_out_docx))
|
|
48
|
+
if options.should_update_fields:
|
|
49
|
+
write_docx_field_update_markers(options.file_out_docx)
|
|
50
|
+
if options.field_refresh is None and options.should_freeze_fields:
|
|
51
|
+
write_frozen_docx_fields(options.file_out_docx)
|
|
52
|
+
refresh_docx_fields(options.file_out_docx, options=options.field_refresh)
|
|
53
|
+
return DocxWriteResult(file_docx=options.file_out_docx)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def convert_docx_to_pdf(options: DocxToPdfOptions) -> DocxToPdfResult:
|
|
57
|
+
"""Convert a DOCX file to PDF through LibreOffice.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
options (DocxToPdfOptions): DOCX-to-PDF conversion options.
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
DocxToPdfResult: Result containing the written PDF path and optional refreshed
|
|
64
|
+
DOCX path.
|
|
65
|
+
|
|
66
|
+
Raises:
|
|
67
|
+
FileNotFoundError: The input DOCX does not exist.
|
|
68
|
+
RuntimeError: LibreOffice or UNO cannot load or convert the document.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
from docxrender.pdf_uno import run_docx_to_pdf_pipeline
|
|
72
|
+
|
|
73
|
+
return run_docx_to_pdf_pipeline(options)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _write_template_docx(options: DocxWriteOptions) -> None:
|
|
77
|
+
options.file_out_docx.parent.mkdir(parents=True, exist_ok=True)
|
|
78
|
+
template = cast(Any, DocxTemplate(str(options.file_template)))
|
|
79
|
+
context = dict(options.context)
|
|
80
|
+
context.setdefault("body_anchor", options.anchor_token)
|
|
81
|
+
template.render(context)
|
|
82
|
+
template.save(str(options.file_out_docx))
|
docxrender/contracts.py
ADDED
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Mapping
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass(frozen=True, slots=True)
|
|
10
|
+
class DocxFontStyle:
|
|
11
|
+
"""Font names used when writing DOCX content.
|
|
12
|
+
|
|
13
|
+
Attributes:
|
|
14
|
+
font_name_latin (str): Latin font name applied to runs.
|
|
15
|
+
font_name_body_east_asia (str): East Asian font name applied to body text.
|
|
16
|
+
font_name_heading_east_asia (str): East Asian font name applied to headings.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
font_name_latin: str
|
|
20
|
+
font_name_body_east_asia: str
|
|
21
|
+
font_name_heading_east_asia: str
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass(frozen=True, slots=True)
|
|
25
|
+
class DocxSizeStyle:
|
|
26
|
+
"""Point sizes used by the DOCX writer.
|
|
27
|
+
|
|
28
|
+
Attributes:
|
|
29
|
+
pt_title_page_title (float): Title-page report title size.
|
|
30
|
+
pt_title_page_meta (float): Title-page metadata size.
|
|
31
|
+
pt_title_page_compiler (float): Title-page compiler or organization text size.
|
|
32
|
+
pt_body (float): Body paragraph text size.
|
|
33
|
+
pt_caption (float): Figure caption and note text size.
|
|
34
|
+
pt_table (float): Markdown table body text size.
|
|
35
|
+
pt_heading_by_level (Mapping[int, float]): Heading text size by heading level.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
pt_title_page_title: float
|
|
39
|
+
pt_title_page_meta: float
|
|
40
|
+
pt_title_page_compiler: float
|
|
41
|
+
pt_body: float
|
|
42
|
+
pt_caption: float
|
|
43
|
+
pt_table: float
|
|
44
|
+
pt_heading_by_level: Mapping[int, float]
|
|
45
|
+
|
|
46
|
+
def with_overrides(
|
|
47
|
+
self,
|
|
48
|
+
*,
|
|
49
|
+
pt_title_page_title: float | None = None,
|
|
50
|
+
pt_title_page_meta: float | None = None,
|
|
51
|
+
pt_title_page_compiler: float | None = None,
|
|
52
|
+
pt_body: float | None = None,
|
|
53
|
+
pt_caption: float | None = None,
|
|
54
|
+
pt_table: float | None = None,
|
|
55
|
+
pt_heading_by_level: Mapping[int, float] | None = None,
|
|
56
|
+
) -> DocxSizeStyle:
|
|
57
|
+
"""Create a copy with selected size values overridden.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
pt_title_page_title (float | None): Title-page report title size.
|
|
61
|
+
pt_title_page_meta (float | None): Title-page metadata size.
|
|
62
|
+
pt_title_page_compiler (float | None): Compiler or organization size.
|
|
63
|
+
pt_body (float | None): Body paragraph text size.
|
|
64
|
+
pt_caption (float | None): Caption and note text size.
|
|
65
|
+
pt_table (float | None): Markdown table text size.
|
|
66
|
+
pt_heading_by_level (Mapping[int, float] | None): Heading sizes by level.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
DocxSizeStyle: New size style with overrides applied.
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
return DocxSizeStyle(
|
|
73
|
+
pt_title_page_title=(
|
|
74
|
+
pt_title_page_title
|
|
75
|
+
if pt_title_page_title is not None
|
|
76
|
+
else self.pt_title_page_title
|
|
77
|
+
),
|
|
78
|
+
pt_title_page_meta=(
|
|
79
|
+
pt_title_page_meta
|
|
80
|
+
if pt_title_page_meta is not None
|
|
81
|
+
else self.pt_title_page_meta
|
|
82
|
+
),
|
|
83
|
+
pt_title_page_compiler=(
|
|
84
|
+
pt_title_page_compiler
|
|
85
|
+
if pt_title_page_compiler is not None
|
|
86
|
+
else self.pt_title_page_compiler
|
|
87
|
+
),
|
|
88
|
+
pt_body=pt_body if pt_body is not None else self.pt_body,
|
|
89
|
+
pt_caption=pt_caption if pt_caption is not None else self.pt_caption,
|
|
90
|
+
pt_table=pt_table if pt_table is not None else self.pt_table,
|
|
91
|
+
pt_heading_by_level=(
|
|
92
|
+
dict(pt_heading_by_level)
|
|
93
|
+
if pt_heading_by_level is not None
|
|
94
|
+
else self.pt_heading_by_level
|
|
95
|
+
),
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
@dataclass(frozen=True, slots=True)
|
|
100
|
+
class DocxTableStyle:
|
|
101
|
+
"""Table style values used by markdown table rendering.
|
|
102
|
+
|
|
103
|
+
Attributes:
|
|
104
|
+
border_color (str): WordprocessingML border color as a hex RGB string.
|
|
105
|
+
stripe_fill_color (str): Body-row stripe fill color as a hex RGB string.
|
|
106
|
+
border_size_main (str): Main top/bottom border size in Word units.
|
|
107
|
+
border_size_header (str): Header separator border size in Word units.
|
|
108
|
+
line_spacing (float): Line spacing applied to table cell paragraphs.
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
border_color: str
|
|
112
|
+
stripe_fill_color: str
|
|
113
|
+
border_size_main: str
|
|
114
|
+
border_size_header: str
|
|
115
|
+
line_spacing: float
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
@dataclass(frozen=True, slots=True)
|
|
119
|
+
class DocxParagraphStyle:
|
|
120
|
+
"""Paragraph style values used by body and note paragraphs.
|
|
121
|
+
|
|
122
|
+
Attributes:
|
|
123
|
+
line_spacing_body (float): Line spacing for ordinary body paragraphs.
|
|
124
|
+
line_spacing_note (float): Line spacing for note paragraphs.
|
|
125
|
+
first_line_indent_cm (float): Body paragraph first-line indent in centimeters.
|
|
126
|
+
note_prefixes (tuple[str, ...]): Text prefixes classified as note paragraphs.
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
line_spacing_body: float
|
|
130
|
+
line_spacing_note: float
|
|
131
|
+
first_line_indent_cm: float
|
|
132
|
+
note_prefixes: tuple[str, ...] = ("注:", "注:")
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@dataclass(frozen=True, slots=True)
|
|
136
|
+
class DocxStyle:
|
|
137
|
+
"""Complete style bundle for DOCX writing.
|
|
138
|
+
|
|
139
|
+
`docxrender` accepts this structured style object directly. Reading TOML,
|
|
140
|
+
JSON, YAML, or another configuration format is the caller's responsibility.
|
|
141
|
+
|
|
142
|
+
Attributes:
|
|
143
|
+
fonts (DocxFontStyle): Font names for Latin and East Asian text.
|
|
144
|
+
sizes (DocxSizeStyle): Point sizes for title, body, caption, table, and
|
|
145
|
+
headings.
|
|
146
|
+
table (DocxTableStyle): Table border, fill, and spacing settings.
|
|
147
|
+
paragraph (DocxParagraphStyle): Body and note paragraph settings.
|
|
148
|
+
"""
|
|
149
|
+
|
|
150
|
+
fonts: DocxFontStyle
|
|
151
|
+
sizes: DocxSizeStyle
|
|
152
|
+
table: DocxTableStyle
|
|
153
|
+
paragraph: DocxParagraphStyle
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
@dataclass(frozen=True, slots=True)
|
|
157
|
+
class DocxFieldRefreshOptions:
|
|
158
|
+
"""Options for refreshing DOCX fields through LibreOffice UNO.
|
|
159
|
+
|
|
160
|
+
Attributes:
|
|
161
|
+
exe_libreoffice (Path): LibreOffice executable path.
|
|
162
|
+
dir_user_profile (Path): Isolated LibreOffice user profile directory.
|
|
163
|
+
file_out_docx_refreshed (Path | None): Optional refreshed DOCX output path.
|
|
164
|
+
file_listener_log (Path | None): Optional LibreOffice listener log path.
|
|
165
|
+
should_require_toc (bool): Whether refreshed DOCX must contain TOC results.
|
|
166
|
+
should_freeze_fields (bool): Whether refreshed field results should be frozen.
|
|
167
|
+
timeout_seconds (float): Maximum wait time for refreshed DOCX validation.
|
|
168
|
+
poll_interval_seconds (float): Poll interval for refreshed DOCX validation.
|
|
169
|
+
stable_checks (int): Consecutive stable file-stat checks required.
|
|
170
|
+
"""
|
|
171
|
+
|
|
172
|
+
exe_libreoffice: Path
|
|
173
|
+
dir_user_profile: Path
|
|
174
|
+
file_out_docx_refreshed: Path | None = None
|
|
175
|
+
file_listener_log: Path | None = None
|
|
176
|
+
should_require_toc: bool = False
|
|
177
|
+
should_freeze_fields: bool = False
|
|
178
|
+
timeout_seconds: float = 30.0
|
|
179
|
+
poll_interval_seconds: float = 0.5
|
|
180
|
+
stable_checks: int = 2
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
@dataclass(frozen=True, slots=True)
|
|
184
|
+
class DocxWriteOptions:
|
|
185
|
+
"""Inputs for writing a DOCX file.
|
|
186
|
+
|
|
187
|
+
Attributes:
|
|
188
|
+
file_template (Path): Input DOCX template path.
|
|
189
|
+
file_out_docx (Path): Output DOCX path to write.
|
|
190
|
+
context (Mapping[str, Any]): Template context passed to `docxtpl`.
|
|
191
|
+
markdown_body (str): Already-rendered markdown body to insert into the DOCX.
|
|
192
|
+
dir_base (Path): Base directory used to resolve relative image paths.
|
|
193
|
+
style (DocxStyle): Structured DOCX style settings.
|
|
194
|
+
anchor_token (str): Paragraph text marking where markdown body content is
|
|
195
|
+
inserted.
|
|
196
|
+
should_update_fields (bool): Whether DOCX fields should be prepared for update.
|
|
197
|
+
should_freeze_fields (bool): Whether DOCX fields should be frozen after writing.
|
|
198
|
+
field_refresh (DocxFieldRefreshOptions | None): Optional UNO field refresh
|
|
199
|
+
settings.
|
|
200
|
+
"""
|
|
201
|
+
|
|
202
|
+
file_template: Path
|
|
203
|
+
file_out_docx: Path
|
|
204
|
+
context: Mapping[str, Any]
|
|
205
|
+
markdown_body: str
|
|
206
|
+
dir_base: Path
|
|
207
|
+
style: DocxStyle
|
|
208
|
+
anchor_token: str = "__REPORT_BODY_ANCHOR__"
|
|
209
|
+
should_update_fields: bool = True
|
|
210
|
+
should_freeze_fields: bool = False
|
|
211
|
+
field_refresh: DocxFieldRefreshOptions | None = None
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
@dataclass(frozen=True, slots=True)
|
|
215
|
+
class DocxWriteResult:
|
|
216
|
+
"""Result of a DOCX write operation.
|
|
217
|
+
|
|
218
|
+
Attributes:
|
|
219
|
+
file_docx (Path): Written DOCX path.
|
|
220
|
+
"""
|
|
221
|
+
|
|
222
|
+
file_docx: Path
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
@dataclass(frozen=True, slots=True)
|
|
226
|
+
class DocxToPdfOptions:
|
|
227
|
+
"""Inputs for converting a DOCX file to PDF.
|
|
228
|
+
|
|
229
|
+
Attributes:
|
|
230
|
+
exe_libreoffice (Path): LibreOffice executable path.
|
|
231
|
+
file_in_docx (Path): Input DOCX path.
|
|
232
|
+
file_out_pdf (Path): Output PDF path.
|
|
233
|
+
dir_user_profile (Path): Isolated LibreOffice user profile directory.
|
|
234
|
+
file_out_docx_refreshed (Path | None): Optional refreshed DOCX output path.
|
|
235
|
+
file_listener_log (Path | None): Optional LibreOffice listener log path.
|
|
236
|
+
"""
|
|
237
|
+
|
|
238
|
+
exe_libreoffice: Path
|
|
239
|
+
file_in_docx: Path
|
|
240
|
+
file_out_pdf: Path
|
|
241
|
+
dir_user_profile: Path
|
|
242
|
+
file_out_docx_refreshed: Path | None = None
|
|
243
|
+
file_listener_log: Path | None = None
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
@dataclass(frozen=True, slots=True)
|
|
247
|
+
class DocxToPdfResult:
|
|
248
|
+
"""Result of converting DOCX to PDF.
|
|
249
|
+
|
|
250
|
+
Attributes:
|
|
251
|
+
file_pdf (Path): Written PDF path.
|
|
252
|
+
file_docx_refreshed (Path | None): Refreshed DOCX path when requested.
|
|
253
|
+
"""
|
|
254
|
+
|
|
255
|
+
file_pdf: Path
|
|
256
|
+
file_docx_refreshed: Path | None = None
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Internal DOCX rendering helpers."""
|