docxrender 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docxrender/__init__.py +30 -0
- docxrender/api.py +82 -0
- docxrender/contracts.py +256 -0
- docxrender/docx/__init__.py +1 -0
- docxrender/docx/body.py +369 -0
- docxrender/docx/fields.py +141 -0
- docxrender/docx/refresh.py +113 -0
- docxrender/markdown.py +177 -0
- docxrender/pdf_uno.py +608 -0
- docxrender/writer.py +423 -0
- docxrender-0.1.0.dist-info/METADATA +273 -0
- docxrender-0.1.0.dist-info/RECORD +14 -0
- docxrender-0.1.0.dist-info/WHEEL +4 -0
- docxrender-0.1.0.dist-info/entry_points.txt +4 -0
docxrender/pdf_uno.py
ADDED
|
@@ -0,0 +1,608 @@
|
|
|
1
|
+
"""LibreOffice UNO PDF conversion helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import importlib
|
|
6
|
+
import shutil
|
|
7
|
+
import socket
|
|
8
|
+
import subprocess
|
|
9
|
+
import tempfile
|
|
10
|
+
import time
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
from io import BufferedWriter
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any, Protocol
|
|
15
|
+
|
|
16
|
+
from docxrender.contracts import (
|
|
17
|
+
DocxFieldRefreshOptions,
|
|
18
|
+
DocxToPdfOptions,
|
|
19
|
+
DocxToPdfResult,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
LISTENER_HOST = "127.0.0.1"
|
|
23
|
+
LISTENER_START_TIMEOUT_SECONDS = 15.0
|
|
24
|
+
LISTENER_POLL_INTERVAL_SECONDS = 0.1
|
|
25
|
+
DOCUMENT_LOAD_TIMEOUT_SECONDS = 10.0
|
|
26
|
+
DOCUMENT_LOAD_POLL_INTERVAL_SECONDS = 0.2
|
|
27
|
+
URL_LIBREOFFICE_PARAMETERS = (
|
|
28
|
+
"https://help.libreoffice.org/latest/en-US/text/shared/guide/start_parameters.html"
|
|
29
|
+
)
|
|
30
|
+
URL_LIBREOFFICE_API = "https://api.libreoffice.org/"
|
|
31
|
+
URL_DEBIAN_PYTHON_UNO = "https://packages.debian.org/bullseye/python3-uno"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class UnoUpdatable(Protocol):
|
|
35
|
+
def update(self) -> None: ...
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class UnoDocumentIndexes(Protocol):
|
|
39
|
+
def getCount(self) -> int: ...
|
|
40
|
+
|
|
41
|
+
def getByIndex(self, index: int) -> UnoUpdatable: ...
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class UnoTextFields(Protocol):
|
|
45
|
+
def refresh(self) -> None: ...
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class UnoDisposable(Protocol):
|
|
49
|
+
def dispose(self) -> None: ...
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class UnoTextDocument(UnoDisposable, Protocol):
|
|
53
|
+
def updateLinks(self) -> None: ...
|
|
54
|
+
|
|
55
|
+
def refresh(self) -> None: ...
|
|
56
|
+
|
|
57
|
+
def getDocumentIndexes(self) -> UnoDocumentIndexes: ...
|
|
58
|
+
|
|
59
|
+
def getTextFields(self) -> UnoTextFields: ...
|
|
60
|
+
|
|
61
|
+
def store(self) -> None: ...
|
|
62
|
+
|
|
63
|
+
def storeToURL(self, url: str, properties: tuple[Any, ...]) -> None: ...
|
|
64
|
+
|
|
65
|
+
def close(self, deliver_ownership: bool) -> None: ...
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class UnoDesktop(Protocol):
|
|
69
|
+
def loadComponentFromURL(
|
|
70
|
+
self,
|
|
71
|
+
url: str,
|
|
72
|
+
target_frame_name: str,
|
|
73
|
+
search_flags: int,
|
|
74
|
+
properties: tuple[Any, ...],
|
|
75
|
+
) -> UnoTextDocument | None: ...
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class ListenerProcess(Protocol):
|
|
79
|
+
def poll(self) -> int | None: ...
|
|
80
|
+
|
|
81
|
+
def terminate(self) -> None: ...
|
|
82
|
+
|
|
83
|
+
def wait(self, timeout: float | None = None) -> int | None: ...
|
|
84
|
+
|
|
85
|
+
def kill(self) -> None: ...
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@dataclass(frozen=True, slots=True)
|
|
89
|
+
class DocxToPdfState:
|
|
90
|
+
options: DocxToPdfOptions
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def run_docx_to_pdf_pipeline(options: DocxToPdfOptions) -> DocxToPdfResult:
|
|
94
|
+
state = create_docx_to_pdf_state(options)
|
|
95
|
+
validate_docx_input(state)
|
|
96
|
+
convert_docx_to_pdf_with_uno(state)
|
|
97
|
+
return create_docx_to_pdf_result(state)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def create_docx_to_pdf_state(options: DocxToPdfOptions) -> DocxToPdfState:
|
|
101
|
+
return DocxToPdfState(options=options)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def validate_docx_input(state: DocxToPdfState) -> DocxToPdfState:
|
|
105
|
+
file_in_docx = state.options.file_in_docx
|
|
106
|
+
if not file_in_docx.exists():
|
|
107
|
+
raise FileNotFoundError(f"Input DOCX does not exist: {file_in_docx.resolve()}")
|
|
108
|
+
if not file_in_docx.is_file():
|
|
109
|
+
raise RuntimeError(
|
|
110
|
+
f"Input DOCX is not a regular file: {file_in_docx.resolve()}"
|
|
111
|
+
)
|
|
112
|
+
if file_in_docx.stat().st_size == 0:
|
|
113
|
+
raise RuntimeError(f"Input DOCX is empty: {file_in_docx.resolve()}")
|
|
114
|
+
with file_in_docx.open("rb"):
|
|
115
|
+
pass
|
|
116
|
+
return state
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def create_docx_to_pdf_result(state: DocxToPdfState) -> DocxToPdfResult:
|
|
120
|
+
return DocxToPdfResult(
|
|
121
|
+
file_pdf=state.options.file_out_pdf,
|
|
122
|
+
file_docx_refreshed=state.options.file_out_docx_refreshed,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def create_libreoffice_listener_command(
|
|
127
|
+
*,
|
|
128
|
+
exe_libreoffice: Path,
|
|
129
|
+
dir_user_profile: Path,
|
|
130
|
+
port: int,
|
|
131
|
+
) -> list[str]:
|
|
132
|
+
return [
|
|
133
|
+
str(exe_libreoffice),
|
|
134
|
+
"--headless",
|
|
135
|
+
f"--accept=socket,host={LISTENER_HOST},port={port};urp;",
|
|
136
|
+
"--norestore",
|
|
137
|
+
"--nodefault",
|
|
138
|
+
f"-env:UserInstallation={dir_user_profile.resolve().as_uri()}",
|
|
139
|
+
]
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def convert_docx_to_pdf_with_uno(state: DocxToPdfState) -> DocxToPdfState:
|
|
143
|
+
options = state.options
|
|
144
|
+
options.dir_user_profile.mkdir(parents=True, exist_ok=True)
|
|
145
|
+
options.file_out_pdf.parent.mkdir(parents=True, exist_ok=True)
|
|
146
|
+
if options.file_out_docx_refreshed is not None:
|
|
147
|
+
options.file_out_docx_refreshed.parent.mkdir(parents=True, exist_ok=True)
|
|
148
|
+
|
|
149
|
+
with tempfile.TemporaryDirectory(prefix="docxrender-docx-stage-") as dir_stage_tmp:
|
|
150
|
+
file_in_docx_staged = copy_docx_to_stage(
|
|
151
|
+
options.file_in_docx,
|
|
152
|
+
dir_stage=Path(dir_stage_tmp),
|
|
153
|
+
)
|
|
154
|
+
uno_module = import_uno_module()
|
|
155
|
+
port = select_free_port()
|
|
156
|
+
(
|
|
157
|
+
file_listener_log_resolved,
|
|
158
|
+
handle_listener_log,
|
|
159
|
+
stdout_listener,
|
|
160
|
+
stderr_listener,
|
|
161
|
+
) = open_listener_log_handle(options.file_listener_log)
|
|
162
|
+
process_listener = start_libreoffice_listener(
|
|
163
|
+
exe_libreoffice=options.exe_libreoffice,
|
|
164
|
+
dir_user_profile=options.dir_user_profile,
|
|
165
|
+
port=port,
|
|
166
|
+
stdout=stdout_listener,
|
|
167
|
+
stderr=stderr_listener,
|
|
168
|
+
file_listener_log=file_listener_log_resolved,
|
|
169
|
+
)
|
|
170
|
+
try:
|
|
171
|
+
wait_for_listener(port, file_listener_log=file_listener_log_resolved)
|
|
172
|
+
desktop = connect_desktop(uno_module, port)
|
|
173
|
+
doc: UnoTextDocument | None = None
|
|
174
|
+
try:
|
|
175
|
+
doc = load_uno_document_or_raise(
|
|
176
|
+
uno_module=uno_module,
|
|
177
|
+
desktop=desktop,
|
|
178
|
+
file_in_docx_source=options.file_in_docx,
|
|
179
|
+
file_in_docx_staged=file_in_docx_staged,
|
|
180
|
+
exe_libreoffice=options.exe_libreoffice,
|
|
181
|
+
dir_user_profile=options.dir_user_profile,
|
|
182
|
+
process_listener=process_listener,
|
|
183
|
+
file_listener_log=file_listener_log_resolved,
|
|
184
|
+
file_source_lock=find_source_lock_file(options.file_in_docx),
|
|
185
|
+
)
|
|
186
|
+
refresh_uno_document_fields(doc)
|
|
187
|
+
doc.store()
|
|
188
|
+
doc.storeToURL(
|
|
189
|
+
uno_module.systemPathToFileUrl(str(options.file_out_pdf.resolve())),
|
|
190
|
+
(
|
|
191
|
+
create_property("FilterName", "writer_pdf_Export"),
|
|
192
|
+
create_property("Overwrite", True),
|
|
193
|
+
),
|
|
194
|
+
)
|
|
195
|
+
if options.file_out_docx_refreshed is not None:
|
|
196
|
+
shutil.copy2(file_in_docx_staged, options.file_out_docx_refreshed)
|
|
197
|
+
finally:
|
|
198
|
+
close_document(doc)
|
|
199
|
+
finally:
|
|
200
|
+
terminate_process(process_listener)
|
|
201
|
+
if handle_listener_log is not None:
|
|
202
|
+
handle_listener_log.close()
|
|
203
|
+
return state
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def refresh_docx_with_uno(
|
|
207
|
+
*,
|
|
208
|
+
file_in_docx: Path,
|
|
209
|
+
file_out_docx: Path,
|
|
210
|
+
options: DocxFieldRefreshOptions,
|
|
211
|
+
) -> None:
|
|
212
|
+
options.dir_user_profile.mkdir(parents=True, exist_ok=True)
|
|
213
|
+
file_out_docx.parent.mkdir(parents=True, exist_ok=True)
|
|
214
|
+
|
|
215
|
+
with tempfile.TemporaryDirectory(
|
|
216
|
+
prefix="docxrender-docx-refresh-stage-"
|
|
217
|
+
) as dir_tmp:
|
|
218
|
+
file_in_docx_staged = copy_docx_to_stage(file_in_docx, dir_stage=Path(dir_tmp))
|
|
219
|
+
uno_module = import_uno_module()
|
|
220
|
+
port = select_free_port()
|
|
221
|
+
(
|
|
222
|
+
file_listener_log_resolved,
|
|
223
|
+
handle_listener_log,
|
|
224
|
+
stdout_listener,
|
|
225
|
+
stderr_listener,
|
|
226
|
+
) = open_listener_log_handle(options.file_listener_log)
|
|
227
|
+
process_listener = start_libreoffice_listener(
|
|
228
|
+
exe_libreoffice=options.exe_libreoffice,
|
|
229
|
+
dir_user_profile=options.dir_user_profile,
|
|
230
|
+
port=port,
|
|
231
|
+
stdout=stdout_listener,
|
|
232
|
+
stderr=stderr_listener,
|
|
233
|
+
file_listener_log=file_listener_log_resolved,
|
|
234
|
+
)
|
|
235
|
+
try:
|
|
236
|
+
wait_for_listener(port, file_listener_log=file_listener_log_resolved)
|
|
237
|
+
desktop = connect_desktop(uno_module, port)
|
|
238
|
+
doc: UnoTextDocument | None = None
|
|
239
|
+
try:
|
|
240
|
+
doc = load_uno_document_or_raise(
|
|
241
|
+
uno_module=uno_module,
|
|
242
|
+
desktop=desktop,
|
|
243
|
+
file_in_docx_source=file_in_docx,
|
|
244
|
+
file_in_docx_staged=file_in_docx_staged,
|
|
245
|
+
exe_libreoffice=options.exe_libreoffice,
|
|
246
|
+
dir_user_profile=options.dir_user_profile,
|
|
247
|
+
process_listener=process_listener,
|
|
248
|
+
file_listener_log=file_listener_log_resolved,
|
|
249
|
+
file_source_lock=find_source_lock_file(file_in_docx),
|
|
250
|
+
)
|
|
251
|
+
refresh_uno_document_fields(doc)
|
|
252
|
+
doc.store()
|
|
253
|
+
finally:
|
|
254
|
+
close_document(doc)
|
|
255
|
+
finally:
|
|
256
|
+
terminate_process(process_listener)
|
|
257
|
+
if handle_listener_log is not None:
|
|
258
|
+
handle_listener_log.close()
|
|
259
|
+
shutil.copy2(file_in_docx_staged, file_out_docx)
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def import_uno_module() -> Any:
|
|
263
|
+
try:
|
|
264
|
+
return importlib.import_module("uno")
|
|
265
|
+
except ImportError as exc:
|
|
266
|
+
raise RuntimeError(
|
|
267
|
+
"\n".join(
|
|
268
|
+
[
|
|
269
|
+
"error_code=libreoffice_uno_import_failed",
|
|
270
|
+
"reason=UNO Python bindings are not importable in this Python "
|
|
271
|
+
"environment.",
|
|
272
|
+
*create_libreoffice_runtime_guidance_fields(),
|
|
273
|
+
]
|
|
274
|
+
)
|
|
275
|
+
) from exc
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def create_libreoffice_runtime_guidance_fields() -> list[str]:
|
|
279
|
+
return [
|
|
280
|
+
"runtime_dependency=LibreOffice and Python-UNO are external runtime "
|
|
281
|
+
"dependencies; docxrender does not install them through a Python "
|
|
282
|
+
"package extra.",
|
|
283
|
+
"validate_libreoffice=libreoffice --headless --version",
|
|
284
|
+
'validate_uno=python -c "import uno"',
|
|
285
|
+
"install_debian_ubuntu=sudo apt install libreoffice python3-uno",
|
|
286
|
+
f"docs_libreoffice_parameters={URL_LIBREOFFICE_PARAMETERS}",
|
|
287
|
+
f"docs_libreoffice_api={URL_LIBREOFFICE_API}",
|
|
288
|
+
f"docs_debian_python_uno={URL_DEBIAN_PYTHON_UNO}",
|
|
289
|
+
]
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def validate_libreoffice_executable(exe_libreoffice: Path) -> None:
|
|
293
|
+
if not exe_libreoffice.exists():
|
|
294
|
+
raise FileNotFoundError(
|
|
295
|
+
"\n".join(
|
|
296
|
+
[
|
|
297
|
+
"error_code=libreoffice_executable_missing",
|
|
298
|
+
f"exe_libreoffice={exe_libreoffice.resolve()}",
|
|
299
|
+
*create_libreoffice_runtime_guidance_fields(),
|
|
300
|
+
]
|
|
301
|
+
)
|
|
302
|
+
)
|
|
303
|
+
if not exe_libreoffice.is_file():
|
|
304
|
+
raise RuntimeError(
|
|
305
|
+
"\n".join(
|
|
306
|
+
[
|
|
307
|
+
"error_code=libreoffice_executable_not_file",
|
|
308
|
+
f"exe_libreoffice={exe_libreoffice.resolve()}",
|
|
309
|
+
*create_libreoffice_runtime_guidance_fields(),
|
|
310
|
+
]
|
|
311
|
+
)
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def start_libreoffice_listener(
|
|
316
|
+
*,
|
|
317
|
+
exe_libreoffice: Path,
|
|
318
|
+
dir_user_profile: Path,
|
|
319
|
+
port: int,
|
|
320
|
+
stdout: BufferedWriter | None,
|
|
321
|
+
stderr: BufferedWriter | None,
|
|
322
|
+
file_listener_log: Path | None,
|
|
323
|
+
) -> subprocess.Popen[bytes]:
|
|
324
|
+
validate_libreoffice_executable(exe_libreoffice)
|
|
325
|
+
command = create_libreoffice_listener_command(
|
|
326
|
+
exe_libreoffice=exe_libreoffice,
|
|
327
|
+
dir_user_profile=dir_user_profile,
|
|
328
|
+
port=port,
|
|
329
|
+
)
|
|
330
|
+
try:
|
|
331
|
+
return subprocess.Popen(command, stdout=stdout, stderr=stderr)
|
|
332
|
+
except (FileNotFoundError, PermissionError) as exc:
|
|
333
|
+
raise RuntimeError(
|
|
334
|
+
"\n".join(
|
|
335
|
+
[
|
|
336
|
+
"error_code=libreoffice_listener_start_failed",
|
|
337
|
+
f"exe_libreoffice={exe_libreoffice.resolve()}",
|
|
338
|
+
f"dir_user_profile={dir_user_profile.resolve()}",
|
|
339
|
+
f"listener_log={listener_log_label(file_listener_log)}",
|
|
340
|
+
f"launch_error={type(exc).__name__}: {exc}",
|
|
341
|
+
*create_libreoffice_runtime_guidance_fields(),
|
|
342
|
+
]
|
|
343
|
+
)
|
|
344
|
+
) from exc
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
def select_free_port() -> int:
|
|
348
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
|
|
349
|
+
sock.bind((LISTENER_HOST, 0))
|
|
350
|
+
return int(sock.getsockname()[1])
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def wait_for_listener(port: int, *, file_listener_log: Path | None = None) -> None:
|
|
354
|
+
deadline = time.monotonic() + LISTENER_START_TIMEOUT_SECONDS
|
|
355
|
+
while time.monotonic() < deadline:
|
|
356
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
|
|
357
|
+
sock.settimeout(LISTENER_POLL_INTERVAL_SECONDS)
|
|
358
|
+
if sock.connect_ex((LISTENER_HOST, port)) == 0:
|
|
359
|
+
return
|
|
360
|
+
time.sleep(LISTENER_POLL_INTERVAL_SECONDS)
|
|
361
|
+
fields = [
|
|
362
|
+
"error_code=libreoffice_uno_listener_timeout",
|
|
363
|
+
f"listener_host={LISTENER_HOST}",
|
|
364
|
+
f"listener_port={port}",
|
|
365
|
+
f"listener_log={listener_log_label(file_listener_log)}",
|
|
366
|
+
]
|
|
367
|
+
text_log_tail = read_log_tail(file_listener_log)
|
|
368
|
+
if text_log_tail:
|
|
369
|
+
fields.append(f"listener_log_tail={format_log_field(text_log_tail)}")
|
|
370
|
+
fields.extend(create_libreoffice_runtime_guidance_fields())
|
|
371
|
+
raise TimeoutError("\n".join(fields))
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
def create_property(name: str, value: object) -> Any:
|
|
375
|
+
module_beans = importlib.import_module("com.sun.star.beans")
|
|
376
|
+
prop = module_beans.PropertyValue()
|
|
377
|
+
prop.Name = name
|
|
378
|
+
prop.Value = value
|
|
379
|
+
return prop
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
def copy_docx_to_stage(file_in_docx: Path, *, dir_stage: Path) -> Path:
|
|
383
|
+
file_staged_docx = dir_stage / file_in_docx.name
|
|
384
|
+
shutil.copy2(file_in_docx, file_staged_docx)
|
|
385
|
+
return file_staged_docx
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
def find_source_lock_file(file_in_docx: Path) -> Path | None:
|
|
389
|
+
file_lock = file_in_docx.parent / f".~lock.{file_in_docx.name}#"
|
|
390
|
+
if file_lock.exists():
|
|
391
|
+
return file_lock
|
|
392
|
+
return None
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
def open_listener_log_handle(
|
|
396
|
+
file_listener_log: Path | None,
|
|
397
|
+
) -> tuple[
|
|
398
|
+
Path | None,
|
|
399
|
+
BufferedWriter | None,
|
|
400
|
+
BufferedWriter | None,
|
|
401
|
+
BufferedWriter | None,
|
|
402
|
+
]:
|
|
403
|
+
if file_listener_log is None:
|
|
404
|
+
return None, None, None, None
|
|
405
|
+
file_listener_log = file_listener_log.resolve()
|
|
406
|
+
file_listener_log.parent.mkdir(parents=True, exist_ok=True)
|
|
407
|
+
handle_listener_log = file_listener_log.open("ab")
|
|
408
|
+
return (
|
|
409
|
+
file_listener_log,
|
|
410
|
+
handle_listener_log,
|
|
411
|
+
handle_listener_log,
|
|
412
|
+
handle_listener_log,
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
def connect_desktop(uno_module: Any, port: int) -> UnoDesktop:
|
|
417
|
+
context_local = uno_module.getComponentContext()
|
|
418
|
+
resolver = context_local.ServiceManager.createInstanceWithContext(
|
|
419
|
+
"com.sun.star.bridge.UnoUrlResolver",
|
|
420
|
+
context_local,
|
|
421
|
+
)
|
|
422
|
+
context_remote = resolver.resolve(
|
|
423
|
+
f"uno:socket,host={LISTENER_HOST},port={port};urp;StarOffice.ComponentContext"
|
|
424
|
+
)
|
|
425
|
+
return context_remote.ServiceManager.createInstanceWithContext(
|
|
426
|
+
"com.sun.star.frame.Desktop",
|
|
427
|
+
context_remote,
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
def refresh_uno_document_fields(doc: UnoTextDocument) -> None:
|
|
432
|
+
doc.refresh()
|
|
433
|
+
doc.updateLinks()
|
|
434
|
+
indexes = doc.getDocumentIndexes()
|
|
435
|
+
for idx in range(indexes.getCount()):
|
|
436
|
+
indexes.getByIndex(idx).update()
|
|
437
|
+
doc.getTextFields().refresh()
|
|
438
|
+
doc.refresh()
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
def load_uno_document_or_raise(
|
|
442
|
+
*,
|
|
443
|
+
uno_module: Any,
|
|
444
|
+
desktop: UnoDesktop,
|
|
445
|
+
file_in_docx_source: Path,
|
|
446
|
+
file_in_docx_staged: Path,
|
|
447
|
+
exe_libreoffice: Path,
|
|
448
|
+
dir_user_profile: Path,
|
|
449
|
+
process_listener: ListenerProcess,
|
|
450
|
+
file_listener_log: Path | None,
|
|
451
|
+
file_source_lock: Path | None,
|
|
452
|
+
) -> UnoTextDocument:
|
|
453
|
+
file_url = uno_module.systemPathToFileUrl(str(file_in_docx_staged.resolve()))
|
|
454
|
+
props_default = (
|
|
455
|
+
create_property("Hidden", True),
|
|
456
|
+
create_property("ReadOnly", False),
|
|
457
|
+
create_property("UpdateDocMode", 1),
|
|
458
|
+
)
|
|
459
|
+
props_hidden_only = (create_property("Hidden", True),)
|
|
460
|
+
|
|
461
|
+
doc_probe = load_document_with_retry(
|
|
462
|
+
desktop=desktop,
|
|
463
|
+
url="private:factory/swriter",
|
|
464
|
+
properties=props_hidden_only,
|
|
465
|
+
)
|
|
466
|
+
probe_ok = doc_probe is not None
|
|
467
|
+
if doc_probe is not None:
|
|
468
|
+
close_document(doc_probe)
|
|
469
|
+
|
|
470
|
+
doc = load_document_with_retry(
|
|
471
|
+
desktop=desktop,
|
|
472
|
+
url=file_url,
|
|
473
|
+
properties=props_default,
|
|
474
|
+
)
|
|
475
|
+
load_default_ok = doc is not None
|
|
476
|
+
if doc is not None:
|
|
477
|
+
return doc
|
|
478
|
+
|
|
479
|
+
doc = load_document_with_retry(
|
|
480
|
+
desktop=desktop,
|
|
481
|
+
url=file_url,
|
|
482
|
+
properties=props_hidden_only,
|
|
483
|
+
)
|
|
484
|
+
load_hidden_only_ok = doc is not None
|
|
485
|
+
if doc is not None:
|
|
486
|
+
return doc
|
|
487
|
+
|
|
488
|
+
raise RuntimeError(
|
|
489
|
+
"\n".join(
|
|
490
|
+
create_load_failure_fields(
|
|
491
|
+
file_in_docx_source=file_in_docx_source,
|
|
492
|
+
file_in_docx_staged=file_in_docx_staged,
|
|
493
|
+
file_url=file_url,
|
|
494
|
+
exe_libreoffice=exe_libreoffice,
|
|
495
|
+
dir_user_profile=dir_user_profile,
|
|
496
|
+
process_listener=process_listener,
|
|
497
|
+
file_listener_log=file_listener_log,
|
|
498
|
+
file_source_lock=file_source_lock,
|
|
499
|
+
probe_ok=probe_ok,
|
|
500
|
+
load_default_ok=load_default_ok,
|
|
501
|
+
load_hidden_only_ok=load_hidden_only_ok,
|
|
502
|
+
)
|
|
503
|
+
)
|
|
504
|
+
)
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
def load_document_with_retry(
|
|
508
|
+
*,
|
|
509
|
+
desktop: UnoDesktop,
|
|
510
|
+
url: str,
|
|
511
|
+
properties: tuple[Any, ...],
|
|
512
|
+
) -> UnoTextDocument | None:
|
|
513
|
+
deadline = time.monotonic() + DOCUMENT_LOAD_TIMEOUT_SECONDS
|
|
514
|
+
while True:
|
|
515
|
+
doc = desktop.loadComponentFromURL(url, "_blank", 0, properties)
|
|
516
|
+
if doc is not None:
|
|
517
|
+
return doc
|
|
518
|
+
if time.monotonic() >= deadline:
|
|
519
|
+
return None
|
|
520
|
+
time.sleep(DOCUMENT_LOAD_POLL_INTERVAL_SECONDS)
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
def create_load_failure_fields(
|
|
524
|
+
*,
|
|
525
|
+
file_in_docx_source: Path,
|
|
526
|
+
file_in_docx_staged: Path,
|
|
527
|
+
file_url: str,
|
|
528
|
+
exe_libreoffice: Path,
|
|
529
|
+
dir_user_profile: Path,
|
|
530
|
+
process_listener: ListenerProcess,
|
|
531
|
+
file_listener_log: Path | None,
|
|
532
|
+
file_source_lock: Path | None,
|
|
533
|
+
probe_ok: bool,
|
|
534
|
+
load_default_ok: bool,
|
|
535
|
+
load_hidden_only_ok: bool,
|
|
536
|
+
) -> list[str]:
|
|
537
|
+
exit_code = process_listener.poll()
|
|
538
|
+
text_log_tail = read_log_tail(file_listener_log)
|
|
539
|
+
|
|
540
|
+
if not probe_ok:
|
|
541
|
+
reason_code = "uno_writer_probe_failed"
|
|
542
|
+
elif exit_code not in (None, 0):
|
|
543
|
+
reason_code = "listener_exited"
|
|
544
|
+
elif not load_default_ok and not load_hidden_only_ok:
|
|
545
|
+
reason_code = "staged_docx_import_failed"
|
|
546
|
+
else:
|
|
547
|
+
reason_code = "unknown_load_failure"
|
|
548
|
+
|
|
549
|
+
fields = [
|
|
550
|
+
"error_code=libreoffice_uno_load_failed",
|
|
551
|
+
f"reason_code={reason_code}",
|
|
552
|
+
f"file_in_docx={file_in_docx_source.resolve()}",
|
|
553
|
+
f"file_in_docx_staged={file_in_docx_staged.resolve()}",
|
|
554
|
+
f"file_url={file_url}",
|
|
555
|
+
f"exe_libreoffice={exe_libreoffice.resolve()}",
|
|
556
|
+
f"dir_user_profile={dir_user_profile.resolve()}",
|
|
557
|
+
f"listener_exit_code={exit_code}",
|
|
558
|
+
f"listener_log={listener_log_label(file_listener_log)}",
|
|
559
|
+
f"source_lock_file_present={file_source_lock is not None}",
|
|
560
|
+
f"probe_swriter_factory={'ok' if probe_ok else 'failed'}",
|
|
561
|
+
f"load_staged_default_props={'ok' if load_default_ok else 'failed'}",
|
|
562
|
+
f"load_staged_hidden_only={'ok' if load_hidden_only_ok else 'failed'}",
|
|
563
|
+
f"staged_docx_size_bytes={file_in_docx_staged.stat().st_size}",
|
|
564
|
+
]
|
|
565
|
+
if file_source_lock is not None:
|
|
566
|
+
fields.append(f"source_lock_file={file_source_lock.resolve()}")
|
|
567
|
+
if text_log_tail:
|
|
568
|
+
fields.append(f"listener_log_tail={format_log_field(text_log_tail)}")
|
|
569
|
+
fields.extend(create_libreoffice_runtime_guidance_fields())
|
|
570
|
+
return fields
|
|
571
|
+
|
|
572
|
+
|
|
573
|
+
def read_log_tail(file_log: Path | None, *, max_bytes: int = 4000) -> str:
|
|
574
|
+
if file_log is None or not file_log.exists():
|
|
575
|
+
return ""
|
|
576
|
+
with file_log.open("rb") as handle_log:
|
|
577
|
+
handle_log.seek(0, 2)
|
|
578
|
+
size = handle_log.tell()
|
|
579
|
+
handle_log.seek(max(size - max_bytes, 0))
|
|
580
|
+
return handle_log.read().decode("utf-8", errors="replace").strip()
|
|
581
|
+
|
|
582
|
+
|
|
583
|
+
def format_log_field(value: object) -> str:
|
|
584
|
+
return str(value).replace("\n", r"\n")
|
|
585
|
+
|
|
586
|
+
|
|
587
|
+
def listener_log_label(file_listener_log: Path | None) -> str:
|
|
588
|
+
if file_listener_log is None:
|
|
589
|
+
return "stderr"
|
|
590
|
+
return str(file_listener_log)
|
|
591
|
+
|
|
592
|
+
|
|
593
|
+
def close_document(doc: UnoTextDocument | None) -> None:
|
|
594
|
+
if doc is None:
|
|
595
|
+
return
|
|
596
|
+
if hasattr(doc, "close"):
|
|
597
|
+
doc.close(True)
|
|
598
|
+
else:
|
|
599
|
+
doc.dispose()
|
|
600
|
+
|
|
601
|
+
|
|
602
|
+
def terminate_process(process: ListenerProcess) -> None:
|
|
603
|
+
process.terminate()
|
|
604
|
+
try:
|
|
605
|
+
process.wait(timeout=5)
|
|
606
|
+
except subprocess.TimeoutExpired:
|
|
607
|
+
process.kill()
|
|
608
|
+
process.wait(timeout=5)
|