mformat-ext 0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mformat_ext/__init__.py +6 -0
- mformat_ext/mformat_docx.py +428 -0
- mformat_ext/mformat_odt.py +666 -0
- mformat_ext/py.typed +0 -0
- mformat_ext/reg_extpkg_formats.py +16 -0
- mformat_ext-0.2.dist-info/METADATA +134 -0
- mformat_ext-0.2.dist-info/RECORD +9 -0
- mformat_ext-0.2.dist-info/WHEEL +5 -0
- mformat_ext-0.2.dist-info/top_level.txt +1 -0
mformat_ext/__init__.py
ADDED
|
@@ -0,0 +1,428 @@
|
|
|
1
|
+
#! /usr/local/bin/python3
|
|
2
|
+
"""Extension of the MultiFormat class for DOCX files."""
|
|
3
|
+
|
|
4
|
+
# Copyright (c) 2025 - 2026 Tom Björkholm
|
|
5
|
+
# MIT License
|
|
6
|
+
#
|
|
7
|
+
|
|
8
|
+
from typing import Optional, Callable
|
|
9
|
+
from docx import Document
|
|
10
|
+
from docx.document import Document as DocumentObject
|
|
11
|
+
from docx.text.paragraph import Paragraph
|
|
12
|
+
from docx.shared import Inches, Twips
|
|
13
|
+
from docx.oxml.ns import qn
|
|
14
|
+
from docx.oxml import OxmlElement
|
|
15
|
+
from mformat.mformat import FormatterDescriptor, MultiFormat
|
|
16
|
+
from mformat.mformat_state import MultiFormatState, Formatting
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class MultiFormatDocx(MultiFormat):
|
|
20
|
+
"""Extension of the MultiFormat class for DOCX files."""
|
|
21
|
+
|
|
22
|
+
def __init__(self, file_name: str, url_as_text: bool = False,
|
|
23
|
+
file_exists_callback: Optional[Callable[[str], None]] = None):
|
|
24
|
+
"""Initialize the MultiFormatDocx class.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
file_name: The name of the file to write to.
|
|
28
|
+
url_as_text: Format URLs as text not clickable URLs.
|
|
29
|
+
file_exists_callback: A callback function to call if the file
|
|
30
|
+
already exists. Return to allow the file to
|
|
31
|
+
be overwritten. Raise an exception to prevent
|
|
32
|
+
the file from being overwritten.
|
|
33
|
+
(May for instance save existing file as
|
|
34
|
+
backup.)
|
|
35
|
+
(Default is to raise an exception.)
|
|
36
|
+
"""
|
|
37
|
+
self.doc: DocumentObject = Document()
|
|
38
|
+
self.current_paragraph: Optional[Paragraph] = None
|
|
39
|
+
super().__init__(file_name=file_name, url_as_text=url_as_text,
|
|
40
|
+
file_exists_callback=file_exists_callback)
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def file_name_extension(cls) -> str:
|
|
44
|
+
"""Get the file name extension for the formatter."""
|
|
45
|
+
return '.docx'
|
|
46
|
+
|
|
47
|
+
@classmethod
|
|
48
|
+
def get_arg_desciption(cls) -> FormatterDescriptor:
|
|
49
|
+
"""Get the description of the arguments for the formatter."""
|
|
50
|
+
return FormatterDescriptor(name='docx', mandatory_args=[],
|
|
51
|
+
optional_args=[])
|
|
52
|
+
|
|
53
|
+
def open(self) -> None:
|
|
54
|
+
"""Open the file.
|
|
55
|
+
|
|
56
|
+
Avoid using this method directly.
|
|
57
|
+
Use as a context manager instead, using a with statement.
|
|
58
|
+
"""
|
|
59
|
+
self.doc.save(self.file_name)
|
|
60
|
+
|
|
61
|
+
def _close(self) -> None:
|
|
62
|
+
"""Close the file.
|
|
63
|
+
|
|
64
|
+
Avoid using this method directly.
|
|
65
|
+
Use as a context manager instead, using a with statement.
|
|
66
|
+
"""
|
|
67
|
+
if self.state == MultiFormatState.EMPTY:
|
|
68
|
+
return
|
|
69
|
+
self.doc.save(self.file_name)
|
|
70
|
+
|
|
71
|
+
def _write_file_prefix(self) -> None:
|
|
72
|
+
"""Write the file prefix.
|
|
73
|
+
|
|
74
|
+
For DOCX files, this is a no-op since the document
|
|
75
|
+
structure is handled by python-docx.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
def _write_file_suffix(self) -> None:
|
|
79
|
+
"""Write the file suffix.
|
|
80
|
+
|
|
81
|
+
For DOCX files, this is a no-op since the document
|
|
82
|
+
structure is handled by python-docx.
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
def _start_paragraph(self) -> None:
|
|
86
|
+
"""Start a paragraph."""
|
|
87
|
+
self.current_paragraph = self.doc.add_paragraph()
|
|
88
|
+
|
|
89
|
+
def _end_paragraph(self) -> None:
|
|
90
|
+
"""End a paragraph."""
|
|
91
|
+
self.current_paragraph = None
|
|
92
|
+
|
|
93
|
+
def _start_heading(self, level: int) -> None:
|
|
94
|
+
"""Start a heading.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
level: The level of the heading (1-9).
|
|
98
|
+
"""
|
|
99
|
+
self.current_paragraph = self.doc.add_heading(level=level)
|
|
100
|
+
|
|
101
|
+
def _end_heading(self, level: int) -> None:
|
|
102
|
+
"""End a heading.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
level: The level of the heading (1-9).
|
|
106
|
+
"""
|
|
107
|
+
self.current_paragraph = None
|
|
108
|
+
|
|
109
|
+
def _write_text(self, text: str, state: MultiFormatState,
|
|
110
|
+
formatting: Formatting) -> None:
|
|
111
|
+
"""Write text into current item (paragraph, bullet list item, etc.).
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
text: The text to write into the current item.
|
|
115
|
+
state: The state of the current item.
|
|
116
|
+
formatting: The formatting of the text.
|
|
117
|
+
"""
|
|
118
|
+
if self.current_paragraph is None:
|
|
119
|
+
raise RuntimeError('No current paragraph to write text into')
|
|
120
|
+
run = self.current_paragraph.add_run(text)
|
|
121
|
+
if formatting.bold:
|
|
122
|
+
run.bold = True
|
|
123
|
+
if formatting.italic:
|
|
124
|
+
run.italic = True
|
|
125
|
+
|
|
126
|
+
def _write_url(self, url: str, text: Optional[str],
|
|
127
|
+
state: MultiFormatState,
|
|
128
|
+
formatting: Formatting) -> None:
|
|
129
|
+
"""Write a URL into current item (paragraph, bullet list item, etc.).
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
url: The URL to write into the current item.
|
|
133
|
+
text: The text to display for the URL.
|
|
134
|
+
state: The state of the current item.
|
|
135
|
+
formatting: The formatting of the text.
|
|
136
|
+
""" # pylint: disable=too-many-arguments,too-many-positional-arguments
|
|
137
|
+
if self.current_paragraph is None:
|
|
138
|
+
raise RuntimeError('No current paragraph to write URL into')
|
|
139
|
+
if not text:
|
|
140
|
+
text = url
|
|
141
|
+
self._add_hyperlink(self.current_paragraph, url, text, formatting)
|
|
142
|
+
|
|
143
|
+
def _add_hyperlink(self, paragraph: Paragraph, url: str,
|
|
144
|
+
text: str, formatting: Formatting) -> None:
|
|
145
|
+
"""Add a clickable hyperlink to a paragraph.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
paragraph: The paragraph to add the hyperlink to.
|
|
149
|
+
url: The URL for the hyperlink.
|
|
150
|
+
text: The display text for the hyperlink.
|
|
151
|
+
formatting: The formatting (bold/italic) for the text.
|
|
152
|
+
"""
|
|
153
|
+
# Create relationship for the hyperlink
|
|
154
|
+
# The relationship type for external hyperlinks
|
|
155
|
+
rel_type = ('http://schemas.openxmlformats.org/officeDocument/'
|
|
156
|
+
'2006/relationships/hyperlink')
|
|
157
|
+
r_id = self.doc.part.relate_to(url, rel_type, is_external=True)
|
|
158
|
+
# Create hyperlink element
|
|
159
|
+
hyperlink = OxmlElement('w:hyperlink')
|
|
160
|
+
hyperlink.set(qn('r:id'), r_id)
|
|
161
|
+
# Create run inside hyperlink
|
|
162
|
+
run_element = OxmlElement('w:r')
|
|
163
|
+
run_props = OxmlElement('w:rPr')
|
|
164
|
+
# Add blue color for link appearance
|
|
165
|
+
color = OxmlElement('w:color')
|
|
166
|
+
color.set(qn('w:val'), '0000FF')
|
|
167
|
+
run_props.append(color)
|
|
168
|
+
# Add underline for link appearance
|
|
169
|
+
underline = OxmlElement('w:u')
|
|
170
|
+
underline.set(qn('w:val'), 'single')
|
|
171
|
+
run_props.append(underline)
|
|
172
|
+
# Add bold if requested
|
|
173
|
+
if formatting.bold:
|
|
174
|
+
bold = OxmlElement('w:b')
|
|
175
|
+
run_props.append(bold)
|
|
176
|
+
# Add italic if requested
|
|
177
|
+
if formatting.italic:
|
|
178
|
+
italic = OxmlElement('w:i')
|
|
179
|
+
run_props.append(italic)
|
|
180
|
+
run_element.append(run_props)
|
|
181
|
+
# Add text element
|
|
182
|
+
text_element = OxmlElement('w:t')
|
|
183
|
+
text_element.text = text
|
|
184
|
+
run_element.append(text_element)
|
|
185
|
+
hyperlink.append(run_element)
|
|
186
|
+
# Append hyperlink to paragraph
|
|
187
|
+
# pylint: disable=protected-access
|
|
188
|
+
paragraph._p.append(hyperlink)
|
|
189
|
+
|
|
190
|
+
def _start_bullet_list(self, level: int) -> None:
|
|
191
|
+
"""Start a bullet list.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
level: The level of the bullet list (1-9).
|
|
195
|
+
"""
|
|
196
|
+
assert isinstance(level, int)
|
|
197
|
+
# In python-docx, lists are created by setting paragraph styles
|
|
198
|
+
# No explicit list start/end is needed
|
|
199
|
+
|
|
200
|
+
def _end_bullet_list(self, level: int) -> None:
|
|
201
|
+
"""End a bullet list.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
level: The level of the bullet list (1-9).
|
|
205
|
+
"""
|
|
206
|
+
assert isinstance(level, int)
|
|
207
|
+
# In python-docx, lists are created by setting paragraph styles
|
|
208
|
+
# No explicit list start/end is needed
|
|
209
|
+
|
|
210
|
+
def _start_bullet_item(self, level: int) -> None:
|
|
211
|
+
"""Start a bullet item.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
level: The level of the bullet item (1-9).
|
|
215
|
+
"""
|
|
216
|
+
assert isinstance(level, int)
|
|
217
|
+
self.current_paragraph = self.doc.add_paragraph(style='List Bullet')
|
|
218
|
+
# Set the indentation level for nested lists
|
|
219
|
+
if level > 1:
|
|
220
|
+
self.current_paragraph.paragraph_format.left_indent = \
|
|
221
|
+
Inches(0.5 * (level - 1))
|
|
222
|
+
|
|
223
|
+
def _end_bullet_item(self, level: int) -> None:
|
|
224
|
+
"""End a bullet item.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
level: The level of the bullet item (1-9).
|
|
228
|
+
"""
|
|
229
|
+
assert isinstance(level, int)
|
|
230
|
+
self.current_paragraph = None
|
|
231
|
+
|
|
232
|
+
def _start_numbered_list(self, level: int) -> None:
|
|
233
|
+
"""Start a numbered list.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
level: The level of the numbered list (1-9).
|
|
237
|
+
"""
|
|
238
|
+
assert isinstance(level, int)
|
|
239
|
+
# In python-docx, lists are created by setting paragraph styles
|
|
240
|
+
# No explicit list start/end is needed
|
|
241
|
+
|
|
242
|
+
def _end_numbered_list(self, level: int) -> None:
|
|
243
|
+
"""End a numbered list.
|
|
244
|
+
|
|
245
|
+
Args:
|
|
246
|
+
level: The level of the numbered list (1-9).
|
|
247
|
+
"""
|
|
248
|
+
assert isinstance(level, int)
|
|
249
|
+
# In python-docx, lists are created by setting paragraph styles
|
|
250
|
+
# No explicit list start/end is needed
|
|
251
|
+
|
|
252
|
+
def _start_numbered_item(self, level: int, num: int,
|
|
253
|
+
full_number: str) -> None:
|
|
254
|
+
"""Start a numbered item.
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
level: The level of the numbered item (1-9).
|
|
258
|
+
num: The number of the item.
|
|
259
|
+
full_number: The full number of the item including all levels.
|
|
260
|
+
"""
|
|
261
|
+
assert isinstance(level, int)
|
|
262
|
+
assert isinstance(num, int)
|
|
263
|
+
assert isinstance(full_number, str)
|
|
264
|
+
# Use regular paragraph with manual numbering for hierarchical numbers
|
|
265
|
+
# The 'List Number' style only supports simple sequential numbering
|
|
266
|
+
self.current_paragraph = self.doc.add_paragraph()
|
|
267
|
+
# Set up hanging indent with tab stop for proper text alignment
|
|
268
|
+
# Using twips: 720 twips = 0.5 inches
|
|
269
|
+
number_width_twips = 720 # Space reserved for the number
|
|
270
|
+
base_indent_twips = 720 * (level - 1) # Additional indent per level
|
|
271
|
+
text_position_twips = base_indent_twips + number_width_twips
|
|
272
|
+
self.current_paragraph.paragraph_format.left_indent = \
|
|
273
|
+
Twips(text_position_twips)
|
|
274
|
+
self.current_paragraph.paragraph_format.first_line_indent = \
|
|
275
|
+
Twips(-number_width_twips)
|
|
276
|
+
# Add tab stop at text position so text after number aligns with
|
|
277
|
+
# wrapped lines (both start at the same position)
|
|
278
|
+
self._add_tab_stop(self.current_paragraph, text_position_twips)
|
|
279
|
+
# Add the hierarchical number followed by tab (not space)
|
|
280
|
+
self.current_paragraph.add_run(full_number)
|
|
281
|
+
self.current_paragraph.add_run('\t')
|
|
282
|
+
|
|
283
|
+
@staticmethod
|
|
284
|
+
def _add_tab_stop(paragraph: Paragraph, position_twips: int) -> None:
|
|
285
|
+
"""Add a left-aligned tab stop to a paragraph.
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
paragraph: The paragraph to add the tab stop to.
|
|
289
|
+
position_twips: The position of the tab stop in twips.
|
|
290
|
+
"""
|
|
291
|
+
# pylint: disable=protected-access
|
|
292
|
+
p_pr = paragraph._p.get_or_add_pPr()
|
|
293
|
+
tabs = OxmlElement('w:tabs')
|
|
294
|
+
tab = OxmlElement('w:tab')
|
|
295
|
+
tab.set(qn('w:val'), 'left')
|
|
296
|
+
tab.set(qn('w:pos'), str(position_twips))
|
|
297
|
+
tabs.append(tab)
|
|
298
|
+
p_pr.append(tabs)
|
|
299
|
+
|
|
300
|
+
def _end_numbered_item(self, level: int, num: int) -> None:
|
|
301
|
+
"""End a numbered item.
|
|
302
|
+
|
|
303
|
+
Args:
|
|
304
|
+
level: The level of the numbered item (1-9).
|
|
305
|
+
num: The number of the item.
|
|
306
|
+
"""
|
|
307
|
+
assert isinstance(level, int)
|
|
308
|
+
assert isinstance(num, int)
|
|
309
|
+
self.current_paragraph = None
|
|
310
|
+
|
|
311
|
+
def _start_table(self, num_columns: int) -> None:
|
|
312
|
+
"""Start a table.
|
|
313
|
+
|
|
314
|
+
Args:
|
|
315
|
+
num_columns: The number of columns in the table.
|
|
316
|
+
"""
|
|
317
|
+
assert isinstance(num_columns, int)
|
|
318
|
+
# Add an empty paragraph before the table for spacing
|
|
319
|
+
# This separates the table from previous content
|
|
320
|
+
self.doc.add_paragraph()
|
|
321
|
+
# Note: Actual table creation happens in _write_table_first_row
|
|
322
|
+
|
|
323
|
+
def _end_table(self, num_columns: int, num_rows: int) -> None:
|
|
324
|
+
"""End a table.
|
|
325
|
+
|
|
326
|
+
Args:
|
|
327
|
+
num_columns: The number of columns in the table.
|
|
328
|
+
num_rows: The number of rows in the table.
|
|
329
|
+
"""
|
|
330
|
+
assert isinstance(num_columns, int)
|
|
331
|
+
assert isinstance(num_rows, int)
|
|
332
|
+
# Add an empty paragraph after the table for spacing
|
|
333
|
+
self.doc.add_paragraph()
|
|
334
|
+
|
|
335
|
+
def _write_table_first_row(self, first_row: list[str],
|
|
336
|
+
formatting: Formatting) -> None:
|
|
337
|
+
"""Write the first row of a table.
|
|
338
|
+
|
|
339
|
+
Args:
|
|
340
|
+
first_row: The first row of the table.
|
|
341
|
+
formatting: The formatting of the text in each cell.
|
|
342
|
+
"""
|
|
343
|
+
assert isinstance(first_row, list)
|
|
344
|
+
assert isinstance(formatting, Formatting)
|
|
345
|
+
# Create the table with the first row
|
|
346
|
+
table = self.doc.add_table(rows=1, cols=len(first_row))
|
|
347
|
+
table.style = 'Table Grid'
|
|
348
|
+
# Fill in the first row
|
|
349
|
+
for idx, cell_text in enumerate(first_row):
|
|
350
|
+
cell = table.rows[0].cells[idx]
|
|
351
|
+
para = cell.paragraphs[0]
|
|
352
|
+
run = para.add_run(cell_text)
|
|
353
|
+
if formatting.bold:
|
|
354
|
+
run.bold = True
|
|
355
|
+
if formatting.italic:
|
|
356
|
+
run.italic = True
|
|
357
|
+
|
|
358
|
+
def _write_table_row(self, row: list[str], formatting: Formatting,
|
|
359
|
+
row_number: int) -> None:
|
|
360
|
+
"""Write a row of a table.
|
|
361
|
+
|
|
362
|
+
Args:
|
|
363
|
+
row: The row to add to the table.
|
|
364
|
+
formatting: The formatting of the text in each cell.
|
|
365
|
+
row_number: The row number (0-based).
|
|
366
|
+
"""
|
|
367
|
+
assert isinstance(row, list)
|
|
368
|
+
assert isinstance(formatting, Formatting)
|
|
369
|
+
assert isinstance(row_number, int)
|
|
370
|
+
# Get the last table in the document
|
|
371
|
+
table = self.doc.tables[-1]
|
|
372
|
+
# Add a new row
|
|
373
|
+
new_row = table.add_row() # type: ignore[no-untyped-call]
|
|
374
|
+
# Fill in the row
|
|
375
|
+
for idx, cell_text in enumerate(row):
|
|
376
|
+
cell = new_row.cells[idx]
|
|
377
|
+
para = cell.paragraphs[0]
|
|
378
|
+
run = para.add_run(cell_text)
|
|
379
|
+
if formatting.bold:
|
|
380
|
+
run.bold = True
|
|
381
|
+
if formatting.italic:
|
|
382
|
+
run.italic = True
|
|
383
|
+
|
|
384
|
+
def _start_code_block(self, programming_language: Optional[str]) -> None:
|
|
385
|
+
"""Start a code block.
|
|
386
|
+
|
|
387
|
+
Args:
|
|
388
|
+
programming_language: The programming language of the code block.
|
|
389
|
+
"""
|
|
390
|
+
assert programming_language is None or \
|
|
391
|
+
isinstance(programming_language, str)
|
|
392
|
+
# Create a paragraph with a code/verbatim style
|
|
393
|
+
# python-docx doesn't have a built-in code style,
|
|
394
|
+
# so we'll use 'No Spacing' style and monospace font
|
|
395
|
+
self.current_paragraph = self.doc.add_paragraph()
|
|
396
|
+
self.current_paragraph.style = 'No Spacing'
|
|
397
|
+
|
|
398
|
+
def _end_code_block(self, programming_language: Optional[str]) -> None:
|
|
399
|
+
"""End a code block.
|
|
400
|
+
|
|
401
|
+
Args:
|
|
402
|
+
programming_language: The programming language of the code block.
|
|
403
|
+
"""
|
|
404
|
+
assert programming_language is None or \
|
|
405
|
+
isinstance(programming_language, str)
|
|
406
|
+
self.current_paragraph = None
|
|
407
|
+
|
|
408
|
+
def _write_code_block(self, text: str,
|
|
409
|
+
programming_language: Optional[str]) -> None:
|
|
410
|
+
"""Write a code block.
|
|
411
|
+
|
|
412
|
+
Args:
|
|
413
|
+
text: The text to add to the code block.
|
|
414
|
+
programming_language: The programming language of the code block.
|
|
415
|
+
"""
|
|
416
|
+
assert isinstance(text, str)
|
|
417
|
+
assert programming_language is None or \
|
|
418
|
+
isinstance(programming_language, str)
|
|
419
|
+
if self.current_paragraph is None:
|
|
420
|
+
raise RuntimeError('No current paragraph to write code into')
|
|
421
|
+
run = self.current_paragraph.add_run(text)
|
|
422
|
+
# Set monospace font
|
|
423
|
+
run.font.name = 'Courier New'
|
|
424
|
+
|
|
425
|
+
def _encode_text(self, text: str) -> str:
|
|
426
|
+
"""Encode text (escape special characters)."""
|
|
427
|
+
# No encoding needed for DOCX
|
|
428
|
+
return text
|
|
@@ -0,0 +1,666 @@
|
|
|
1
|
+
#! /usr/local/bin/python3
|
|
2
|
+
"""Extension of the MultiFormat class for DOCX files."""
|
|
3
|
+
|
|
4
|
+
# Copyright (c) 2025 - 2026 Tom Björkholm
|
|
5
|
+
# MIT License
|
|
6
|
+
#
|
|
7
|
+
|
|
8
|
+
from typing import Optional, Callable, NamedTuple
|
|
9
|
+
from odfdo import Document, Paragraph, Header, Table, Row, Cell, \
|
|
10
|
+
Link, List, ListItem, Style, Span, Element
|
|
11
|
+
from mformat.mformat import FormatterDescriptor, MultiFormat
|
|
12
|
+
from mformat.mformat_state import MultiFormatState, Formatting
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class OdtStyles(NamedTuple):
|
|
16
|
+
"""Styles for ODT files."""
|
|
17
|
+
|
|
18
|
+
text_styles: dict[str, Style]
|
|
19
|
+
font_styles: dict[str, Style]
|
|
20
|
+
paragraph_styles: dict[str, Style]
|
|
21
|
+
bold: Style
|
|
22
|
+
italic: Style
|
|
23
|
+
bold_italic: Style
|
|
24
|
+
code: Style
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class MultiFormatOdt(MultiFormat):
|
|
28
|
+
"""Extension of the MultiFormat class for ODT files."""
|
|
29
|
+
|
|
30
|
+
def __init__(self, file_name: str, url_as_text: bool = False,
|
|
31
|
+
file_exists_callback: Optional[Callable[[str], None]] = None,
|
|
32
|
+
lang: str = 'en-UK'):
|
|
33
|
+
"""Initialize the MultiFormatOdt class.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
file_name: The name of the file to write to.
|
|
37
|
+
url_as_text: Format URLs as text not clickable URLs.
|
|
38
|
+
file_exists_callback: A callback function to call if the file
|
|
39
|
+
already exists. Return to allow the file to
|
|
40
|
+
be overwritten. Raise an exception to prevent
|
|
41
|
+
the file from being overwritten.
|
|
42
|
+
(May for instance save existing file as
|
|
43
|
+
backup.)
|
|
44
|
+
(Default is to raise an exception.)
|
|
45
|
+
lang: The language of the document.
|
|
46
|
+
"""
|
|
47
|
+
self.doc: Document = Document('text')
|
|
48
|
+
self.doc.set_language(lang)
|
|
49
|
+
self.current_paragraph: Optional[Paragraph] = None
|
|
50
|
+
self.odt_table: Optional[Table] = None
|
|
51
|
+
self.odt_tablenumber: int = 1
|
|
52
|
+
self.odt_list: list[List] = []
|
|
53
|
+
self.odt_listitem: Optional[ListItem] = None
|
|
54
|
+
self.odt_styles: OdtStyles = self._create_odt_styles()
|
|
55
|
+
self._insert_odt_styles()
|
|
56
|
+
super().__init__(file_name=file_name, url_as_text=url_as_text,
|
|
57
|
+
file_exists_callback=file_exists_callback)
|
|
58
|
+
|
|
59
|
+
def _insert_odt_styles(self) -> None:
|
|
60
|
+
"""Insert the ODT styles into the document."""
|
|
61
|
+
for style in self.odt_styles.text_styles.values():
|
|
62
|
+
self.doc.insert_style(style)
|
|
63
|
+
for style in self.odt_styles.font_styles.values():
|
|
64
|
+
self.doc.insert_style(style)
|
|
65
|
+
for style in self.odt_styles.paragraph_styles.values():
|
|
66
|
+
self.doc.insert_style(style)
|
|
67
|
+
# Insert list styles (Element type for list styles, not StyleBase)
|
|
68
|
+
self.doc.insert_style(
|
|
69
|
+
self._create_numbered_list_style()) # type: ignore[arg-type]
|
|
70
|
+
self.doc.insert_style(
|
|
71
|
+
self._create_bullet_list_style()) # type: ignore[arg-type]
|
|
72
|
+
|
|
73
|
+
@staticmethod
|
|
74
|
+
def _create_list_level_properties(level_num: int) -> Element:
|
|
75
|
+
"""Create list-level-properties element for indentation.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
level_num: The list level number (1-9).
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
An Element representing style:list-level-properties.
|
|
82
|
+
"""
|
|
83
|
+
# Base indentation values (in cm)
|
|
84
|
+
base_indent = 0.635 # text indent (negative, for hanging indent)
|
|
85
|
+
base_margin = 1.27 # margin left per level
|
|
86
|
+
|
|
87
|
+
props = Element.from_tag('style:list-level-properties')
|
|
88
|
+
props.set_attribute('text:list-level-position-and-space-mode',
|
|
89
|
+
'label-alignment')
|
|
90
|
+
|
|
91
|
+
# Add label alignment with indentation
|
|
92
|
+
label_align = Element.from_tag('style:list-level-label-alignment')
|
|
93
|
+
label_align.set_attribute('text:label-followed-by', 'listtab')
|
|
94
|
+
margin_left = base_margin * level_num
|
|
95
|
+
label_align.set_attribute('text:list-tab-stop-position',
|
|
96
|
+
f'{margin_left:.2f}cm')
|
|
97
|
+
label_align.set_attribute('fo:text-indent', f'-{base_indent:.3f}cm')
|
|
98
|
+
label_align.set_attribute('fo:margin-left', f'{margin_left:.2f}cm')
|
|
99
|
+
|
|
100
|
+
props.append(label_align)
|
|
101
|
+
return props
|
|
102
|
+
|
|
103
|
+
@staticmethod
|
|
104
|
+
def _create_numbered_list_style() -> Element:
|
|
105
|
+
"""Create a numbered list style for ODF documents.
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
An Element representing a text:list-style for numbered lists.
|
|
109
|
+
"""
|
|
110
|
+
list_style = Element.from_tag('text:list-style')
|
|
111
|
+
list_style.set_attribute('style:name', 'numbered-list')
|
|
112
|
+
# Add levels 1-9 for nested numbered lists
|
|
113
|
+
for level_num in range(1, 10):
|
|
114
|
+
level = Element.from_tag('text:list-level-style-number')
|
|
115
|
+
level.set_attribute('text:level', str(level_num))
|
|
116
|
+
level.set_attribute('style:num-suffix', '.')
|
|
117
|
+
level.set_attribute('style:num-format', '1')
|
|
118
|
+
# Add indentation properties
|
|
119
|
+
level.append(
|
|
120
|
+
MultiFormatOdt._create_list_level_properties(level_num))
|
|
121
|
+
list_style.append(level)
|
|
122
|
+
return list_style
|
|
123
|
+
|
|
124
|
+
@staticmethod
|
|
125
|
+
def _create_bullet_list_style() -> Element:
|
|
126
|
+
"""Create a bullet list style for ODF documents.
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
An Element representing a text:list-style for bullet lists.
|
|
130
|
+
"""
|
|
131
|
+
list_style = Element.from_tag('text:list-style')
|
|
132
|
+
list_style.set_attribute('style:name', 'bullet-list')
|
|
133
|
+
# Bullet characters for different levels (bullet, white bullet, square)
|
|
134
|
+
bullets = ['\u2022', '\u25e6', '\u25aa',
|
|
135
|
+
'\u2022', '\u25e6', '\u25aa',
|
|
136
|
+
'\u2022', '\u25e6', '\u25aa']
|
|
137
|
+
# Add levels 1-9 for nested bullet lists
|
|
138
|
+
for level_num in range(1, 10):
|
|
139
|
+
level = Element.from_tag('text:list-level-style-bullet')
|
|
140
|
+
level.set_attribute('text:level', str(level_num))
|
|
141
|
+
level.set_attribute('text:bullet-char', bullets[level_num - 1])
|
|
142
|
+
# Add indentation properties
|
|
143
|
+
level.append(
|
|
144
|
+
MultiFormatOdt._create_list_level_properties(level_num))
|
|
145
|
+
list_style.append(level)
|
|
146
|
+
return list_style
|
|
147
|
+
|
|
148
|
+
@staticmethod
|
|
149
|
+
def _create_code_paragraph_style() -> Style:
|
|
150
|
+
"""Create a code paragraph style with monospace font.
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
A Style object for code blocks with monospace font.
|
|
154
|
+
"""
|
|
155
|
+
style = Style(
|
|
156
|
+
name='code',
|
|
157
|
+
family='paragraph',
|
|
158
|
+
display_name='code'
|
|
159
|
+
)
|
|
160
|
+
# Add text properties for monospace font
|
|
161
|
+
text_props = Element.from_tag('style:text-properties')
|
|
162
|
+
text_props.set_attribute('style:font-name', 'Liberation Mono')
|
|
163
|
+
text_props.set_attribute('fo:font-family', "'Liberation Mono'")
|
|
164
|
+
text_props.set_attribute('style:font-family-generic', 'modern')
|
|
165
|
+
text_props.set_attribute('style:font-pitch', 'fixed')
|
|
166
|
+
# Also set for Asian and Complex text
|
|
167
|
+
text_props.set_attribute('style:font-name-asian', 'Liberation Mono')
|
|
168
|
+
text_props.set_attribute('style:font-name-complex', 'Liberation Mono')
|
|
169
|
+
style.append(text_props)
|
|
170
|
+
# Add paragraph properties for light gray background
|
|
171
|
+
para_props = Element.from_tag('style:paragraph-properties')
|
|
172
|
+
para_props.set_attribute('fo:background-color', '#f0f0f0')
|
|
173
|
+
para_props.set_attribute('fo:padding', '0.1cm')
|
|
174
|
+
style.append(para_props)
|
|
175
|
+
return style
|
|
176
|
+
|
|
177
|
+
@staticmethod
|
|
178
|
+
def _create_link_style(name: str, bold: bool = False,
|
|
179
|
+
italic: bool = False) -> Style:
|
|
180
|
+
"""Create a link style with blue color and underline.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
name: The name of the style.
|
|
184
|
+
bold: Whether the link text should be bold.
|
|
185
|
+
italic: Whether the link text should be italic.
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
A Style object for links with the specified formatting.
|
|
189
|
+
"""
|
|
190
|
+
link_color = '#0000ff' # Blue color for links
|
|
191
|
+
style = Style(
|
|
192
|
+
name=name,
|
|
193
|
+
family='text',
|
|
194
|
+
display_name=name,
|
|
195
|
+
area='text',
|
|
196
|
+
color=link_color,
|
|
197
|
+
bold=bold,
|
|
198
|
+
italic=italic
|
|
199
|
+
)
|
|
200
|
+
# Add underline to text-properties
|
|
201
|
+
text_props = style.get_element('style:text-properties')
|
|
202
|
+
if text_props:
|
|
203
|
+
text_props.set_attribute('style:text-underline-style', 'solid')
|
|
204
|
+
text_props.set_attribute('style:text-underline-width', 'auto')
|
|
205
|
+
text_props.set_attribute('style:text-underline-color', link_color)
|
|
206
|
+
return style
|
|
207
|
+
|
|
208
|
+
def _create_odt_styles(self) -> OdtStyles:
|
|
209
|
+
"""Create the ODT styles needed for documents."""
|
|
210
|
+
text_styles: dict[str, Style] = {}
|
|
211
|
+
font_styles: dict[str, Style] = {}
|
|
212
|
+
paragraph_styles: dict[str, Style] = {}
|
|
213
|
+
bold_style = Style(name='bold', family='text',
|
|
214
|
+
display_name='bold', area='text',
|
|
215
|
+
bold=True, italic=False)
|
|
216
|
+
text_styles['bold'] = bold_style
|
|
217
|
+
italic_style = Style(name='italic', family='text',
|
|
218
|
+
display_name='italic', area='text',
|
|
219
|
+
italic=True, bold=False)
|
|
220
|
+
text_styles['italic'] = italic_style
|
|
221
|
+
bold_italic_style = Style(name='bold-italic', family='text',
|
|
222
|
+
display_name='bold-italic', area='text',
|
|
223
|
+
bold=True, italic=True)
|
|
224
|
+
text_styles['bold-italic'] = bold_italic_style
|
|
225
|
+
# Create code paragraph style with monospace font and background
|
|
226
|
+
code_style = self._create_code_paragraph_style()
|
|
227
|
+
paragraph_styles['code'] = code_style
|
|
228
|
+
# Create link styles with blue color and underline
|
|
229
|
+
text_styles['link'] = self._create_link_style('link')
|
|
230
|
+
text_styles['link-bold'] = self._create_link_style(
|
|
231
|
+
'link-bold', bold=True)
|
|
232
|
+
text_styles['link-italic'] = self._create_link_style(
|
|
233
|
+
'link-italic', italic=True)
|
|
234
|
+
text_styles['link-bold-italic'] = self._create_link_style(
|
|
235
|
+
'link-bold-italic', bold=True, italic=True)
|
|
236
|
+
return OdtStyles(text_styles=text_styles, font_styles=font_styles,
|
|
237
|
+
paragraph_styles=paragraph_styles, bold=bold_style,
|
|
238
|
+
italic=italic_style, bold_italic=bold_italic_style,
|
|
239
|
+
code=code_style)
|
|
240
|
+
|
|
241
|
+
@staticmethod
|
|
242
|
+
def _style_name_from_formatting(formatting: Formatting) -> str:
|
|
243
|
+
"""Get the style name from the formatting."""
|
|
244
|
+
assert isinstance(formatting, Formatting)
|
|
245
|
+
style_name = ''
|
|
246
|
+
if formatting.bold:
|
|
247
|
+
style_name = 'bold'
|
|
248
|
+
if formatting.italic:
|
|
249
|
+
style_name += '-italic'
|
|
250
|
+
elif formatting.italic:
|
|
251
|
+
style_name = 'italic'
|
|
252
|
+
return style_name
|
|
253
|
+
|
|
254
|
+
@staticmethod
|
|
255
|
+
def _link_style_name_from_formatting(formatting: Formatting) -> str:
|
|
256
|
+
"""Get the link style name from the formatting.
|
|
257
|
+
|
|
258
|
+
Link styles include blue color and underline to be visible as links.
|
|
259
|
+
"""
|
|
260
|
+
assert isinstance(formatting, Formatting)
|
|
261
|
+
style_name = 'link'
|
|
262
|
+
if formatting.bold:
|
|
263
|
+
style_name += '-bold'
|
|
264
|
+
if formatting.italic:
|
|
265
|
+
style_name += '-italic'
|
|
266
|
+
return style_name
|
|
267
|
+
|
|
268
|
+
@classmethod
|
|
269
|
+
def file_name_extension(cls) -> str:
|
|
270
|
+
"""Get the file name extension for the formatter."""
|
|
271
|
+
return '.odt'
|
|
272
|
+
|
|
273
|
+
@classmethod
|
|
274
|
+
def get_arg_desciption(cls) -> FormatterDescriptor:
|
|
275
|
+
"""Get the description of the arguments for the formatter."""
|
|
276
|
+
return FormatterDescriptor(name='odt', mandatory_args=[],
|
|
277
|
+
optional_args=['lang'])
|
|
278
|
+
|
|
279
|
+
def open(self) -> None:
|
|
280
|
+
"""Open the file.
|
|
281
|
+
|
|
282
|
+
Avoid using this method directly.
|
|
283
|
+
Use as a context manager instead, using a with statement.
|
|
284
|
+
"""
|
|
285
|
+
# No need to open the file - it is created when the document is saved
|
|
286
|
+
|
|
287
|
+
def _close(self) -> None:
|
|
288
|
+
"""Close the file.
|
|
289
|
+
|
|
290
|
+
Avoid using this method directly.
|
|
291
|
+
Use as a context manager instead, using a with statement.
|
|
292
|
+
"""
|
|
293
|
+
if self.state == MultiFormatState.EMPTY:
|
|
294
|
+
return
|
|
295
|
+
self.doc.save(self.file_name, pretty=True)
|
|
296
|
+
|
|
297
|
+
def _write_file_prefix(self) -> None:
|
|
298
|
+
"""Write the file prefix.
|
|
299
|
+
|
|
300
|
+
For ODT files, this is a no-op since the document
|
|
301
|
+
structure is handled by odfdo.
|
|
302
|
+
"""
|
|
303
|
+
|
|
304
|
+
def _write_file_suffix(self) -> None:
|
|
305
|
+
"""Write the file suffix.
|
|
306
|
+
|
|
307
|
+
For ODT files, this is a no-op since the document
|
|
308
|
+
structure is handled by odfdo.
|
|
309
|
+
"""
|
|
310
|
+
|
|
311
|
+
def _start_paragraph(self) -> None:
|
|
312
|
+
"""Start a paragraph."""
|
|
313
|
+
self.current_paragraph = Paragraph()
|
|
314
|
+
|
|
315
|
+
def _end_paragraph(self) -> None:
|
|
316
|
+
"""End a paragraph."""
|
|
317
|
+
if self.current_paragraph is not None:
|
|
318
|
+
self.doc.body.append(self.current_paragraph)
|
|
319
|
+
self.current_paragraph = None
|
|
320
|
+
|
|
321
|
+
def _start_heading(self, level: int) -> None:
|
|
322
|
+
"""Start a heading.
|
|
323
|
+
|
|
324
|
+
Args:
|
|
325
|
+
level: The level of the heading (1-9).
|
|
326
|
+
"""
|
|
327
|
+
self.current_paragraph = Header(level=level)
|
|
328
|
+
|
|
329
|
+
def _end_heading(self, level: int) -> None:
|
|
330
|
+
"""End a heading.
|
|
331
|
+
|
|
332
|
+
Args:
|
|
333
|
+
level: The level of the heading (1-9).
|
|
334
|
+
"""
|
|
335
|
+
if self.current_paragraph is not None:
|
|
336
|
+
self.doc.body.append(self.current_paragraph)
|
|
337
|
+
self.current_paragraph = None
|
|
338
|
+
|
|
339
|
+
def _formatted_write(self, paragraph: Paragraph,
|
|
340
|
+
formatting: Formatting, text: str) -> None:
|
|
341
|
+
"""Apply formatting to a paragraph or list item.
|
|
342
|
+
|
|
343
|
+
In ODF/XML, paragraph.text holds text before any child elements,
|
|
344
|
+
and each element's tail holds text after that element. To maintain
|
|
345
|
+
correct text order when mixing formatted and unformatted text, we:
|
|
346
|
+
- Use Span elements for formatted text (appended as children)
|
|
347
|
+
- For unformatted text: add to paragraph.text if no children exist,
|
|
348
|
+
otherwise add to the tail of the last child element.
|
|
349
|
+
"""
|
|
350
|
+
assert paragraph is not None
|
|
351
|
+
assert isinstance(paragraph, Paragraph)
|
|
352
|
+
style = self._style_name_from_formatting(formatting)
|
|
353
|
+
if style:
|
|
354
|
+
# Formatted text: create a span with the style and append it
|
|
355
|
+
span = Span(text=text, style=style)
|
|
356
|
+
paragraph.append(span)
|
|
357
|
+
else:
|
|
358
|
+
# Unformatted text: add to correct position
|
|
359
|
+
if len(paragraph.children) == 0:
|
|
360
|
+
# No children yet, add to paragraph.text
|
|
361
|
+
if paragraph.text:
|
|
362
|
+
paragraph.text += text
|
|
363
|
+
else:
|
|
364
|
+
paragraph.text = text
|
|
365
|
+
else:
|
|
366
|
+
# Has children, add to the tail of the last child
|
|
367
|
+
last_child = paragraph.children[-1]
|
|
368
|
+
if last_child.tail:
|
|
369
|
+
last_child.tail += text
|
|
370
|
+
else:
|
|
371
|
+
last_child.tail = text
|
|
372
|
+
|
|
373
|
+
def _write_text(self, text: str, state: MultiFormatState,
|
|
374
|
+
formatting: Formatting) -> None:
|
|
375
|
+
"""Write text into current item (paragraph, bullet list item, etc.).
|
|
376
|
+
|
|
377
|
+
Args:
|
|
378
|
+
text: The text to write into the current item.
|
|
379
|
+
state: The state of the current item.
|
|
380
|
+
formatting: The formatting of the text.
|
|
381
|
+
"""
|
|
382
|
+
if self.state in (MultiFormatState.PARAGRAPH,
|
|
383
|
+
MultiFormatState.HEADING):
|
|
384
|
+
assert self.current_paragraph is not None
|
|
385
|
+
self._formatted_write(self.current_paragraph, formatting, text)
|
|
386
|
+
elif self.state in (MultiFormatState.BULLET_LIST_ITEM,
|
|
387
|
+
MultiFormatState.NUMBERED_LIST_ITEM):
|
|
388
|
+
assert self.odt_listitem is not None
|
|
389
|
+
assert isinstance(self.odt_listitem.children[-1], Paragraph)
|
|
390
|
+
self._formatted_write(self.odt_listitem.children[-1],
|
|
391
|
+
formatting, text)
|
|
392
|
+
else:
|
|
393
|
+
raise RuntimeError(f'Unexpected state: {self.state.name} for '
|
|
394
|
+
f'writing text: {text}')
|
|
395
|
+
|
|
396
|
+
def _impl_write_url(self, paragraph: Paragraph,
|
|
397
|
+
url: str, text: Optional[str],
|
|
398
|
+
formatting: Formatting) -> None:
|
|
399
|
+
"""Implement the writing of a URL into a paragraph or list item."""
|
|
400
|
+
assert paragraph is not None
|
|
401
|
+
assert isinstance(paragraph, Paragraph)
|
|
402
|
+
assert isinstance(formatting, Formatting)
|
|
403
|
+
if not text:
|
|
404
|
+
text = url
|
|
405
|
+
lnk = Link(url=url, text=text)
|
|
406
|
+
# Use link styles that include blue color and underline
|
|
407
|
+
lnk.style = self._link_style_name_from_formatting(formatting)
|
|
408
|
+
paragraph.append(lnk)
|
|
409
|
+
|
|
410
|
+
def _write_url(self, url: str, text: Optional[str],
|
|
411
|
+
state: MultiFormatState,
|
|
412
|
+
formatting: Formatting) -> None:
|
|
413
|
+
"""Write a URL into current item (paragraph, bullet list item, etc.).
|
|
414
|
+
|
|
415
|
+
Args:
|
|
416
|
+
url: The URL to write into the current item.
|
|
417
|
+
text: The text to display for the URL.
|
|
418
|
+
state: The state of the current item.
|
|
419
|
+
formatting: The formatting of the text.
|
|
420
|
+
"""
|
|
421
|
+
assert state == self.state
|
|
422
|
+
if self.state in (MultiFormatState.PARAGRAPH,
|
|
423
|
+
MultiFormatState.HEADING):
|
|
424
|
+
assert self.current_paragraph is not None
|
|
425
|
+
self._impl_write_url(self.current_paragraph, url, text, formatting)
|
|
426
|
+
elif self.state in (MultiFormatState.BULLET_LIST_ITEM,
|
|
427
|
+
MultiFormatState.NUMBERED_LIST_ITEM):
|
|
428
|
+
assert self.odt_listitem is not None
|
|
429
|
+
assert isinstance(self.odt_listitem.children[-1], Paragraph)
|
|
430
|
+
self._impl_write_url(self.odt_listitem.children[-1],
|
|
431
|
+
url, text, formatting)
|
|
432
|
+
else:
|
|
433
|
+
raise RuntimeError(f'Unexpected state: {self.state.name} for '
|
|
434
|
+
f'writing url: {url} text: {text}')
|
|
435
|
+
|
|
436
|
+
def _start_bullet_list(self, level: int) -> None:
|
|
437
|
+
"""Start a bullet list.
|
|
438
|
+
|
|
439
|
+
Args:
|
|
440
|
+
level: The level of the bullet list (1-9).
|
|
441
|
+
"""
|
|
442
|
+
assert isinstance(level, int)
|
|
443
|
+
self.odt_list.append(List(style='bullet-list'))
|
|
444
|
+
self.current_paragraph = None
|
|
445
|
+
|
|
446
|
+
def _end_bullet_list(self, level: int) -> None:
|
|
447
|
+
"""End a bullet list.
|
|
448
|
+
|
|
449
|
+
Args:
|
|
450
|
+
level: The level of the bullet list (1-9).
|
|
451
|
+
"""
|
|
452
|
+
assert isinstance(level, int)
|
|
453
|
+
if not self.odt_list or len(self.odt_list) != level:
|
|
454
|
+
print(f'len(odt_list) = {len(self.odt_list)} for bullet list '
|
|
455
|
+
f'level = {level} state: {self.state.name}')
|
|
456
|
+
assert self.odt_list and len(self.odt_list) == level
|
|
457
|
+
if len(self.odt_list) > 1:
|
|
458
|
+
# Nested list: append to the last list item of the parent list
|
|
459
|
+
# In ODF, nested lists must be inside the parent's list item
|
|
460
|
+
parent_list = self.odt_list[-2]
|
|
461
|
+
parent_list_item = parent_list.children[-1]
|
|
462
|
+
parent_list_item.append(self.odt_list[-1])
|
|
463
|
+
else:
|
|
464
|
+
self.doc.body.append(self.odt_list[-1])
|
|
465
|
+
self.odt_list.pop()
|
|
466
|
+
|
|
467
|
+
def _start_bullet_item(self, level: int) -> None:
|
|
468
|
+
"""Start a bullet item.
|
|
469
|
+
|
|
470
|
+
Args:
|
|
471
|
+
level: The level of the bullet item (1-9).
|
|
472
|
+
"""
|
|
473
|
+
assert isinstance(level, int)
|
|
474
|
+
self.odt_listitem = ListItem()
|
|
475
|
+
self.odt_listitem.text_content = Paragraph(text_or_element=Paragraph())
|
|
476
|
+
assert self.odt_listitem is not None
|
|
477
|
+
self.current_paragraph = None
|
|
478
|
+
|
|
479
|
+
def _end_bullet_item(self, level: int) -> None:
|
|
480
|
+
"""End a bullet item.
|
|
481
|
+
|
|
482
|
+
Args:
|
|
483
|
+
level: The level of the bullet item (1-9).
|
|
484
|
+
"""
|
|
485
|
+
assert isinstance(level, int)
|
|
486
|
+
assert self.odt_listitem is not None
|
|
487
|
+
self.odt_list[-1].append(self.odt_listitem)
|
|
488
|
+
self.odt_listitem = None
|
|
489
|
+
self.current_paragraph = None
|
|
490
|
+
|
|
491
|
+
def _start_numbered_list(self, level: int) -> None:
|
|
492
|
+
"""Start a numbered list.
|
|
493
|
+
|
|
494
|
+
Args:
|
|
495
|
+
level: The level of the numbered list (1-9).
|
|
496
|
+
"""
|
|
497
|
+
assert isinstance(level, int)
|
|
498
|
+
self.odt_list.append(List(style='numbered-list'))
|
|
499
|
+
self.current_paragraph = None
|
|
500
|
+
|
|
501
|
+
def _end_numbered_list(self, level: int) -> None:
|
|
502
|
+
"""End a numbered list.
|
|
503
|
+
|
|
504
|
+
Args:
|
|
505
|
+
level: The level of the numbered list (1-9).
|
|
506
|
+
"""
|
|
507
|
+
assert isinstance(level, int)
|
|
508
|
+
if not self.odt_list or len(self.odt_list) != level:
|
|
509
|
+
print(f'len(odt_list) = {len(self.odt_list)} for numbered list '
|
|
510
|
+
f'level = {level} state: {self.state.name}')
|
|
511
|
+
assert self.odt_list and len(self.odt_list) == level
|
|
512
|
+
if len(self.odt_list) > 1:
|
|
513
|
+
# Nested list: append to the last list item of the parent list
|
|
514
|
+
# In ODF, nested lists must be inside the parent's list item
|
|
515
|
+
parent_list = self.odt_list[-2]
|
|
516
|
+
parent_list_item = parent_list.children[-1]
|
|
517
|
+
parent_list_item.append(self.odt_list[-1])
|
|
518
|
+
else:
|
|
519
|
+
self.doc.body.append(self.odt_list[-1])
|
|
520
|
+
self.odt_list.pop()
|
|
521
|
+
|
|
522
|
+
def _start_numbered_item(self, level: int, num: int,
|
|
523
|
+
full_number: str) -> None:
|
|
524
|
+
"""Start a numbered item.
|
|
525
|
+
|
|
526
|
+
Args:
|
|
527
|
+
level: The level of the numbered item (1-9).
|
|
528
|
+
num: The number of the item.
|
|
529
|
+
full_number: The full number of the item including all levels.
|
|
530
|
+
"""
|
|
531
|
+
assert isinstance(level, int)
|
|
532
|
+
assert isinstance(num, int)
|
|
533
|
+
assert isinstance(full_number, str)
|
|
534
|
+
self.odt_listitem = ListItem(text_or_element=Paragraph())
|
|
535
|
+
self.current_paragraph = None
|
|
536
|
+
|
|
537
|
+
def _end_numbered_item(self, level: int, num: int) -> None:
|
|
538
|
+
"""End a numbered item.
|
|
539
|
+
|
|
540
|
+
Args:
|
|
541
|
+
level: The level of the numbered item (1-9).
|
|
542
|
+
num: The number of the item.
|
|
543
|
+
"""
|
|
544
|
+
assert isinstance(level, int)
|
|
545
|
+
assert isinstance(num, int)
|
|
546
|
+
assert self.odt_listitem is not None
|
|
547
|
+
if not self.odt_list or len(self.odt_list) != level:
|
|
548
|
+
print(f'len(odt_list) = {len(self.odt_list)} for numbered item '
|
|
549
|
+
f'level = {level} {num} state: {self.state.name}')
|
|
550
|
+
assert self.odt_list and len(self.odt_list) == level
|
|
551
|
+
self.odt_list[-1].append(self.odt_listitem)
|
|
552
|
+
self.odt_listitem = None
|
|
553
|
+
self.current_paragraph = None
|
|
554
|
+
|
|
555
|
+
def _start_table(self, num_columns: int) -> None:
|
|
556
|
+
"""Start a table.
|
|
557
|
+
|
|
558
|
+
Args:
|
|
559
|
+
num_columns: The number of columns in the table.
|
|
560
|
+
"""
|
|
561
|
+
assert isinstance(num_columns, int)
|
|
562
|
+
assert num_columns > 0
|
|
563
|
+
# Add an empty paragraph before the table for spacing
|
|
564
|
+
self.doc.body.append(Paragraph())
|
|
565
|
+
self.odt_table = Table(name=f'Table{self.odt_tablenumber}',
|
|
566
|
+
width=num_columns)
|
|
567
|
+
self.odt_tablenumber += 1
|
|
568
|
+
|
|
569
|
+
def _end_table(self, num_columns: int, num_rows: int) -> None:
|
|
570
|
+
"""End a table.
|
|
571
|
+
|
|
572
|
+
Args:
|
|
573
|
+
num_columns: The number of columns in the table.
|
|
574
|
+
num_rows: The number of rows in the table.
|
|
575
|
+
"""
|
|
576
|
+
assert isinstance(num_columns, int)
|
|
577
|
+
assert isinstance(num_rows, int)
|
|
578
|
+
assert self.odt_table is not None
|
|
579
|
+
self.doc.body.append(self.odt_table)
|
|
580
|
+
# Add an empty paragraph after the table for spacing
|
|
581
|
+
self.doc.body.append(Paragraph())
|
|
582
|
+
self.odt_table = None
|
|
583
|
+
|
|
584
|
+
def _write_table_first_row(self, first_row: list[str],
|
|
585
|
+
formatting: Formatting) -> None:
|
|
586
|
+
"""Write the first row of a table.
|
|
587
|
+
|
|
588
|
+
Args:
|
|
589
|
+
first_row: The first row of the table.
|
|
590
|
+
formatting: The formatting of the text in each cell.
|
|
591
|
+
"""
|
|
592
|
+
assert isinstance(first_row, list)
|
|
593
|
+
assert isinstance(formatting, Formatting)
|
|
594
|
+
assert self.odt_table is not None
|
|
595
|
+
self._write_table_row(first_row, formatting, 0)
|
|
596
|
+
self.odt_table.delete_row(0)
|
|
597
|
+
|
|
598
|
+
def _write_table_row(self, row: list[str], formatting: Formatting,
|
|
599
|
+
row_number: int) -> None:
|
|
600
|
+
"""Write a row of a table.
|
|
601
|
+
|
|
602
|
+
Args:
|
|
603
|
+
row: The row to add to the table.
|
|
604
|
+
formatting: The formatting of the text in each cell.
|
|
605
|
+
row_number: The row number (0-based).
|
|
606
|
+
"""
|
|
607
|
+
assert isinstance(row, list)
|
|
608
|
+
assert isinstance(formatting, Formatting)
|
|
609
|
+
assert isinstance(row_number, int)
|
|
610
|
+
assert self.odt_table is not None
|
|
611
|
+
table_row = Row()
|
|
612
|
+
for cell_text in row:
|
|
613
|
+
cell = Cell()
|
|
614
|
+
# Create a paragraph inside the cell with formatted text
|
|
615
|
+
para = Paragraph()
|
|
616
|
+
style = self._style_name_from_formatting(formatting)
|
|
617
|
+
if style:
|
|
618
|
+
# Use a span with the style for formatted text
|
|
619
|
+
span = Span(text=cell_text, style=style)
|
|
620
|
+
para.append(span)
|
|
621
|
+
else:
|
|
622
|
+
# No formatting, just set the text directly
|
|
623
|
+
para.text = cell_text
|
|
624
|
+
cell.append(para)
|
|
625
|
+
table_row.append(cell)
|
|
626
|
+
self.odt_table.append(table_row)
|
|
627
|
+
|
|
628
|
+
def _start_code_block(self, programming_language: Optional[str]) -> None:
|
|
629
|
+
"""Start a code block.
|
|
630
|
+
|
|
631
|
+
Args:
|
|
632
|
+
programming_language: The programming language of the code block.
|
|
633
|
+
"""
|
|
634
|
+
assert programming_language is None or \
|
|
635
|
+
isinstance(programming_language, str)
|
|
636
|
+
# No-op. Everything is done in _write_code_block.
|
|
637
|
+
|
|
638
|
+
def _end_code_block(self, programming_language: Optional[str]) -> None:
|
|
639
|
+
"""End a code block.
|
|
640
|
+
|
|
641
|
+
Args:
|
|
642
|
+
programming_language: The programming language of the code block.
|
|
643
|
+
"""
|
|
644
|
+
assert programming_language is None or \
|
|
645
|
+
isinstance(programming_language, str)
|
|
646
|
+
# No-op. Everyhing is done in _write_code_block.
|
|
647
|
+
|
|
648
|
+
def _write_code_block(self, text: str,
|
|
649
|
+
programming_language: Optional[str]) -> None:
|
|
650
|
+
"""Write a code block.
|
|
651
|
+
|
|
652
|
+
Args:
|
|
653
|
+
text: The text to add to the code block.
|
|
654
|
+
programming_language: The programming language of the code block.
|
|
655
|
+
"""
|
|
656
|
+
assert isinstance(text, str)
|
|
657
|
+
assert programming_language is None or \
|
|
658
|
+
isinstance(programming_language, str)
|
|
659
|
+
for line in text.split('\n'):
|
|
660
|
+
para = Paragraph(text_or_element=line, style='code')
|
|
661
|
+
self.doc.body.append(para)
|
|
662
|
+
|
|
663
|
+
def _encode_text(self, text: str) -> str:
|
|
664
|
+
"""Encode text (escape special characters)."""
|
|
665
|
+
# No encoding needed for DOCX
|
|
666
|
+
return text
|
mformat_ext/py.typed
ADDED
|
File without changes
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
#! /usr/local/bin/python3
|
|
2
|
+
"""Register the formats defined in the ext package with the factory."""
|
|
3
|
+
|
|
4
|
+
# Copyright (c) 2025 - 2026 Tom Björkholm
|
|
5
|
+
# MIT License
|
|
6
|
+
#
|
|
7
|
+
|
|
8
|
+
from mformat_ext.mformat_docx import MultiFormatDocx
|
|
9
|
+
from mformat_ext.mformat_odt import MultiFormatOdt
|
|
10
|
+
from mformat.mformat import MultiFormat
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def register_formats_in_ext_pkg() -> list[type[MultiFormat]]:
|
|
14
|
+
"""Get formats defined in the ext package to register with the factory."""
|
|
15
|
+
ret: list[type[MultiFormat]] = [MultiFormatDocx, MultiFormatOdt]
|
|
16
|
+
return ret
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mformat_ext
|
|
3
|
+
Version: 0.2
|
|
4
|
+
Summary: Uniform way to write simple text extended with DOCX and ODT files
|
|
5
|
+
Author: Tom Björkholm
|
|
6
|
+
Author-email: Tom Björkholm <klausuler_linnet0q@icloud.com>
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
Project-URL: Source code, https://bitbucket.org/tom-bjorkholm/mformat
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.12
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
Requires-Dist: mformat>=0.0.1
|
|
14
|
+
Requires-Dist: python-docx>=1.2.0
|
|
15
|
+
Requires-Dist: odfpy>=1.4.1
|
|
16
|
+
Requires-Dist: msl-odt>=1.0
|
|
17
|
+
Requires-Dist: pip>=25.3
|
|
18
|
+
Requires-Dist: setuptools>=80.9.0
|
|
19
|
+
Requires-Dist: build>=1.3.0
|
|
20
|
+
Requires-Dist: wheel>=0.45.1
|
|
21
|
+
Dynamic: author
|
|
22
|
+
Dynamic: requires-dist
|
|
23
|
+
Dynamic: requires-python
|
|
24
|
+
|
|
25
|
+
# mformat
|
|
26
|
+
|
|
27
|
+
The mformat package contains a number of classes providing a uniform way for a python program to write to a number of different file formats.
|
|
28
|
+
|
|
29
|
+
The primary intended use is for text output from a python program, where the programmer would like the user to be able to select the output file formats. Some users may want the text as a Microsoft Word file, others as a LibreOffice Open Document Text file, while still others might want it as Markdown. By using the uniform way of writing provided by mformat the same python code can produce output in a number of different formats.
|
|
30
|
+
|
|
31
|
+
This is intended to provide an easy and uniform way to produce information in different formats. The emphasis is on getting the same information into the different formats. This will allow you to get a correct (but perhaps rudimentary) document in several formats. If you want to produce the most estetically pleasing document in a particular format, this is not the correct library to use.
|
|
32
|
+
|
|
33
|
+
## Installing mformat (base package)
|
|
34
|
+
|
|
35
|
+
The base package contains support for the output formats that are supported with a minimum of dependencies. Use this if you for some reason want to avoid extra dependencies.
|
|
36
|
+
|
|
37
|
+
If you want to use it, install it using pip from [https://pypi.org/project/mformat](https://pypi.org/project/mformat). There is no need to download anything from Bitbucket to write Python programs that use the library.
|
|
38
|
+
|
|
39
|
+
### Installing base mformat on mac and Linux
|
|
40
|
+
|
|
41
|
+
````sh
|
|
42
|
+
pip3 install --upgrade mformat
|
|
43
|
+
````
|
|
44
|
+
|
|
45
|
+
### Installing base mformat on Microsoft Windows
|
|
46
|
+
|
|
47
|
+
````sh
|
|
48
|
+
pip install --upgrade mformat
|
|
49
|
+
````
|
|
50
|
+
|
|
51
|
+
## Installing mformat-ext (extended package, this package)
|
|
52
|
+
|
|
53
|
+
The extended package contains support also for output formats that require some additional dependencies. Use this if you want the full selection of output formats.
|
|
54
|
+
|
|
55
|
+
If you want to use it, install it using pip from [https://pypi.org/project/mformat-ext](https://pypi.org/project/mformat-ext). There is no need to download anything from Bitbucket to write Python programs that use the library.
|
|
56
|
+
|
|
57
|
+
### Installing extended mformat on mac and Linux
|
|
58
|
+
|
|
59
|
+
````sh
|
|
60
|
+
pip3 install --upgrade mformat-ext
|
|
61
|
+
````
|
|
62
|
+
|
|
63
|
+
### Installing extended mformat on Microsoft Windows
|
|
64
|
+
|
|
65
|
+
````sh
|
|
66
|
+
pip install --upgrade mformat-ext
|
|
67
|
+
````
|
|
68
|
+
|
|
69
|
+
## What it does
|
|
70
|
+
|
|
71
|
+
The main features supported in a uniform way for all supported output file formats are:
|
|
72
|
+
|
|
73
|
+
* Factory function that takes file format and output file name as arguments
|
|
74
|
+
* It opens and closes a file in the selected format, with protection against accidentically overwriting an existing file
|
|
75
|
+
* The recommended way to use it is as a context manager in a with-clause, opening and closing the file
|
|
76
|
+
* Headings (several levels)
|
|
77
|
+
* Paragraphs
|
|
78
|
+
* Nested bullet point lists
|
|
79
|
+
* Nested numbered point lists
|
|
80
|
+
* Mixed nested numbered point and bullet point lists
|
|
81
|
+
* Tables
|
|
82
|
+
* URLs in paragraphs, headings, numbered point list items and in bullet point list items
|
|
83
|
+
|
|
84
|
+
## Design of program that uses mformat
|
|
85
|
+
|
|
86
|
+
It is recommended that the ouput function(s) of the a Python program using mformat should have a with-clause getting the formatting object from the factory (easiest with `with create_mf(file_format=fmt, file_name=output_file_name) as`).
|
|
87
|
+
In the context of the with-clause the programmer just calls a minimum of member functions:
|
|
88
|
+
|
|
89
|
+
* `start_paragraph` to start a new paragraph with some provided text content.
|
|
90
|
+
* `start_heading`to start a new heading with some provided text content.
|
|
91
|
+
* `start_bullet_item` to start a new bullet point list item with some provided text content, and if needed to start the bullet point list with the bullet point item.
|
|
92
|
+
* `start_numbered_point_item` to start a new numbered point list item with some provided text content, and if needed to start the numbered point list with the number point list item.
|
|
93
|
+
* `add_text` to add more text to an already started paragraph, heading, bullet point list item or numbered point list item.
|
|
94
|
+
* `add_url` to add a URL (link) to an already started paragraph, heading, bullet point list item or numbered point list item.
|
|
95
|
+
* `start_table` to start a new table with the provided first row.
|
|
96
|
+
* `add_table_row` to add another row to an already started table.
|
|
97
|
+
* `write_complete_table` to write a table all at once.
|
|
98
|
+
* `write_code_block` to write some preformatted text as a code block
|
|
99
|
+
|
|
100
|
+
There are no member functions to end or close any document item. Each document item is automatically closed as another docuemnt item is started (or when closing the file at the end of the context manager scope). `start_bullet_item`and `start_numbered_point_item` take an optional level argument, that is used to change to another nesting level.
|
|
101
|
+
|
|
102
|
+
## Example programs
|
|
103
|
+
|
|
104
|
+
A number of minimal but complete example programs are provided to help the programmer new to mformat. See [list of examples](https://bitbucket.org/tom-bjorkholm/mformat/src/master/example/README.md).
|
|
105
|
+
|
|
106
|
+
## API documentation
|
|
107
|
+
|
|
108
|
+
API documentation automatically extracted from the Python code and docstrings are available [here for the public API](https://bitbucket.org/tom-bjorkholm/mformat/src/master/doc/api.md) for programmers using the API and [here for the protected API](https://bitbucket.org/tom-bjorkholm/mformat/src/master/doc/protected_api.md) for programmers that want to extend the API by adding their own derived class that provide some other output format.
|
|
109
|
+
|
|
110
|
+
Even though some may like reading API documentation, the [example programs](https://bitbucket.org/tom-bjorkholm/mformat/src/master/example/README.md) probably provide a better introduction.
|
|
111
|
+
|
|
112
|
+
## Version history
|
|
113
|
+
|
|
114
|
+
| Version | Date | Python version | Description |
|
|
115
|
+
|---------|-------------|-----------------|-------------------------------------|
|
|
116
|
+
| 0.2 | 30 Jan 2026 | 3.12 or newer | First released version |
|
|
117
|
+
|
|
118
|
+
## Output file formats
|
|
119
|
+
|
|
120
|
+
The following table provides information about in which version support for a format was introduced.
|
|
121
|
+
|
|
122
|
+
| Format | Full name of format | Which package | Starting at version |
|
|
123
|
+
|--------|---------------------|---------------|---------------------|
|
|
124
|
+
| docx | Microsoft Word | mformat-ext | 0.2 |
|
|
125
|
+
| html | Web page | mformat | 0.2 |
|
|
126
|
+
| md | Markdown | mformat | 0.2 |
|
|
127
|
+
| odt | Open Document Text | mformat-ext | 0.2 |
|
|
128
|
+
|
|
129
|
+
## Test summary
|
|
130
|
+
|
|
131
|
+
* Test result: 916 passed in 10s
|
|
132
|
+
* No Flake8 warnings.
|
|
133
|
+
* No mypy errors found.
|
|
134
|
+
* 0.2 built and tested using python version: Python 3.14.2
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
mformat_ext/__init__.py,sha256=p3sK092CiwBQdLl5533lOh37j8luVu-wNiGFKBWgfjg,128
|
|
2
|
+
mformat_ext/mformat_docx.py,sha256=_tDyMM2m5-zrn20KusQT6gowsqb3QEMDAAjtlpx4XUQ,16032
|
|
3
|
+
mformat_ext/mformat_odt.py,sha256=jrah1JH4juXAY6HE9XyqVH3P8b8-OxMQzBvBVxxnXw4,26706
|
|
4
|
+
mformat_ext/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
+
mformat_ext/reg_extpkg_formats.py,sha256=wJXv5rsWgTYSHFo9BYBZn_8tV4pun3aXDUpZFUxI9os,530
|
|
6
|
+
mformat_ext-0.2.dist-info/METADATA,sha256=J1AOdeHm9vbnCeuEA4R9ivHodVbKVaHpYzDzeSNJV-Y,7247
|
|
7
|
+
mformat_ext-0.2.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
8
|
+
mformat_ext-0.2.dist-info/top_level.txt,sha256=Qvx4UfVay6UsRei0fsuXp1CH3aLvozKFBJWrw-O5dLU,12
|
|
9
|
+
mformat_ext-0.2.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
mformat_ext
|