pydocmaker 2.3.2__tar.gz → 2.3.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {pydocmaker-2.3.2/src/pydocmaker.egg-info → pydocmaker-2.3.4}/PKG-INFO +58 -1
  2. {pydocmaker-2.3.2 → pydocmaker-2.3.4}/README.md +55 -0
  3. {pydocmaker-2.3.2 → pydocmaker-2.3.4}/setup.cfg +2 -0
  4. {pydocmaker-2.3.2 → pydocmaker-2.3.4}/src/pydocmaker/__init__.py +2 -2
  5. pydocmaker-2.3.4/src/pydocmaker/backend/ex_docx.py +469 -0
  6. {pydocmaker-2.3.2 → pydocmaker-2.3.4}/src/pydocmaker/backend/pandoc_api.py +21 -1
  7. {pydocmaker-2.3.2 → pydocmaker-2.3.4}/src/pydocmaker/core.py +5 -2
  8. {pydocmaker-2.3.2 → pydocmaker-2.3.4/src/pydocmaker.egg-info}/PKG-INFO +58 -1
  9. {pydocmaker-2.3.2 → pydocmaker-2.3.4}/src/pydocmaker.egg-info/requires.txt +2 -0
  10. pydocmaker-2.3.2/src/pydocmaker/backend/ex_docx.py +0 -174
  11. {pydocmaker-2.3.2 → pydocmaker-2.3.4}/LICENSE +0 -0
  12. {pydocmaker-2.3.2 → pydocmaker-2.3.4}/setup.py +0 -0
  13. {pydocmaker-2.3.2 → pydocmaker-2.3.4}/src/pydocmaker/backend/__init__.py +0 -0
  14. {pydocmaker-2.3.2 → pydocmaker-2.3.4}/src/pydocmaker/backend/baseformatter.py +0 -0
  15. {pydocmaker-2.3.2 → pydocmaker-2.3.4}/src/pydocmaker/backend/ex_html.py +0 -0
  16. {pydocmaker-2.3.2 → pydocmaker-2.3.4}/src/pydocmaker/backend/ex_ipynb.py +0 -0
  17. {pydocmaker-2.3.2 → pydocmaker-2.3.4}/src/pydocmaker/backend/ex_markdown.py +0 -0
  18. {pydocmaker-2.3.2 → pydocmaker-2.3.4}/src/pydocmaker/backend/ex_redmine.py +0 -0
  19. {pydocmaker-2.3.2 → pydocmaker-2.3.4}/src/pydocmaker/backend/ex_tex.py +0 -0
  20. {pydocmaker-2.3.2 → pydocmaker-2.3.4}/src/pydocmaker/backend/mdx_latex.py +0 -0
  21. {pydocmaker-2.3.2 → pydocmaker-2.3.4}/src/pydocmaker/backend/pdf_maker.py +0 -0
  22. {pydocmaker-2.3.2 → pydocmaker-2.3.4}/src/pydocmaker/templating.py +0 -0
  23. {pydocmaker-2.3.2 → pydocmaker-2.3.4}/src/pydocmaker/util.py +0 -0
  24. {pydocmaker-2.3.2 → pydocmaker-2.3.4}/src/pydocmaker.egg-info/SOURCES.txt +0 -0
  25. {pydocmaker-2.3.2 → pydocmaker-2.3.4}/src/pydocmaker.egg-info/dependency_links.txt +0 -0
  26. {pydocmaker-2.3.2 → pydocmaker-2.3.4}/src/pydocmaker.egg-info/top_level.txt +0 -0
  27. {pydocmaker-2.3.2 → pydocmaker-2.3.4}/tests/test_backend_pandoc.py +0 -0
  28. {pydocmaker-2.3.2 → pydocmaker-2.3.4}/tests/test_convert_all.py +0 -0
  29. {pydocmaker-2.3.2 → pydocmaker-2.3.4}/tests/test_core.py +0 -0
  30. {pydocmaker-2.3.2 → pydocmaker-2.3.4}/tests/test_ex_html.py +0 -0
  31. {pydocmaker-2.3.2 → pydocmaker-2.3.4}/tests/test_util.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pydocmaker
3
- Version: 2.3.2
3
+ Version: 2.3.4
4
4
  Summary: a minimal document maker to make docx, markdown, html, textile, redmine, and tex documents from python. Written in pure python.
5
5
  Home-page: https://github.com/TobiasGlaubach/pydocmaker
6
6
  Author: Tobias Glaubach
@@ -17,6 +17,8 @@ Requires-Dist: python-docx
17
17
  Requires-Dist: requests
18
18
  Requires-Dist: latex
19
19
  Requires-Dist: jinja2
20
+ Requires-Dist: docxcompose
21
+ Requires-Dist: docx-mailmerge
20
22
  Dynamic: license-file
21
23
 
22
24
  # pydocmaker
@@ -125,6 +127,61 @@ doc.to_ipynb('path/to/my_file.ipynb') # will write a ipynb file
125
127
 
126
128
  doc.to_json('path/to/doc.json') # saves the document
127
129
  ```
130
+ ### Writing Word docx Documents with templates and fields
131
+
132
+ Below is an example on how to use pydocmaker to write word docx documents from format templates
133
+ and also automatically "replace" fields (MergeFields in Word or plain text) to be filled out in
134
+ the docx document with text from python.
135
+
136
+ (NOTE: some of the code below utilized the win32com api and only works on windows)
137
+
138
+ ```python
139
+ import pydocmaker as pyd
140
+
141
+ # this is some fields in my template which I want to populate automatically
142
+ metadata = {
143
+ 'repno': "1234",
144
+ "summary": "This is a nice workflow for automatically creating docx documents",
145
+ "date": "2025-12-13",
146
+ "comment": f"this works!",
147
+ "author": "Me"
148
+ }
149
+
150
+ templatepath = 'my/path/template.docx'
151
+ outpath = 'my/path/outfile.docx'
152
+
153
+ # get a pyd example document to combine with my doxc template
154
+ docx_bts = pyd.get_example().to_docx()
155
+
156
+ print('replacing keywords...')
157
+ # HOWTO:
158
+ # Adding MergeFields In Word to replace them later:
159
+ # Go to Insert -> Quick Parts -> Field -> MergeField.
160
+ template = pyd.docx_replace_fields(templatepath, metadata)
161
+
162
+ # Alternative calls working with plain strings as keywords to replace
163
+ # -- see the function documentation for info
164
+ # template = pyd.docx_replace_keywords_raw(template, metadata)
165
+ # template = pyd.docx_replace_keywords(template, metadata)
166
+
167
+ # merge two or more documents by appending them
168
+ docx_bts = pyd.docx_merge(template, docx_bts)
169
+
170
+ # save out file
171
+ with open(outpath, "wb") as fp:
172
+ fp.write(docx_bts)
173
+
174
+ # use the win32com.client api with word to update all fields
175
+ # like the table of contents in the document after we changed it
176
+
177
+ # WARNING! only works in windows and with word installed
178
+ pyd.docx_update_w32(outpath)
179
+
180
+
181
+ print(outpath)
182
+
183
+ ```
184
+
128
185
 
129
186
  ### Uploading to Redmine
130
187
 
@@ -104,6 +104,61 @@ doc.to_ipynb('path/to/my_file.ipynb') # will write a ipynb file
104
104
 
105
105
  doc.to_json('path/to/doc.json') # saves the document
106
106
  ```
107
+ ### Writing Word docx Documents with templates and fields
108
+
109
+ Below is an example on how to use pydocmaker to write word docx documents from format templates
110
+ and also automatically "replace" fields (MergeFields in Word or plain text) to be filled out in
111
+ the docx document with text from python.
112
+
113
+ (NOTE: some of the code below utilized the win32com api and only works on windows)
114
+
115
+ ```python
116
+ import pydocmaker as pyd
117
+
118
+ # this is some fields in my template which I want to populate automatically
119
+ metadata = {
120
+ 'repno': "1234",
121
+ "summary": "This is a nice workflow for automatically creating docx documents",
122
+ "date": "2025-12-13",
123
+ "comment": f"this works!",
124
+ "author": "Me"
125
+ }
126
+
127
+ templatepath = 'my/path/template.docx'
128
+ outpath = 'my/path/outfile.docx'
129
+
130
+ # get a pyd example document to combine with my doxc template
131
+ docx_bts = pyd.get_example().to_docx()
132
+
133
+ print('replacing keywords...')
134
+ # HOWTO:
135
+ # Adding MergeFields In Word to replace them later:
136
+ # Go to Insert -> Quick Parts -> Field -> MergeField.
137
+ template = pyd.docx_replace_fields(templatepath, metadata)
138
+
139
+ # Alternative calls working with plain strings as keywords to replace
140
+ # -- see the function documentation for info
141
+ # template = pyd.docx_replace_keywords_raw(template, metadata)
142
+ # template = pyd.docx_replace_keywords(template, metadata)
143
+
144
+ # merge two or more documents by appending them
145
+ docx_bts = pyd.docx_merge(template, docx_bts)
146
+
147
+ # save out file
148
+ with open(outpath, "wb") as fp:
149
+ fp.write(docx_bts)
150
+
151
+ # use the win32com.client api with word to update all fields
152
+ # like the table of contents in the document after we changed it
153
+
154
+ # WARNING! only works in windows and with word installed
155
+ pyd.docx_update_w32(outpath)
156
+
157
+
158
+ print(outpath)
159
+
160
+ ```
161
+
107
162
 
108
163
  ### Uploading to Redmine
109
164
 
@@ -27,6 +27,8 @@ install_requires =
27
27
  requests
28
28
  latex
29
29
  jinja2
30
+ docxcompose
31
+ docx-mailmerge
30
32
 
31
33
  [options.packages.find]
32
34
  where = src
@@ -1,9 +1,9 @@
1
- __version__ = '2.3.2'
1
+ __version__ = '2.3.4'
2
2
 
3
3
  from pydocmaker.core import DocBuilder, construct, constr, buildingblocks, print_to_pdf, get_latex_compiler, set_latex_compiler, make_pdf_from_tex, show_pdf
4
4
  from pydocmaker.util import upload_report_to_redmine, bcolors, txtcolor, colors_dc
5
5
 
6
-
6
+ from pydocmaker.backend.ex_docx import docx_replace_keywords_raw, docx_replace_keywords, docx_replace_fields, docx_merge, docx_update_w32
7
7
  from pydocmaker.backend.ex_tex import can_run_pandoc
8
8
  from pydocmaker.backend.pdf_maker import get_all_installed_latex_compilers, get_latex_compiler
9
9
  from pydocmaker.backend.pandoc_api import pandoc_convert_file, pandoc_set_allowed
@@ -0,0 +1,469 @@
1
+ import traceback
2
+ import io
3
+
4
+ import base64
5
+ from typing import List
6
+
7
+ import docx
8
+ from docx.shared import Inches, Pt, RGBColor
9
+
10
+ import tempfile
11
+ import os
12
+
13
+ from pathlib import Path
14
+ import zipfile, os, sys
15
+ from io import BytesIO
16
+
17
+ import markdown
18
+
19
+ try:
20
+ from pydocmaker.backend.baseformatter import BaseFormatter
21
+ except Exception as err:
22
+ from .baseformatter import BaseFormatter
23
+
24
+ can_run_pandoc = lambda : False
25
+
26
+
27
+ try:
28
+ from pydocmaker.backend.pandoc_api import can_run_pandoc, pandoc_convert, pandoc_convert_file
29
+ except Exception as err:
30
+ from .pandoc_api import can_run_pandoc, pandoc_convert, pandoc_convert_file
31
+
32
+ try:
33
+ from pydocmaker.backend.ex_html import convert as convert_html
34
+ except Exception as err:
35
+ from .ex_html import convert as convert_html
36
+
37
+
38
+
39
+ from docx import Document
40
+
41
+ def _make_output(bts, output_file_or_buffer):
42
+
43
+ if isinstance(output_file_or_buffer, (str, Path)):
44
+ os.makedirs(os.path.dirname(output_file_or_buffer), exist_ok=True)
45
+ with open(output_file_or_buffer, 'wb') as f:
46
+ f.write(bts)
47
+ return os.path.exists(output_file_or_buffer)
48
+ elif hasattr(output_file_or_buffer, 'write'):
49
+ return output_file_or_buffer.write(bts)
50
+ else:
51
+ return bts
52
+
53
+
54
+ def _get_bytes_file_or_buffer(file_path_or_buffer):
55
+ if hasattr(file_path_or_buffer, "read"):
56
+ bts_data = file_path_or_buffer.read()
57
+ elif isinstance(file_path_or_buffer, bytes):
58
+ bts_data = file_path_or_buffer
59
+ else:
60
+ # Read the original DOCX file
61
+ with open(file_path_or_buffer, 'rb') as f:
62
+ bts_data = f.read()
63
+ return bts_data
64
+
65
+ def docx_update_w32(inpath, outpath=None):
66
+ """
67
+ Update all fields in a Word document using COM automation.
68
+
69
+ This function opens a `.docx` file via the Word COM interface,
70
+ updates all fields in the main body and in all story ranges
71
+ (including headers and footers), and then saves the document.
72
+ If `outpath` is provided, the updated document is saved to that
73
+ path; otherwise, the original file is overwritten.
74
+
75
+ Args:
76
+ inpath (str): Path to the input Word document (.docx).
77
+ outpath (str, optional): Path to save the updated document.
78
+ If None, the input file is overwritten. Defaults to None.
79
+
80
+ Returns:
81
+ None
82
+
83
+ Raises:
84
+ pywintypes.com_error: If Word cannot open the document or
85
+ if COM automation fails.
86
+
87
+ Example:
88
+ >>> docx_update_w32("C:/docs/report.docx")
89
+ # Updates fields in report.docx and overwrites the file
90
+
91
+ >>> docx_update_w32("C:/docs/report.docx", "C:/docs/report_updated.docx")
92
+ # Updates fields and saves to a new file
93
+ """
94
+
95
+ import win32com.client
96
+
97
+ word = win32com.client.Dispatch("Word.Application")
98
+ doc = word.Documents.Open(inpath)
99
+ doc.Fields.Update()
100
+
101
+ for story_range in doc.StoryRanges:
102
+ story_range.Fields.Update()
103
+ # Some story ranges have linked ranges (e.g., next header/footer)
104
+ while story_range.NextStoryRange is not None:
105
+ story_range = story_range.NextStoryRange
106
+ story_range.Fields.Update()
107
+ if outpath is None:
108
+ doc.Save()
109
+ else:
110
+ doc.SaveAs(outpath)
111
+
112
+ doc.Close()
113
+ word.Quit()
114
+
115
+
116
+ def docx_merge(*files, verb=0) -> bytes:
117
+ """
118
+ Merge multiple Word (.docx) documents into a single document.
119
+
120
+ This function uses the `docxcompose.Composer` class to combine
121
+ multiple `.docx` files into one. The merged document is returned
122
+ as raw bytes, suitable for saving to disk or further processing.
123
+ Input files can be provided as paths, file-like objects, or raw
124
+ byte buffers. If the first argument is a list or tuple, it will
125
+ be unpacked as the set of files to merge.
126
+
127
+ Args:
128
+ *files (Union[str, bytes, io.BytesIO, list, tuple]):
129
+ One or more `.docx` files to merge. Each file can be a
130
+ filesystem path, a bytes buffer, or a file-like object.
131
+ If a single list or tuple is passed, it will be expanded.
132
+ verb (int, optional): Verbosity level. If nonzero, progress
133
+ messages are printed during merging. Defaults to 0.
134
+
135
+ Returns:
136
+ bytes: The merged `.docx` document as a byte string.
137
+
138
+ Raises:
139
+ AssertionError: If no files are provided.
140
+ ImportError: If `docxcompose` is not installed.
141
+ Exception: Any error raised during document loading or merging.
142
+
143
+ Example:
144
+ >>> merged_bytes = docx_merge("intro.docx", "chapter1.docx", "chapter2.docx")
145
+ >>> with open("book.docx", "wb") as f:
146
+ ... f.write(merged_bytes)
147
+
148
+ >>> files = ["report_part1.docx", "report_part2.docx"]
149
+ >>> merged_bytes = docx_merge(files, verb=1)
150
+ >>> open("report_full.docx", "wb").write(merged_bytes)
151
+ """
152
+
153
+ if not locals().get("Composer", globals().get("Composer")):
154
+ print('importing Composer from docxcompose...')
155
+ from docxcompose.composer import Composer
156
+
157
+ assert files, f'Must supply files to merge, but given was {files=}!'
158
+
159
+ if files and isinstance(files[0], (tuple, list)):
160
+ files = files[0]
161
+
162
+ composer = None
163
+ for i, file in enumerate(files):
164
+
165
+ bts = _get_bytes_file_or_buffer(file)
166
+ if composer is None:
167
+ if verb:
168
+ print(f'loading template {i}/{len(files)}...')
169
+ composer = Composer(Document(io.BytesIO(bts)))
170
+ if verb:
171
+ print('compose...')
172
+ else:
173
+ if verb:
174
+ print(f'adding doc {i}/{len(files)}')
175
+ print("load report...")
176
+ print(f'doc={file}')
177
+ doc_b = Document(io.BytesIO(bts))
178
+ if verb:
179
+ print("adding report to template...")
180
+ composer.append(doc_b)
181
+ if verb:
182
+ print("saving merged as bytes...")
183
+
184
+ with io.BytesIO() as fp:
185
+ composer.save(fp)
186
+ fp.seek(0)
187
+ bts = fp.getvalue()
188
+
189
+ if verb:
190
+ print("returning...")
191
+
192
+ return bts
193
+
194
+
195
+ # def docx_replace_color(file_path_or_buffer, output_file_or_buffer=None, color_to_replace=(0, 0, 0), new_color=(0, 112, 192), do_replace_default_color=True):
196
+
197
+ # docx_data = _get_bytes_file_or_buffer(file_path_or_buffer)
198
+
199
+ # # Create a temporary zip file from the DOCX data
200
+ # doc = Document(BytesIO(docx_data))
201
+
202
+ # if isinstance(color_to_replace, (tuple, list)):
203
+ # color_to_replace = RGBColor(*color_to_replace)
204
+
205
+ # if isinstance(new_color, (tuple, list)):
206
+ # new_color = RGBColor(*new_color)
207
+
208
+
209
+ # # Iterate through all paragraphs and runs
210
+ # for para in doc.paragraphs:
211
+ # for run in para.runs:
212
+ # # Check if the run has a font color set to black
213
+ # if (run.font.color is None and do_replace_default_color) or run.font.color.rgb == color_to_replace:
214
+ # run.font.color.rgb = new_color # set to navyblue
215
+
216
+ # with BytesIO() as fp:
217
+ # doc.save(fp)
218
+ # bts = fp.getvalue()
219
+
220
+ # return _make_output(bts, output_file_or_buffer)
221
+
222
+
223
+
224
+ def docx_replace_fields(file_path_or_buffer, replace_dict, output_file_or_buffer=None):
225
+ """
226
+ replace all MergeFields of a DOCX file with given text in form of a dict.
227
+
228
+ Adding MergeFields In Word:
229
+ - Go to Insert → Quick Parts → Field → MergeField.
230
+ - Give it a name like FirstName, Date, etc.
231
+
232
+ This function reads a DOCX file, searches for specific MergeFields and replaces them.
233
+ Writes the modified content back into a new DOCX file or returns it as bytes.
234
+ Args:
235
+ file_path_or_buffer (str, filelike): Path to the input DOCX file to be modified
236
+ replace_dict (dict): Dictionary mapping strings to be replaced (keys) to their replacement values (values)
237
+ output_file_or_buffer (str, Path, or filelike object, optional): Path to save the modified DOCX file, or a buffer object to write to. If None, returns the modified DOCX content as bytes.
238
+ Returns:
239
+ bool or bytes: If output_file_or_buffer is a path, returns True if successful. If output_file_or_buffer is a buffer or None, returns the modified DOCX content as bytes.
240
+ """
241
+
242
+ if not locals().get("MailMerge", globals().get("MailMerge")):
243
+ from mailmerge import MailMerge
244
+
245
+ bts = _get_bytes_file_or_buffer(file_path_or_buffer)
246
+ outbuf = BytesIO()
247
+
248
+ with MailMerge(BytesIO(bts)) as document:
249
+ # fields = document.get_merge_fields()
250
+
251
+ document.merge(**replace_dict)
252
+ document.write(outbuf)
253
+
254
+ return _make_output(outbuf.getvalue(), output_file_or_buffer)
255
+
256
+
257
+ def docx_replace_keywords(file_path_or_buffer, replace_dict, output_file_or_buffer=None):
258
+ """
259
+ Edit raw XML content of a DOCX file by replacing specified strings using python docx.
260
+
261
+ This function reads a DOCX file, searches for specific strings in all XML files
262
+ inside the archive, replaces them with new values,
263
+ and writes the modified content back into a new DOCX file or returns it as bytes.
264
+ Args:
265
+ file_path_or_buffer (str, filelike): Path to the input DOCX file to be modified
266
+ replace_dict (dict): Dictionary mapping strings to be replaced (keys) to their replacement values (values)
267
+ output_file_or_buffer (str, Path, or filelike object, optional): Path to save the modified DOCX file, or a buffer object to write to. If None, returns the modified DOCX content as bytes.
268
+ Returns:
269
+ bool or bytes: If output_file_or_buffer is a path, returns True if successful. If output_file_or_buffer is a buffer or None, returns the modified DOCX content as bytes.
270
+ """
271
+
272
+ docx_data = _get_bytes_file_or_buffer(file_path_or_buffer)
273
+
274
+ # Create a temporary zip file from the DOCX data
275
+ doc = Document(BytesIO(docx_data))
276
+
277
+ for p in doc.paragraphs:
278
+ for key_to_replace, new_value in replace_dict.items():
279
+ p.text = p.text.replace(str(key_to_replace), str(new_value))
280
+
281
+ with BytesIO() as fp:
282
+ doc.save(fp)
283
+ bts = fp.getvalue()
284
+
285
+ return _make_output(bts, output_file_or_buffer)
286
+
287
+
288
+
289
+ def docx_replace_keywords_raw(file_path_or_buffer, replace_dict, output_file_or_buffer=None):
290
+ """
291
+ Edit raw XML content of a DOCX file by replacing specified strings in all XML files within the document.
292
+
293
+ This function reads a DOCX file, extracts its contents (which are stored as a ZIP archive),
294
+ searches for specific strings in all XML files inside the archive, replaces them with new values,
295
+ and writes the modified content back into a new DOCX file or returns it as bytes.
296
+ Args:
297
+ file_path_or_buffer (str, filelike): Path to the input DOCX file to be modified
298
+ replace_dict (dict): Dictionary mapping strings to be replaced (keys) to their replacement values (values)
299
+ output_file_or_buffer (str, Path, or filelike object, optional): Path to save the modified DOCX file, or a buffer object to write to. If None, returns the modified DOCX content as bytes.
300
+ Returns:
301
+ bool or bytes: If output_file_or_buffer is a path, returns True if successful. If output_file_or_buffer is a buffer or None, returns the modified DOCX content as bytes.
302
+ """
303
+
304
+ docx_data = _get_bytes_file_or_buffer(file_path_or_buffer)
305
+
306
+ # Create a temporary zip file from the DOCX data
307
+ zip_buffer = BytesIO(docx_data)
308
+ with zipfile.ZipFile(zip_buffer, 'r') as zip_file:
309
+ # Get all file names in the archive
310
+ file_list = zip_file.namelist()
311
+
312
+ output_buffer = BytesIO()
313
+ with zipfile.ZipFile(output_buffer, 'w', zipfile.ZIP_DEFLATED) as new_zip:
314
+ # Process each file in the original zip
315
+ for filename in file_list:
316
+ # Read the file content
317
+ content = zip_file.read(filename)
318
+
319
+ for key_to_replace, new_value in replace_dict.items():
320
+ content = content.replace(key_to_replace.encode('utf-8'), new_value.encode('utf-8'))
321
+
322
+ # Add file to new zip
323
+ new_zip.writestr(filename, content)
324
+ # Create a new zip file in memory
325
+ bts = output_buffer.getvalue()
326
+
327
+ return _make_output(bts, output_file_or_buffer)
328
+
329
+
330
+ def blue(run):
331
+ run.font.color.rgb = docx.shared.RGBColor(0, 0, 255)
332
+
333
+ def red(run):
334
+ run.font.color.rgb = docx.shared.RGBColor(255, 0, 0)
335
+
336
+ def convert_pandoc(doc:List[dict]) -> bytes:
337
+
338
+ with tempfile.TemporaryDirectory() as temp_dir:
339
+ html_file_path = os.path.join(temp_dir, 'temp.html')
340
+ docx_file_path = os.path.join(temp_dir, 'temp.docx')
341
+
342
+ with open(html_file_path, 'w', encoding='utf-8') as fp:
343
+ fp.write(convert_html(doc))
344
+
345
+ pandoc_convert_file(html_file_path, docx_file_path)
346
+ with open(docx_file_path, 'rb') as fp:
347
+ return fp.read()
348
+
349
+ def convert(doc:List[dict], template_path:str=None, make_blue=False) -> bytes:
350
+
351
+ if can_run_pandoc() and not template_path:
352
+ return convert_pandoc(doc)
353
+ else:
354
+ renderer = docx_renderer(template_path, make_blue=make_blue)
355
+ renderer.digest(doc)
356
+ return renderer.doc_to_bytes()
357
+
358
+ class docx_renderer(BaseFormatter):
359
+ def __init__(self, template_path:str=None, make_blue=False) -> None:
360
+ self.d = docx.Document(template_path)
361
+ self.make_blue = make_blue
362
+
363
+ def add_paragraph(self, newtext, *args, **kwargs):
364
+ new_paragraph = self.d.add_paragraph(newtext, *args, **kwargs)
365
+ if self.make_blue:
366
+ for r in new_paragraph.runs:
367
+ blue(r)
368
+ return new_paragraph
369
+
370
+ def add_run(self, text, *args, **kwargs):
371
+ if not self.d.paragraphs:
372
+ self.add_paragraph('')
373
+
374
+ last_paragraph = self.d.paragraphs[-1]
375
+
376
+ if not last_paragraph.runs:
377
+ last_run = last_paragraph.add_run(text)
378
+ else:
379
+ last_run = last_paragraph.runs[-1]
380
+ last_run.add_text(text)
381
+
382
+ if self.make_blue:
383
+ blue(last_run)
384
+ return last_run
385
+
386
+ def digest_text(self, children, *args, **kwargs):
387
+ return self.add_paragraph(children)
388
+
389
+
390
+ def digest_str(self, children, *args, **kwargs):
391
+ return self.add_run(children)
392
+
393
+ def digest_line(self, children, *args, **kwargs):
394
+ return self.add_run(children + '\n')
395
+
396
+ def digest_markdown(self, children, *args, **kwargs):
397
+ return self.add_paragraph(children, style='Normal')
398
+
399
+ def digest_verbatim(self, children, *args, **kwargs):
400
+ new_run = self.add_run(children)
401
+ new_run.font.name = 'Courier New' # Or any other monospace font
402
+ new_run.font.size = docx.shared.Pt(8) # Adjust font size as needed
403
+ return new_run
404
+
405
+ def digest_latex(self, children, *args, **kwargs):
406
+ new_run = self.add_run(children)
407
+ new_run.font.name = 'Courier New' # Or any other monospace font
408
+ new_run.font.size = docx.shared.Pt(8) # Adjust font size as needed
409
+ return new_run
410
+
411
+
412
+ def handle_error(self, err, el=None) -> list:
413
+ if isinstance(err, BaseException):
414
+ traceback.print_exc(limit=5)
415
+ err = '\n'.join(traceback.format_exception(type(err), value=err, tb=err.__traceback__, limit=5))
416
+
417
+ new_run = self.add_run(err)
418
+ new_run.font.name = 'Courier New' # Or any other monospace font
419
+ new_run.font.size = docx.shared.Pt(8) # Adjust font size as needed
420
+ red(new_run)
421
+ return new_run
422
+
423
+
424
+ def digest_iterator(self, children, *args, **kwargs):
425
+ if children:
426
+ return [self.digest(val, *args, **kwargs) for val in children]
427
+ return []
428
+
429
+ def digest_table(self, children=None, **kwargs) -> str:
430
+ self.handle_error(NotImplementedError(f'exporter of type {type(self)} can not handle tables'))
431
+
432
+ def digest_image(self, children, *args, **kwargs):
433
+
434
+ image_width = Inches(max(1, kwargs.get('width', 0.8)*5))
435
+ image_caption = kwargs.get('caption', '')
436
+ image_blob = kwargs.get('imageblob', '')
437
+
438
+ assert image_blob, 'no image data given!'
439
+
440
+ btsb64 = image_blob.split(',')[-1]
441
+
442
+ # Decode the base64 image
443
+ img_bytes = base64.b64decode(btsb64)
444
+
445
+ # Create an image stream from the bytes
446
+ image_stream = io.BytesIO(img_bytes)
447
+
448
+ picture = self.d.add_picture(image_stream, width=image_width)
449
+ # picture.width = image_width # Ensure fixed width
450
+ # picture.height = None # Adjust height automatically
451
+ picture.alignment = 1
452
+
453
+ run = self.add_paragraph(image_caption)
454
+ # run.style = 'Caption' # Apply the 'Caption' style for formatting
455
+
456
+ return run
457
+
458
+ def format(self, *args, **kwargs):
459
+ raise NotImplementedError('Can not format a docx document directly')
460
+
461
+
462
+ def doc_to_bytes(self):
463
+ with io.BytesIO() as fp:
464
+ self.d.save(fp)
465
+ fp.seek(0)
466
+ return fp.read()
467
+
468
+ def save(self, filepath):
469
+ self.d.save(filepath)
@@ -71,6 +71,26 @@ def pandoc_set_allowed(is_allowed):
71
71
  allow_pandoc = True if is_allowed else False
72
72
  return allow_pandoc
73
73
 
74
+ def pandoc_merge_files(inp_files, out_file):
75
+ """
76
+ Convert a file using pandoc.
77
+
78
+ Parameters:
79
+ inp_files (List[str]): The path to the input file.
80
+ out_file (str or Path): The path to the output file or the desired output format.
81
+
82
+ Returns:
83
+ subprocess.CompletedProcess: The result of the pandoc conversion command.
84
+
85
+ Raises:
86
+ AssertionError: If the input file does not exist or if no output file or format is provided.
87
+ """
88
+ for inp_file in inp_files:
89
+ assert inp_file, "Need to give an input file name!"
90
+ assert os.path.exists(inp_file), f"input file {inp_file=} does not exist!"
91
+
92
+ assert out_file, "Need to give an output file name!"
93
+ return subprocess.run(['pandoc', *inp_files, '-o', out_file])
74
94
 
75
95
  def pandoc_convert_file(inp_file, out_file_or_format):
76
96
  """
@@ -89,7 +109,7 @@ def pandoc_convert_file(inp_file, out_file_or_format):
89
109
  AssertionError: If the input file does not exist or if no output file or format is provided.
90
110
  """
91
111
 
92
- assert inp_file, "Need to give an inout file name!"
112
+ assert inp_file, "Need to give an input file name!"
93
113
  assert os.path.exists(inp_file), f"input file {inp_file=} does not exist!"
94
114
 
95
115
  out_file = out_file_or_format
@@ -986,17 +986,20 @@ class DocBuilder(UserList):
986
986
  """
987
987
  return self._ret(to_markdown(self.dump(), embed_images=embed_images), path_or_stream)
988
988
 
989
- def to_docx(self, path_or_stream=None) -> bytes:
989
+ def to_docx(self, path_or_stream=None, template_path:str=None, make_blue=False) -> bytes:
990
990
  """
991
991
  Converts the current object to a DOCX file.
992
992
 
993
993
  Args:
994
994
  path_or_stream (str or io.IOBase, optional): The path to save the file to, or a file-like object to write the data to. If not provided, the data will be returned as string.
995
+ template_path (str, optional): Path to a template file to use for formatting the DOCX output (will append to the end of the document).
996
+ make_blue (bool): If True, applies blue color styling to the generated document.
997
+
995
998
 
996
999
  Returns:
997
1000
  bytes: The data as bytes, or True if the data was saved successfully to a file or stream.
998
1001
  """
999
- return self._ret(to_docx(self.dump()), path_or_stream)
1002
+ return self._ret(to_docx(self.dump(), template_path=template_path, make_blue=make_blue), path_or_stream)
1000
1003
 
1001
1004
  def to_ipynb(self, path_or_stream=None) -> str:
1002
1005
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pydocmaker
3
- Version: 2.3.2
3
+ Version: 2.3.4
4
4
  Summary: a minimal document maker to make docx, markdown, html, textile, redmine, and tex documents from python. Written in pure python.
5
5
  Home-page: https://github.com/TobiasGlaubach/pydocmaker
6
6
  Author: Tobias Glaubach
@@ -17,6 +17,8 @@ Requires-Dist: python-docx
17
17
  Requires-Dist: requests
18
18
  Requires-Dist: latex
19
19
  Requires-Dist: jinja2
20
+ Requires-Dist: docxcompose
21
+ Requires-Dist: docx-mailmerge
20
22
  Dynamic: license-file
21
23
 
22
24
  # pydocmaker
@@ -125,6 +127,61 @@ doc.to_ipynb('path/to/my_file.ipynb') # will write a ipynb file
125
127
 
126
128
  doc.to_json('path/to/doc.json') # saves the document
127
129
  ```
130
+ ### Writing Word docx Documents with templates and fields
131
+
132
+ Below is an example on how to use pydocmaker to write word docx documents from format templates
133
+ and also automatically "replace" fields (MergeFields in Word or plain text) to be filled out in
134
+ the docx document with text from python.
135
+
136
+ (NOTE: some of the code below utilized the win32com api and only works on windows)
137
+
138
+ ```python
139
+ import pydocmaker as pyd
140
+
141
+ # this is some fields in my template which I want to populate automatically
142
+ metadata = {
143
+ 'repno': "1234",
144
+ "summary": "This is a nice workflow for automatically creating docx documents",
145
+ "date": "2025-12-13",
146
+ "comment": f"this works!",
147
+ "author": "Me"
148
+ }
149
+
150
+ templatepath = 'my/path/template.docx'
151
+ outpath = 'my/path/outfile.docx'
152
+
153
+ # get a pyd example document to combine with my doxc template
154
+ docx_bts = pyd.get_example().to_docx()
155
+
156
+ print('replacing keywords...')
157
+ # HOWTO:
158
+ # Adding MergeFields In Word to replace them later:
159
+ # Go to Insert -> Quick Parts -> Field -> MergeField.
160
+ template = pyd.docx_replace_fields(templatepath, metadata)
161
+
162
+ # Alternative calls working with plain strings as keywords to replace
163
+ # -- see the function documentation for info
164
+ # template = pyd.docx_replace_keywords_raw(template, metadata)
165
+ # template = pyd.docx_replace_keywords(template, metadata)
166
+
167
+ # merge two or more documents by appending them
168
+ docx_bts = pyd.docx_merge(template, docx_bts)
169
+
170
+ # save out file
171
+ with open(outpath, "wb") as fp:
172
+ fp.write(docx_bts)
173
+
174
+ # use the win32com.client api with word to update all fields
175
+ # like the table of contents in the document after we changed it
176
+
177
+ # WARNING! only works in windows and with word installed
178
+ pyd.docx_update_w32(outpath)
179
+
180
+
181
+ print(outpath)
182
+
183
+ ```
184
+
128
185
 
129
186
  ### Uploading to Redmine
130
187
 
@@ -3,3 +3,5 @@ python-docx
3
3
  requests
4
4
  latex
5
5
  jinja2
6
+ docxcompose
7
+ docx-mailmerge
@@ -1,174 +0,0 @@
1
- import traceback
2
- import io
3
-
4
- import base64
5
- from typing import List
6
-
7
- import docx
8
- from docx.shared import Inches, Pt
9
-
10
- import tempfile
11
- import os
12
-
13
- import markdown
14
-
15
- try:
16
- from pydocmaker.backend.baseformatter import BaseFormatter
17
- except Exception as err:
18
- from .baseformatter import BaseFormatter
19
-
20
- can_run_pandoc = lambda : False
21
-
22
-
23
- try:
24
- from pydocmaker.backend.pandoc_api import can_run_pandoc, pandoc_convert, pandoc_convert_file
25
- except Exception as err:
26
- from .pandoc_api import can_run_pandoc, pandoc_convert, pandoc_convert_file
27
-
28
- try:
29
- from pydocmaker.backend.ex_html import convert as convert_html
30
- except Exception as err:
31
- from .ex_html import convert as convert_html
32
-
33
-
34
-
35
- def blue(run):
36
- run.font.color.rgb = docx.shared.RGBColor(0, 0, 255)
37
-
38
- def red(run):
39
- run.font.color.rgb = docx.shared.RGBColor(255, 0, 0)
40
-
41
- def convert_pandoc(doc:List[dict]) -> bytes:
42
-
43
- with tempfile.TemporaryDirectory() as temp_dir:
44
- html_file_path = os.path.join(temp_dir, 'temp.html')
45
- docx_file_path = os.path.join(temp_dir, 'temp.docx')
46
-
47
- with open(html_file_path, 'w', encoding='utf-8') as fp:
48
- fp.write(convert_html(doc))
49
-
50
- pandoc_convert_file(html_file_path, docx_file_path)
51
- with open(docx_file_path, 'rb') as fp:
52
- return fp.read()
53
-
54
- def convert(doc:List[dict]) -> bytes:
55
-
56
- if can_run_pandoc():
57
- return convert_pandoc(doc)
58
- else:
59
- renderer = docx_renderer()
60
- renderer.digest(doc)
61
- return renderer.doc_to_bytes()
62
-
63
- class docx_renderer(BaseFormatter):
64
- def __init__(self, template_path:str=None, make_blue=False) -> None:
65
- self.d = docx.Document(template_path)
66
- self.make_blue = make_blue
67
-
68
- def add_paragraph(self, newtext, *args, **kwargs):
69
- new_paragraph = self.d.add_paragraph(newtext, *args, **kwargs)
70
- if self.make_blue:
71
- for r in new_paragraph.runs:
72
- blue(r)
73
- return new_paragraph
74
-
75
- def add_run(self, text, *args, **kwargs):
76
- if not self.d.paragraphs:
77
- self.add_paragraph('')
78
-
79
- last_paragraph = self.d.paragraphs[-1]
80
-
81
- if not last_paragraph.runs:
82
- last_run = last_paragraph.add_run(text)
83
- else:
84
- last_run = last_paragraph.runs[-1]
85
- last_run.add_text(text)
86
-
87
- if self.make_blue:
88
- blue(last_run)
89
- return last_run
90
-
91
- def digest_text(self, children, *args, **kwargs):
92
- return self.add_paragraph(children)
93
-
94
-
95
- def digest_str(self, children, *args, **kwargs):
96
- return self.add_run(children)
97
-
98
- def digest_line(self, children, *args, **kwargs):
99
- return self.add_run(children + '\n')
100
-
101
- def digest_markdown(self, children, *args, **kwargs):
102
- return self.add_paragraph(children, style='Normal')
103
-
104
- def digest_verbatim(self, children, *args, **kwargs):
105
- new_run = self.add_run(children)
106
- new_run.font.name = 'Courier New' # Or any other monospace font
107
- new_run.font.size = docx.shared.Pt(8) # Adjust font size as needed
108
- return new_run
109
-
110
- def digest_latex(self, children, *args, **kwargs):
111
- new_run = self.add_run(children)
112
- new_run.font.name = 'Courier New' # Or any other monospace font
113
- new_run.font.size = docx.shared.Pt(8) # Adjust font size as needed
114
- return new_run
115
-
116
-
117
- def handle_error(self, err, el=None) -> list:
118
- if isinstance(err, BaseException):
119
- traceback.print_exc(limit=5)
120
- err = '\n'.join(traceback.format_exception(type(err), value=err, tb=err.__traceback__, limit=5))
121
-
122
- new_run = self.add_run(err)
123
- new_run.font.name = 'Courier New' # Or any other monospace font
124
- new_run.font.size = docx.shared.Pt(8) # Adjust font size as needed
125
- red(new_run)
126
- return new_run
127
-
128
-
129
- def digest_iterator(self, children, *args, **kwargs):
130
- if children:
131
- return [self.digest(val, *args, **kwargs) for val in children]
132
- return []
133
-
134
- def digest_table(self, children=None, **kwargs) -> str:
135
- self.handle_error(NotImplementedError(f'exporter of type {type(self)} can not handle tables'))
136
-
137
- def digest_image(self, children, *args, **kwargs):
138
-
139
- image_width = Inches(max(1, kwargs.get('width', 0.8)*5))
140
- image_caption = kwargs.get('caption', '')
141
- image_blob = kwargs.get('imageblob', '')
142
-
143
- assert image_blob, 'no image data given!'
144
-
145
- btsb64 = image_blob.split(',')[-1]
146
-
147
- # Decode the base64 image
148
- img_bytes = base64.b64decode(btsb64)
149
-
150
- # Create an image stream from the bytes
151
- image_stream = io.BytesIO(img_bytes)
152
-
153
- picture = self.d.add_picture(image_stream, width=image_width)
154
- # picture.width = image_width # Ensure fixed width
155
- # picture.height = None # Adjust height automatically
156
- picture.alignment = 1
157
-
158
- run = self.add_paragraph(image_caption)
159
- # run.style = 'Caption' # Apply the 'Caption' style for formatting
160
-
161
- return run
162
-
163
- def format(self, *args, **kwargs):
164
- raise NotImplementedError('Can not format a docx document directly')
165
-
166
-
167
- def doc_to_bytes(self):
168
- with io.BytesIO() as fp:
169
- self.d.save(fp)
170
- fp.seek(0)
171
- return fp.read()
172
-
173
- def save(self, filepath):
174
- self.d.save(filepath)
File without changes
File without changes