pydocmaker 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ Metadata-Version: 2.1
2
+ Name: pydocmaker
3
+ Version: 1.0.0
4
+ Summary: a minimal document maker to make docx, markdown, html, and tex documents from python. Written in pure python.
5
+ Home-page: https://github.com/TobiasGlaubach/pyandoc
6
+ Author: Tobias Glaubach
7
+ Project-URL: Bug Tracker, https://github.com/TobiasGlaubach/pyandoc/issues
8
+ Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
9
+ Classifier: Operating System :: OS Independent
10
+ Classifier: Environment :: Console
11
+ Classifier: Programming Language :: Python
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3 :: Only
14
+ Requires-Python: >=3.8
15
+ Description-Content-Type: text/markdown
16
+
17
+ # pydocmaker
18
+ a minimal document maker to make docx, markdown, html, and tex documents from python.
19
+ Written in pure python.
20
+
21
+ Named in honor and spirit after the famous [pandoc](https://github.com/jgm/pandoc) package.
@@ -0,0 +1,5 @@
1
+ # pydocmaker
2
+ a minimal document maker to make docx, markdown, html, and tex documents from python.
3
+ Written in pure python.
4
+
5
+ Named in honor and spirit after the famous [pandoc](https://github.com/jgm/pandoc) package.
@@ -0,0 +1,33 @@
1
+ [metadata]
2
+ name = pydocmaker
3
+ description-file = README.md
4
+ license_files = LICENSE.rst
5
+ author = Tobias Glaubach
6
+ description = a minimal document maker to make docx, markdown, html, and tex documents from python. Written in pure python.
7
+ license_file = LICENSE
8
+ long_description = file: README.md
9
+ long_description_content_type = text/markdown
10
+ url = https://github.com/TobiasGlaubach/pyandoc
11
+ project_urls =
12
+ Bug Tracker = https://github.com/TobiasGlaubach/pyandoc/issues
13
+ classifiers =
14
+ License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
15
+ Operating System :: OS Independent
16
+ Environment :: Console
17
+ Programming Language :: Python
18
+ Programming Language :: Python :: 3
19
+ Programming Language :: Python :: 3 :: Only
20
+
21
+ [options]
22
+ package_dir =
23
+ =src
24
+ packages = find:
25
+ python_requires = >=3.8
26
+
27
+ [options.packages.find]
28
+ where = src
29
+
30
+ [egg_info]
31
+ tag_build =
32
+ tag_date = 0
33
+
@@ -0,0 +1,10 @@
1
+
2
+ import re
3
+
4
+ def get_property(prop, project):
5
+ with open('src/' + project + '/__init__.py') as fp:
6
+ result = re.search(r'{}\s*=\s*[\'"]([^\'"]*)[\'"]'.format(prop), fp.read())
7
+ return result.group(1)
8
+
9
+ # print(get_property('__version__', "mke_client"))
10
+ from setuptools import setup; setup(version=get_property('__version__', "pydocmaker"))
@@ -0,0 +1,12 @@
1
+ __version__ = '1.0.0'
2
+
3
+ from pyandoc.core.schema import c as constr
4
+
5
+ from pyandoc.core.schema import FlowDoc, SectionedDoc, construct, dump
6
+
7
+ from pyandoc.exporters.ex_docx import convert as to_docx
8
+ from pyandoc.exporters.ex_html import convert as to_html
9
+ from pyandoc.exporters.ex_redmine import convert as to_redmine
10
+ from pyandoc.exporters.ex_redmine import convert as to_textile
11
+ from pyandoc.exporters.ex_tex import convert as to_tex
12
+
File without changes
@@ -0,0 +1,148 @@
1
+ from dataclasses import dataclass, field, is_dataclass
2
+ from collections import UserDict, UserList
3
+
4
+
5
+
6
+
7
+
8
+ class c():
9
+ """This is the basic schema for the main building blocks for a document"""
10
+
11
+ @staticmethod
12
+ def markdown(children=''):
13
+ return {
14
+ 'typ': 'markdown',
15
+ 'children': children
16
+ }
17
+
18
+ @staticmethod
19
+ def text(children=''):
20
+ return {
21
+ 'typ': 'text',
22
+ 'children': children
23
+ }
24
+
25
+ @staticmethod
26
+ def verbatim(children=''):
27
+ return {
28
+ 'typ': 'verbatim',
29
+ 'children': children
30
+ }
31
+
32
+ @staticmethod
33
+ def iter(children:list=None):
34
+ return {
35
+ 'typ': 'iter',
36
+ 'children': [] if children is None else children,
37
+ }
38
+
39
+ @staticmethod
40
+ def image(imageblob='', caption='', children='', width=0.8):
41
+ return {
42
+ 'typ': 'image',
43
+ 'children': children,
44
+ 'imageblob': imageblob.decode("utf-8") if isinstance(imageblob, bytes) else imageblob,
45
+ 'caption': caption,
46
+ 'width': width,
47
+ }
48
+
49
+
50
+
51
+ class FlowDoc(UserList):
52
+ """an unordered collection of document parts to make a document (can be used like a list)"""
53
+
54
+ def add(self, element_to_add:dict={}):
55
+
56
+ assert element_to_add, 'need to give an element_to_add!'
57
+ assert hasattr(c, element_to_add.get('typ', None)), 'the element to add is of unknown type!'
58
+ self.append(element_to_add)
59
+
60
+ def add_kw(self, typ='', **kwargs):
61
+ assert typ, 'need to give a content type!'
62
+ self.add(construct(typ, **kwargs))
63
+
64
+ def dump(self):
65
+ return [dump(v) for v in self]
66
+
67
+ class SectionedDoc(UserDict):
68
+ """a sectioned collection of document parts to make a document (can be used like a dict)"""
69
+
70
+ def add_section(self, caption, children:list=None):
71
+ assert caption, 'need to give a caption!'
72
+ assert not caption in self, f'section with {caption=} already exists in Document'
73
+ assert children is None or isinstance(children, list), 'children must be of type list or None'
74
+ self[caption] = [] if children is None else children
75
+
76
+ def add(self, section_caption:str=None, element_to_add:dict={}):
77
+ if not section_caption and len(self):
78
+ section_caption = list(self.keys())[-1]
79
+
80
+ assert section_caption, 'need to give a section_caption to add to!'
81
+ assert element_to_add, 'need to give an element_to_add!'
82
+ assert hasattr(c, element_to_add.get('typ', None)), 'the element to add is of unknown type!'
83
+ if not section_caption in self:
84
+ self.add_section(caption=section_caption)
85
+ self[section_caption].append(element_to_add)
86
+
87
+ def add_kw(self, section_caption=None, typ='', **kwargs):
88
+ assert typ, 'need to give a content type!'
89
+ self.add(section_caption, construct(typ, **kwargs))
90
+
91
+ def to_flow_doc(self):
92
+ doc = FlowDoc()
93
+ for section_caption, section_parts in self.items():
94
+ doc.add(c.markdown(f'## {section_caption}'))
95
+ for part in section_parts:
96
+ doc.add(part)
97
+ return doc
98
+
99
+ def dump(self):
100
+ return self.to_flow_doc().dump()
101
+
102
+
103
+ def _serialize(v):
104
+ if isinstance(v, str):
105
+ return v
106
+ elif isinstance(v, list):
107
+ return [dump(vv) for vv in v]
108
+ elif isinstance(v, dict):
109
+ return v
110
+ else:
111
+ TypeError(f'{type(v)=} is of unknown type only dataclass, str, list, and dict is allowed!')
112
+
113
+
114
+ def _construct(v):
115
+
116
+ if isinstance(v, str):
117
+ return v
118
+ elif isinstance(v, list):
119
+ return [_construct(vv) for vv in v]
120
+ elif isinstance(v, dict):
121
+ return construct(**v)
122
+ else:
123
+ TypeError(f'{type(v)=} is of unknown type only dataclass, str, list, and dict is allowed!')
124
+
125
+ def construct(type:str, **kwargs):
126
+ assert isinstance(type, str)
127
+ if not kwargs and not hasattr(c, type):
128
+ return type
129
+ elif hasattr(c, type):
130
+ children = kwargs.get('children')
131
+ if children:
132
+ kwargs['children'] = _construct(children)
133
+ constructor = getattr(c, type)
134
+ return constructor(**kwargs)
135
+ else:
136
+ TypeError(f'{type=} is of unknown type only dataclass, str, list, and dict is allowed!')
137
+
138
+ def dump(obj):
139
+ if isinstance(obj, list):
140
+ return [dump(o) for o in obj]
141
+
142
+ assert isinstance(obj, dict)
143
+ return {k:_serialize(v) for k, v in obj.items()}
144
+
145
+
146
+ if __name__ == "__main__":
147
+ mysection = c.markdown('# Introduction')
148
+
File without changes
@@ -0,0 +1,146 @@
1
+ import traceback
2
+ import io
3
+
4
+ import base64
5
+ from typing import List
6
+
7
+ import docx
8
+ from docx.shared import Inches, Pt
9
+
10
+
11
+ import markdown
12
+
13
+
14
+ def blue(run):
15
+ run.font.color.rgb = docx.shared.RGBColor(0, 0, 255)
16
+
17
+ def red(run):
18
+ run.font.color.rgb = docx.shared.RGBColor(255, 0, 0)
19
+
20
+ def convert(doc:List[dict]) -> bytes:
21
+ renderer = docx_renderer()
22
+ renderer.digest(doc)
23
+ return renderer.doc_to_bytes()
24
+
25
+ class docx_renderer():
26
+ def __init__(self, template_path:str=None, make_blue=False) -> None:
27
+ self.d = docx.Document(template_path)
28
+ self.make_blue = make_blue
29
+
30
+ def add_paragraph(self, newtext, *args, **kwargs):
31
+ new_paragraph = self.d.add_paragraph(newtext, *args, **kwargs)
32
+ if self.make_blue:
33
+ for r in new_paragraph.runs:
34
+ blue(r)
35
+ return new_paragraph
36
+
37
+ def add_run(self, text, *args, **kwargs):
38
+ if not self.d.paragraphs:
39
+ self.add_paragraph('')
40
+
41
+ last_paragraph = self.d.paragraphs[-1]
42
+
43
+ if not last_paragraph.runs:
44
+ last_run = last_paragraph.add_run(text)
45
+ else:
46
+ last_run = last_paragraph.runs[-1]
47
+ last_run.add_text(text)
48
+
49
+ if self.make_blue:
50
+ blue(last_run)
51
+ return last_run
52
+
53
+ def digest_text(self, children, *args, **kwargs):
54
+ return self.add_paragraph(children)
55
+
56
+
57
+ def digest_str(self, children, *args, **kwargs):
58
+ return self.add_run(children)
59
+
60
+ def digest_markdown(self, children, *args, **kwargs):
61
+ return self.add_paragraph(children, style='Normal')
62
+
63
+ def digest_verbatim(self, children, *args, **kwargs):
64
+ new_run = self.add_run(children)
65
+ new_run.font.name = 'Courier New' # Or any other monospace font
66
+ new_run.font.size = docx.shared.Pt(8) # Adjust font size as needed
67
+ return new_run
68
+
69
+
70
+ def digest_error(self, children, *args, **kwargs):
71
+ if isinstance(children, BaseException):
72
+ traceback.print_exc(limit=5)
73
+ children = traceback.format_exc(limit=5)
74
+
75
+ new_run = self.add_run(children)
76
+ new_run.font.name = 'Courier New' # Or any other monospace font
77
+ new_run.font.size = docx.shared.Pt(8) # Adjust font size as needed
78
+ red(new_run)
79
+ return new_run
80
+
81
+
82
+ def digest_iterator(self, children, *args, **kwargs):
83
+ if children:
84
+ return [self.digest(val, *args, **kwargs) for val in children]
85
+ return []
86
+
87
+
88
+ def digest_image(self, children, *args, **kwargs):
89
+
90
+ image_width = Inches(max(1, kwargs.get('width', 0.8)*5))
91
+ image_caption = kwargs.get('caption', '')
92
+ image_blob = kwargs.get('imageblob', '')
93
+
94
+ assert image_blob, 'no image data given!'
95
+
96
+ # Decode the base64 image
97
+ img_bytes = base64.b64decode(image_blob)
98
+
99
+ # Create an image stream from the bytes
100
+ image_stream = io.BytesIO(img_bytes)
101
+
102
+ picture = self.d.add_picture(image_stream, width=image_width)
103
+ # picture.width = image_width # Ensure fixed width
104
+ # picture.height = None # Adjust height automatically
105
+ picture.alignment = 1
106
+
107
+ run = self.add_paragraph(image_caption)
108
+ # run.style = 'Caption' # Apply the 'Caption' style for formatting
109
+
110
+ return run
111
+
112
+ def digest(self, children, *args, **kwargs):
113
+ try:
114
+ print(f'{type(children)=}, {args=} {kwargs=}')
115
+ if not children:
116
+ return ''
117
+ elif isinstance(children, str):
118
+ ret = self.digest_str(children, *args, **kwargs)
119
+ elif isinstance(children, dict) and 'typ' in children and children['typ'] == 'iter':
120
+ ret = self.digest_iterator(children, *args, **kwargs)
121
+ elif isinstance(children, list) and children:
122
+ ret = self.digest_iterator(children, *args, **kwargs)
123
+ elif isinstance(children, dict) and 'typ' in children and children['typ'] == 'image':
124
+ ret = self.digest_image(*args, **kwargs, **children)
125
+ elif isinstance(children, dict) and 'typ' in children and children['typ'] == 'text':
126
+ ret = self.digest_text(*args, **kwargs, **children)
127
+ elif isinstance(children, dict) and 'typ' in children and children['typ'] == 'verbatim':
128
+ ret = self.digest_verbatim(*args, **kwargs, **children)
129
+ elif isinstance(children, dict) and 'typ' in children and children['typ'] == 'markdown':
130
+ ret = self.digest_markdown(*args, **kwargs, **children)
131
+ else:
132
+ val = f'the element of type {type(children)}, could not be parsed.'
133
+ ret = self.digest_error(val, *args, **kwargs)
134
+ except Exception as err:
135
+ ret = self.digest_error(err, *args, **kwargs)
136
+
137
+ return ret
138
+
139
+ def doc_to_bytes(self):
140
+ with io.BytesIO() as fp:
141
+ self.d.save(fp)
142
+ fp.seek(0)
143
+ return fp.read()
144
+
145
+ def save(self, filepath):
146
+ self.d.save(filepath)
@@ -0,0 +1,207 @@
1
+ from collections import namedtuple
2
+ import io
3
+ import json
4
+ import random
5
+ import textwrap
6
+ import time
7
+ import urllib
8
+ import re
9
+ import uuid
10
+ import os
11
+ import base64
12
+ import markdown
13
+ from typing import List
14
+
15
+
16
+ """
17
+
18
+ ██████ ██████ ███ ██ ██ ██ ███████ ██████ ████████
19
+ ██ ██ ██ ████ ██ ██ ██ ██ ██ ██ ██
20
+ ██ ██ ██ ██ ██ ██ ██ ██ █████ ██████ ██
21
+ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██
22
+ ██████ ██████ ██ ████ ████ ███████ ██ ██ ██
23
+
24
+
25
+ """
26
+
27
+ # DEFAULT_IMAGE_PATH = os.path.join(parent_dir, 'ReqTracker', 'assets', 'mpifr.png')
28
+ # with open(DEFAULT_IMAGE_PATH, 'rb') as fp:
29
+ # DEFAULT_IMAGE_BLOB = '' # base64.b64encode(fp.read()).decode('utf-8')
30
+ # DEFAULT_IMAGE_BLOB = ''
31
+
32
+ def mk_link(id_, label=None, pth='show', p0='uib', v='v1', **kwargs):
33
+ return f'<a href="/{p0}/{v}/{pth}/{urllib.parse.quote_plus(id_)}" target="_self">{label if label else id_}</a>'
34
+
35
+ def mk_tpl(id_, label=None, pth='show', p0='uib', v='v1', **kwargs):
36
+ return f"/{p0}/{v}/{pth}/{urllib.parse.quote_plus(id_)}", label if label else id_
37
+
38
+
39
+ def convert(doc:List[dict]):
40
+ tmp = doc.values() if isinstance(doc, dict) else doc
41
+ return '\n\n'.join([html_docdc2html(dc) for dc in tmp])
42
+
43
+
44
+ class html_renderer:
45
+
46
+ @staticmethod
47
+ def vm_Text(**kwargs):
48
+ label = kwargs.get('label', '')
49
+ content = kwargs.get('content', kwargs.get('children'))
50
+
51
+ if label:
52
+ return f'<div style="min-width:100">{label}</div><div>{content}</div>'
53
+ else:
54
+ return f'<div>{content}</div>'
55
+
56
+ @staticmethod
57
+ def vm_Markdown(**kwargs):
58
+ label = kwargs.get('label', '')
59
+ content = kwargs.get('content', kwargs.get('children'))
60
+
61
+ parts = []
62
+ if label:
63
+ parts += [
64
+ f'<div style="min-width:100;">{label}</div>',
65
+ '<hr/>'
66
+ ]
67
+
68
+ s = markdown.markdown(content)
69
+
70
+ # s = f'<pre disabled=true style="width:90%; min-height:200px; overflow-x: scroll; overflow-y: none; margin:5px;display:block;font-family: Lucida Console, Courier New, monospace;font-size: 0.8em;">\n\n{content}\n\n</pre>'
71
+ #s = f'<span style="display:block;" class="note">\n\n{content}\n\n</span>'
72
+ parts += [s]
73
+
74
+ return '\n\n'.join(parts)
75
+
76
+
77
+ @staticmethod
78
+ def vm_LargeText(**kwargs):
79
+ label = kwargs.get('label', '')
80
+ content = kwargs.get('content', kwargs.get('children'))
81
+
82
+ nn = [len(s) for s in content.split('\n')]
83
+ n = len(n)
84
+ w = max(nn)
85
+ return f'<div style="min-width:100;">{label}</div><hr></hr>\n\n<textarea cols="{w}" rows="{n}" disabled=True>\n\n{content}\n\n</textarea>'
86
+
87
+
88
+ @staticmethod
89
+ def vm_Iterator(**kwargs):
90
+ children = []
91
+ content = kwargs.get('content', kwargs.get('children'))
92
+ for i, c in enumerate(content, 1):
93
+ children.append(c.to_html(add_header=False) if hasattr(c, 'get_VM') else c)
94
+
95
+ return f'\n\n'.join([f'<div>{c}</div>' for c in children])
96
+
97
+
98
+
99
+ @staticmethod
100
+ def vm_BaseFallback(**kwargs):
101
+ label = kwargs.get('label', '')
102
+ content = kwargs.get('content', kwargs.get('children'))
103
+
104
+ j = '#!/RAWJSON!\n' + json.dumps(content, indent=2)
105
+ nn = [len(s) for s in j.split('\n')]
106
+ n = len(n)
107
+ w = max(nn)
108
+ return f'<div style="min-width:100;">{label}</div><hr></hr>\n\n<textarea cols="{w}" rows="{n}" disabled=True>\n\n{j}\n\n</textarea>'
109
+
110
+
111
+ @staticmethod
112
+ def vm_Verbatim(**kwargs):
113
+ label = kwargs.get('label', '')
114
+ content = kwargs.get('content', kwargs.get('children'))
115
+
116
+ j = content
117
+ nn = [len(s) for s in j.split('\n')]
118
+ n = len(n)
119
+ w = max(nn)
120
+ children = [
121
+ f'<div style="min-width:100;">{label}</div>',
122
+ f'<textarea cols="{w}" rows="{n}" disabled=True>\n\n{j}\n\n</textarea>'
123
+ ]
124
+ return '\n\n'.join(children)
125
+
126
+ @staticmethod
127
+ def vm_Image(label='', imageblob=None, children='', width=0.8, caption="", **kwargs):
128
+
129
+ if imageblob is None:
130
+ imageblob = ''
131
+
132
+ uid = (id(imageblob) + int(time.time()) + random.randint(1, 100))
133
+
134
+
135
+ if not children:
136
+ children = f'image_{uid}.png'
137
+
138
+ s = imageblob.decode("utf-8") if isinstance(imageblob, bytes) else imageblob
139
+ if not s.startswith('data:image'):
140
+ s = 'data:image/png;base64,' + s
141
+
142
+
143
+ children = [
144
+ f'<div style="min-width:100;">{label}</div>',
145
+ f'<div style="min-width:100;">image-name:</div>',
146
+ f'<div>{children}</div>',
147
+ f'<div style="min-width:100;">width</div>',
148
+ f'<div>{width}</div>',
149
+ f'<div style="min-width:100;">caption</div>',
150
+ f'<div>{caption}</div>',
151
+ f"<image src=\"{s}\", style=\"max-width:80%\"></image>",
152
+ ]
153
+
154
+ # children = dcc.Upload(id=self.mkid('helper_uploadfile'), children=children, multiple=False, disable_click=True)
155
+
156
+ return '\n\n'.join(children)
157
+
158
+ @staticmethod
159
+ def vm_BaseFallback(**kwargs):
160
+ label = kwargs.get('label', '')
161
+ content = kwargs.get('content', kwargs.get('children'))
162
+
163
+ j = '#!/RAWJSON!\n' + json.dumps(content, indent=2)
164
+ nn = [len(s) for s in j.split('\n')]
165
+ n = len(n)
166
+ w = max(nn)
167
+ return f'<div style="min-width:100;">{label}</div><hr></hr>\n\n<textarea cols="{w}" rows="{n}" disabled=True>\n\n{j}\n\n</textarea>'
168
+
169
+ @staticmethod
170
+ def vm_Iterator( **kwargs):
171
+ label = kwargs.get('label', '')
172
+ content = kwargs.get('content', kwargs.get('children'))
173
+ return f'\n\n'.join([f'<div>{c}</div>' for c in content])
174
+
175
+ @staticmethod
176
+ def vm_LargeText(**kwargs):
177
+ label = kwargs.get('label', '')
178
+ content = kwargs.get('content', kwargs.get('children'))
179
+ nn = [len(s) for s in content.split('\n')]
180
+ n = len(n)
181
+ w = max(nn)
182
+ return f'<div style="min-width:100;">{label}</div><hr></hr>\n\n<textarea cols="{w}" rows="{n}" disabled=True>\n\n{content}\n\n</textarea>'
183
+
184
+
185
+
186
+ def html_docdc2html(content):
187
+
188
+ if isinstance(content, str):
189
+ return html_renderer.vm_Text(content=content)
190
+ elif isinstance(content, dict) and content.get('typ', None) == 'iter' and isinstance(content.get('children', None), list):
191
+ return html_renderer.vm_Iterator(content=[html_docdc2html(c) for c in content.get('children')])
192
+ elif isinstance(content, list):
193
+ return html_renderer.vm_Iterator(content=[html_docdc2html(c) for c in content])
194
+ elif isinstance(content, dict) and content.get('typ', None) == 'image':
195
+ return html_renderer.vm_Image(**content)
196
+ elif isinstance(content, dict) and content.get('typ', None) == 'text':
197
+ return html_renderer.vm_Text(**content)
198
+ elif isinstance(content, dict) and content.get('typ', None) == 'verbatim':
199
+ return html_renderer.vm_Verbatim(**content)
200
+ elif isinstance(content, dict) and content.get('typ', None) == 'markdown':
201
+ return html_renderer.vm_Markdown(**content)
202
+ else:
203
+ raise TypeError(f'the element of type {type(content)}, could not be parsed.', content)
204
+
205
+
206
+
207
+
@@ -0,0 +1,175 @@
1
+ import yaml, base64, time, io, copy, json, traceback, hashlib, markdown, re
2
+ from typing import List
3
+
4
+ def doc2attachment(doc_dc):
5
+ content = io.BytesIO(bytes(json.dumps(doc_dc, indent=2), 'ascii'))
6
+ return {"path" : content, "filename" : 'doc_dict.json', "content_type" : "application/octet-stream"}
7
+
8
+ def bindoc2attachment(mybytes, filename):
9
+ content = io.BytesIO(mybytes)
10
+ return {"path" : content, "filename" : filename, "content_type" : "application/octet-stream"}
11
+
12
+
13
+ def convert(doc:List[dict], with_attachments=True, files_to_upload=None):
14
+
15
+ files_to_upload = {} if not files_to_upload else files_to_upload
16
+
17
+ formatter = DocumentRedmineFormatter()
18
+ s = formatter.digest(doc)
19
+ text = '\n'.join(s)
20
+ if with_attachments:
21
+ attachments = [doc2attachment(doc)]
22
+ attachments += formatter.attachments
23
+ attachments += [bindoc2attachment(bts, k) for k, bts in files_to_upload.items()]
24
+ return text, attachments
25
+ else:
26
+ return text
27
+
28
+
29
+ def im2attachment(dc_img):
30
+ mapper = {
31
+ '/' : 'jpg',
32
+ 'i' : 'png',
33
+ 'R' : 'gif',
34
+ 'U' : 'webp'
35
+ }
36
+ filename = dc_img.get('children', None)
37
+ imageblob = dc_img.get('imageblob')
38
+
39
+ if not filename:
40
+ if ';base64' in imageblob:
41
+ ext = imageblob.split(';base64')[0].split('/')[-1]
42
+ elif mapper.get(imageblob[0], None):
43
+ ext = mapper.get(imageblob[0], None)
44
+ else:
45
+ raise KeyError('extension could not be determined from imageblob')
46
+
47
+ #filename = f'img_{time.time_ns()}_{str(id(dc))[-4:]}.{ext}'
48
+ filename = f"img_{hashlib.md5(imageblob.encode('utf-8')).hexdigest()}.{ext}"
49
+ description = dc_img.get('caption', '')
50
+ if not description:
51
+ description = filename
52
+
53
+ content = io.BytesIO(base64.b64decode(imageblob))
54
+
55
+ return {"path" : content, "filename" : filename, "content_type" : "application/octet-stream", "description": description}
56
+
57
+
58
+ class DocumentRedmineFormatter:
59
+
60
+ def __init__(self, out_format='textile') -> None:
61
+ self.attachments = []
62
+ self.out_format = out_format
63
+
64
+ def handle_error(self, err, el) -> list:
65
+ txt = 'ERROR WHILE HANDLING ELEMENT:\n{}\n\n'.format(el)
66
+ if not isinstance(err, str):
67
+ txt += '\n'.join(traceback.format_exception(err, limit=5)) + '\n'
68
+ else:
69
+ txt += err + '\n'
70
+ txt = f"""<pre>\n{txt}\n</pre>"""
71
+
72
+ return [txt]
73
+
74
+ def digest_text(self, children='', **kwargs) -> list:
75
+ return [children]
76
+
77
+ def digest_markdown(self, children='', **kwargs) -> list:
78
+ return [children]
79
+
80
+ def digest_image(self, **kwargs) -> list:
81
+ attachment = im2attachment(kwargs)
82
+
83
+ filename = attachment.get('filename')
84
+ caption = attachment.get('description')
85
+ self.attachments.append(attachment)
86
+ s = f'!{filename}({caption})!\n**IMAGE:** attachment:"{filename}" {caption}\n'
87
+ return [s]
88
+
89
+ def digest_text(self, children='', **kwargs) -> list:
90
+ return [children]
91
+
92
+
93
+ def digest_verbatim(self, children='', **kwargs) -> list:
94
+ txt = self.digest(children).strip('\n')
95
+ s = f"""<pre>{txt}</pre>"""
96
+ return [s]
97
+
98
+
99
+ def digest_iter(self, el) -> list:
100
+ parts = []
101
+ if isinstance(el, dict) and el.get('typ', '') == 'iter' and isinstance(el.get('children', None), list):
102
+ el = el['children']
103
+
104
+ assert isinstance(el, list)
105
+ for p in el:
106
+ parts += self.digest(p)
107
+ parts.append('\n\n')
108
+
109
+ return parts
110
+
111
+ def parse_md2html(self, s) -> str:
112
+ return markdown.markdown(s, extensions=['extra', 'toc'])
113
+
114
+ def parse_md2textile_line(self, line):
115
+ r = re.match(r'([ \t]*)#+', line)
116
+ n = r.group().count('#') if r else None
117
+ if n:
118
+ line = re.sub(r'([ \t]*)#+', rf'\1h{n}. ', line)
119
+
120
+ r = re.match(r'^([ \t]*-{1}[ ]{1})', line)
121
+ if r:
122
+ g = r.group()
123
+ line = line.replace(g, ('*'*len(g)) + ' ')
124
+
125
+ return line
126
+
127
+
128
+
129
+ def parse_md2textile(self, s) -> str:
130
+ f = self.parse_md2textile_line
131
+ lines = [f(line) for line in s.split('\n')]
132
+ s = '\n'.join(lines)
133
+ # code blocks
134
+ s = re.sub(r"(```)([\s\S]*?)(?=```)(```)", r'<pre>\2</pre>', s)
135
+
136
+ # links
137
+ s = re.sub(r"(\[)([\s\S]*?)(?=\])(\])(\()([\s\S]*?)(?=\))(\))", r'"\2":\5', s)
138
+
139
+ return s
140
+
141
+ def digest_str(self, el) -> list:
142
+ return [el]
143
+
144
+ def digest(self, el) -> list:
145
+ try:
146
+
147
+ if not el:
148
+ return ''
149
+ elif isinstance(el, str):
150
+ ret = self.digest_str(el)
151
+ elif isinstance(el, dict) and 'typ' in el and el['typ'] == 'iter':
152
+ ret = self.digest_iter(el)
153
+ elif isinstance(el, list) and el:
154
+ ret = self.digest_iter(el)
155
+ elif isinstance(el, dict) and 'typ' in el and el['typ'] == 'image':
156
+ ret = self.digest_image(**el)
157
+ elif isinstance(el, dict) and 'typ' in el and el['typ'] == 'text':
158
+ ret = self.digest_text(**el)
159
+ elif isinstance(el, dict) and 'typ' in el and el['typ'] == 'verbatim':
160
+ ret = self.digest_verbatim(**el)
161
+ elif isinstance(el, dict) and 'typ' in el and el['typ'] == 'markdown':
162
+ ret = self.digest_markdown(**el)
163
+ else:
164
+ return self.handle_error(f'the element of type {type(el)} {el=}, could not be parsed.')
165
+
166
+ if self.out_format == 'html':
167
+ ret = [self.parse_md2html(s) for s in ret]
168
+ elif self.out_format == 'textile':
169
+ ret = [self.parse_md2textile(s) for s in ret]
170
+
171
+ return ret
172
+
173
+ except Exception as err:
174
+ return self.handle_error(err, el)
175
+
@@ -0,0 +1,202 @@
1
+
2
+ import argparse
3
+ import base64
4
+ import os
5
+ import re
6
+ from pathlib import Path
7
+ import json
8
+ import shutil
9
+ import traceback
10
+ import sys
11
+
12
+ import tempfile
13
+ import shutil
14
+ from io import BytesIO
15
+
16
+ from dataclasses import dataclass
17
+ import zipfile
18
+
19
+ from typing import List
20
+ import markdown
21
+ try:
22
+ import pyandoc.sub.mdx_latex as mdx_latex
23
+ except Exception as err:
24
+ from ..sub import mdx_latex
25
+
26
+ md = markdown.Markdown()
27
+ latex_mdx = mdx_latex.LaTeXExtension()
28
+ latex_mdx.extendMarkdown(md)
29
+
30
+
31
+
32
+
33
+
34
+
35
+ def convert(doc:List[dict], with_attachments=True, files_to_upload=None):
36
+
37
+ files_to_upload = {} if not files_to_upload else files_to_upload
38
+
39
+ formatter = ElementFormatter()
40
+ s = formatter.format(doc)
41
+ text = '\n'.join(s)
42
+ if with_attachments:
43
+ return text, formatter.attachments
44
+ else:
45
+ return text
46
+
47
+
48
+
49
+
50
+
51
+ ###########################################################################################
52
+ """
53
+
54
+ ███████ ██████ ██████ ███ ███ █████ ████████
55
+ ██ ██ ██ ██ ██ ████ ████ ██ ██ ██
56
+ █████ ██ ██ ██████ ██ ████ ██ ███████ ██
57
+ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██
58
+ ██ ██████ ██ ██ ██ ██ ██ ██ ██
59
+
60
+ """
61
+ ###########################################################################################
62
+
63
+
64
+ class ElementFormatter:
65
+
66
+ def __init__(self, make_blue=False) -> None:
67
+ self.attachments = {}
68
+ self.make_blue = make_blue
69
+
70
+
71
+ def handle_error(self, err, el):
72
+ txt = 'ERROR WHILE HANDLING ELEMENT:\n{}\n\n'.format(el)
73
+ if not isinstance(err, str):
74
+ txt += '\n'.join(traceback.format_exception(err, limit=5)) + '\n'
75
+ else:
76
+ txt += err + '\n'
77
+ txt = r"""
78
+ \begin{verbatim}
79
+
80
+ <REPLACEME:VERBTEXT>
81
+
82
+ \end{verbatim}""".replace('<REPLACEME:VERBTEXT>', txt)
83
+ txt = f'{{\\color{{red}}{txt}}}'
84
+
85
+ return txt
86
+
87
+ def digest_markdown(self, children='', **kwargs) -> str:
88
+ tex = md.convert(children).lstrip('<root>').rstrip('</root>')
89
+ return tex
90
+
91
+
92
+ def digest_image(self, children='', width=0.8, caption='', imageblob='', **kwargs) -> str:
93
+
94
+ if not isinstance(width, str):
95
+ width = 'width={}\\textwidth'.format(width)
96
+
97
+ file_name = os.path.basename(children)
98
+ relpath = os.path.join('inp', file_name)
99
+ path = './inp/' + file_name
100
+
101
+ if imageblob:
102
+ if isinstance(imageblob, str):
103
+ if ';base64, ' in imageblob:
104
+ imageblob = imageblob.replace(';base64, ', ';base64,')
105
+
106
+ imageblob = imageblob.encode("utf8")
107
+
108
+ data = imageblob.split(b";base64,")[1]
109
+ assert not os.path.exists(relpath), f'can not write a file if it already exists! "{children}'
110
+
111
+ self.attachments[relpath] = base64.decodebytes(data)
112
+
113
+ txt = fr'\includegraphics[{width}]{{{path}}}'
114
+
115
+ if caption:
116
+ txt += '\n' + fr'\caption{{{caption}}}'
117
+
118
+ txt = r"\begin{figure}[h!]" + '\n' + r"\centering" '\n' + txt + '\n' + r"\end{figure}"
119
+ return txt
120
+
121
+
122
+
123
+ def digest_verbatim(self, children='', **kwargs) -> str:
124
+ txt = self.digest(children)
125
+ template = r"""\begin{tabular}{|p{16cm}|}
126
+ \hline
127
+ \begin{tiny}\begin{verbatim}
128
+ <REPLACEME:VERBTEXT>
129
+ \end{verbatim}\end{tiny}
130
+ \\
131
+ \hline
132
+ \end{tabular}\par"""
133
+ txt = txt.strip('\n')
134
+ parts = []
135
+
136
+ while len(txt) > 2000:
137
+ parts.append(template.replace('<REPLACEME:VERBTEXT>', txt[:2000]))
138
+ txt = txt[2000:]
139
+ parts.append(template.replace('<REPLACEME:VERBTEXT>', txt))
140
+
141
+ txt = '\n\n'.join(parts)
142
+ # if caption:
143
+ # caption = fr'\caption{{{caption}}}'
144
+
145
+ # txt = txt.replace('<REPLACEME:CAPTION>', caption)
146
+
147
+ return txt
148
+
149
+
150
+ def digest_iterator(self, el) -> str:
151
+ if isinstance(el, dict) and el.get('typ', '') == 'iter' and isinstance(el.get('children', None), list):
152
+ el = el['children']
153
+ return '\n\n'.join([f'% Iterator Element {i}\n' + self.digest(e) for i, e in enumerate(el)])
154
+
155
+ def digest_str(self, el):
156
+ return el
157
+
158
+ def digest_text(self, children:str, **kwargs):
159
+ return children
160
+
161
+ def digest(self, el, make_blue=False):
162
+ blue = lambda s: f'{{\\color{{blue}}{s}}}'
163
+ try:
164
+
165
+ if not el:
166
+ return ''
167
+ elif isinstance(el, str):
168
+ ret = self.digest_str(el)
169
+ elif isinstance(el, dict) and 'typ' in el and el['typ'] == 'iter':
170
+ ret = self.digest_iterator(el)
171
+ elif isinstance(el, list) and el:
172
+ ret = self.digest_iterator(el)
173
+ elif isinstance(el, dict) and 'typ' in el and el['typ'] == 'image':
174
+ ret = self.digest_image(**el)
175
+ elif isinstance(el, dict) and 'typ' in el and el['typ'] == 'text':
176
+ ret = self.digest_text(**el)
177
+ elif isinstance(el, dict) and 'typ' in el and el['typ'] == 'verbatim':
178
+ ret = self.digest_verbatim(**el)
179
+ elif isinstance(el, dict) and 'typ' in el and el['typ'] == 'markdown':
180
+ ret = self.digest_markdown(**el)
181
+ else:
182
+ return self.handle_error(f'the element of typ {type(el)}, could not be parsed.', el)
183
+
184
+ return blue(ret) if make_blue else ret
185
+
186
+ except Exception as err:
187
+ return self.handle_error(err, el)
188
+
189
+
190
+ def format(self, doc:list) -> str:
191
+ return '\n\n'.join([self.digest(e, make_blue=self.make_blue) for e in doc])
192
+
193
+
194
+
195
+
196
+
197
+
198
+
199
+
200
+
201
+
202
+
@@ -0,0 +1,21 @@
1
+ Metadata-Version: 2.1
2
+ Name: pydocmaker
3
+ Version: 1.0.0
4
+ Summary: a minimal document maker to make docx, markdown, html, and tex documents from python. Written in pure python.
5
+ Home-page: https://github.com/TobiasGlaubach/pyandoc
6
+ Author: Tobias Glaubach
7
+ Project-URL: Bug Tracker, https://github.com/TobiasGlaubach/pyandoc/issues
8
+ Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
9
+ Classifier: Operating System :: OS Independent
10
+ Classifier: Environment :: Console
11
+ Classifier: Programming Language :: Python
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3 :: Only
14
+ Requires-Python: >=3.8
15
+ Description-Content-Type: text/markdown
16
+
17
+ # pydocmaker
18
+ a minimal document maker to make docx, markdown, html, and tex documents from python.
19
+ Written in pure python.
20
+
21
+ Named in honor and spirit after the famous [pandoc](https://github.com/jgm/pandoc) package.
@@ -0,0 +1,15 @@
1
+ README.md
2
+ setup.cfg
3
+ setup.py
4
+ src/pydocmaker/__init__.py
5
+ src/pydocmaker.egg-info/PKG-INFO
6
+ src/pydocmaker.egg-info/SOURCES.txt
7
+ src/pydocmaker.egg-info/dependency_links.txt
8
+ src/pydocmaker.egg-info/top_level.txt
9
+ src/pydocmaker/core/__init__.py
10
+ src/pydocmaker/core/schema.py
11
+ src/pydocmaker/exporters/__init__.py
12
+ src/pydocmaker/exporters/ex_docx.py
13
+ src/pydocmaker/exporters/ex_html.py
14
+ src/pydocmaker/exporters/ex_redmine.py
15
+ src/pydocmaker/exporters/ex_tex.py
@@ -0,0 +1 @@
1
+ pydocmaker