docxrender 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docxrender/__init__.py +30 -0
- docxrender/api.py +82 -0
- docxrender/contracts.py +256 -0
- docxrender/docx/__init__.py +1 -0
- docxrender/docx/body.py +369 -0
- docxrender/docx/fields.py +141 -0
- docxrender/docx/refresh.py +113 -0
- docxrender/markdown.py +177 -0
- docxrender/pdf_uno.py +608 -0
- docxrender/writer.py +423 -0
- docxrender-0.1.0.dist-info/METADATA +273 -0
- docxrender-0.1.0.dist-info/RECORD +14 -0
- docxrender-0.1.0.dist-info/WHEEL +4 -0
- docxrender-0.1.0.dist-info/entry_points.txt +4 -0
docxrender/markdown.py
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass(frozen=True, slots=True)
|
|
8
|
+
class MarkdownHeading:
|
|
9
|
+
level: int
|
|
10
|
+
text: str
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(frozen=True, slots=True)
|
|
14
|
+
class MarkdownParagraph:
|
|
15
|
+
text: str
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass(frozen=True, slots=True)
|
|
19
|
+
class MarkdownOrderedList:
|
|
20
|
+
items: tuple[str, ...]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass(frozen=True, slots=True)
|
|
24
|
+
class MarkdownTable:
|
|
25
|
+
rows: tuple[tuple[str, ...], ...]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass(frozen=True, slots=True)
|
|
29
|
+
class MarkdownImage:
|
|
30
|
+
caption: str
|
|
31
|
+
path: str
|
|
32
|
+
width_pct: float
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass(frozen=True, slots=True)
|
|
36
|
+
class MarkdownPageBreak:
|
|
37
|
+
pass
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass(frozen=True, slots=True)
|
|
41
|
+
class MarkdownSpacer:
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
MarkdownBlock = (
|
|
46
|
+
MarkdownHeading
|
|
47
|
+
| MarkdownParagraph
|
|
48
|
+
| MarkdownOrderedList
|
|
49
|
+
| MarkdownTable
|
|
50
|
+
| MarkdownImage
|
|
51
|
+
| MarkdownPageBreak
|
|
52
|
+
| MarkdownSpacer
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
RE_HEADING = re.compile(r"^(#{1,6})\s+(.*)$")
|
|
56
|
+
RE_ORDERED_LIST_ITEM = re.compile(r"^\d+\.\s+(.*)$")
|
|
57
|
+
RE_IMAGE = re.compile(
|
|
58
|
+
r"^!\[(?P<caption>.*?)\]\((?P<path>.*?)\)"
|
|
59
|
+
r"(?:\{[^}]*width=(?P<width>\d+)%[^}]*\})?\s*$"
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def parse_markdown_blocks(markdown_body: str) -> tuple[MarkdownBlock, ...]:
|
|
64
|
+
lines = markdown_body.splitlines()
|
|
65
|
+
blocks: list[MarkdownBlock] = []
|
|
66
|
+
idx = 0
|
|
67
|
+
while idx < len(lines):
|
|
68
|
+
line = lines[idx]
|
|
69
|
+
text = line.strip()
|
|
70
|
+
if not text:
|
|
71
|
+
idx += 1
|
|
72
|
+
continue
|
|
73
|
+
if text == r"\newpage":
|
|
74
|
+
blocks.append(MarkdownPageBreak())
|
|
75
|
+
idx += 1
|
|
76
|
+
continue
|
|
77
|
+
if text == r"\vspace":
|
|
78
|
+
blocks.append(MarkdownSpacer())
|
|
79
|
+
idx += 1
|
|
80
|
+
continue
|
|
81
|
+
|
|
82
|
+
match_heading = RE_HEADING.match(text)
|
|
83
|
+
if match_heading is not None:
|
|
84
|
+
blocks.append(
|
|
85
|
+
MarkdownHeading(
|
|
86
|
+
level=len(match_heading.group(1)),
|
|
87
|
+
text=match_heading.group(2).strip(),
|
|
88
|
+
)
|
|
89
|
+
)
|
|
90
|
+
idx += 1
|
|
91
|
+
continue
|
|
92
|
+
|
|
93
|
+
match_image = RE_IMAGE.match(text)
|
|
94
|
+
if match_image is not None:
|
|
95
|
+
width_raw = match_image.group("width")
|
|
96
|
+
blocks.append(
|
|
97
|
+
MarkdownImage(
|
|
98
|
+
caption=match_image.group("caption"),
|
|
99
|
+
path=match_image.group("path").strip(),
|
|
100
|
+
width_pct=float(width_raw) if width_raw is not None else 90.0,
|
|
101
|
+
)
|
|
102
|
+
)
|
|
103
|
+
idx += 1
|
|
104
|
+
continue
|
|
105
|
+
|
|
106
|
+
match_list_item = RE_ORDERED_LIST_ITEM.match(text)
|
|
107
|
+
if match_list_item is not None:
|
|
108
|
+
items: list[str] = []
|
|
109
|
+
while idx < len(lines):
|
|
110
|
+
match_current = RE_ORDERED_LIST_ITEM.match(lines[idx].strip())
|
|
111
|
+
if match_current is None:
|
|
112
|
+
break
|
|
113
|
+
items.append(match_current.group(1).strip())
|
|
114
|
+
idx += 1
|
|
115
|
+
blocks.append(MarkdownOrderedList(items=tuple(items)))
|
|
116
|
+
continue
|
|
117
|
+
|
|
118
|
+
if text.startswith("|"):
|
|
119
|
+
rows: list[tuple[str, ...]] = []
|
|
120
|
+
idx_table = 0
|
|
121
|
+
while idx < len(lines) and lines[idx].strip().startswith("|"):
|
|
122
|
+
row = tuple(
|
|
123
|
+
cell.strip() for cell in lines[idx].strip().strip("|").split("|")
|
|
124
|
+
)
|
|
125
|
+
is_header_separator = idx_table == 1 and all(
|
|
126
|
+
set(cell) <= {"-", ":"} for cell in row
|
|
127
|
+
)
|
|
128
|
+
if not is_header_separator:
|
|
129
|
+
rows.append(row)
|
|
130
|
+
idx += 1
|
|
131
|
+
idx_table += 1
|
|
132
|
+
blocks.append(MarkdownTable(rows=tuple(rows)))
|
|
133
|
+
continue
|
|
134
|
+
|
|
135
|
+
paragraph_parts: list[tuple[str, bool]] = []
|
|
136
|
+
while idx < len(lines):
|
|
137
|
+
line_current = lines[idx]
|
|
138
|
+
text_current = line_current.strip()
|
|
139
|
+
if not text_current:
|
|
140
|
+
break
|
|
141
|
+
if _is_special_line(text_current):
|
|
142
|
+
break
|
|
143
|
+
line_text, has_hard_break = _strip_hard_break(line_current)
|
|
144
|
+
paragraph_parts.append((line_text.strip(), has_hard_break))
|
|
145
|
+
idx += 1
|
|
146
|
+
if idx < len(lines) and lines[idx].strip() and not _is_special_line(
|
|
147
|
+
lines[idx].strip()
|
|
148
|
+
):
|
|
149
|
+
continue
|
|
150
|
+
break
|
|
151
|
+
blocks.append(MarkdownParagraph(text=_join_paragraph_parts(paragraph_parts)))
|
|
152
|
+
return tuple(blocks)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _strip_hard_break(line: str) -> tuple[str, bool]:
|
|
156
|
+
has_hard_break = line.endswith(" ")
|
|
157
|
+
return line.rstrip(), has_hard_break
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _join_paragraph_parts(parts: list[tuple[str, bool]]) -> str:
|
|
161
|
+
if not parts:
|
|
162
|
+
return ""
|
|
163
|
+
text = parts[0][0]
|
|
164
|
+
for previous, current in zip(parts, parts[1:], strict=False):
|
|
165
|
+
text += ("\n" if previous[1] else " ") + current[0]
|
|
166
|
+
return text
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _is_special_line(text: str) -> bool:
|
|
170
|
+
return (
|
|
171
|
+
text == r"\newpage"
|
|
172
|
+
or text == r"\vspace"
|
|
173
|
+
or text.startswith("|")
|
|
174
|
+
or RE_HEADING.match(text) is not None
|
|
175
|
+
or RE_IMAGE.match(text) is not None
|
|
176
|
+
or RE_ORDERED_LIST_ITEM.match(text) is not None
|
|
177
|
+
)
|