markdown_convert 1.2.51__tar.gz → 1.2.53__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {markdown_convert-1.2.51 → markdown_convert-1.2.53}/PKG-INFO +3 -1
- {markdown_convert-1.2.51 → markdown_convert-1.2.53}/markdown_convert/default.css +11 -8
- {markdown_convert-1.2.51 → markdown_convert-1.2.53}/markdown_convert/modules/convert.py +1 -1
- markdown_convert-1.2.53/markdown_convert/modules/extras.py +248 -0
- markdown_convert-1.2.53/markdown_convert/modules/overrides.py +36 -0
- markdown_convert-1.2.53/markdown_convert/modules/transform.py +135 -0
- {markdown_convert-1.2.51 → markdown_convert-1.2.53}/pyproject.toml +5 -1
- markdown_convert-1.2.51/markdown_convert/modules/extras.py +0 -100
- markdown_convert-1.2.51/markdown_convert/modules/transform.py +0 -164
- {markdown_convert-1.2.51 → markdown_convert-1.2.53}/.gitignore +0 -0
- {markdown_convert-1.2.51 → markdown_convert-1.2.53}/LICENSE +0 -0
- {markdown_convert-1.2.51 → markdown_convert-1.2.53}/README.md +0 -0
- {markdown_convert-1.2.51 → markdown_convert-1.2.53}/markdown_convert/__init__.py +0 -0
- {markdown_convert-1.2.51 → markdown_convert-1.2.53}/markdown_convert/__main__.py +0 -0
- {markdown_convert-1.2.51 → markdown_convert-1.2.53}/markdown_convert/code.css +0 -0
- {markdown_convert-1.2.51 → markdown_convert-1.2.53}/markdown_convert/modules/__init__.py +0 -0
- {markdown_convert-1.2.51 → markdown_convert-1.2.53}/markdown_convert/modules/autoinstall.py +0 -0
- {markdown_convert-1.2.51 → markdown_convert-1.2.53}/markdown_convert/modules/constants.py +0 -0
- {markdown_convert-1.2.51 → markdown_convert-1.2.53}/markdown_convert/modules/resources.py +0 -0
- {markdown_convert-1.2.51 → markdown_convert-1.2.53}/markdown_convert/modules/utils.py +0 -0
- {markdown_convert-1.2.51 → markdown_convert-1.2.53}/markdown_convert/modules/validate.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: markdown_convert
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.53
|
|
4
4
|
Summary: Convert Markdown files to PDF from your command line.
|
|
5
5
|
Project-URL: homepage, https://github.com/Julynx/markdown_convert
|
|
6
6
|
Author-email: Julio Cabria <juliocabria@tutanota.com>
|
|
@@ -17,6 +17,8 @@ Requires-Dist: latex2mathml>=3.78.1
|
|
|
17
17
|
Requires-Dist: markdown2<3,>=2.4.13
|
|
18
18
|
Requires-Dist: playwright>=1.57.0
|
|
19
19
|
Requires-Dist: pygments<3,>=2.17.2
|
|
20
|
+
Requires-Dist: ruamel-yaml>=0.19.1
|
|
21
|
+
Requires-Dist: vl-convert-python>=1.9.0.post1
|
|
20
22
|
Description-Content-Type: text/markdown
|
|
21
23
|
|
|
22
24
|
# markdown-convert
|
|
@@ -453,14 +453,6 @@ math {
|
|
|
453
453
|
border-radius: 0.3rem;
|
|
454
454
|
}
|
|
455
455
|
|
|
456
|
-
.admonition header {
|
|
457
|
-
display: flex;
|
|
458
|
-
align-items: center;
|
|
459
|
-
gap: 0.25rem;
|
|
460
|
-
margin-bottom: 0.25rem;
|
|
461
|
-
font-weight: bold;
|
|
462
|
-
}
|
|
463
|
-
|
|
464
456
|
.admonition strong {
|
|
465
457
|
text-transform: capitalize;
|
|
466
458
|
}
|
|
@@ -522,4 +514,15 @@ math {
|
|
|
522
514
|
|
|
523
515
|
.admonition.caution strong {
|
|
524
516
|
color: var(--color-ad-caution);
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
/* Vega-Lite charts*/
|
|
520
|
+
div.vega-lite,
|
|
521
|
+
div.vega {
|
|
522
|
+
display: flex;
|
|
523
|
+
justify-content: center;
|
|
524
|
+
align-items: center;
|
|
525
|
+
width: 100%;
|
|
526
|
+
margin-top: 1em;
|
|
527
|
+
margin-bottom: 1em;
|
|
525
528
|
}
|
|
@@ -9,7 +9,6 @@ import time
|
|
|
9
9
|
from datetime import datetime
|
|
10
10
|
from pathlib import Path
|
|
11
11
|
|
|
12
|
-
import markdown2
|
|
13
12
|
from playwright.sync_api import sync_playwright
|
|
14
13
|
|
|
15
14
|
from .autoinstall import ensure_chromium
|
|
@@ -19,6 +18,7 @@ from .constants import (
|
|
|
19
18
|
MARKDOWN_EXTENSIONS,
|
|
20
19
|
PDF_PARAMS,
|
|
21
20
|
)
|
|
21
|
+
from .overrides import markdown2
|
|
22
22
|
from .resources import get_code_css_path, get_css_path, get_output_path
|
|
23
23
|
from .transform import (
|
|
24
24
|
create_html_document,
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Extras are defined as helper functions called by
|
|
3
|
+
render_extra_features from transform.py
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import re
|
|
8
|
+
|
|
9
|
+
import vl_convert as vlc
|
|
10
|
+
from bs4 import BeautifulSoup, Tag
|
|
11
|
+
from ruamel.yaml import YAML
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ExtraFeature:
|
|
15
|
+
"""
|
|
16
|
+
Base class for extra features that can be applied to HTML.
|
|
17
|
+
|
|
18
|
+
Attributes:
|
|
19
|
+
pattern (str): Regex pattern to match the extra feature in the HTML.
|
|
20
|
+
run_before_stash (bool): Whether to run this extra before stashing code blocks.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
pattern = r""
|
|
24
|
+
run_before_stash = False
|
|
25
|
+
|
|
26
|
+
def replace(self, match, html):
|
|
27
|
+
"""
|
|
28
|
+
Replaces the matched pattern with the rendered extra feature.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
match (re.Match): The regex match object.
|
|
32
|
+
html (str): The full HTML content.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
str: The replacement string.
|
|
36
|
+
|
|
37
|
+
Raises:
|
|
38
|
+
NotImplementedError: If the subclass does not implement this method.
|
|
39
|
+
"""
|
|
40
|
+
raise NotImplementedError("Subclasses must implement the replace method.")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class CheckboxExtra(ExtraFeature):
|
|
44
|
+
"""
|
|
45
|
+
Extra feature for rendering checkboxes.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
pattern = r"(?P<checkbox>\[\s\]|\[x\])"
|
|
49
|
+
|
|
50
|
+
def replace(match, html):
|
|
51
|
+
"""
|
|
52
|
+
Render a tag for a checkbox.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
match: Element identified as a checkbox
|
|
56
|
+
Returns:
|
|
57
|
+
str: tag representing the checkbox
|
|
58
|
+
"""
|
|
59
|
+
status = "checked" if "[x]" in match.group("checkbox") else ""
|
|
60
|
+
return f'<input type="checkbox" {status}>'
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class HighlightExtra(ExtraFeature):
|
|
64
|
+
"""
|
|
65
|
+
Extra feature for rendering highlighted text.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
pattern = r"==(?P<content>.*?)=="
|
|
69
|
+
|
|
70
|
+
def replace(match, html):
|
|
71
|
+
"""
|
|
72
|
+
Render a tag for a highlight.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
match: Element identified as a highlight
|
|
76
|
+
Returns:
|
|
77
|
+
str: tag representing the highlight
|
|
78
|
+
"""
|
|
79
|
+
content = match.group("content")
|
|
80
|
+
return f'<span class="highlight">{content}</span>'
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class CustomSpanExtra(ExtraFeature):
|
|
84
|
+
"""
|
|
85
|
+
Extra feature for rendering custom spans with specific classes.
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
pattern = r"(?P<cls>[a-zA-Z0-9_-]+)\{\{\s*(?P<content>.*?)\s*\}\}"
|
|
89
|
+
|
|
90
|
+
def replace(match, html):
|
|
91
|
+
"""
|
|
92
|
+
Render a tag for a custom span.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
match: Element identified as a custom span
|
|
96
|
+
Returns:
|
|
97
|
+
str: tag representing the custom span
|
|
98
|
+
"""
|
|
99
|
+
cls = match.group("cls")
|
|
100
|
+
content = match.group("content")
|
|
101
|
+
return f'<span class="{cls}">{content}</span>'
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class TocExtra(ExtraFeature):
|
|
105
|
+
"""
|
|
106
|
+
Extra feature for rendering a Table of Contents.
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
pattern = r"\[TOC(?:\s+depth=(?P<depth>\d+))?\]"
|
|
110
|
+
|
|
111
|
+
def replace(match, html):
|
|
112
|
+
"""
|
|
113
|
+
Render a tag for a table of contents
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
match: Element identified as a table of contents
|
|
117
|
+
Returns:
|
|
118
|
+
str: tag representing the table of contents
|
|
119
|
+
"""
|
|
120
|
+
soup = BeautifulSoup(html, "html.parser")
|
|
121
|
+
max_level = match.group("depth")
|
|
122
|
+
max_level = 3 if max_level is None else int(max_level)
|
|
123
|
+
|
|
124
|
+
headers = [
|
|
125
|
+
header
|
|
126
|
+
for header in soup.find_all(
|
|
127
|
+
[f"h{index}" for index in range(1, max_level + 1)]
|
|
128
|
+
)
|
|
129
|
+
if header.get("id")
|
|
130
|
+
]
|
|
131
|
+
if not headers:
|
|
132
|
+
return ""
|
|
133
|
+
|
|
134
|
+
tag: Tag = soup.new_tag("ul", attrs={"class": "toc"})
|
|
135
|
+
active_list = {0: tag}
|
|
136
|
+
last_list_element = {}
|
|
137
|
+
|
|
138
|
+
for header in headers:
|
|
139
|
+
level = int(header.name[1])
|
|
140
|
+
|
|
141
|
+
if level not in active_list:
|
|
142
|
+
parent_lvl = max(key for key in active_list if key < level)
|
|
143
|
+
if last_list_element.get(parent_lvl):
|
|
144
|
+
sub_list = soup.new_tag("ul")
|
|
145
|
+
last_list_element[parent_lvl].append(sub_list)
|
|
146
|
+
active_list[level] = sub_list
|
|
147
|
+
else:
|
|
148
|
+
active_list[level] = active_list[parent_lvl]
|
|
149
|
+
|
|
150
|
+
active_list = {
|
|
151
|
+
key: value for key, value in active_list.items() if key <= level
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
list_item = soup.new_tag("li")
|
|
155
|
+
link = soup.new_tag("a", href=f"#{header['id']}")
|
|
156
|
+
link.string = header.get_text(strip=True)
|
|
157
|
+
list_item.append(link)
|
|
158
|
+
|
|
159
|
+
active_list[level].append(list_item)
|
|
160
|
+
last_list_element[level] = list_item
|
|
161
|
+
|
|
162
|
+
return tag.prettify()
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
class VegaExtra(ExtraFeature):
|
|
166
|
+
"""
|
|
167
|
+
Extra feature for rendering Vega-Lite diagrams from JSON or YAML.
|
|
168
|
+
"""
|
|
169
|
+
|
|
170
|
+
pattern = (
|
|
171
|
+
r"<pre[^>]*>"
|
|
172
|
+
r"<code[^>]*class=[\"'][^\"]*language-vega[^\"]*[\"'][^>]*>"
|
|
173
|
+
r"(?P<content>.*?)"
|
|
174
|
+
r"</code>"
|
|
175
|
+
r"</pre>"
|
|
176
|
+
)
|
|
177
|
+
run_before_stash = True
|
|
178
|
+
|
|
179
|
+
def replace(match, html):
|
|
180
|
+
"""
|
|
181
|
+
Render a tag for a vega lite diagram from JSON or YAML.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
match (re.Match): Element identified as a vega lite diagram.
|
|
185
|
+
html (str): The full HTML content.
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
str: SVG tag representing the vega lite diagram.
|
|
189
|
+
"""
|
|
190
|
+
content = match.group("content")
|
|
191
|
+
spec = None
|
|
192
|
+
|
|
193
|
+
try:
|
|
194
|
+
spec = json.loads(content)
|
|
195
|
+
except (json.JSONDecodeError, TypeError):
|
|
196
|
+
try:
|
|
197
|
+
yaml = YAML(typ="safe")
|
|
198
|
+
spec = yaml.load(content)
|
|
199
|
+
except Exception as exc:
|
|
200
|
+
print(f"WARNING: Failed to parse Vega-Lite spec: {exc}")
|
|
201
|
+
return match.group(0)
|
|
202
|
+
|
|
203
|
+
if spec is None:
|
|
204
|
+
return match.group(0)
|
|
205
|
+
|
|
206
|
+
try:
|
|
207
|
+
tag = vlc.vegalite_to_svg(spec)
|
|
208
|
+
return f"<div class='vega-lite'>{tag}</div>"
|
|
209
|
+
except Exception as exc:
|
|
210
|
+
print(f"WARNING: Failed to convert Vega-Lite spec to SVG: {exc}")
|
|
211
|
+
return match.group(0)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def apply_extras(extras: set[ExtraFeature], html, before_stash=False):
|
|
215
|
+
"""
|
|
216
|
+
Applies extra features to an html string.
|
|
217
|
+
Args:
|
|
218
|
+
extras: set[ExtraFeature] Extra features to apply
|
|
219
|
+
html: complete html text, used by some extras like TOC.
|
|
220
|
+
Returns:
|
|
221
|
+
str: The updated html.
|
|
222
|
+
"""
|
|
223
|
+
for extra in extras:
|
|
224
|
+
if not extra.run_before_stash == before_stash:
|
|
225
|
+
continue
|
|
226
|
+
|
|
227
|
+
# Loop until the pattern no longer matches
|
|
228
|
+
while re.search(extra.pattern, html, flags=re.DOTALL):
|
|
229
|
+
new_html = html
|
|
230
|
+
try:
|
|
231
|
+
new_html = re.sub(
|
|
232
|
+
extra.pattern,
|
|
233
|
+
lambda match: extra.replace(match, html=html),
|
|
234
|
+
html,
|
|
235
|
+
flags=re.DOTALL,
|
|
236
|
+
)
|
|
237
|
+
except Exception as exc:
|
|
238
|
+
print(
|
|
239
|
+
f"WARNING: An exception occurred while trying to apply an extra:\n{exc}"
|
|
240
|
+
)
|
|
241
|
+
pass
|
|
242
|
+
|
|
243
|
+
# Safety break:
|
|
244
|
+
if new_html == html:
|
|
245
|
+
break
|
|
246
|
+
html = new_html
|
|
247
|
+
|
|
248
|
+
return html
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Overrides for markdown2.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import markdown2
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def tags(self, lexer_name: str) -> tuple[str, str]:
|
|
9
|
+
"""
|
|
10
|
+
Overrides markdown2.FencedCodeBlocks.tags
|
|
11
|
+
|
|
12
|
+
Provides support for the fenced code blocks language attribute without
|
|
13
|
+
the need to have the highlightjs-lang extension enabled.
|
|
14
|
+
"""
|
|
15
|
+
pre_class = self.md._html_class_str_from_tag("pre")
|
|
16
|
+
if lexer_name:
|
|
17
|
+
code_class = f' class="{lexer_name} language-{lexer_name}"'
|
|
18
|
+
else:
|
|
19
|
+
code_class = self.md._html_class_str_from_tag("code")
|
|
20
|
+
return (f"<pre{pre_class}><code{code_class}>", "</code></pre>")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _convert_double_match(self, match):
|
|
24
|
+
"""
|
|
25
|
+
Overrides markdown2.Latex._convert_double_match
|
|
26
|
+
|
|
27
|
+
Fixes bug #674 of latex macros that start with backslash n not being
|
|
28
|
+
properly rendered.
|
|
29
|
+
"""
|
|
30
|
+
return self.converter.convert(match.group(1).replace("\n", " "), display="block")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# Apply overrides on module import and expose markdown2
|
|
34
|
+
markdown2.FencedCodeBlocks.tags = tags
|
|
35
|
+
markdown2.Latex._convert_double_match = _convert_double_match
|
|
36
|
+
__all__ = ["markdown2"]
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Module for transforming HTML content.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
from bs4 import BeautifulSoup
|
|
8
|
+
|
|
9
|
+
from .extras import (
|
|
10
|
+
apply_extras,
|
|
11
|
+
ExtraFeature,
|
|
12
|
+
CheckboxExtra,
|
|
13
|
+
CustomSpanExtra,
|
|
14
|
+
HighlightExtra,
|
|
15
|
+
TocExtra,
|
|
16
|
+
VegaExtra,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def create_html_document(html_content, css_content, csp):
|
|
21
|
+
"""
|
|
22
|
+
Creates a complete HTML document with the given content, CSS, and Content Security Policy.
|
|
23
|
+
Args:
|
|
24
|
+
html_content (str): The HTML content to include in the body.
|
|
25
|
+
css_content (str): The CSS styles to include in the head.
|
|
26
|
+
csp (str): The Content Security Policy string.
|
|
27
|
+
Returns:
|
|
28
|
+
str: A complete HTML document as a string.
|
|
29
|
+
"""
|
|
30
|
+
return f"""<!DOCTYPE html>
|
|
31
|
+
<html>
|
|
32
|
+
<head>
|
|
33
|
+
<meta charset="UTF-8">
|
|
34
|
+
<meta http-equiv="Content-Security-Policy" content="{csp or ""}">
|
|
35
|
+
<style>
|
|
36
|
+
{css_content or ""}
|
|
37
|
+
</style>
|
|
38
|
+
</head>
|
|
39
|
+
<body>
|
|
40
|
+
{html_content or ""}
|
|
41
|
+
</body>
|
|
42
|
+
</html>"""
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def create_sections(html_string):
|
|
46
|
+
"""
|
|
47
|
+
Wraps each h2 and its following content in a <section> tag.
|
|
48
|
+
The section ends when the next h2 is encountered, or the parent ends.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
html_string (str): The input HTML string.
|
|
52
|
+
Returns:
|
|
53
|
+
str: The modified HTML string with sections wrapped.
|
|
54
|
+
"""
|
|
55
|
+
soup = BeautifulSoup(html_string, "html.parser")
|
|
56
|
+
|
|
57
|
+
for header in soup.find_all("h2"):
|
|
58
|
+
new_section = soup.new_tag("section")
|
|
59
|
+
header.insert_before(new_section)
|
|
60
|
+
|
|
61
|
+
current = header
|
|
62
|
+
while current is not None and (current == header or current.name != "h2"):
|
|
63
|
+
next_sibling = current.next_sibling
|
|
64
|
+
new_section.append(current)
|
|
65
|
+
current = next_sibling
|
|
66
|
+
|
|
67
|
+
return str(soup)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def render_mermaid_diagrams(html, *, nonce):
|
|
71
|
+
"""
|
|
72
|
+
Renders Mermaid diagrams in the HTML content.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
html (str): HTML content.
|
|
76
|
+
nonce (str): Cryptographic nonce for CSP.
|
|
77
|
+
Returns:
|
|
78
|
+
str: HTML content with rendered Mermaid diagrams.
|
|
79
|
+
"""
|
|
80
|
+
mermaid_script = f"""
|
|
81
|
+
<script type="module" nonce="{nonce}">
|
|
82
|
+
import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.esm.min.mjs';
|
|
83
|
+
mermaid.initialize({{
|
|
84
|
+
startOnLoad: true,
|
|
85
|
+
theme: 'default',
|
|
86
|
+
themeVariables: {{}},
|
|
87
|
+
fontFamily: 'arial, verdana, sans-serif'
|
|
88
|
+
}});
|
|
89
|
+
</script>
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
if '<div class="mermaid">' in html:
|
|
93
|
+
html = mermaid_script + html
|
|
94
|
+
|
|
95
|
+
return html
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def render_extra_features(
|
|
99
|
+
html,
|
|
100
|
+
extras: set[ExtraFeature] = (
|
|
101
|
+
CheckboxExtra,
|
|
102
|
+
CustomSpanExtra,
|
|
103
|
+
HighlightExtra,
|
|
104
|
+
TocExtra,
|
|
105
|
+
VegaExtra,
|
|
106
|
+
),
|
|
107
|
+
):
|
|
108
|
+
"""
|
|
109
|
+
Renders extra features by protecting specific tags, applying regex
|
|
110
|
+
transformations, and restoring the protected content.
|
|
111
|
+
"""
|
|
112
|
+
placeholders = {}
|
|
113
|
+
|
|
114
|
+
def stash(match):
|
|
115
|
+
key = f"__PROTECTED_BLOCK_{len(placeholders)}__"
|
|
116
|
+
placeholders[key] = match.group(0)
|
|
117
|
+
return key
|
|
118
|
+
|
|
119
|
+
# 0. Pre protection extras
|
|
120
|
+
html = apply_extras(extras, html, before_stash=True)
|
|
121
|
+
|
|
122
|
+
# 1. Protection: Replace ignored tags with unique hashes
|
|
123
|
+
ignored_pattern = re.compile(
|
|
124
|
+
r"<(code|pre|script|style)\b[^>]*>.*?</\1>", re.DOTALL | re.IGNORECASE
|
|
125
|
+
)
|
|
126
|
+
html = ignored_pattern.sub(stash, html)
|
|
127
|
+
|
|
128
|
+
# 2. Transformations: Define patterns and their replacements
|
|
129
|
+
html = apply_extras(extras, html, before_stash=False)
|
|
130
|
+
|
|
131
|
+
# 3. Restoration: Replace hashes back with original content
|
|
132
|
+
for key, original_content in placeholders.items():
|
|
133
|
+
html = html.replace(key, original_content)
|
|
134
|
+
|
|
135
|
+
return html
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "markdown_convert"
|
|
7
|
-
version = "1.2.
|
|
7
|
+
version = "1.2.53"
|
|
8
8
|
description = "Convert Markdown files to PDF from your command line."
|
|
9
9
|
authors = [
|
|
10
10
|
{ name = "Julio Cabria", email = "juliocabria@tutanota.com" },
|
|
@@ -25,6 +25,8 @@ dependencies = [
|
|
|
25
25
|
"playwright>=1.57.0",
|
|
26
26
|
"beautifulsoup4>=4.14.3",
|
|
27
27
|
"install-playwright>=1.0.0",
|
|
28
|
+
"vl-convert-python>=1.9.0.post1",
|
|
29
|
+
"ruamel-yaml>=0.19.1",
|
|
28
30
|
]
|
|
29
31
|
|
|
30
32
|
[project.urls]
|
|
@@ -48,6 +50,7 @@ include = [
|
|
|
48
50
|
"markdown_convert/modules/utils.py",
|
|
49
51
|
"markdown_convert/modules/validate.py",
|
|
50
52
|
"markdown_convert/modules/autoinstall.py",
|
|
53
|
+
"markdown_convert/modules/overrides.py",
|
|
51
54
|
]
|
|
52
55
|
|
|
53
56
|
[tool.hatch.build.targets.wheel]
|
|
@@ -66,6 +69,7 @@ include = [
|
|
|
66
69
|
"markdown_convert/modules/utils.py",
|
|
67
70
|
"markdown_convert/modules/validate.py",
|
|
68
71
|
"markdown_convert/modules/autoinstall.py",
|
|
72
|
+
"markdown_convert/modules/overrides.py",
|
|
69
73
|
]
|
|
70
74
|
|
|
71
75
|
[dependency-groups]
|
|
@@ -1,100 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Extras are defined as helper functions called by
|
|
3
|
-
render_extra_features from transform.py
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
def create_checkbox(soup, match):
|
|
8
|
-
"""
|
|
9
|
-
Render a tag for a checkbox.
|
|
10
|
-
|
|
11
|
-
Args:
|
|
12
|
-
soup: HTML beautifulsoup
|
|
13
|
-
match: Element identified as a checkbox
|
|
14
|
-
Returns:
|
|
15
|
-
tag: Beautifulsoup tag representing the checkbox
|
|
16
|
-
"""
|
|
17
|
-
tag = soup.new_tag("input", type="checkbox")
|
|
18
|
-
if "[x]" in match.group("checkbox"):
|
|
19
|
-
tag["checked"] = ""
|
|
20
|
-
return tag
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def create_highlight(soup, match):
|
|
24
|
-
"""
|
|
25
|
-
Render a tag for a highlight.
|
|
26
|
-
|
|
27
|
-
Args:
|
|
28
|
-
soup: HTML beautifulsoup
|
|
29
|
-
match: Element identified as a highlight
|
|
30
|
-
Returns:
|
|
31
|
-
tag: Beautifulsoup tag representing the highlight
|
|
32
|
-
"""
|
|
33
|
-
tag = soup.new_tag("span", attrs={"class": "highlight"})
|
|
34
|
-
tag.string = match.group("hl_content")
|
|
35
|
-
return tag
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def create_custom_span(soup, match):
|
|
39
|
-
"""
|
|
40
|
-
Render a tag for a custom span.
|
|
41
|
-
|
|
42
|
-
Args:
|
|
43
|
-
soup: HTML beautifulsoup
|
|
44
|
-
match: Element identified as a custom span
|
|
45
|
-
Returns:
|
|
46
|
-
tag: Beautifulsoup tag representing the custom span
|
|
47
|
-
"""
|
|
48
|
-
tag = soup.new_tag("span", attrs={"class": match.group("cls")})
|
|
49
|
-
tag.string = match.group("sp_content")
|
|
50
|
-
return tag
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
def create_toc(soup, match):
|
|
54
|
-
"""
|
|
55
|
-
Render a tag for a table of contents
|
|
56
|
-
|
|
57
|
-
Args:
|
|
58
|
-
soup: HTML beautifulsoup
|
|
59
|
-
match: Element identified as a table of contents
|
|
60
|
-
Returns:
|
|
61
|
-
tag: Beautifulsoup tag representing the table of contents
|
|
62
|
-
"""
|
|
63
|
-
max_level = match.group("depth")
|
|
64
|
-
max_level = 3 if max_level is None else int(max_level)
|
|
65
|
-
|
|
66
|
-
headers = [
|
|
67
|
-
header
|
|
68
|
-
for header in soup.find_all([f"h{index}" for index in range(1, max_level + 1)])
|
|
69
|
-
if header.get("id")
|
|
70
|
-
]
|
|
71
|
-
if not headers:
|
|
72
|
-
return ""
|
|
73
|
-
|
|
74
|
-
tag = soup.new_tag("ul", attrs={"class": "toc"})
|
|
75
|
-
active_list = {0: tag}
|
|
76
|
-
last_list_element = {}
|
|
77
|
-
|
|
78
|
-
for header in headers:
|
|
79
|
-
level = int(header.name[1])
|
|
80
|
-
|
|
81
|
-
if level not in active_list:
|
|
82
|
-
parent_lvl = max(key for key in active_list if key < level)
|
|
83
|
-
if last_list_element.get(parent_lvl):
|
|
84
|
-
sub_list = soup.new_tag("ul")
|
|
85
|
-
last_list_element[parent_lvl].append(sub_list)
|
|
86
|
-
active_list[level] = sub_list
|
|
87
|
-
else:
|
|
88
|
-
active_list[level] = active_list[parent_lvl]
|
|
89
|
-
|
|
90
|
-
active_list = {key: value for key, value in active_list.items() if key <= level}
|
|
91
|
-
|
|
92
|
-
list_item = soup.new_tag("li")
|
|
93
|
-
link = soup.new_tag("a", href=f"#{header['id']}")
|
|
94
|
-
link.string = header.get_text(strip=True)
|
|
95
|
-
list_item.append(link)
|
|
96
|
-
|
|
97
|
-
active_list[level].append(list_item)
|
|
98
|
-
last_list_element[level] = list_item
|
|
99
|
-
|
|
100
|
-
return tag
|
|
@@ -1,164 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Module for transforming HTML content.
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
import re
|
|
6
|
-
|
|
7
|
-
from bs4 import BeautifulSoup
|
|
8
|
-
|
|
9
|
-
from .constants import YELLOW
|
|
10
|
-
from .extras import create_checkbox, create_custom_span, create_highlight, create_toc
|
|
11
|
-
from .utils import color
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def create_html_document(html_content, css_content, csp):
|
|
15
|
-
"""
|
|
16
|
-
Creates a complete HTML document with the given content, CSS, and Content Security Policy.
|
|
17
|
-
Args:
|
|
18
|
-
html_content (str): The HTML content to include in the body.
|
|
19
|
-
css_content (str): The CSS styles to include in the head.
|
|
20
|
-
csp (str): The Content Security Policy string.
|
|
21
|
-
Returns:
|
|
22
|
-
str: A complete HTML document as a string.
|
|
23
|
-
"""
|
|
24
|
-
return f"""<!DOCTYPE html>
|
|
25
|
-
<html>
|
|
26
|
-
<head>
|
|
27
|
-
<meta charset="UTF-8">
|
|
28
|
-
<meta http-equiv="Content-Security-Policy" content="{csp or ""}">
|
|
29
|
-
<style>
|
|
30
|
-
{css_content or ""}
|
|
31
|
-
</style>
|
|
32
|
-
</head>
|
|
33
|
-
<body>
|
|
34
|
-
{html_content or ""}
|
|
35
|
-
</body>
|
|
36
|
-
</html>"""
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def create_sections(html_string):
|
|
40
|
-
"""
|
|
41
|
-
Wraps each h2 and its following content in a <section> tag.
|
|
42
|
-
The section ends when the next h2 is encountered, or the parent ends.
|
|
43
|
-
|
|
44
|
-
Args:
|
|
45
|
-
html_string (str): The input HTML string.
|
|
46
|
-
Returns:
|
|
47
|
-
str: The modified HTML string with sections wrapped.
|
|
48
|
-
"""
|
|
49
|
-
soup = BeautifulSoup(html_string, "html.parser")
|
|
50
|
-
|
|
51
|
-
for header in soup.find_all("h2"):
|
|
52
|
-
new_section = soup.new_tag("section")
|
|
53
|
-
header.insert_before(new_section)
|
|
54
|
-
|
|
55
|
-
current = header
|
|
56
|
-
while current is not None and (current == header or current.name != "h2"):
|
|
57
|
-
next_sibling = current.next_sibling
|
|
58
|
-
new_section.append(current)
|
|
59
|
-
current = next_sibling
|
|
60
|
-
|
|
61
|
-
return str(soup)
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
def render_mermaid_diagrams(html, *, nonce):
|
|
65
|
-
"""
|
|
66
|
-
Renders Mermaid diagrams in the HTML content.
|
|
67
|
-
|
|
68
|
-
Args:
|
|
69
|
-
html (str): HTML content.
|
|
70
|
-
nonce (str): Cryptographic nonce for CSP.
|
|
71
|
-
Returns:
|
|
72
|
-
str: HTML content with rendered Mermaid diagrams.
|
|
73
|
-
"""
|
|
74
|
-
mermaid_script = f"""
|
|
75
|
-
<script type="module" nonce="{nonce}">
|
|
76
|
-
import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.esm.min.mjs';
|
|
77
|
-
mermaid.initialize({{
|
|
78
|
-
startOnLoad: true,
|
|
79
|
-
theme: 'default',
|
|
80
|
-
themeVariables: {{}},
|
|
81
|
-
fontFamily: 'arial, verdana, sans-serif'
|
|
82
|
-
}});
|
|
83
|
-
</script>
|
|
84
|
-
"""
|
|
85
|
-
|
|
86
|
-
if '<div class="mermaid">' in html:
|
|
87
|
-
html = mermaid_script + html
|
|
88
|
-
|
|
89
|
-
return html
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
def render_extra_features(html):
|
|
93
|
-
"""
|
|
94
|
-
Renders extra features like checkboxes, highlights, and custom spans in the HTML content.
|
|
95
|
-
|
|
96
|
-
Args:
|
|
97
|
-
html (str): HTML content.
|
|
98
|
-
Returns:
|
|
99
|
-
str: HTML content with extra features rendered.
|
|
100
|
-
"""
|
|
101
|
-
|
|
102
|
-
handlers = {
|
|
103
|
-
"checkbox": create_checkbox,
|
|
104
|
-
"highlight": create_highlight,
|
|
105
|
-
"span": create_custom_span,
|
|
106
|
-
"toc": create_toc,
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
master_pattern = re.compile(
|
|
110
|
-
r"(?P<checkbox>\[\s\]|\[x\])|"
|
|
111
|
-
r"(?P<highlight>==(?P<hl_content>.*?)==)|"
|
|
112
|
-
r"(?P<span>(?P<cls>[a-zA-Z0-9_-]+)\{\{\s*(?P<sp_content>.*?)\s*\}\})|"
|
|
113
|
-
r"(?P<toc>\[TOC(?:\s+depth=(?P<depth>\d+))?\])"
|
|
114
|
-
)
|
|
115
|
-
|
|
116
|
-
ignored_tags = {"code", "pre", "script", "style"}
|
|
117
|
-
|
|
118
|
-
soup = BeautifulSoup(html, "html.parser")
|
|
119
|
-
for text_node in soup.find_all(string=True):
|
|
120
|
-
# Ignore text nodes within certain tags
|
|
121
|
-
if text_node.parent.name in ignored_tags:
|
|
122
|
-
continue
|
|
123
|
-
|
|
124
|
-
# If no match, skip processing
|
|
125
|
-
content = text_node.string
|
|
126
|
-
if not master_pattern.search(content):
|
|
127
|
-
continue
|
|
128
|
-
|
|
129
|
-
new_nodes = []
|
|
130
|
-
last_end = 0
|
|
131
|
-
for match in master_pattern.finditer(content):
|
|
132
|
-
start, end = match.span()
|
|
133
|
-
|
|
134
|
-
# Append text before the match
|
|
135
|
-
if start > last_end:
|
|
136
|
-
new_nodes.append(content[last_end:start])
|
|
137
|
-
|
|
138
|
-
kind = match.lastgroup
|
|
139
|
-
|
|
140
|
-
# Call the appropriate handler
|
|
141
|
-
handler = handlers.get(kind)
|
|
142
|
-
if handler:
|
|
143
|
-
try:
|
|
144
|
-
tag = handler(soup, match)
|
|
145
|
-
new_nodes.append(tag)
|
|
146
|
-
except Exception as exc:
|
|
147
|
-
print(
|
|
148
|
-
color(
|
|
149
|
-
YELLOW,
|
|
150
|
-
f"WARNING: Handler for '{kind}' failed with exception: {exc}",
|
|
151
|
-
)
|
|
152
|
-
)
|
|
153
|
-
new_nodes.append(match.group(0))
|
|
154
|
-
|
|
155
|
-
last_end = end
|
|
156
|
-
|
|
157
|
-
# Append any remaining text after the last match
|
|
158
|
-
if new_nodes:
|
|
159
|
-
if last_end < len(content):
|
|
160
|
-
new_nodes.append(content[last_end:])
|
|
161
|
-
|
|
162
|
-
text_node.replace_with(*new_nodes)
|
|
163
|
-
|
|
164
|
-
return str(soup)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|