markdown_convert 1.2.45__tar.gz → 1.2.47__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {markdown_convert-1.2.45 → markdown_convert-1.2.47}/PKG-INFO +1 -1
- {markdown_convert-1.2.45 → markdown_convert-1.2.47}/markdown_convert/default.css +23 -3
- {markdown_convert-1.2.45 → markdown_convert-1.2.47}/markdown_convert/modules/convert.py +3 -6
- markdown_convert-1.2.47/markdown_convert/modules/transform.py +171 -0
- {markdown_convert-1.2.45 → markdown_convert-1.2.47}/pyproject.toml +1 -1
- markdown_convert-1.2.45/markdown_convert/modules/transform.py +0 -165
- {markdown_convert-1.2.45 → markdown_convert-1.2.47}/.gitignore +0 -0
- {markdown_convert-1.2.45 → markdown_convert-1.2.47}/LICENSE +0 -0
- {markdown_convert-1.2.45 → markdown_convert-1.2.47}/README.md +0 -0
- {markdown_convert-1.2.45 → markdown_convert-1.2.47}/markdown_convert/__init__.py +0 -0
- {markdown_convert-1.2.45 → markdown_convert-1.2.47}/markdown_convert/__main__.py +0 -0
- {markdown_convert-1.2.45 → markdown_convert-1.2.47}/markdown_convert/code.css +0 -0
- {markdown_convert-1.2.45 → markdown_convert-1.2.47}/markdown_convert/modules/__init__.py +0 -0
- {markdown_convert-1.2.45 → markdown_convert-1.2.47}/markdown_convert/modules/constants.py +0 -0
- {markdown_convert-1.2.45 → markdown_convert-1.2.47}/markdown_convert/modules/resources.py +0 -0
- {markdown_convert-1.2.45 → markdown_convert-1.2.47}/markdown_convert/modules/utils.py +0 -0
- {markdown_convert-1.2.45 → markdown_convert-1.2.47}/markdown_convert/modules/validate.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: markdown_convert
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.47
|
|
4
4
|
Summary: Convert Markdown files to PDF from your command line.
|
|
5
5
|
Project-URL: homepage, https://github.com/Julynx/markdown_convert
|
|
6
6
|
Author-email: Julio Cabria <juliocabria@tutanota.com>
|
|
@@ -181,7 +181,7 @@ p > img + em {
|
|
|
181
181
|
font-style: italic;
|
|
182
182
|
color: var(--color-text-light);
|
|
183
183
|
display: block;
|
|
184
|
-
margin:
|
|
184
|
+
margin: 0 auto;
|
|
185
185
|
}
|
|
186
186
|
|
|
187
187
|
/* Image alt text attributes */
|
|
@@ -212,6 +212,22 @@ img[alt*="::wide::"] {
|
|
|
212
212
|
max-width: 100vw;
|
|
213
213
|
}
|
|
214
214
|
|
|
215
|
+
p:has(> img[alt*="::left::"]) {
|
|
216
|
+
display: flex;
|
|
217
|
+
flex-direction: column;
|
|
218
|
+
width: fit-content;
|
|
219
|
+
margin-right: auto;
|
|
220
|
+
align-items: center;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
p:has(> img[alt*="::right::"]) {
|
|
224
|
+
display: flex;
|
|
225
|
+
flex-direction: column;
|
|
226
|
+
width: fit-content;
|
|
227
|
+
margin-left: auto;
|
|
228
|
+
align-items: center;
|
|
229
|
+
}
|
|
230
|
+
|
|
215
231
|
p:has(> img[alt*="::inline::"]) {
|
|
216
232
|
display: inline-block;
|
|
217
233
|
width: min-content;
|
|
@@ -391,8 +407,7 @@ math {
|
|
|
391
407
|
.keyboard,
|
|
392
408
|
.key {
|
|
393
409
|
font-family:
|
|
394
|
-
"Segoe UI Symbol", "DejaVu Sans", "Liberation Sans", "Arial",
|
|
395
|
-
"sans-serif";
|
|
410
|
+
"Segoe UI Symbol", "DejaVu Sans", "Liberation Sans", "Arial", "sans-serif";
|
|
396
411
|
font-size: 0.9rem;
|
|
397
412
|
background-color: var(--color-text-background);
|
|
398
413
|
border: 1px solid var(--color-border);
|
|
@@ -400,3 +415,8 @@ math {
|
|
|
400
415
|
padding: 0.1em 0.4em;
|
|
401
416
|
box-shadow: rgba(100, 100, 100, 0.4) 0px 2px 4px 0px;
|
|
402
417
|
}
|
|
418
|
+
|
|
419
|
+
.center {
|
|
420
|
+
display: block;
|
|
421
|
+
text-align: center;
|
|
422
|
+
}
|
|
@@ -18,8 +18,7 @@ from .transform import (
|
|
|
18
18
|
create_sections,
|
|
19
19
|
render_mermaid_diagrams,
|
|
20
20
|
create_html_document,
|
|
21
|
-
|
|
22
|
-
create_spans,
|
|
21
|
+
render_extra_features,
|
|
23
22
|
)
|
|
24
23
|
from .utils import drop_duplicates
|
|
25
24
|
|
|
@@ -135,8 +134,7 @@ def convert(
|
|
|
135
134
|
html = markdown2.markdown_path(markdown_path, extras=MARKDOWN_EXTENSIONS)
|
|
136
135
|
html = create_sections(html)
|
|
137
136
|
html = render_mermaid_diagrams(html, nonce=nonce)
|
|
138
|
-
html =
|
|
139
|
-
html = create_spans(html)
|
|
137
|
+
html = render_extra_features(html)
|
|
140
138
|
|
|
141
139
|
_generate_pdf_with_playwright(
|
|
142
140
|
html,
|
|
@@ -207,8 +205,7 @@ def convert_text(markdown_text, css_text=None, *, extend_default_css=True):
|
|
|
207
205
|
html = markdown2.markdown(markdown_text, extras=MARKDOWN_EXTENSIONS)
|
|
208
206
|
html = create_sections(html)
|
|
209
207
|
html = render_mermaid_diagrams(html, nonce=nonce)
|
|
210
|
-
html =
|
|
211
|
-
html = create_spans(html)
|
|
208
|
+
html = render_extra_features(html)
|
|
212
209
|
|
|
213
210
|
return _generate_pdf_with_playwright(
|
|
214
211
|
html,
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Module for transforming HTML content.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
from bs4 import BeautifulSoup
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def create_html_document(html_content, css_content, csp):
|
|
11
|
+
"""
|
|
12
|
+
Creates a complete HTML document with the given content, CSS, and Content Security Policy.
|
|
13
|
+
Args:
|
|
14
|
+
html_content (str): The HTML content to include in the body.
|
|
15
|
+
css_content (str): The CSS styles to include in the head.
|
|
16
|
+
csp (str): The Content Security Policy string.
|
|
17
|
+
Returns:
|
|
18
|
+
str: A complete HTML document as a string.
|
|
19
|
+
"""
|
|
20
|
+
return f"""<!DOCTYPE html>
|
|
21
|
+
<html>
|
|
22
|
+
<head>
|
|
23
|
+
<meta charset="UTF-8">
|
|
24
|
+
<meta http-equiv="Content-Security-Policy" content="{csp or ""}">
|
|
25
|
+
<style>
|
|
26
|
+
{css_content or ""}
|
|
27
|
+
</style>
|
|
28
|
+
</head>
|
|
29
|
+
<body>
|
|
30
|
+
{html_content or ""}
|
|
31
|
+
</body>
|
|
32
|
+
</html>"""
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def create_sections(html_string):
|
|
36
|
+
"""
|
|
37
|
+
Wraps each h2 or h3 and its following content in a <section> tag.
|
|
38
|
+
The section ends when the next h2 or h3 is encountered, or the parent ends.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
html_string (str): The input HTML string.
|
|
42
|
+
Returns:
|
|
43
|
+
str: The modified HTML string with sections wrapped.
|
|
44
|
+
"""
|
|
45
|
+
soup = BeautifulSoup(html_string, "html.parser")
|
|
46
|
+
|
|
47
|
+
for header in soup.find_all(["h2", "h3"]):
|
|
48
|
+
new_section = soup.new_tag("section")
|
|
49
|
+
header.insert_before(new_section)
|
|
50
|
+
|
|
51
|
+
current = header
|
|
52
|
+
while current is not None and (
|
|
53
|
+
current == header or current.name not in ["h2", "h3"]
|
|
54
|
+
):
|
|
55
|
+
next_sibling = current.next_sibling
|
|
56
|
+
new_section.append(current)
|
|
57
|
+
current = next_sibling
|
|
58
|
+
|
|
59
|
+
return str(soup)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def render_mermaid_diagrams(html, *, nonce):
|
|
63
|
+
"""
|
|
64
|
+
Renders Mermaid diagrams in the HTML content.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
html (str): HTML content.
|
|
68
|
+
nonce (str): Cryptographic nonce for CSP.
|
|
69
|
+
Returns:
|
|
70
|
+
str: HTML content with rendered Mermaid diagrams.
|
|
71
|
+
"""
|
|
72
|
+
mermaid_script = f"""
|
|
73
|
+
<script type="module" nonce="{nonce}">
|
|
74
|
+
import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.esm.min.mjs';
|
|
75
|
+
mermaid.initialize({{
|
|
76
|
+
startOnLoad: true,
|
|
77
|
+
theme: 'default',
|
|
78
|
+
themeVariables: {{}},
|
|
79
|
+
fontFamily: 'arial, verdana, sans-serif'
|
|
80
|
+
}});
|
|
81
|
+
</script>
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
if '<div class="mermaid">' in html:
|
|
85
|
+
html = mermaid_script + html
|
|
86
|
+
|
|
87
|
+
return html
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def render_extra_features(html):
|
|
91
|
+
"""
|
|
92
|
+
Renders extra features like checkboxes, highlights, and custom spans in the HTML content.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
html (str): HTML content.
|
|
96
|
+
Returns:
|
|
97
|
+
str: HTML content with extra features rendered.
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
def _create_checkbox(soup, match):
|
|
101
|
+
tag = soup.new_tag("input", type="checkbox")
|
|
102
|
+
if "[x]" in match.group("checkbox"):
|
|
103
|
+
tag["checked"] = ""
|
|
104
|
+
return tag
|
|
105
|
+
|
|
106
|
+
def _create_highlight(soup, match):
|
|
107
|
+
tag = soup.new_tag("span", attrs={"class": "highlight"})
|
|
108
|
+
tag.string = match.group("hl_content")
|
|
109
|
+
return tag
|
|
110
|
+
|
|
111
|
+
def _create_custom_span(soup, match):
|
|
112
|
+
tag = soup.new_tag("span", attrs={"class": match.group("cls")})
|
|
113
|
+
tag.string = match.group("sp_content")
|
|
114
|
+
return tag
|
|
115
|
+
|
|
116
|
+
handlers = {
|
|
117
|
+
"checkbox": _create_checkbox,
|
|
118
|
+
"highlight": _create_highlight,
|
|
119
|
+
"span": _create_custom_span,
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
master_pattern = re.compile(
|
|
123
|
+
r"(?P<checkbox>\[\s\]|\[x\])|"
|
|
124
|
+
r"(?P<highlight>==(?P<hl_content>.*?)==)|"
|
|
125
|
+
r"(?P<span>(?P<cls>[a-zA-Z0-9_-]+)\{\{\s*(?P<sp_content>.*?)\s*\}\})"
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
ignored_tags = {"code", "pre", "script", "style"}
|
|
129
|
+
|
|
130
|
+
soup = BeautifulSoup(html, "html.parser")
|
|
131
|
+
for text_node in soup.find_all(string=True):
|
|
132
|
+
# Ignore text nodes within certain tags
|
|
133
|
+
if text_node.parent.name in ignored_tags:
|
|
134
|
+
continue
|
|
135
|
+
|
|
136
|
+
# If no match, skip processing
|
|
137
|
+
content = text_node.string
|
|
138
|
+
if not master_pattern.search(content):
|
|
139
|
+
continue
|
|
140
|
+
|
|
141
|
+
new_nodes = []
|
|
142
|
+
last_end = 0
|
|
143
|
+
for match in master_pattern.finditer(content):
|
|
144
|
+
start, end = match.span()
|
|
145
|
+
|
|
146
|
+
# Append text before the match
|
|
147
|
+
if start > last_end:
|
|
148
|
+
new_nodes.append(content[last_end:start])
|
|
149
|
+
|
|
150
|
+
kind = match.lastgroup
|
|
151
|
+
|
|
152
|
+
# Call the appropriate handler
|
|
153
|
+
handler = handlers.get(kind)
|
|
154
|
+
if handler:
|
|
155
|
+
try:
|
|
156
|
+
tag = handler(soup, match)
|
|
157
|
+
new_nodes.append(tag)
|
|
158
|
+
except Exception as exc:
|
|
159
|
+
print(f"Warning: Handler for '{kind}' failed with exception: {exc}")
|
|
160
|
+
new_nodes.append(match.group(0))
|
|
161
|
+
|
|
162
|
+
last_end = end
|
|
163
|
+
|
|
164
|
+
# Append any remaining text after the last match
|
|
165
|
+
if new_nodes:
|
|
166
|
+
if last_end < len(content):
|
|
167
|
+
new_nodes.append(content[last_end:])
|
|
168
|
+
|
|
169
|
+
text_node.replace_with(*new_nodes)
|
|
170
|
+
|
|
171
|
+
return str(soup)
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "markdown_convert"
|
|
7
|
-
version = "1.2.
|
|
7
|
+
version = "1.2.47"
|
|
8
8
|
description = "Convert Markdown files to PDF from your command line."
|
|
9
9
|
authors = [
|
|
10
10
|
{ name = "Julio Cabria", email = "juliocabria@tutanota.com" },
|
|
@@ -1,165 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Module for transforming HTML content.
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
import re
|
|
6
|
-
|
|
7
|
-
from bs4 import BeautifulSoup
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def create_html_document(html_content, css_content, csp):
|
|
11
|
-
"""
|
|
12
|
-
Creates a complete HTML document with the given content, CSS, and Content Security Policy.
|
|
13
|
-
Args:
|
|
14
|
-
html_content (str): The HTML content to include in the body.
|
|
15
|
-
css_content (str): The CSS styles to include in the head.
|
|
16
|
-
csp (str): The Content Security Policy string.
|
|
17
|
-
Returns:
|
|
18
|
-
str: A complete HTML document as a string.
|
|
19
|
-
"""
|
|
20
|
-
return f"""<!DOCTYPE html>
|
|
21
|
-
<html>
|
|
22
|
-
<head>
|
|
23
|
-
<meta charset="UTF-8">
|
|
24
|
-
<meta http-equiv="Content-Security-Policy" content="{csp or ""}">
|
|
25
|
-
<style>
|
|
26
|
-
{css_content or ""}
|
|
27
|
-
</style>
|
|
28
|
-
</head>
|
|
29
|
-
<body>
|
|
30
|
-
{html_content or ""}
|
|
31
|
-
</body>
|
|
32
|
-
</html>"""
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def create_sections(html_string):
|
|
36
|
-
"""
|
|
37
|
-
Wraps each h2 and its following content in a <section> tag.
|
|
38
|
-
Avoids wrapping h2 tags that are inside <code> blocks.
|
|
39
|
-
|
|
40
|
-
Args:
|
|
41
|
-
html_string (str): The input HTML string.
|
|
42
|
-
Returns:
|
|
43
|
-
str: The modified HTML string with h2 sections wrapped.
|
|
44
|
-
"""
|
|
45
|
-
soup = BeautifulSoup(html_string, "html.parser")
|
|
46
|
-
|
|
47
|
-
for second_level_header in soup.find_all("h2"):
|
|
48
|
-
new_section = soup.new_tag("section")
|
|
49
|
-
second_level_header.insert_before(new_section)
|
|
50
|
-
|
|
51
|
-
current = second_level_header
|
|
52
|
-
while current is not None and (
|
|
53
|
-
current == second_level_header or current.name != "h2"
|
|
54
|
-
):
|
|
55
|
-
next_sibling = current.next_sibling
|
|
56
|
-
new_section.append(current)
|
|
57
|
-
current = next_sibling
|
|
58
|
-
|
|
59
|
-
return str(soup)
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
def render_mermaid_diagrams(html, *, nonce):
|
|
63
|
-
"""
|
|
64
|
-
Renders Mermaid diagrams in the HTML content.
|
|
65
|
-
|
|
66
|
-
Args:
|
|
67
|
-
html (str): HTML content.
|
|
68
|
-
nonce (str): Cryptographic nonce for CSP.
|
|
69
|
-
Returns:
|
|
70
|
-
str: HTML content with rendered Mermaid diagrams.
|
|
71
|
-
"""
|
|
72
|
-
mermaid_script = f"""
|
|
73
|
-
<script type="module" nonce="{nonce}">
|
|
74
|
-
import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.esm.min.mjs';
|
|
75
|
-
mermaid.initialize({{
|
|
76
|
-
startOnLoad: true,
|
|
77
|
-
theme: 'default',
|
|
78
|
-
themeVariables: {{}},
|
|
79
|
-
fontFamily: 'arial, verdana, sans-serif'
|
|
80
|
-
}});
|
|
81
|
-
</script>
|
|
82
|
-
"""
|
|
83
|
-
|
|
84
|
-
if '<div class="mermaid">' in html:
|
|
85
|
-
html = mermaid_script + html
|
|
86
|
-
|
|
87
|
-
return html
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
def render_checkboxes(html):
|
|
91
|
-
"""
|
|
92
|
-
Renders checkboxes in the HTML content by replacing input elements with SVG representations.
|
|
93
|
-
Args:
|
|
94
|
-
html (str): HTML content.
|
|
95
|
-
Returns:
|
|
96
|
-
str: HTML content with rendered checkboxes.
|
|
97
|
-
"""
|
|
98
|
-
unchecked = "[ ]"
|
|
99
|
-
checked = "[x]"
|
|
100
|
-
|
|
101
|
-
unchecked_html = "<input type='checkbox'>"
|
|
102
|
-
checked_html = "<input type='checkbox' checked>"
|
|
103
|
-
|
|
104
|
-
# Split by code blocks to avoid processing text inside them
|
|
105
|
-
parts = re.split(r"(<code>.*?</code>)", html, flags=re.DOTALL)
|
|
106
|
-
for part_index, _part in enumerate(parts):
|
|
107
|
-
# Only process parts that are NOT code blocks
|
|
108
|
-
if not parts[part_index].startswith("<code>"):
|
|
109
|
-
parts[part_index] = parts[part_index].replace(unchecked, unchecked_html)
|
|
110
|
-
parts[part_index] = parts[part_index].replace(checked, checked_html)
|
|
111
|
-
|
|
112
|
-
return "".join(parts)
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
def create_spans(html):
|
|
116
|
-
"""
|
|
117
|
-
Renders custom spans in the HTML content by replacing classname{{ content }} tags.
|
|
118
|
-
Args:
|
|
119
|
-
html (str): HTML content.
|
|
120
|
-
Returns:
|
|
121
|
-
str: HTML content with rendered custom spans.
|
|
122
|
-
"""
|
|
123
|
-
soup = BeautifulSoup(html, "html.parser")
|
|
124
|
-
|
|
125
|
-
# Regex to match classname{{ content }}
|
|
126
|
-
# It captures the class name and the content
|
|
127
|
-
pattern = re.compile(r"([a-zA-Z0-9_-]+){{\s*(.*?)\s*}}")
|
|
128
|
-
|
|
129
|
-
# We need to find all text nodes and replace the pattern
|
|
130
|
-
for text_node in soup.find_all(string=True):
|
|
131
|
-
# Skip text nodes inside code, pre, script, style tags
|
|
132
|
-
if text_node.parent.name in ["code", "pre", "script", "style"]:
|
|
133
|
-
continue
|
|
134
|
-
|
|
135
|
-
content = str(text_node)
|
|
136
|
-
if "{{" in content:
|
|
137
|
-
new_content_nodes = []
|
|
138
|
-
last_end = 0
|
|
139
|
-
for match in pattern.finditer(content):
|
|
140
|
-
# Add text before the match
|
|
141
|
-
before = content[last_end : match.start()]
|
|
142
|
-
if before:
|
|
143
|
-
new_content_nodes.append(soup.new_string(before))
|
|
144
|
-
|
|
145
|
-
# Create the new span tag
|
|
146
|
-
class_name = match.group(1)
|
|
147
|
-
inner_text = match.group(2)
|
|
148
|
-
new_span = soup.new_tag("span", attrs={"class": class_name})
|
|
149
|
-
new_span.string = inner_text
|
|
150
|
-
new_content_nodes.append(new_span)
|
|
151
|
-
|
|
152
|
-
last_end = match.end()
|
|
153
|
-
|
|
154
|
-
# Add remaining text after the last match
|
|
155
|
-
after = content[last_end:]
|
|
156
|
-
if after:
|
|
157
|
-
new_content_nodes.append(soup.new_string(after))
|
|
158
|
-
|
|
159
|
-
if new_content_nodes:
|
|
160
|
-
# Replace the original text node with the new nodes
|
|
161
|
-
for node in reversed(new_content_nodes):
|
|
162
|
-
text_node.insert_after(node)
|
|
163
|
-
text_node.extract()
|
|
164
|
-
|
|
165
|
-
return str(soup)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|