markdown_convert 1.2.45__tar.gz → 1.2.47__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: markdown_convert
3
- Version: 1.2.45
3
+ Version: 1.2.47
4
4
  Summary: Convert Markdown files to PDF from your command line.
5
5
  Project-URL: homepage, https://github.com/Julynx/markdown_convert
6
6
  Author-email: Julio Cabria <juliocabria@tutanota.com>
@@ -181,7 +181,7 @@ p > img + em {
181
181
  font-style: italic;
182
182
  color: var(--color-text-light);
183
183
  display: block;
184
- margin: 1em auto;
184
+ margin: 0 auto;
185
185
  }
186
186
 
187
187
  /* Image alt text attributes */
@@ -212,6 +212,22 @@ img[alt*="::wide::"] {
212
212
  max-width: 100vw;
213
213
  }
214
214
 
215
+ p:has(> img[alt*="::left::"]) {
216
+ display: flex;
217
+ flex-direction: column;
218
+ width: fit-content;
219
+ margin-right: auto;
220
+ align-items: center;
221
+ }
222
+
223
+ p:has(> img[alt*="::right::"]) {
224
+ display: flex;
225
+ flex-direction: column;
226
+ width: fit-content;
227
+ margin-left: auto;
228
+ align-items: center;
229
+ }
230
+
215
231
  p:has(> img[alt*="::inline::"]) {
216
232
  display: inline-block;
217
233
  width: min-content;
@@ -391,8 +407,7 @@ math {
391
407
  .keyboard,
392
408
  .key {
393
409
  font-family:
394
- "Segoe UI Symbol", "DejaVu Sans", "Liberation Sans", "Arial",
395
- "sans-serif";
410
+ "Segoe UI Symbol", "DejaVu Sans", "Liberation Sans", "Arial", "sans-serif";
396
411
  font-size: 0.9rem;
397
412
  background-color: var(--color-text-background);
398
413
  border: 1px solid var(--color-border);
@@ -400,3 +415,8 @@ math {
400
415
  padding: 0.1em 0.4em;
401
416
  box-shadow: rgba(100, 100, 100, 0.4) 0px 2px 4px 0px;
402
417
  }
418
+
419
+ .center {
420
+ display: block;
421
+ text-align: center;
422
+ }
@@ -18,8 +18,7 @@ from .transform import (
18
18
  create_sections,
19
19
  render_mermaid_diagrams,
20
20
  create_html_document,
21
- render_checkboxes,
22
- create_spans,
21
+ render_extra_features,
23
22
  )
24
23
  from .utils import drop_duplicates
25
24
 
@@ -135,8 +134,7 @@ def convert(
135
134
  html = markdown2.markdown_path(markdown_path, extras=MARKDOWN_EXTENSIONS)
136
135
  html = create_sections(html)
137
136
  html = render_mermaid_diagrams(html, nonce=nonce)
138
- html = render_checkboxes(html)
139
- html = create_spans(html)
137
+ html = render_extra_features(html)
140
138
 
141
139
  _generate_pdf_with_playwright(
142
140
  html,
@@ -207,8 +205,7 @@ def convert_text(markdown_text, css_text=None, *, extend_default_css=True):
207
205
  html = markdown2.markdown(markdown_text, extras=MARKDOWN_EXTENSIONS)
208
206
  html = create_sections(html)
209
207
  html = render_mermaid_diagrams(html, nonce=nonce)
210
- html = render_checkboxes(html)
211
- html = create_spans(html)
208
+ html = render_extra_features(html)
212
209
 
213
210
  return _generate_pdf_with_playwright(
214
211
  html,
@@ -0,0 +1,171 @@
1
+ """
2
+ Module for transforming HTML content.
3
+ """
4
+
5
+ import re
6
+
7
+ from bs4 import BeautifulSoup
8
+
9
+
10
+ def create_html_document(html_content, css_content, csp):
11
+ """
12
+ Creates a complete HTML document with the given content, CSS, and Content Security Policy.
13
+ Args:
14
+ html_content (str): The HTML content to include in the body.
15
+ css_content (str): The CSS styles to include in the head.
16
+ csp (str): The Content Security Policy string.
17
+ Returns:
18
+ str: A complete HTML document as a string.
19
+ """
20
+ return f"""<!DOCTYPE html>
21
+ <html>
22
+ <head>
23
+ <meta charset="UTF-8">
24
+ <meta http-equiv="Content-Security-Policy" content="{csp or ""}">
25
+ <style>
26
+ {css_content or ""}
27
+ </style>
28
+ </head>
29
+ <body>
30
+ {html_content or ""}
31
+ </body>
32
+ </html>"""
33
+
34
+
35
+ def create_sections(html_string):
36
+ """
37
+ Wraps each h2 or h3 and its following content in a <section> tag.
38
+ The section ends when the next h2 or h3 is encountered, or the parent ends.
39
+
40
+ Args:
41
+ html_string (str): The input HTML string.
42
+ Returns:
43
+ str: The modified HTML string with sections wrapped.
44
+ """
45
+ soup = BeautifulSoup(html_string, "html.parser")
46
+
47
+ for header in soup.find_all(["h2", "h3"]):
48
+ new_section = soup.new_tag("section")
49
+ header.insert_before(new_section)
50
+
51
+ current = header
52
+ while current is not None and (
53
+ current == header or current.name not in ["h2", "h3"]
54
+ ):
55
+ next_sibling = current.next_sibling
56
+ new_section.append(current)
57
+ current = next_sibling
58
+
59
+ return str(soup)
60
+
61
+
62
+ def render_mermaid_diagrams(html, *, nonce):
63
+ """
64
+ Renders Mermaid diagrams in the HTML content.
65
+
66
+ Args:
67
+ html (str): HTML content.
68
+ nonce (str): Cryptographic nonce for CSP.
69
+ Returns:
70
+ str: HTML content with rendered Mermaid diagrams.
71
+ """
72
+ mermaid_script = f"""
73
+ <script type="module" nonce="{nonce}">
74
+ import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.esm.min.mjs';
75
+ mermaid.initialize({{
76
+ startOnLoad: true,
77
+ theme: 'default',
78
+ themeVariables: {{}},
79
+ fontFamily: 'arial, verdana, sans-serif'
80
+ }});
81
+ </script>
82
+ """
83
+
84
+ if '<div class="mermaid">' in html:
85
+ html = mermaid_script + html
86
+
87
+ return html
88
+
89
+
90
+ def render_extra_features(html):
91
+ """
92
+ Renders extra features like checkboxes, highlights, and custom spans in the HTML content.
93
+
94
+ Args:
95
+ html (str): HTML content.
96
+ Returns:
97
+ str: HTML content with extra features rendered.
98
+ """
99
+
100
+ def _create_checkbox(soup, match):
101
+ tag = soup.new_tag("input", type="checkbox")
102
+ if "[x]" in match.group("checkbox"):
103
+ tag["checked"] = ""
104
+ return tag
105
+
106
+ def _create_highlight(soup, match):
107
+ tag = soup.new_tag("span", attrs={"class": "highlight"})
108
+ tag.string = match.group("hl_content")
109
+ return tag
110
+
111
+ def _create_custom_span(soup, match):
112
+ tag = soup.new_tag("span", attrs={"class": match.group("cls")})
113
+ tag.string = match.group("sp_content")
114
+ return tag
115
+
116
+ handlers = {
117
+ "checkbox": _create_checkbox,
118
+ "highlight": _create_highlight,
119
+ "span": _create_custom_span,
120
+ }
121
+
122
+ master_pattern = re.compile(
123
+ r"(?P<checkbox>\[\s\]|\[x\])|"
124
+ r"(?P<highlight>==(?P<hl_content>.*?)==)|"
125
+ r"(?P<span>(?P<cls>[a-zA-Z0-9_-]+)\{\{\s*(?P<sp_content>.*?)\s*\}\})"
126
+ )
127
+
128
+ ignored_tags = {"code", "pre", "script", "style"}
129
+
130
+ soup = BeautifulSoup(html, "html.parser")
131
+ for text_node in soup.find_all(string=True):
132
+ # Ignore text nodes within certain tags
133
+ if text_node.parent.name in ignored_tags:
134
+ continue
135
+
136
+ # If no match, skip processing
137
+ content = text_node.string
138
+ if not master_pattern.search(content):
139
+ continue
140
+
141
+ new_nodes = []
142
+ last_end = 0
143
+ for match in master_pattern.finditer(content):
144
+ start, end = match.span()
145
+
146
+ # Append text before the match
147
+ if start > last_end:
148
+ new_nodes.append(content[last_end:start])
149
+
150
+ kind = match.lastgroup
151
+
152
+ # Call the appropriate handler
153
+ handler = handlers.get(kind)
154
+ if handler:
155
+ try:
156
+ tag = handler(soup, match)
157
+ new_nodes.append(tag)
158
+ except Exception as exc:
159
+ print(f"Warning: Handler for '{kind}' failed with exception: {exc}")
160
+ new_nodes.append(match.group(0))
161
+
162
+ last_end = end
163
+
164
+ # Append any remaining text after the last match
165
+ if new_nodes:
166
+ if last_end < len(content):
167
+ new_nodes.append(content[last_end:])
168
+
169
+ text_node.replace_with(*new_nodes)
170
+
171
+ return str(soup)
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "markdown_convert"
7
- version = "1.2.45"
7
+ version = "1.2.47"
8
8
  description = "Convert Markdown files to PDF from your command line."
9
9
  authors = [
10
10
  { name = "Julio Cabria", email = "juliocabria@tutanota.com" },
@@ -1,165 +0,0 @@
1
- """
2
- Module for transforming HTML content.
3
- """
4
-
5
- import re
6
-
7
- from bs4 import BeautifulSoup
8
-
9
-
10
- def create_html_document(html_content, css_content, csp):
11
- """
12
- Creates a complete HTML document with the given content, CSS, and Content Security Policy.
13
- Args:
14
- html_content (str): The HTML content to include in the body.
15
- css_content (str): The CSS styles to include in the head.
16
- csp (str): The Content Security Policy string.
17
- Returns:
18
- str: A complete HTML document as a string.
19
- """
20
- return f"""<!DOCTYPE html>
21
- <html>
22
- <head>
23
- <meta charset="UTF-8">
24
- <meta http-equiv="Content-Security-Policy" content="{csp or ""}">
25
- <style>
26
- {css_content or ""}
27
- </style>
28
- </head>
29
- <body>
30
- {html_content or ""}
31
- </body>
32
- </html>"""
33
-
34
-
35
- def create_sections(html_string):
36
- """
37
- Wraps each h2 and its following content in a <section> tag.
38
- Avoids wrapping h2 tags that are inside <code> blocks.
39
-
40
- Args:
41
- html_string (str): The input HTML string.
42
- Returns:
43
- str: The modified HTML string with h2 sections wrapped.
44
- """
45
- soup = BeautifulSoup(html_string, "html.parser")
46
-
47
- for second_level_header in soup.find_all("h2"):
48
- new_section = soup.new_tag("section")
49
- second_level_header.insert_before(new_section)
50
-
51
- current = second_level_header
52
- while current is not None and (
53
- current == second_level_header or current.name != "h2"
54
- ):
55
- next_sibling = current.next_sibling
56
- new_section.append(current)
57
- current = next_sibling
58
-
59
- return str(soup)
60
-
61
-
62
- def render_mermaid_diagrams(html, *, nonce):
63
- """
64
- Renders Mermaid diagrams in the HTML content.
65
-
66
- Args:
67
- html (str): HTML content.
68
- nonce (str): Cryptographic nonce for CSP.
69
- Returns:
70
- str: HTML content with rendered Mermaid diagrams.
71
- """
72
- mermaid_script = f"""
73
- <script type="module" nonce="{nonce}">
74
- import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.esm.min.mjs';
75
- mermaid.initialize({{
76
- startOnLoad: true,
77
- theme: 'default',
78
- themeVariables: {{}},
79
- fontFamily: 'arial, verdana, sans-serif'
80
- }});
81
- </script>
82
- """
83
-
84
- if '<div class="mermaid">' in html:
85
- html = mermaid_script + html
86
-
87
- return html
88
-
89
-
90
- def render_checkboxes(html):
91
- """
92
- Renders checkboxes in the HTML content by replacing input elements with SVG representations.
93
- Args:
94
- html (str): HTML content.
95
- Returns:
96
- str: HTML content with rendered checkboxes.
97
- """
98
- unchecked = "[ ]"
99
- checked = "[x]"
100
-
101
- unchecked_html = "<input type='checkbox'>"
102
- checked_html = "<input type='checkbox' checked>"
103
-
104
- # Split by code blocks to avoid processing text inside them
105
- parts = re.split(r"(<code>.*?</code>)", html, flags=re.DOTALL)
106
- for part_index, _part in enumerate(parts):
107
- # Only process parts that are NOT code blocks
108
- if not parts[part_index].startswith("<code>"):
109
- parts[part_index] = parts[part_index].replace(unchecked, unchecked_html)
110
- parts[part_index] = parts[part_index].replace(checked, checked_html)
111
-
112
- return "".join(parts)
113
-
114
-
115
- def create_spans(html):
116
- """
117
- Renders custom spans in the HTML content by replacing classname{{ content }} tags.
118
- Args:
119
- html (str): HTML content.
120
- Returns:
121
- str: HTML content with rendered custom spans.
122
- """
123
- soup = BeautifulSoup(html, "html.parser")
124
-
125
- # Regex to match classname{{ content }}
126
- # It captures the class name and the content
127
- pattern = re.compile(r"([a-zA-Z0-9_-]+){{\s*(.*?)\s*}}")
128
-
129
- # We need to find all text nodes and replace the pattern
130
- for text_node in soup.find_all(string=True):
131
- # Skip text nodes inside code, pre, script, style tags
132
- if text_node.parent.name in ["code", "pre", "script", "style"]:
133
- continue
134
-
135
- content = str(text_node)
136
- if "{{" in content:
137
- new_content_nodes = []
138
- last_end = 0
139
- for match in pattern.finditer(content):
140
- # Add text before the match
141
- before = content[last_end : match.start()]
142
- if before:
143
- new_content_nodes.append(soup.new_string(before))
144
-
145
- # Create the new span tag
146
- class_name = match.group(1)
147
- inner_text = match.group(2)
148
- new_span = soup.new_tag("span", attrs={"class": class_name})
149
- new_span.string = inner_text
150
- new_content_nodes.append(new_span)
151
-
152
- last_end = match.end()
153
-
154
- # Add remaining text after the last match
155
- after = content[last_end:]
156
- if after:
157
- new_content_nodes.append(soup.new_string(after))
158
-
159
- if new_content_nodes:
160
- # Replace the original text node with the new nodes
161
- for node in reversed(new_content_nodes):
162
- text_node.insert_after(node)
163
- text_node.extract()
164
-
165
- return str(soup)