markdown_convert 1.2.46__tar.gz → 1.2.48__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: markdown_convert
3
- Version: 1.2.46
3
+ Version: 1.2.48
4
4
  Summary: Convert Markdown files to PDF from your command line.
5
5
  Project-URL: homepage, https://github.com/Julynx/markdown_convert
6
6
  Author-email: Julio Cabria <juliocabria@tutanota.com>
@@ -12,12 +12,18 @@
12
12
  --left-margin-big: 2em;
13
13
 
14
14
  /* -- Colors -- */
15
- --color-text: #333; /* Main text color */
16
- --color-text-light: #666; /* Tables, blockquotes, footers */
17
- --color-text-background: #eee; /* Code blocks, table headers */
18
- --color-border: #999; /* Image borders, rules */
19
- --color-border-light: #ccc; /* Table borders, blockquotes */
20
- --color-links: #09f; /* Hyperlinks */
15
+ /* Main text color */
16
+ --color-text: #333;
17
+ /* Tables, blockquotes, footers */
18
+ --color-text-light: #666;
19
+ /* Code blocks, table headers */
20
+ --color-text-background: #eee;
21
+ /* Image borders, rules */
22
+ --color-border: #999;
23
+ /* Table borders, blockquotes */
24
+ --color-border-light: #ccc;
25
+ /* Hyperlinks */
26
+ --color-links: #09f;
21
27
  }
22
28
 
23
29
  /* Document */
@@ -59,6 +65,7 @@ h2,
59
65
  h3,
60
66
  h4,
61
67
  h5 {
68
+ line-height: 1;
62
69
  color: var(--color-text);
63
70
  font-weight: 600;
64
71
  }
@@ -122,7 +129,7 @@ pre {
122
129
  white-space: pre-wrap;
123
130
  }
124
131
 
125
- pre > code {
132
+ pre>code {
126
133
  display: block;
127
134
  padding: 1em;
128
135
  margin-top: 0;
@@ -160,7 +167,7 @@ blockquote {
160
167
  color: var(--color-text-light);
161
168
  }
162
169
 
163
- blockquote > blockquote {
170
+ blockquote>blockquote {
164
171
  padding-left: var(--left-margin-small);
165
172
  }
166
173
 
@@ -175,13 +182,13 @@ img {
175
182
  }
176
183
 
177
184
  /* Image footers */
178
- p > img + em {
185
+ p>img+em {
179
186
  text-align: center;
180
187
  font-size: 1rem;
181
188
  font-style: italic;
182
189
  color: var(--color-text-light);
183
190
  display: block;
184
- margin: 1em auto;
191
+ margin: 0 auto;
185
192
  }
186
193
 
187
194
  /* Image alt text attributes */
@@ -212,6 +219,22 @@ img[alt*="::wide::"] {
212
219
  max-width: 100vw;
213
220
  }
214
221
 
222
+ p:has(> img[alt*="::left::"]) {
223
+ display: flex;
224
+ flex-direction: column;
225
+ width: fit-content;
226
+ margin-right: auto;
227
+ align-items: center;
228
+ }
229
+
230
+ p:has(> img[alt*="::right::"]) {
231
+ display: flex;
232
+ flex-direction: column;
233
+ width: fit-content;
234
+ margin-left: auto;
235
+ align-items: center;
236
+ }
237
+
215
238
  p:has(> img[alt*="::inline::"]) {
216
239
  display: inline-block;
217
240
  width: min-content;
@@ -268,19 +291,10 @@ th,
268
291
  td {
269
292
  padding: 0.5em 0.75em;
270
293
  border: 1px solid var(--color-border-light);
271
- overflow-wrap: break-word;
272
294
  vertical-align: top;
273
- }
274
-
275
- th {
276
- white-space: normal;
277
- word-break: keep-all;
278
- hyphens: auto;
279
- }
280
-
281
- td {
295
+ overflow-wrap: break-word;
296
+ word-break: normal;
282
297
  white-space: normal;
283
- word-break: break-word;
284
298
  }
285
299
 
286
300
  /* Horizontal rulers */
@@ -295,7 +309,7 @@ hr:has(+ hr) {
295
309
  visibility: hidden;
296
310
  }
297
311
 
298
- hr + hr {
312
+ hr+hr {
299
313
  clear: both;
300
314
  break-after: always;
301
315
  page-break-after: always;
@@ -391,8 +405,7 @@ math {
391
405
  .keyboard,
392
406
  .key {
393
407
  font-family:
394
- "Segoe UI Symbol", "DejaVu Sans", "Liberation Sans", "Arial",
395
- "sans-serif";
408
+ "Segoe UI Symbol", "DejaVu Sans", "Liberation Sans", "Arial", "sans-serif";
396
409
  font-size: 0.9rem;
397
410
  background-color: var(--color-text-background);
398
411
  border: 1px solid var(--color-border);
@@ -400,3 +413,8 @@ math {
400
413
  padding: 0.1em 0.4em;
401
414
  box-shadow: rgba(100, 100, 100, 0.4) 0px 2px 4px 0px;
402
415
  }
416
+
417
+ .center {
418
+ display: block;
419
+ text-align: center;
420
+ }
@@ -18,8 +18,7 @@ from .transform import (
18
18
  create_sections,
19
19
  render_mermaid_diagrams,
20
20
  create_html_document,
21
- render_checkboxes,
22
- create_spans,
21
+ render_extra_features,
23
22
  )
24
23
  from .utils import drop_duplicates
25
24
 
@@ -135,8 +134,7 @@ def convert(
135
134
  html = markdown2.markdown_path(markdown_path, extras=MARKDOWN_EXTENSIONS)
136
135
  html = create_sections(html)
137
136
  html = render_mermaid_diagrams(html, nonce=nonce)
138
- html = render_checkboxes(html)
139
- html = create_spans(html)
137
+ html = render_extra_features(html)
140
138
 
141
139
  _generate_pdf_with_playwright(
142
140
  html,
@@ -207,8 +205,7 @@ def convert_text(markdown_text, css_text=None, *, extend_default_css=True):
207
205
  html = markdown2.markdown(markdown_text, extras=MARKDOWN_EXTENSIONS)
208
206
  html = create_sections(html)
209
207
  html = render_mermaid_diagrams(html, nonce=nonce)
210
- html = render_checkboxes(html)
211
- html = create_spans(html)
208
+ html = render_extra_features(html)
212
209
 
213
210
  return _generate_pdf_with_playwright(
214
211
  html,
@@ -0,0 +1,171 @@
1
+ """
2
+ Module for transforming HTML content.
3
+ """
4
+
5
+ import re
6
+
7
+ from bs4 import BeautifulSoup
8
+
9
+
10
+ def create_html_document(html_content, css_content, csp):
11
+ """
12
+ Creates a complete HTML document with the given content, CSS, and Content Security Policy.
13
+ Args:
14
+ html_content (str): The HTML content to include in the body.
15
+ css_content (str): The CSS styles to include in the head.
16
+ csp (str): The Content Security Policy string.
17
+ Returns:
18
+ str: A complete HTML document as a string.
19
+ """
20
+ return f"""<!DOCTYPE html>
21
+ <html>
22
+ <head>
23
+ <meta charset="UTF-8">
24
+ <meta http-equiv="Content-Security-Policy" content="{csp or ""}">
25
+ <style>
26
+ {css_content or ""}
27
+ </style>
28
+ </head>
29
+ <body>
30
+ {html_content or ""}
31
+ </body>
32
+ </html>"""
33
+
34
+
35
+ def create_sections(html_string):
36
+ """
37
+ Wraps each h2 or h3 and its following content in a <section> tag.
38
+ The section ends when the next h2 or h3 is encountered, or the parent ends.
39
+
40
+ Args:
41
+ html_string (str): The input HTML string.
42
+ Returns:
43
+ str: The modified HTML string with sections wrapped.
44
+ """
45
+ soup = BeautifulSoup(html_string, "html.parser")
46
+
47
+ for header in soup.find_all(["h2", "h3"]):
48
+ new_section = soup.new_tag("section")
49
+ header.insert_before(new_section)
50
+
51
+ current = header
52
+ while current is not None and (
53
+ current == header or current.name not in ["h2", "h3"]
54
+ ):
55
+ next_sibling = current.next_sibling
56
+ new_section.append(current)
57
+ current = next_sibling
58
+
59
+ return str(soup)
60
+
61
+
62
+ def render_mermaid_diagrams(html, *, nonce):
63
+ """
64
+ Renders Mermaid diagrams in the HTML content.
65
+
66
+ Args:
67
+ html (str): HTML content.
68
+ nonce (str): Cryptographic nonce for CSP.
69
+ Returns:
70
+ str: HTML content with rendered Mermaid diagrams.
71
+ """
72
+ mermaid_script = f"""
73
+ <script type="module" nonce="{nonce}">
74
+ import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.esm.min.mjs';
75
+ mermaid.initialize({{
76
+ startOnLoad: true,
77
+ theme: 'default',
78
+ themeVariables: {{}},
79
+ fontFamily: 'arial, verdana, sans-serif'
80
+ }});
81
+ </script>
82
+ """
83
+
84
+ if '<div class="mermaid">' in html:
85
+ html = mermaid_script + html
86
+
87
+ return html
88
+
89
+
90
+ def render_extra_features(html):
91
+ """
92
+ Renders extra features like checkboxes, highlights, and custom spans in the HTML content.
93
+
94
+ Args:
95
+ html (str): HTML content.
96
+ Returns:
97
+ str: HTML content with extra features rendered.
98
+ """
99
+
100
+ def _create_checkbox(soup, match):
101
+ tag = soup.new_tag("input", type="checkbox")
102
+ if "[x]" in match.group("checkbox"):
103
+ tag["checked"] = ""
104
+ return tag
105
+
106
+ def _create_highlight(soup, match):
107
+ tag = soup.new_tag("span", attrs={"class": "highlight"})
108
+ tag.string = match.group("hl_content")
109
+ return tag
110
+
111
+ def _create_custom_span(soup, match):
112
+ tag = soup.new_tag("span", attrs={"class": match.group("cls")})
113
+ tag.string = match.group("sp_content")
114
+ return tag
115
+
116
+ handlers = {
117
+ "checkbox": _create_checkbox,
118
+ "highlight": _create_highlight,
119
+ "span": _create_custom_span,
120
+ }
121
+
122
+ master_pattern = re.compile(
123
+ r"(?P<checkbox>\[\s\]|\[x\])|"
124
+ r"(?P<highlight>==(?P<hl_content>.*?)==)|"
125
+ r"(?P<span>(?P<cls>[a-zA-Z0-9_-]+)\{\{\s*(?P<sp_content>.*?)\s*\}\})"
126
+ )
127
+
128
+ ignored_tags = {"code", "pre", "script", "style"}
129
+
130
+ soup = BeautifulSoup(html, "html.parser")
131
+ for text_node in soup.find_all(string=True):
132
+ # Ignore text nodes within certain tags
133
+ if text_node.parent.name in ignored_tags:
134
+ continue
135
+
136
+ # If no match, skip processing
137
+ content = text_node.string
138
+ if not master_pattern.search(content):
139
+ continue
140
+
141
+ new_nodes = []
142
+ last_end = 0
143
+ for match in master_pattern.finditer(content):
144
+ start, end = match.span()
145
+
146
+ # Append text before the match
147
+ if start > last_end:
148
+ new_nodes.append(content[last_end:start])
149
+
150
+ kind = match.lastgroup
151
+
152
+ # Call the appropriate handler
153
+ handler = handlers.get(kind)
154
+ if handler:
155
+ try:
156
+ tag = handler(soup, match)
157
+ new_nodes.append(tag)
158
+ except Exception as exc:
159
+ print(f"Warning: Handler for '{kind}' failed with exception: {exc}")
160
+ new_nodes.append(match.group(0))
161
+
162
+ last_end = end
163
+
164
+ # Append any remaining text after the last match
165
+ if new_nodes:
166
+ if last_end < len(content):
167
+ new_nodes.append(content[last_end:])
168
+
169
+ text_node.replace_with(*new_nodes)
170
+
171
+ return str(soup)
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "markdown_convert"
7
- version = "1.2.46"
7
+ version = "1.2.48"
8
8
  description = "Convert Markdown files to PDF from your command line."
9
9
  authors = [
10
10
  { name = "Julio Cabria", email = "juliocabria@tutanota.com" },
@@ -1,169 +0,0 @@
1
- """
2
- Module for transforming HTML content.
3
- """
4
-
5
- import re
6
-
7
- from bs4 import BeautifulSoup
8
-
9
-
10
- def create_html_document(html_content, css_content, csp):
11
- """
12
- Creates a complete HTML document with the given content, CSS, and Content Security Policy.
13
- Args:
14
- html_content (str): The HTML content to include in the body.
15
- css_content (str): The CSS styles to include in the head.
16
- csp (str): The Content Security Policy string.
17
- Returns:
18
- str: A complete HTML document as a string.
19
- """
20
- return f"""<!DOCTYPE html>
21
- <html>
22
- <head>
23
- <meta charset="UTF-8">
24
- <meta http-equiv="Content-Security-Policy" content="{csp or ""}">
25
- <style>
26
- {css_content or ""}
27
- </style>
28
- </head>
29
- <body>
30
- {html_content or ""}
31
- </body>
32
- </html>"""
33
-
34
-
35
- def create_sections(html_string):
36
- """
37
- Wraps each h2 or h3 and its following content in a <section> tag.
38
- The section ends when the next h2 or h3 is encountered, or the parent ends.
39
-
40
- Args:
41
- html_string (str): The input HTML string.
42
- Returns:
43
- str: The modified HTML string with sections wrapped.
44
- """
45
- soup = BeautifulSoup(html_string, "html.parser")
46
-
47
- # Change 1: Search for both h2 and h3 tags
48
- for header in soup.find_all(["h2", "h3"]):
49
- # Create the new section
50
- new_section = soup.new_tag("section")
51
- header.insert_before(new_section)
52
-
53
- current = header
54
-
55
- # Change 2: Update loop to stop if it hits an h2 OR h3 (that isn't the current one)
56
- while current is not None and (
57
- current == header or current.name not in ["h2", "h3"]
58
- ):
59
- next_sibling = current.next_sibling
60
- new_section.append(current)
61
- current = next_sibling
62
-
63
- return str(soup)
64
-
65
-
66
- def render_mermaid_diagrams(html, *, nonce):
67
- """
68
- Renders Mermaid diagrams in the HTML content.
69
-
70
- Args:
71
- html (str): HTML content.
72
- nonce (str): Cryptographic nonce for CSP.
73
- Returns:
74
- str: HTML content with rendered Mermaid diagrams.
75
- """
76
- mermaid_script = f"""
77
- <script type="module" nonce="{nonce}">
78
- import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.esm.min.mjs';
79
- mermaid.initialize({{
80
- startOnLoad: true,
81
- theme: 'default',
82
- themeVariables: {{}},
83
- fontFamily: 'arial, verdana, sans-serif'
84
- }});
85
- </script>
86
- """
87
-
88
- if '<div class="mermaid">' in html:
89
- html = mermaid_script + html
90
-
91
- return html
92
-
93
-
94
- def render_checkboxes(html):
95
- """
96
- Renders checkboxes in the HTML content by replacing input elements with SVG representations.
97
- Args:
98
- html (str): HTML content.
99
- Returns:
100
- str: HTML content with rendered checkboxes.
101
- """
102
- unchecked = "[ ]"
103
- checked = "[x]"
104
-
105
- unchecked_html = "<input type='checkbox'>"
106
- checked_html = "<input type='checkbox' checked>"
107
-
108
- # Split by code blocks to avoid processing text inside them
109
- parts = re.split(r"(<code>.*?</code>)", html, flags=re.DOTALL)
110
- for part_index, _part in enumerate(parts):
111
- # Only process parts that are NOT code blocks
112
- if not parts[part_index].startswith("<code>"):
113
- parts[part_index] = parts[part_index].replace(unchecked, unchecked_html)
114
- parts[part_index] = parts[part_index].replace(checked, checked_html)
115
-
116
- return "".join(parts)
117
-
118
-
119
- def create_spans(html):
120
- """
121
- Renders custom spans in the HTML content by replacing classname{{ content }} tags.
122
- Args:
123
- html (str): HTML content.
124
- Returns:
125
- str: HTML content with rendered custom spans.
126
- """
127
- soup = BeautifulSoup(html, "html.parser")
128
-
129
- # Regex to match classname{{ content }}
130
- # It captures the class name and the content
131
- pattern = re.compile(r"([a-zA-Z0-9_-]+){{\s*(.*?)\s*}}")
132
-
133
- # We need to find all text nodes and replace the pattern
134
- for text_node in soup.find_all(string=True):
135
- # Skip text nodes inside code, pre, script, style tags
136
- if text_node.parent.name in ["code", "pre", "script", "style"]:
137
- continue
138
-
139
- content = str(text_node)
140
- if "{{" in content:
141
- new_content_nodes = []
142
- last_end = 0
143
- for match in pattern.finditer(content):
144
- # Add text before the match
145
- before = content[last_end : match.start()]
146
- if before:
147
- new_content_nodes.append(soup.new_string(before))
148
-
149
- # Create the new span tag
150
- class_name = match.group(1)
151
- inner_text = match.group(2)
152
- new_span = soup.new_tag("span", attrs={"class": class_name})
153
- new_span.string = inner_text
154
- new_content_nodes.append(new_span)
155
-
156
- last_end = match.end()
157
-
158
- # Add remaining text after the last match
159
- after = content[last_end:]
160
- if after:
161
- new_content_nodes.append(soup.new_string(after))
162
-
163
- if new_content_nodes:
164
- # Replace the original text node with the new nodes
165
- for node in reversed(new_content_nodes):
166
- text_node.insert_after(node)
167
- text_node.extract()
168
-
169
- return str(soup)