markdown_convert 1.2.51__py3-none-any.whl → 1.2.53__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -453,14 +453,6 @@ math {
453
453
  border-radius: 0.3rem;
454
454
  }
455
455
 
456
- .admonition header {
457
- display: flex;
458
- align-items: center;
459
- gap: 0.25rem;
460
- margin-bottom: 0.25rem;
461
- font-weight: bold;
462
- }
463
-
464
456
  .admonition strong {
465
457
  text-transform: capitalize;
466
458
  }
@@ -522,4 +514,15 @@ math {
522
514
 
523
515
  .admonition.caution strong {
524
516
  color: var(--color-ad-caution);
517
+ }
518
+
519
+ /* Vega-Lite charts*/
520
+ div.vega-lite,
521
+ div.vega {
522
+ display: flex;
523
+ justify-content: center;
524
+ align-items: center;
525
+ width: 100%;
526
+ margin-top: 1em;
527
+ margin-bottom: 1em;
525
528
  }
@@ -9,7 +9,6 @@ import time
9
9
  from datetime import datetime
10
10
  from pathlib import Path
11
11
 
12
- import markdown2
13
12
  from playwright.sync_api import sync_playwright
14
13
 
15
14
  from .autoinstall import ensure_chromium
@@ -19,6 +18,7 @@ from .constants import (
19
18
  MARKDOWN_EXTENSIONS,
20
19
  PDF_PARAMS,
21
20
  )
21
+ from .overrides import markdown2
22
22
  from .resources import get_code_css_path, get_css_path, get_output_path
23
23
  from .transform import (
24
24
  create_html_document,
@@ -3,98 +3,246 @@ Extras are defined as helper functions called by
3
3
  render_extra_features from transform.py
4
4
  """
5
5
 
6
+ import json
7
+ import re
6
8
 
7
- def create_checkbox(soup, match):
9
+ import vl_convert as vlc
10
+ from bs4 import BeautifulSoup, Tag
11
+ from ruamel.yaml import YAML
12
+
13
+
14
+ class ExtraFeature:
8
15
  """
9
- Render a tag for a checkbox.
16
+ Base class for extra features that can be applied to HTML.
10
17
 
11
- Args:
12
- soup: HTML beautifulsoup
13
- match: Element identified as a checkbox
14
- Returns:
15
- tag: Beautifulsoup tag representing the checkbox
18
+ Attributes:
19
+ pattern (str): Regex pattern to match the extra feature in the HTML.
20
+ run_before_stash (bool): Whether to run this extra before stashing code blocks.
16
21
  """
17
- tag = soup.new_tag("input", type="checkbox")
18
- if "[x]" in match.group("checkbox"):
19
- tag["checked"] = ""
20
- return tag
21
22
 
23
+ pattern = r""
24
+ run_before_stash = False
22
25
 
23
- def create_highlight(soup, match):
24
- """
25
- Render a tag for a highlight.
26
+ def replace(self, match, html):
27
+ """
28
+ Replaces the matched pattern with the rendered extra feature.
26
29
 
27
- Args:
28
- soup: HTML beautifulsoup
29
- match: Element identified as a highlight
30
- Returns:
31
- tag: Beautifulsoup tag representing the highlight
30
+ Args:
31
+ match (re.Match): The regex match object.
32
+ html (str): The full HTML content.
33
+
34
+ Returns:
35
+ str: The replacement string.
36
+
37
+ Raises:
38
+ NotImplementedError: If the subclass does not implement this method.
39
+ """
40
+ raise NotImplementedError("Subclasses must implement the replace method.")
41
+
42
+
43
+ class CheckboxExtra(ExtraFeature):
44
+ """
45
+ Extra feature for rendering checkboxes.
32
46
  """
33
- tag = soup.new_tag("span", attrs={"class": "highlight"})
34
- tag.string = match.group("hl_content")
35
- return tag
36
47
 
48
+ pattern = r"(?P<checkbox>\[\s\]|\[x\])"
37
49
 
38
- def create_custom_span(soup, match):
50
+ def replace(match, html):
51
+ """
52
+ Render a tag for a checkbox.
53
+
54
+ Args:
55
+ match: Element identified as a checkbox
56
+ Returns:
57
+ str: tag representing the checkbox
58
+ """
59
+ status = "checked" if "[x]" in match.group("checkbox") else ""
60
+ return f'<input type="checkbox" {status}>'
61
+
62
+
63
+ class HighlightExtra(ExtraFeature):
64
+ """
65
+ Extra feature for rendering highlighted text.
39
66
  """
40
- Render a tag for a custom span.
41
67
 
42
- Args:
43
- soup: HTML beautifulsoup
44
- match: Element identified as a custom span
45
- Returns:
46
- tag: Beautifulsoup tag representing the custom span
68
+ pattern = r"==(?P<content>.*?)=="
69
+
70
+ def replace(match, html):
71
+ """
72
+ Render a tag for a highlight.
73
+
74
+ Args:
75
+ match: Element identified as a highlight
76
+ Returns:
77
+ str: tag representing the highlight
78
+ """
79
+ content = match.group("content")
80
+ return f'<span class="highlight">{content}</span>'
81
+
82
+
83
+ class CustomSpanExtra(ExtraFeature):
47
84
  """
48
- tag = soup.new_tag("span", attrs={"class": match.group("cls")})
49
- tag.string = match.group("sp_content")
50
- return tag
85
+ Extra feature for rendering custom spans with specific classes.
86
+ """
87
+
88
+ pattern = r"(?P<cls>[a-zA-Z0-9_-]+)\{\{\s*(?P<content>.*?)\s*\}\}"
51
89
 
90
+ def replace(match, html):
91
+ """
92
+ Render a tag for a custom span.
52
93
 
53
- def create_toc(soup, match):
94
+ Args:
95
+ match: Element identified as a custom span
96
+ Returns:
97
+ str: tag representing the custom span
98
+ """
99
+ cls = match.group("cls")
100
+ content = match.group("content")
101
+ return f'<span class="{cls}">{content}</span>'
102
+
103
+
104
+ class TocExtra(ExtraFeature):
105
+ """
106
+ Extra feature for rendering a Table of Contents.
54
107
  """
55
- Render a tag for a table of contents
56
108
 
109
+ pattern = r"\[TOC(?:\s+depth=(?P<depth>\d+))?\]"
110
+
111
+ def replace(match, html):
112
+ """
113
+ Render a tag for a table of contents
114
+
115
+ Args:
116
+ match: Element identified as a table of contents
117
+ Returns:
118
+ str: tag representing the table of contents
119
+ """
120
+ soup = BeautifulSoup(html, "html.parser")
121
+ max_level = match.group("depth")
122
+ max_level = 3 if max_level is None else int(max_level)
123
+
124
+ headers = [
125
+ header
126
+ for header in soup.find_all(
127
+ [f"h{index}" for index in range(1, max_level + 1)]
128
+ )
129
+ if header.get("id")
130
+ ]
131
+ if not headers:
132
+ return ""
133
+
134
+ tag: Tag = soup.new_tag("ul", attrs={"class": "toc"})
135
+ active_list = {0: tag}
136
+ last_list_element = {}
137
+
138
+ for header in headers:
139
+ level = int(header.name[1])
140
+
141
+ if level not in active_list:
142
+ parent_lvl = max(key for key in active_list if key < level)
143
+ if last_list_element.get(parent_lvl):
144
+ sub_list = soup.new_tag("ul")
145
+ last_list_element[parent_lvl].append(sub_list)
146
+ active_list[level] = sub_list
147
+ else:
148
+ active_list[level] = active_list[parent_lvl]
149
+
150
+ active_list = {
151
+ key: value for key, value in active_list.items() if key <= level
152
+ }
153
+
154
+ list_item = soup.new_tag("li")
155
+ link = soup.new_tag("a", href=f"#{header['id']}")
156
+ link.string = header.get_text(strip=True)
157
+ list_item.append(link)
158
+
159
+ active_list[level].append(list_item)
160
+ last_list_element[level] = list_item
161
+
162
+ return tag.prettify()
163
+
164
+
165
+ class VegaExtra(ExtraFeature):
166
+ """
167
+ Extra feature for rendering Vega-Lite diagrams from JSON or YAML.
168
+ """
169
+
170
+ pattern = (
171
+ r"<pre[^>]*>"
172
+ r"<code[^>]*class=[\"'][^\"]*language-vega[^\"]*[\"'][^>]*>"
173
+ r"(?P<content>.*?)"
174
+ r"</code>"
175
+ r"</pre>"
176
+ )
177
+ run_before_stash = True
178
+
179
+ def replace(match, html):
180
+ """
181
+ Render a tag for a vega lite diagram from JSON or YAML.
182
+
183
+ Args:
184
+ match (re.Match): Element identified as a vega lite diagram.
185
+ html (str): The full HTML content.
186
+
187
+ Returns:
188
+ str: SVG tag representing the vega lite diagram.
189
+ """
190
+ content = match.group("content")
191
+ spec = None
192
+
193
+ try:
194
+ spec = json.loads(content)
195
+ except (json.JSONDecodeError, TypeError):
196
+ try:
197
+ yaml = YAML(typ="safe")
198
+ spec = yaml.load(content)
199
+ except Exception as exc:
200
+ print(f"WARNING: Failed to parse Vega-Lite spec: {exc}")
201
+ return match.group(0)
202
+
203
+ if spec is None:
204
+ return match.group(0)
205
+
206
+ try:
207
+ tag = vlc.vegalite_to_svg(spec)
208
+ return f"<div class='vega-lite'>{tag}</div>"
209
+ except Exception as exc:
210
+ print(f"WARNING: Failed to convert Vega-Lite spec to SVG: {exc}")
211
+ return match.group(0)
212
+
213
+
214
+ def apply_extras(extras: set[ExtraFeature], html, before_stash=False):
215
+ """
216
+ Applies extra features to an html string.
57
217
  Args:
58
- soup: HTML beautifulsoup
59
- match: Element identified as a table of contents
218
+ extras: set[ExtraFeature] Extra features to apply
219
+ html: complete html text, used by some extras like TOC.
60
220
  Returns:
61
- tag: Beautifulsoup tag representing the table of contents
221
+ str: The updated html.
62
222
  """
63
- max_level = match.group("depth")
64
- max_level = 3 if max_level is None else int(max_level)
65
-
66
- headers = [
67
- header
68
- for header in soup.find_all([f"h{index}" for index in range(1, max_level + 1)])
69
- if header.get("id")
70
- ]
71
- if not headers:
72
- return ""
73
-
74
- tag = soup.new_tag("ul", attrs={"class": "toc"})
75
- active_list = {0: tag}
76
- last_list_element = {}
77
-
78
- for header in headers:
79
- level = int(header.name[1])
80
-
81
- if level not in active_list:
82
- parent_lvl = max(key for key in active_list if key < level)
83
- if last_list_element.get(parent_lvl):
84
- sub_list = soup.new_tag("ul")
85
- last_list_element[parent_lvl].append(sub_list)
86
- active_list[level] = sub_list
87
- else:
88
- active_list[level] = active_list[parent_lvl]
89
-
90
- active_list = {key: value for key, value in active_list.items() if key <= level}
91
-
92
- list_item = soup.new_tag("li")
93
- link = soup.new_tag("a", href=f"#{header['id']}")
94
- link.string = header.get_text(strip=True)
95
- list_item.append(link)
96
-
97
- active_list[level].append(list_item)
98
- last_list_element[level] = list_item
99
-
100
- return tag
223
+ for extra in extras:
224
+ if not extra.run_before_stash == before_stash:
225
+ continue
226
+
227
+ # Loop until the pattern no longer matches
228
+ while re.search(extra.pattern, html, flags=re.DOTALL):
229
+ new_html = html
230
+ try:
231
+ new_html = re.sub(
232
+ extra.pattern,
233
+ lambda match: extra.replace(match, html=html),
234
+ html,
235
+ flags=re.DOTALL,
236
+ )
237
+ except Exception as exc:
238
+ print(
239
+ f"WARNING: An exception occurred while trying to apply an extra:\n{exc}"
240
+ )
241
+ pass
242
+
243
+ # Safety break:
244
+ if new_html == html:
245
+ break
246
+ html = new_html
247
+
248
+ return html
@@ -0,0 +1,36 @@
1
+ """
2
+ Overrides for markdown2.
3
+ """
4
+
5
+ import markdown2
6
+
7
+
8
+ def tags(self, lexer_name: str) -> tuple[str, str]:
9
+ """
10
+ Overrides markdown2.FencedCodeBlocks.tags
11
+
12
+ Provides support for the fenced code blocks language attribute without
13
+ the need to have the highlightjs-lang extension enabled.
14
+ """
15
+ pre_class = self.md._html_class_str_from_tag("pre")
16
+ if lexer_name:
17
+ code_class = f' class="{lexer_name} language-{lexer_name}"'
18
+ else:
19
+ code_class = self.md._html_class_str_from_tag("code")
20
+ return (f"<pre{pre_class}><code{code_class}>", "</code></pre>")
21
+
22
+
23
+ def _convert_double_match(self, match):
24
+ """
25
+ Overrides markdown2.Latex._convert_double_match
26
+
27
+ Fixes bug #674 of latex macros that start with backslash n not being
28
+ properly rendered.
29
+ """
30
+ return self.converter.convert(match.group(1).replace("\n", " "), display="block")
31
+
32
+
33
+ # Apply overrides on module import and expose markdown2
34
+ markdown2.FencedCodeBlocks.tags = tags
35
+ markdown2.Latex._convert_double_match = _convert_double_match
36
+ __all__ = ["markdown2"]
@@ -6,9 +6,15 @@ import re
6
6
 
7
7
  from bs4 import BeautifulSoup
8
8
 
9
- from .constants import YELLOW
10
- from .extras import create_checkbox, create_custom_span, create_highlight, create_toc
11
- from .utils import color
9
+ from .extras import (
10
+ apply_extras,
11
+ ExtraFeature,
12
+ CheckboxExtra,
13
+ CustomSpanExtra,
14
+ HighlightExtra,
15
+ TocExtra,
16
+ VegaExtra,
17
+ )
12
18
 
13
19
 
14
20
  def create_html_document(html_content, css_content, csp):
@@ -89,76 +95,41 @@ def render_mermaid_diagrams(html, *, nonce):
89
95
  return html
90
96
 
91
97
 
92
- def render_extra_features(html):
98
+ def render_extra_features(
99
+ html,
100
+ extras: set[ExtraFeature] = (
101
+ CheckboxExtra,
102
+ CustomSpanExtra,
103
+ HighlightExtra,
104
+ TocExtra,
105
+ VegaExtra,
106
+ ),
107
+ ):
93
108
  """
94
- Renders extra features like checkboxes, highlights, and custom spans in the HTML content.
95
-
96
- Args:
97
- html (str): HTML content.
98
- Returns:
99
- str: HTML content with extra features rendered.
109
+ Renders extra features by protecting specific tags, applying regex
110
+ transformations, and restoring the protected content.
100
111
  """
112
+ placeholders = {}
113
+
114
+ def stash(match):
115
+ key = f"__PROTECTED_BLOCK_{len(placeholders)}__"
116
+ placeholders[key] = match.group(0)
117
+ return key
101
118
 
102
- handlers = {
103
- "checkbox": create_checkbox,
104
- "highlight": create_highlight,
105
- "span": create_custom_span,
106
- "toc": create_toc,
107
- }
108
-
109
- master_pattern = re.compile(
110
- r"(?P<checkbox>\[\s\]|\[x\])|"
111
- r"(?P<highlight>==(?P<hl_content>.*?)==)|"
112
- r"(?P<span>(?P<cls>[a-zA-Z0-9_-]+)\{\{\s*(?P<sp_content>.*?)\s*\}\})|"
113
- r"(?P<toc>\[TOC(?:\s+depth=(?P<depth>\d+))?\])"
119
+ # 0. Pre protection extras
120
+ html = apply_extras(extras, html, before_stash=True)
121
+
122
+ # 1. Protection: Replace ignored tags with unique hashes
123
+ ignored_pattern = re.compile(
124
+ r"<(code|pre|script|style)\b[^>]*>.*?</\1>", re.DOTALL | re.IGNORECASE
114
125
  )
126
+ html = ignored_pattern.sub(stash, html)
115
127
 
116
- ignored_tags = {"code", "pre", "script", "style"}
117
-
118
- soup = BeautifulSoup(html, "html.parser")
119
- for text_node in soup.find_all(string=True):
120
- # Ignore text nodes within certain tags
121
- if text_node.parent.name in ignored_tags:
122
- continue
123
-
124
- # If no match, skip processing
125
- content = text_node.string
126
- if not master_pattern.search(content):
127
- continue
128
-
129
- new_nodes = []
130
- last_end = 0
131
- for match in master_pattern.finditer(content):
132
- start, end = match.span()
133
-
134
- # Append text before the match
135
- if start > last_end:
136
- new_nodes.append(content[last_end:start])
137
-
138
- kind = match.lastgroup
139
-
140
- # Call the appropriate handler
141
- handler = handlers.get(kind)
142
- if handler:
143
- try:
144
- tag = handler(soup, match)
145
- new_nodes.append(tag)
146
- except Exception as exc:
147
- print(
148
- color(
149
- YELLOW,
150
- f"WARNING: Handler for '{kind}' failed with exception: {exc}",
151
- )
152
- )
153
- new_nodes.append(match.group(0))
154
-
155
- last_end = end
156
-
157
- # Append any remaining text after the last match
158
- if new_nodes:
159
- if last_end < len(content):
160
- new_nodes.append(content[last_end:])
161
-
162
- text_node.replace_with(*new_nodes)
128
+ # 2. Transformations: Define patterns and their replacements
129
+ html = apply_extras(extras, html, before_stash=False)
163
130
 
164
- return str(soup)
131
+ # 3. Restoration: Replace hashes back with original content
132
+ for key, original_content in placeholders.items():
133
+ html = html.replace(key, original_content)
134
+
135
+ return html
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: markdown_convert
3
- Version: 1.2.51
3
+ Version: 1.2.53
4
4
  Summary: Convert Markdown files to PDF from your command line.
5
5
  Project-URL: homepage, https://github.com/Julynx/markdown_convert
6
6
  Author-email: Julio Cabria <juliocabria@tutanota.com>
@@ -17,6 +17,8 @@ Requires-Dist: latex2mathml>=3.78.1
17
17
  Requires-Dist: markdown2<3,>=2.4.13
18
18
  Requires-Dist: playwright>=1.57.0
19
19
  Requires-Dist: pygments<3,>=2.17.2
20
+ Requires-Dist: ruamel-yaml>=0.19.1
21
+ Requires-Dist: vl-convert-python>=1.9.0.post1
20
22
  Description-Content-Type: text/markdown
21
23
 
22
24
  # markdown-convert
@@ -1,18 +1,19 @@
1
1
  markdown_convert/__init__.py,sha256=0hLMtJnCIuApqopx5P4tiDSw850AmnuVcohmAbPVEZ4,303
2
2
  markdown_convert/__main__.py,sha256=AocRo1iF1El_-Uo0owJ-QLbJUF0rum5R_AlNrTTTSOQ,2780
3
3
  markdown_convert/code.css,sha256=Wt4FqFqJcpT-jwY3GN-o4ZRCCXU8DQj-9lqKdGiuoyw,4935
4
- markdown_convert/default.css,sha256=XmIR6Kx4evwmLTZr9QZc3XhDj4jxjmGkwaeftfvHNmU,8149
4
+ markdown_convert/default.css,sha256=ghuLeRUduJO4nxyM7DmqRTt968xbUFmX3oIth_M6T8I,8196
5
5
  markdown_convert/modules/__init__.py,sha256=PFPgiQhMXgyfjD8BkfLC_X8AR1jz-dCxfif2qmNofJs,65
6
6
  markdown_convert/modules/autoinstall.py,sha256=Tnrde6MIcO11PWT7GZwhs_QTVRy6CSpUB_gIi9G5ve8,2063
7
7
  markdown_convert/modules/constants.py,sha256=FA8DrQa9nzTUIJFXwXrK-AuOc5_ToGSFaD4sJqsnAjU,1305
8
- markdown_convert/modules/convert.py,sha256=1AjQfnOXJoxKyfqr4misDuTvE4YXnwaoWw668FUHiEQ,8972
9
- markdown_convert/modules/extras.py,sha256=GwNx6nseztHOWExcYmovxomdvOs078dMeknQTwzTCJo,2730
8
+ markdown_convert/modules/convert.py,sha256=kinhf9izQDY8O9-gWdTt0tQB1j5z4ExKcwLaEaBKrM4,8988
9
+ markdown_convert/modules/extras.py,sha256=KWIMXEm_hLLliBIsPraqpA5p3RFKLuqefFz2bbo0DI8,6921
10
+ markdown_convert/modules/overrides.py,sha256=VcrXmfZqypg796WQFQZHfpQnmdem2uC6MEPoHY_xc_o,1063
10
11
  markdown_convert/modules/resources.py,sha256=eskLLbrkLJWs-vqtCLq4qV2Hjy6XeGFCUdT0VN2b_tA,2488
11
- markdown_convert/modules/transform.py,sha256=e4QllWx5BYKEQqIzOkYigtxcSAWqSUHsoKkvqzYzEpY,4567
12
+ markdown_convert/modules/transform.py,sha256=9_0mqeHwKPECr3Ft1z8r14flTOw4Y8dxblsOIfblEGw,3476
12
13
  markdown_convert/modules/utils.py,sha256=NX0WegM8e8MPKNNmweTujAWO8ZghdB8LSGDx20K2E44,655
13
14
  markdown_convert/modules/validate.py,sha256=XV_k7cHeifEKDaltF26tCmabs2-Me5msP3enI_eVwfA,1517
14
- markdown_convert-1.2.51.dist-info/METADATA,sha256=sejp1Y3EzxGpfMqDxIJsieDaxkO9_o0gFffs7ULpwKw,4118
15
- markdown_convert-1.2.51.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
16
- markdown_convert-1.2.51.dist-info/entry_points.txt,sha256=RCmzC7C0sX-SpzIP2Cr34rhg3lMd7BRx-exaZPfK8bU,68
17
- markdown_convert-1.2.51.dist-info/licenses/LICENSE,sha256=gXf5dRMhNSbfLPYYTY_5hsZ1r7UU1OaKQEAQUhuIBkM,18092
18
- markdown_convert-1.2.51.dist-info/RECORD,,
15
+ markdown_convert-1.2.53.dist-info/METADATA,sha256=7ElwqwB_C7e9e6QtvFwK0s7nRfY_bEk2EVmdXo1DIz8,4199
16
+ markdown_convert-1.2.53.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
17
+ markdown_convert-1.2.53.dist-info/entry_points.txt,sha256=RCmzC7C0sX-SpzIP2Cr34rhg3lMd7BRx-exaZPfK8bU,68
18
+ markdown_convert-1.2.53.dist-info/licenses/LICENSE,sha256=gXf5dRMhNSbfLPYYTY_5hsZ1r7UU1OaKQEAQUhuIBkM,18092
19
+ markdown_convert-1.2.53.dist-info/RECORD,,