markdown_convert 1.2.51__py3-none-any.whl → 1.2.52__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -453,14 +453,6 @@ math {
453
453
  border-radius: 0.3rem;
454
454
  }
455
455
 
456
- .admonition header {
457
- display: flex;
458
- align-items: center;
459
- gap: 0.25rem;
460
- margin-bottom: 0.25rem;
461
- font-weight: bold;
462
- }
463
-
464
456
  .admonition strong {
465
457
  text-transform: capitalize;
466
458
  }
@@ -522,4 +514,14 @@ math {
522
514
 
523
515
  .admonition.caution strong {
524
516
  color: var(--color-ad-caution);
517
+ }
518
+
519
+ /* Vega-Lite charts*/
520
+ .vega-lite {
521
+ display: flex;
522
+ justify-content: center;
523
+ align-items: center;
524
+ width: 100%;
525
+ margin-top: 1em;
526
+ margin-bottom: 1em;
525
527
  }
@@ -3,98 +3,221 @@ Extras are defined as helper functions called by
3
3
  render_extra_features from transform.py
4
4
  """
5
5
 
6
+ import vl_convert as vlc
7
+ from ruamel.yaml import YAML
8
+ from bs4 import Tag, BeautifulSoup
9
+ import re
6
10
 
7
- def create_checkbox(soup, match):
11
+
12
+ class ExtraFeature:
8
13
  """
9
- Render a tag for a checkbox.
14
+ Base class for extra features that can be applied to HTML.
10
15
 
11
- Args:
12
- soup: HTML beautifulsoup
13
- match: Element identified as a checkbox
14
- Returns:
15
- tag: Beautifulsoup tag representing the checkbox
16
+ Attributes:
17
+ pattern (str): Regex pattern to match the extra feature in the HTML.
18
+ run_before_stash (bool): Whether to run this extra before stashing code blocks.
16
19
  """
17
- tag = soup.new_tag("input", type="checkbox")
18
- if "[x]" in match.group("checkbox"):
19
- tag["checked"] = ""
20
- return tag
21
20
 
21
+ pattern = r""
22
+ run_before_stash = False
22
23
 
23
- def create_highlight(soup, match):
24
- """
25
- Render a tag for a highlight.
24
+ def replace(self, match, html):
25
+ """
26
+ Replaces the matched pattern with the rendered extra feature.
26
27
 
27
- Args:
28
- soup: HTML beautifulsoup
29
- match: Element identified as a highlight
30
- Returns:
31
- tag: Beautifulsoup tag representing the highlight
28
+ Args:
29
+ match (re.Match): The regex match object.
30
+ html (str): The full HTML content.
31
+
32
+ Returns:
33
+ str: The replacement string.
34
+
35
+ Raises:
36
+ NotImplementedError: If the subclass does not implement this method.
37
+ """
38
+ raise NotImplementedError("Subclasses must implement the replace method.")
39
+
40
+
41
+ class CheckboxExtra(ExtraFeature):
42
+ """
43
+ Extra feature for rendering checkboxes.
32
44
  """
33
- tag = soup.new_tag("span", attrs={"class": "highlight"})
34
- tag.string = match.group("hl_content")
35
- return tag
45
+
46
+ pattern = r"(?P<checkbox>\[\s\]|\[x\])"
47
+
48
+ def replace(match, html):
49
+ """
50
+ Render a tag for a checkbox.
51
+
52
+ Args:
53
+ match: Element identified as a checkbox
54
+ Returns:
55
+ str: tag representing the checkbox
56
+ """
57
+ status = "checked" if "[x]" in match.group("checkbox") else ""
58
+ return f'<input type="checkbox" {status}>'
36
59
 
37
60
 
38
- def create_custom_span(soup, match):
61
+ class HighlightExtra(ExtraFeature):
62
+ """
63
+ Extra feature for rendering highlighted text.
39
64
  """
40
- Render a tag for a custom span.
41
65
 
42
- Args:
43
- soup: HTML beautifulsoup
44
- match: Element identified as a custom span
45
- Returns:
46
- tag: Beautifulsoup tag representing the custom span
66
+ pattern = r"==(?P<content>.*?)=="
67
+
68
+ def replace(match, html):
69
+ """
70
+ Render a tag for a highlight.
71
+
72
+ Args:
73
+ match: Element identified as a highlight
74
+ Returns:
75
+ str: tag representing the highlight
76
+ """
77
+ content = match.group("content")
78
+ return f'<span class="highlight">{content}</span>'
79
+
80
+
81
+ class CustomSpanExtra(ExtraFeature):
47
82
  """
48
- tag = soup.new_tag("span", attrs={"class": match.group("cls")})
49
- tag.string = match.group("sp_content")
50
- return tag
83
+ Extra feature for rendering custom spans with specific classes.
84
+ """
85
+
86
+ pattern = r"(?P<cls>[a-zA-Z0-9_-]+)\{\{\s*(?P<content>.*?)\s*\}\}"
51
87
 
88
+ def replace(match, html):
89
+ """
90
+ Render a tag for a custom span.
52
91
 
53
- def create_toc(soup, match):
92
+ Args:
93
+ match: Element identified as a custom span
94
+ Returns:
95
+ str: tag representing the custom span
96
+ """
97
+ cls = match.group("cls")
98
+ content = match.group("content")
99
+ return f'<span class="{cls}">{content}</span>'
100
+
101
+
102
+ class TocExtra(ExtraFeature):
103
+ """
104
+ Extra feature for rendering a Table of Contents.
54
105
  """
55
- Render a tag for a table of contents
56
106
 
107
+ pattern = r"\[TOC(?:\s+depth=(?P<depth>\d+))?\]"
108
+
109
+ def replace(match, html):
110
+ """
111
+ Render a tag for a table of contents
112
+
113
+ Args:
114
+ match: Element identified as a table of contents
115
+ Returns:
116
+ str: tag representing the table of contents
117
+ """
118
+ soup = BeautifulSoup(html, "html.parser")
119
+ max_level = match.group("depth")
120
+ max_level = 3 if max_level is None else int(max_level)
121
+
122
+ headers = [
123
+ header
124
+ for header in soup.find_all(
125
+ [f"h{index}" for index in range(1, max_level + 1)]
126
+ )
127
+ if header.get("id")
128
+ ]
129
+ if not headers:
130
+ return ""
131
+
132
+ tag: Tag = soup.new_tag("ul", attrs={"class": "toc"})
133
+ active_list = {0: tag}
134
+ last_list_element = {}
135
+
136
+ for header in headers:
137
+ level = int(header.name[1])
138
+
139
+ if level not in active_list:
140
+ parent_lvl = max(key for key in active_list if key < level)
141
+ if last_list_element.get(parent_lvl):
142
+ sub_list = soup.new_tag("ul")
143
+ last_list_element[parent_lvl].append(sub_list)
144
+ active_list[level] = sub_list
145
+ else:
146
+ active_list[level] = active_list[parent_lvl]
147
+
148
+ active_list = {
149
+ key: value for key, value in active_list.items() if key <= level
150
+ }
151
+
152
+ list_item = soup.new_tag("li")
153
+ link = soup.new_tag("a", href=f"#{header['id']}")
154
+ link.string = header.get_text(strip=True)
155
+ list_item.append(link)
156
+
157
+ active_list[level].append(list_item)
158
+ last_list_element[level] = list_item
159
+
160
+ return tag.prettify()
161
+
162
+
163
+ class VegaExtra(ExtraFeature):
164
+ """
165
+ Extra feature for rendering Vega-Lite diagrams from YAML.
166
+ """
167
+
168
+ pattern = r"(?s)<pre><code>\$schema: https://vega\.github\.io(?P<content>.*?)</code></pre>"
169
+ run_before_stash = True
170
+
171
+ def replace(match, html):
172
+ """
173
+ Render a tag for a vega lite diagram YAML.
174
+
175
+ Args:
176
+ match (re.Match): Element identified as a vega lite diagram YAML.
177
+ html (str): The full HTML content.
178
+
179
+ Returns:
180
+ str: SVG tag representing the vega lite diagram.
181
+ """
182
+ schema_line = "$schema: https://vega.github.io"
183
+ yaml = YAML()
184
+ spec = yaml.load(schema_line + match.group("content"))
185
+ tag = vlc.vegalite_to_svg(spec)
186
+ return f"<div class='vega-lite'>{tag}</div>"
187
+
188
+
189
+ def apply_extras(extras: set[ExtraFeature], html, before_stash=False):
190
+ """
191
+ Applies extra features to an html string.
57
192
  Args:
58
- soup: HTML beautifulsoup
59
- match: Element identified as a table of contents
193
+ extras: set[ExtraFeature] Extra features to apply
194
+ html: complete html text, used by some extras like TOC.
60
195
  Returns:
61
- tag: Beautifulsoup tag representing the table of contents
196
+ str: The updated html.
62
197
  """
63
- max_level = match.group("depth")
64
- max_level = 3 if max_level is None else int(max_level)
65
-
66
- headers = [
67
- header
68
- for header in soup.find_all([f"h{index}" for index in range(1, max_level + 1)])
69
- if header.get("id")
70
- ]
71
- if not headers:
72
- return ""
73
-
74
- tag = soup.new_tag("ul", attrs={"class": "toc"})
75
- active_list = {0: tag}
76
- last_list_element = {}
77
-
78
- for header in headers:
79
- level = int(header.name[1])
80
-
81
- if level not in active_list:
82
- parent_lvl = max(key for key in active_list if key < level)
83
- if last_list_element.get(parent_lvl):
84
- sub_list = soup.new_tag("ul")
85
- last_list_element[parent_lvl].append(sub_list)
86
- active_list[level] = sub_list
87
- else:
88
- active_list[level] = active_list[parent_lvl]
89
-
90
- active_list = {key: value for key, value in active_list.items() if key <= level}
91
-
92
- list_item = soup.new_tag("li")
93
- link = soup.new_tag("a", href=f"#{header['id']}")
94
- link.string = header.get_text(strip=True)
95
- list_item.append(link)
96
-
97
- active_list[level].append(list_item)
98
- last_list_element[level] = list_item
99
-
100
- return tag
198
+ for extra in extras:
199
+ if not extra.run_before_stash == before_stash:
200
+ continue
201
+
202
+ # Loop until the pattern no longer matches
203
+ while re.search(extra.pattern, html, flags=re.DOTALL):
204
+ new_html = html
205
+ try:
206
+ new_html = re.sub(
207
+ extra.pattern,
208
+ lambda match: extra.replace(match, html=html),
209
+ html,
210
+ flags=re.DOTALL,
211
+ )
212
+ except Exception as exc:
213
+ print(
214
+ f"WARNING: An exception occurred while trying to apply an extra:\n{exc}"
215
+ )
216
+ pass
217
+
218
+ # Safety break:
219
+ if new_html == html:
220
+ break
221
+ html = new_html
222
+
223
+ return html
@@ -6,9 +6,15 @@ import re
6
6
 
7
7
  from bs4 import BeautifulSoup
8
8
 
9
- from .constants import YELLOW
10
- from .extras import create_checkbox, create_custom_span, create_highlight, create_toc
11
- from .utils import color
9
+ from .extras import (
10
+ apply_extras,
11
+ ExtraFeature,
12
+ CheckboxExtra,
13
+ CustomSpanExtra,
14
+ HighlightExtra,
15
+ TocExtra,
16
+ VegaExtra,
17
+ )
12
18
 
13
19
 
14
20
  def create_html_document(html_content, css_content, csp):
@@ -89,76 +95,41 @@ def render_mermaid_diagrams(html, *, nonce):
89
95
  return html
90
96
 
91
97
 
92
- def render_extra_features(html):
98
+ def render_extra_features(
99
+ html,
100
+ extras: set[ExtraFeature] = (
101
+ CheckboxExtra,
102
+ CustomSpanExtra,
103
+ HighlightExtra,
104
+ TocExtra,
105
+ VegaExtra,
106
+ ),
107
+ ):
93
108
  """
94
- Renders extra features like checkboxes, highlights, and custom spans in the HTML content.
95
-
96
- Args:
97
- html (str): HTML content.
98
- Returns:
99
- str: HTML content with extra features rendered.
109
+ Renders extra features by protecting specific tags, applying regex
110
+ transformations, and restoring the protected content.
100
111
  """
112
+ placeholders = {}
113
+
114
+ def stash(match):
115
+ key = f"__PROTECTED_BLOCK_{len(placeholders)}__"
116
+ placeholders[key] = match.group(0)
117
+ return key
101
118
 
102
- handlers = {
103
- "checkbox": create_checkbox,
104
- "highlight": create_highlight,
105
- "span": create_custom_span,
106
- "toc": create_toc,
107
- }
108
-
109
- master_pattern = re.compile(
110
- r"(?P<checkbox>\[\s\]|\[x\])|"
111
- r"(?P<highlight>==(?P<hl_content>.*?)==)|"
112
- r"(?P<span>(?P<cls>[a-zA-Z0-9_-]+)\{\{\s*(?P<sp_content>.*?)\s*\}\})|"
113
- r"(?P<toc>\[TOC(?:\s+depth=(?P<depth>\d+))?\])"
119
+ # 0. Pre protection extras
120
+ html = apply_extras(extras, html, before_stash=True)
121
+
122
+ # 1. Protection: Replace ignored tags with unique hashes
123
+ ignored_pattern = re.compile(
124
+ r"<(code|pre|script|style)\b[^>]*>.*?</\1>", re.DOTALL | re.IGNORECASE
114
125
  )
126
+ html = ignored_pattern.sub(stash, html)
115
127
 
116
- ignored_tags = {"code", "pre", "script", "style"}
117
-
118
- soup = BeautifulSoup(html, "html.parser")
119
- for text_node in soup.find_all(string=True):
120
- # Ignore text nodes within certain tags
121
- if text_node.parent.name in ignored_tags:
122
- continue
123
-
124
- # If no match, skip processing
125
- content = text_node.string
126
- if not master_pattern.search(content):
127
- continue
128
-
129
- new_nodes = []
130
- last_end = 0
131
- for match in master_pattern.finditer(content):
132
- start, end = match.span()
133
-
134
- # Append text before the match
135
- if start > last_end:
136
- new_nodes.append(content[last_end:start])
137
-
138
- kind = match.lastgroup
139
-
140
- # Call the appropriate handler
141
- handler = handlers.get(kind)
142
- if handler:
143
- try:
144
- tag = handler(soup, match)
145
- new_nodes.append(tag)
146
- except Exception as exc:
147
- print(
148
- color(
149
- YELLOW,
150
- f"WARNING: Handler for '{kind}' failed with exception: {exc}",
151
- )
152
- )
153
- new_nodes.append(match.group(0))
154
-
155
- last_end = end
156
-
157
- # Append any remaining text after the last match
158
- if new_nodes:
159
- if last_end < len(content):
160
- new_nodes.append(content[last_end:])
161
-
162
- text_node.replace_with(*new_nodes)
128
+ # 2. Transformations: Define patterns and their replacements
129
+ html = apply_extras(extras, html, before_stash=False)
163
130
 
164
- return str(soup)
131
+ # 3. Restoration: Replace hashes back with original content
132
+ for key, original_content in placeholders.items():
133
+ html = html.replace(key, original_content)
134
+
135
+ return html
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: markdown_convert
3
- Version: 1.2.51
3
+ Version: 1.2.52
4
4
  Summary: Convert Markdown files to PDF from your command line.
5
5
  Project-URL: homepage, https://github.com/Julynx/markdown_convert
6
6
  Author-email: Julio Cabria <juliocabria@tutanota.com>
@@ -17,6 +17,8 @@ Requires-Dist: latex2mathml>=3.78.1
17
17
  Requires-Dist: markdown2<3,>=2.4.13
18
18
  Requires-Dist: playwright>=1.57.0
19
19
  Requires-Dist: pygments<3,>=2.17.2
20
+ Requires-Dist: ruamel-yaml>=0.19.1
21
+ Requires-Dist: vl-convert-python>=1.9.0.post1
20
22
  Description-Content-Type: text/markdown
21
23
 
22
24
  # markdown-convert
@@ -1,18 +1,18 @@
1
1
  markdown_convert/__init__.py,sha256=0hLMtJnCIuApqopx5P4tiDSw850AmnuVcohmAbPVEZ4,303
2
2
  markdown_convert/__main__.py,sha256=AocRo1iF1El_-Uo0owJ-QLbJUF0rum5R_AlNrTTTSOQ,2780
3
3
  markdown_convert/code.css,sha256=Wt4FqFqJcpT-jwY3GN-o4ZRCCXU8DQj-9lqKdGiuoyw,4935
4
- markdown_convert/default.css,sha256=XmIR6Kx4evwmLTZr9QZc3XhDj4jxjmGkwaeftfvHNmU,8149
4
+ markdown_convert/default.css,sha256=2Aac379NfFgytk8Gwnh4kNFN3FMNYmpxxNvoATg1How,8183
5
5
  markdown_convert/modules/__init__.py,sha256=PFPgiQhMXgyfjD8BkfLC_X8AR1jz-dCxfif2qmNofJs,65
6
6
  markdown_convert/modules/autoinstall.py,sha256=Tnrde6MIcO11PWT7GZwhs_QTVRy6CSpUB_gIi9G5ve8,2063
7
7
  markdown_convert/modules/constants.py,sha256=FA8DrQa9nzTUIJFXwXrK-AuOc5_ToGSFaD4sJqsnAjU,1305
8
8
  markdown_convert/modules/convert.py,sha256=1AjQfnOXJoxKyfqr4misDuTvE4YXnwaoWw668FUHiEQ,8972
9
- markdown_convert/modules/extras.py,sha256=GwNx6nseztHOWExcYmovxomdvOs078dMeknQTwzTCJo,2730
9
+ markdown_convert/modules/extras.py,sha256=GO-Nk5hCWsdcPggL2-Mv9Q31wFWLLWyNK_xhXUEOs9g,6313
10
10
  markdown_convert/modules/resources.py,sha256=eskLLbrkLJWs-vqtCLq4qV2Hjy6XeGFCUdT0VN2b_tA,2488
11
- markdown_convert/modules/transform.py,sha256=e4QllWx5BYKEQqIzOkYigtxcSAWqSUHsoKkvqzYzEpY,4567
11
+ markdown_convert/modules/transform.py,sha256=9_0mqeHwKPECr3Ft1z8r14flTOw4Y8dxblsOIfblEGw,3476
12
12
  markdown_convert/modules/utils.py,sha256=NX0WegM8e8MPKNNmweTujAWO8ZghdB8LSGDx20K2E44,655
13
13
  markdown_convert/modules/validate.py,sha256=XV_k7cHeifEKDaltF26tCmabs2-Me5msP3enI_eVwfA,1517
14
- markdown_convert-1.2.51.dist-info/METADATA,sha256=sejp1Y3EzxGpfMqDxIJsieDaxkO9_o0gFffs7ULpwKw,4118
15
- markdown_convert-1.2.51.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
16
- markdown_convert-1.2.51.dist-info/entry_points.txt,sha256=RCmzC7C0sX-SpzIP2Cr34rhg3lMd7BRx-exaZPfK8bU,68
17
- markdown_convert-1.2.51.dist-info/licenses/LICENSE,sha256=gXf5dRMhNSbfLPYYTY_5hsZ1r7UU1OaKQEAQUhuIBkM,18092
18
- markdown_convert-1.2.51.dist-info/RECORD,,
14
+ markdown_convert-1.2.52.dist-info/METADATA,sha256=d7J5oijwQL3Z23RLFgxGMMFh_-jjVIsP2evst7LqK5w,4199
15
+ markdown_convert-1.2.52.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
16
+ markdown_convert-1.2.52.dist-info/entry_points.txt,sha256=RCmzC7C0sX-SpzIP2Cr34rhg3lMd7BRx-exaZPfK8bU,68
17
+ markdown_convert-1.2.52.dist-info/licenses/LICENSE,sha256=gXf5dRMhNSbfLPYYTY_5hsZ1r7UU1OaKQEAQUhuIBkM,18092
18
+ markdown_convert-1.2.52.dist-info/RECORD,,