markdown_convert 1.2.33__tar.gz → 1.2.35__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: markdown_convert
3
- Version: 1.2.33
3
+ Version: 1.2.35
4
4
  Summary: Convert Markdown files to PDF from your command line.
5
5
  Project-URL: homepage, https://github.com/Julynx/markdown_convert
6
6
  Author-email: Julio Cabria <juliocabria@tutanota.com>
@@ -11,6 +11,7 @@ Classifier: Operating System :: OS Independent
11
11
  Classifier: Programming Language :: Python :: 3
12
12
  Requires-Python: <3.15,>=3.9
13
13
  Requires-Dist: argsdict==1.0.0
14
+ Requires-Dist: beautifulsoup4>=4.14.3
14
15
  Requires-Dist: latex2mathml>=3.78.1
15
16
  Requires-Dist: markdown2<3,>=2.4.13
16
17
  Requires-Dist: playwright>=1.57.0
@@ -212,18 +212,20 @@ td {
212
212
  }
213
213
 
214
214
  section,
215
- ol,
216
- ul,
217
215
  table,
218
216
  blockquote,
219
- code,
220
- p,
217
+ code {
218
+ page-break-inside: avoid;
219
+ break-inside: avoid;
220
+ }
221
+
221
222
  h1,
222
223
  h2,
223
224
  h3,
224
225
  h4,
225
226
  h5 {
226
- page-break-inside: avoid;
227
+ page-break-after: avoid;
228
+ break-after: avoid;
227
229
  }
228
230
 
229
231
  /* Increase vertical padding for math elements with display=block */
@@ -4,6 +4,8 @@ Module for transforming HTML content.
4
4
 
5
5
  import re
6
6
 
7
+ from bs4 import BeautifulSoup
8
+
7
9
 
8
10
  def create_html_document(html_content, css_content, csp):
9
11
  """
@@ -30,28 +32,31 @@ def create_html_document(html_content, css_content, csp):
30
32
  </html>"""
31
33
 
32
34
 
33
- def create_sections(html):
35
+ def create_sections(html_string):
34
36
  """
35
- Creates h2 sections, from the first h2 to the next h2, wrapping them in <section> tags
36
- using regular expressions.
37
+ Wraps each h2 and its following content in a <section> tag.
38
+ Avoids wrapping h2 tags that are inside <code> blocks.
39
+
37
40
  Args:
38
- html (str): HTML content.
41
+ html_string (str): The input HTML string.
39
42
  Returns:
40
- HTML content with sections wrapped in <section> tags.
43
+ str: The modified HTML string with h2 sections wrapped.
41
44
  """
42
- pattern = re.compile(r"(<h2.*?>.*?</h2>)(.*?)(?=(<h2.*?>|$))", re.DOTALL)
45
+ soup = BeautifulSoup(html_string, "html.parser")
43
46
 
44
- def wrap_section(match):
45
- return f"<section>\n{match.group(1)}\n{match.group(2)}\n</section>\n"
47
+ for second_level_header in soup.find_all("h2"):
48
+ new_section = soup.new_tag("section")
49
+ second_level_header.insert_before(new_section)
46
50
 
47
- # Split by code blocks to avoid processing text inside them
48
- parts = re.split(r"(<code>.*?</code>)", html, flags=re.DOTALL)
49
- for part_index, _part in enumerate(parts):
50
- # Only process parts that are NOT code blocks
51
- if not parts[part_index].startswith("<code>"):
52
- parts[part_index] = pattern.sub(wrap_section, parts[part_index])
51
+ current = second_level_header
52
+ while current is not None and (
53
+ current == second_level_header or current.name != "h2"
54
+ ):
55
+ next_sibling = current.next_sibling
56
+ new_section.append(current)
57
+ current = next_sibling
53
58
 
54
- return "".join(parts)
59
+ return str(soup)
55
60
 
56
61
 
57
62
  def render_mermaid_diagrams(html, *, nonce):
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "markdown_convert"
7
- version = "1.2.33"
7
+ version = "1.2.35"
8
8
  description = "Convert Markdown files to PDF from your command line."
9
9
  authors = [
10
10
  { name = "Julio Cabria", email = "juliocabria@tutanota.com" },
@@ -23,6 +23,7 @@ dependencies = [
23
23
  "pygments>=2.17.2,<3",
24
24
  "latex2mathml>=3.78.1",
25
25
  "playwright>=1.57.0",
26
+ "beautifulsoup4>=4.14.3",
26
27
  ]
27
28
 
28
29
  [project.urls]