html-to-markdown 1.14.0__tar.gz → 1.14.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of html-to-markdown might be problematic. Click here for more details.
- {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/PKG-INFO +1 -1
- {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown/processing.py +3 -5
- {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown.egg-info/PKG-INFO +1 -1
- {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/pyproject.toml +3 -1
- {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/LICENSE +0 -0
- {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/README.md +0 -0
- {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown/__init__.py +0 -0
- {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown/__main__.py +0 -0
- {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown/cli.py +0 -0
- {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown/constants.py +0 -0
- {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown/converters.py +0 -0
- {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown/exceptions.py +0 -0
- {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown/preprocessor.py +0 -0
- {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown/py.typed +0 -0
- {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown/utils.py +0 -0
- {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown/whitespace.py +0 -0
- {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown.egg-info/SOURCES.txt +0 -0
- {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown.egg-info/dependency_links.txt +0 -0
- {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown.egg-info/entry_points.txt +0 -0
- {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown.egg-info/requires.txt +0 -0
- {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown.egg-info/top_level.txt +0 -0
- {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: html-to-markdown
|
|
3
|
-
Version: 1.14.
|
|
3
|
+
Version: 1.14.1
|
|
4
4
|
Summary: A modern, type-safe Python library for converting HTML to Markdown with comprehensive tag support and customizable options
|
|
5
5
|
Author-email: Na'aman Hirschfeld <nhirschfeld@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -314,11 +314,12 @@ def _process_text(
|
|
|
314
314
|
if len(ancestor_names) > 10:
|
|
315
315
|
break
|
|
316
316
|
|
|
317
|
-
in_pre = bool(ancestor_names.intersection({"pre"}))
|
|
317
|
+
in_pre = bool(ancestor_names.intersection({"pre"})) or parent_name == "pre"
|
|
318
318
|
|
|
319
319
|
text = whitespace_handler.process_text_whitespace(text, el, in_pre=in_pre)
|
|
320
320
|
|
|
321
|
-
|
|
321
|
+
code_like_tags = {"pre", "code", "kbd", "samp"}
|
|
322
|
+
if not (ancestor_names.intersection(code_like_tags) or parent_name in code_like_tags):
|
|
322
323
|
text = escape(
|
|
323
324
|
text=text,
|
|
324
325
|
escape_misc=escape_misc,
|
|
@@ -617,7 +618,6 @@ def convert_to_markdown(
|
|
|
617
618
|
first_child.replace_with(new_text)
|
|
618
619
|
needs_leading_space_fix = False
|
|
619
620
|
|
|
620
|
-
# Fix html5lib whitespace handling to match other parsers
|
|
621
621
|
if parser == "html5lib":
|
|
622
622
|
body = source.find("body")
|
|
623
623
|
if body and isinstance(body, Tag):
|
|
@@ -632,7 +632,6 @@ def convert_to_markdown(
|
|
|
632
632
|
first_child = children[0]
|
|
633
633
|
original_text = str(first_child)
|
|
634
634
|
|
|
635
|
-
# Preserve leading whitespace from original if html5lib stripped it
|
|
636
635
|
leading_ws = ""
|
|
637
636
|
for char in original_source:
|
|
638
637
|
if char in " \t\n\r":
|
|
@@ -640,7 +639,6 @@ def convert_to_markdown(
|
|
|
640
639
|
else:
|
|
641
640
|
break
|
|
642
641
|
|
|
643
|
-
# Create normalized text: restore leading whitespace only
|
|
644
642
|
normalized_text = original_text
|
|
645
643
|
if leading_ws and not normalized_text.startswith(leading_ws):
|
|
646
644
|
normalized_text = leading_ws + normalized_text
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: html-to-markdown
|
|
3
|
-
Version: 1.14.
|
|
3
|
+
Version: 1.14.1
|
|
4
4
|
Summary: A modern, type-safe Python library for converting HTML to Markdown with comprehensive tag support and customizable options
|
|
5
5
|
Author-email: Na'aman Hirschfeld <nhirschfeld@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -5,7 +5,7 @@ requires = [ "setuptools>=78.1" ]
|
|
|
5
5
|
|
|
6
6
|
[project]
|
|
7
7
|
name = "html-to-markdown"
|
|
8
|
-
version = "1.14.
|
|
8
|
+
version = "1.14.1"
|
|
9
9
|
description = "A modern, type-safe Python library for converting HTML to Markdown with comprehensive tag support and customizable options"
|
|
10
10
|
readme = "README.md"
|
|
11
11
|
keywords = [
|
|
@@ -61,8 +61,10 @@ dev = [
|
|
|
61
61
|
"beautifulsoup4[html5lib]>=4.13.5",
|
|
62
62
|
"beautifulsoup4[lxml]>=4.13.5",
|
|
63
63
|
"covdefaults>=2.3",
|
|
64
|
+
"memray>=1.18; sys_platform!='win32'",
|
|
64
65
|
"mypy>=1.18.2",
|
|
65
66
|
"pre-commit>=4.3",
|
|
67
|
+
"psutil>=7.1; sys_platform!='win32'",
|
|
66
68
|
"pytest>=8.4.2",
|
|
67
69
|
"pytest-benchmark>=5.1",
|
|
68
70
|
"pytest-cov>=7",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown.egg-info/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|