PyPI - html-to-markdown - Versions diffs - 1.10.0__tar.gz → 1.11.0__tar.gz - Mend

html-to-markdown 1.10.0tar.gz → 1.11.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of html-to-markdown might be problematic. Click here for more details.

Files changed (22) hide show

{html_to_markdown-1.10.0 → html_to_markdown-1.11.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: html-to-markdown
-Version: 1.10.0
+Version: 1.11.0
 Summary: A modern, type-safe Python library for converting HTML to Markdown with comprehensive tag support and customizable options
 Author-email: Na'aman Hirschfeld <nhirschfeld@gmail.com>
 License: MIT
@@ -33,7 +33,7 @@ License-File: LICENSE
 Requires-Dist: beautifulsoup4>=4.13.5
 Requires-Dist: nh3>=0.3
 Provides-Extra: lxml
-Requires-Dist: lxml>=6.0.1; extra == "lxml"
+Requires-Dist: beautifulsoup4[lxml]>=4.13.5; extra == "lxml"
 Dynamic: license-file
 # html-to-markdown

{html_to_markdown-1.10.0 → html_to_markdown-1.11.0}/html_to_markdown/converters.py RENAMED Viewed

@@ -578,8 +578,11 @@ def _convert_semantic_block(*, text: str, convert_as_inline: bool) -> str:
     return f"{text}\n\n" if text.strip() else ""
-def _convert_div(*, text: str, convert_as_inline: bool) -> str:  # noqa: ARG001
-    return text
+def _convert_div(*, text: str, convert_as_inline: bool) -> str:
+    if convert_as_inline:
+        return text
+    return _format_block_element(text)
 def _convert_details(*, text: str, convert_as_inline: bool) -> str:

{html_to_markdown-1.10.0 → html_to_markdown-1.11.0}/html_to_markdown/processing.py RENAMED Viewed

@@ -258,6 +258,18 @@ def _process_tag(
             if n_eol_to_add > 0:
                 prefix = "\n" * n_eol_to_add
                 return f"{prefix}{rendered}"
+        from html_to_markdown.whitespace import BLOCK_ELEMENTS  # noqa: PLC0415
+        is_block_element = tag.name.lower() in BLOCK_ELEMENTS
+        if (
+            is_block_element
+            and not convert_as_inline
+            and context_before
+            and not context_before.endswith("\n")
+            and rendered.strip()
+        ):
+            return f"\n\n{rendered}"
         return rendered
     return text
@@ -358,7 +370,7 @@ def _as_optional_set(value: str | Iterable[str] | None) -> set[str] | None:
     if value is None:
         return None
     if isinstance(value, str):
-        return set(",".split(value))
+        return set(value.split(","))
     return {*chain(*[v.split(",") for v in value])}
@@ -836,15 +848,6 @@ def _process_html_core(
     try:
         if isinstance(source, str):
-            if (
-                heading_style == UNDERLINED
-                and "Header" in source
-                and "\n------\n\n" in source
-                and "Next paragraph" in source
-            ):
-                sink.write(source)
-                return
             if strip_newlines:
                 source = source.replace("\n", " ").replace("\r", " ")

{html_to_markdown-1.10.0 → html_to_markdown-1.11.0}/html_to_markdown/whitespace.py RENAMED Viewed

@@ -171,13 +171,13 @@ class WhitespaceHandler:
         if not text:
             return ""
-        text = self.normalize_unicode_spaces(text)
         if in_pre or self.should_preserve_whitespace(element):
             return text
         if self.mode == "strict":
             return text
+        text = self.normalize_unicode_spaces(text)
         return self._process_normalized(text, element)
     def _process_normalized(self, text: str, element: NavigableString) -> str:
@@ -242,6 +242,14 @@ class WhitespaceHandler:
         prev_sibling = element.previous_sibling
         next_sibling = element.next_sibling
+        multiple_newlines_before_block = (
+            original
+            and original.count("\n") >= 2
+            and self.is_block_element(next_sibling)
+            and text.strip()
+            and (self.is_inline_element(prev_sibling) or prev_sibling is None)
+        )
         has_leading = (
             has_lead_space
             and original[0] == " "
@@ -268,6 +276,9 @@ class WhitespaceHandler:
         if has_trailing and not (original and original[-1] in "\n\t"):
             text = text + " "
+        if multiple_newlines_before_block:
+            text = text + "\n\n"
         return text
     def get_block_spacing(self, tag: Tag, next_sibling: PageElement | None = None) -> str:
@@ -286,7 +297,7 @@ class WhitespaceHandler:
             return "\n"
         if tag_name in single_newline_elements:
             return "\n"
-        if tag_name.startswith("h") and len(tag_name) == 2:
+        if tag_name.startswith("h") and len(tag_name) == 2 and tag_name[1].isdigit():
             return "\n\n"
         return ""

{html_to_markdown-1.10.0 → html_to_markdown-1.11.0}/html_to_markdown.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: html-to-markdown
-Version: 1.10.0
+Version: 1.11.0
 Summary: A modern, type-safe Python library for converting HTML to Markdown with comprehensive tag support and customizable options
 Author-email: Na'aman Hirschfeld <nhirschfeld@gmail.com>
 License: MIT
@@ -33,7 +33,7 @@ License-File: LICENSE
 Requires-Dist: beautifulsoup4>=4.13.5
 Requires-Dist: nh3>=0.3
 Provides-Extra: lxml
-Requires-Dist: lxml>=6.0.1; extra == "lxml"
+Requires-Dist: beautifulsoup4[lxml]>=4.13.5; extra == "lxml"
 Dynamic: license-file
 # html-to-markdown

{html_to_markdown-1.10.0 → html_to_markdown-1.11.0}/html_to_markdown.egg-info/requires.txt RENAMED Viewed

@@ -2,4 +2,4 @@ beautifulsoup4>=4.13.5
 nh3>=0.3
 [lxml]
-lxml>=6.0.1
+beautifulsoup4[lxml]>=4.13.5

{html_to_markdown-1.10.0 → html_to_markdown-1.11.0}/pyproject.toml RENAMED Viewed

@@ -5,7 +5,7 @@ requires = [ "setuptools>=78.1" ]
 [project]
 name = "html-to-markdown"
-version = "1.10.0"
+version = "1.11.0"
 description = "A modern, type-safe Python library for converting HTML to Markdown with comprehensive tag support and customizable options"
 readme = "README.md"
 keywords = [
@@ -43,8 +43,8 @@ classifiers = [
   "Typing :: Typed",
 ]
 dependencies = [ "beautifulsoup4>=4.13.5", "nh3>=0.3" ]
+optional-dependencies.lxml = [ "beautifulsoup4[lxml]>=4.13.5" ]
-optional-dependencies.lxml = [ "lxml>=6.0.1" ]
 urls.Changelog = "https://github.com/Goldziher/html-to-markdown/releases"
 urls.Homepage = "https://github.com/Goldziher/html-to-markdown"
 urls.Issues = "https://github.com/Goldziher/html-to-markdown/issues"
@@ -54,14 +54,13 @@ scripts.html_to_markdown = "html_to_markdown.__main__:cli"
 [dependency-groups]
 dev = [
-  "ai-rulez>=2.0.1",
   "covdefaults>=2.3",
-  "mypy>=1.17.1",
+  "mypy>=1.18.1",
   "pre-commit>=4.3",
   "pytest>=8.4.2",
-  "pytest-cov>=6.3",
+  "pytest-cov>=7",
   "pytest-mock>=3.15",
-  "ruff>=0.12.12",
+  "ruff>=0.13",
   "types-beautifulsoup4>=4.12.0.20250516",
   "types-psutil>=7.0.0.20250822",
   "uv-bump",