html-to-markdown 1.14.0__tar.gz → 1.14.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of html-to-markdown might be problematic. Click here for more details.

Files changed (22) hide show
  1. {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/PKG-INFO +1 -1
  2. {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown/processing.py +3 -5
  3. {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown.egg-info/PKG-INFO +1 -1
  4. {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/pyproject.toml +3 -1
  5. {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/LICENSE +0 -0
  6. {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/README.md +0 -0
  7. {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown/__init__.py +0 -0
  8. {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown/__main__.py +0 -0
  9. {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown/cli.py +0 -0
  10. {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown/constants.py +0 -0
  11. {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown/converters.py +0 -0
  12. {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown/exceptions.py +0 -0
  13. {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown/preprocessor.py +0 -0
  14. {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown/py.typed +0 -0
  15. {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown/utils.py +0 -0
  16. {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown/whitespace.py +0 -0
  17. {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown.egg-info/SOURCES.txt +0 -0
  18. {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown.egg-info/dependency_links.txt +0 -0
  19. {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown.egg-info/entry_points.txt +0 -0
  20. {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown.egg-info/requires.txt +0 -0
  21. {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/html_to_markdown.egg-info/top_level.txt +0 -0
  22. {html_to_markdown-1.14.0 → html_to_markdown-1.14.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: html-to-markdown
3
- Version: 1.14.0
3
+ Version: 1.14.1
4
4
  Summary: A modern, type-safe Python library for converting HTML to Markdown with comprehensive tag support and customizable options
5
5
  Author-email: Na'aman Hirschfeld <nhirschfeld@gmail.com>
6
6
  License: MIT
@@ -314,11 +314,12 @@ def _process_text(
314
314
  if len(ancestor_names) > 10:
315
315
  break
316
316
 
317
- in_pre = bool(ancestor_names.intersection({"pre"}))
317
+ in_pre = bool(ancestor_names.intersection({"pre"})) or parent_name == "pre"
318
318
 
319
319
  text = whitespace_handler.process_text_whitespace(text, el, in_pre=in_pre)
320
320
 
321
- if not ancestor_names.intersection({"pre", "code", "kbd", "samp"}):
321
+ code_like_tags = {"pre", "code", "kbd", "samp"}
322
+ if not (ancestor_names.intersection(code_like_tags) or parent_name in code_like_tags):
322
323
  text = escape(
323
324
  text=text,
324
325
  escape_misc=escape_misc,
@@ -617,7 +618,6 @@ def convert_to_markdown(
617
618
  first_child.replace_with(new_text)
618
619
  needs_leading_space_fix = False
619
620
 
620
- # Fix html5lib whitespace handling to match other parsers
621
621
  if parser == "html5lib":
622
622
  body = source.find("body")
623
623
  if body and isinstance(body, Tag):
@@ -632,7 +632,6 @@ def convert_to_markdown(
632
632
  first_child = children[0]
633
633
  original_text = str(first_child)
634
634
 
635
- # Preserve leading whitespace from original if html5lib stripped it
636
635
  leading_ws = ""
637
636
  for char in original_source:
638
637
  if char in " \t\n\r":
@@ -640,7 +639,6 @@ def convert_to_markdown(
640
639
  else:
641
640
  break
642
641
 
643
- # Create normalized text: restore leading whitespace only
644
642
  normalized_text = original_text
645
643
  if leading_ws and not normalized_text.startswith(leading_ws):
646
644
  normalized_text = leading_ws + normalized_text
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: html-to-markdown
3
- Version: 1.14.0
3
+ Version: 1.14.1
4
4
  Summary: A modern, type-safe Python library for converting HTML to Markdown with comprehensive tag support and customizable options
5
5
  Author-email: Na'aman Hirschfeld <nhirschfeld@gmail.com>
6
6
  License: MIT
@@ -5,7 +5,7 @@ requires = [ "setuptools>=78.1" ]
5
5
 
6
6
  [project]
7
7
  name = "html-to-markdown"
8
- version = "1.14.0"
8
+ version = "1.14.1"
9
9
  description = "A modern, type-safe Python library for converting HTML to Markdown with comprehensive tag support and customizable options"
10
10
  readme = "README.md"
11
11
  keywords = [
@@ -61,8 +61,10 @@ dev = [
61
61
  "beautifulsoup4[html5lib]>=4.13.5",
62
62
  "beautifulsoup4[lxml]>=4.13.5",
63
63
  "covdefaults>=2.3",
64
+ "memray>=1.18; sys_platform!='win32'",
64
65
  "mypy>=1.18.2",
65
66
  "pre-commit>=4.3",
67
+ "psutil>=7.1; sys_platform!='win32'",
66
68
  "pytest>=8.4.2",
67
69
  "pytest-benchmark>=5.1",
68
70
  "pytest-cov>=7",