PyPI - html-to-markdown - Versions diffs - 1.9.1__py3-none-any.whl → 1.10.0__py3-none-any.whl - Mend

html-to-markdown 1.9.1py3-none-any.whl → 1.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of html-to-markdown might be problematic. Click here for more details.

Files changed (16) hide show

html_to_markdown/__main__.py +0 -1
html_to_markdown/cli.py +101 -45
html_to_markdown/constants.py +3 -0
html_to_markdown/converters.py +31 -502
html_to_markdown/exceptions.py +1 -11
html_to_markdown/preprocessor.py +0 -37
html_to_markdown/processing.py +104 -181
html_to_markdown/utils.py +2 -42
html_to_markdown/whitespace.py +292 -0
{html_to_markdown-1.9.1.dist-info → html_to_markdown-1.10.0.dist-info}/METADATA +195 -203
html_to_markdown-1.10.0.dist-info/RECORD +17 -0
html_to_markdown-1.9.1.dist-info/RECORD +0 -16
{html_to_markdown-1.9.1.dist-info → html_to_markdown-1.10.0.dist-info}/WHEEL +0 -0
{html_to_markdown-1.9.1.dist-info → html_to_markdown-1.10.0.dist-info}/entry_points.txt +0 -0
{html_to_markdown-1.9.1.dist-info → html_to_markdown-1.10.0.dist-info}/licenses/LICENSE +0 -0
{html_to_markdown-1.9.1.dist-info → html_to_markdown-1.10.0.dist-info}/top_level.txt +0 -0

html_to_markdown/converters.py CHANGED Viewed

@@ -23,17 +23,14 @@ from html_to_markdown.utils import chomp, indent, underline
 def _format_block_element(text: str) -> str:
-    """Format text as a block element with trailing newlines."""
     return f"{text.strip()}\n\n" if text.strip() else ""
 def _format_inline_or_block(text: str, convert_as_inline: bool) -> str:
-    """Format text as inline or block element based on context."""
     return text.strip() if convert_as_inline else _format_block_element(text)
 def _format_wrapped_block(text: str, start_marker: str, end_marker: str = "") -> str:
-    """Format text wrapped in markers as a block element."""
     if not end_marker:
         end_marker = start_marker
     return f"{start_marker}{text.strip()}{end_marker}\n\n" if text.strip() else ""
@@ -63,6 +60,7 @@ SupportedElements = Literal[
     "details",
     "dfn",
     "dialog",
+    "div",
     "dl",
     "dt",
     "em",
@@ -145,15 +143,6 @@ T = TypeVar("T")
 def _create_inline_converter(markup_prefix: str) -> Callable[[Tag, str], str]:
-    """Create an inline converter for a markup pattern or tag.
-    Args:
-        markup_prefix: The markup prefix to insert.
-    Returns:
-        A function that can be used to convert HTML to Markdown.
-    """
     def implementation(*, tag: Tag, text: str) -> str:
         from html_to_markdown.processing import _has_ancestor  # noqa: PLC0415
@@ -200,7 +189,7 @@ def _convert_a(*, tag: Tag, text: str, autolinks: bool, default_title: bool) ->
     return f"{prefix}[{text}]({href}{title_part}){suffix}" if href else text
-def _convert_blockquote(*, text: str, tag: Tag, convert_as_inline: bool) -> str:
+def _convert_blockquote(*, text: str, tag: Tag, convert_as_inline: bool, list_indent_str: str) -> str:
     if convert_as_inline:
         return text
@@ -213,14 +202,14 @@ def _convert_blockquote(*, text: str, tag: Tag, convert_as_inline: bool) -> str:
     if _has_ancestor(tag, "li"):
         lines = text.strip().split("\n")
-        indented_lines = [f"    > {line}" if line.strip() else "" for line in lines]
+        indented_lines = [f"{list_indent_str}> {line}" if line.strip() else "" for line in lines]
         quote_text = "\n".join(indented_lines) + "\n\n"
     else:
         quote_text = f"\n{line_beginning_re.sub('> ', text.strip())}\n\n"
     if cite_url:
         if _has_ancestor(tag, "li"):
-            quote_text += f"    — <{cite_url}>\n\n"
+            quote_text += f"{list_indent_str}— <{cite_url}>\n\n"
         else:
             quote_text += f"— <{cite_url}>\n\n"
@@ -281,7 +270,7 @@ def _convert_img(*, tag: Tag, convert_as_inline: bool, keep_inline_images_in: It
     return f"![{alt}]({src}{title_part})"
-def _convert_list(*, tag: Tag, text: str) -> str:
+def _convert_list(*, tag: Tag, text: str, list_indent_str: str) -> str:
     from html_to_markdown.processing import _has_ancestor  # noqa: PLC0415
     before_paragraph = False
@@ -307,18 +296,18 @@ def _convert_list(*, tag: Tag, text: str) -> str:
                 indented_lines = []
                 for line in lines:
                     if line.strip():
-                        indented_lines.append(f"    {line}")
+                        indented_lines.append(f"{list_indent_str}{line}")
                     else:
                         indented_lines.append("")
                 return "\n" + "\n".join(indented_lines) + "\n"
-            return "\n" + indent(text=text, level=1).rstrip()
+            return "\n" + indent(text=text, level=1, indent_str=list_indent_str).rstrip()
     if tag.parent and tag.parent.name in {"ul", "ol"}:
         lines = text.strip().split("\n")
         indented_lines = []
         for line in lines:
             if line.strip():
-                indented_lines.append(f"    {line}")
+                indented_lines.append(f"{list_indent_str}{line}")
             else:
                 indented_lines.append("")
         result = "\n".join(indented_lines)
@@ -329,7 +318,7 @@ def _convert_list(*, tag: Tag, text: str) -> str:
     return text + ("\n" if before_paragraph else "")
-def _convert_li(*, tag: Tag, text: str, bullets: str) -> str:
+def _convert_li(*, tag: Tag, text: str, bullets: str, list_indent_str: str) -> str:
     checkbox = tag.find("input", {"type": "checkbox"})
     if checkbox and isinstance(checkbox, Tag):
         checked = checkbox.get("checked") is not None
@@ -375,14 +364,18 @@ def _convert_li(*, tag: Tag, text: str, bullets: str) -> str:
             for para in paragraphs[1:]:
                 if para.strip():
                     result_parts.append("\n")
-                    result_parts.extend(f"    {line}\n" for line in para.strip().split("\n") if line.strip())
+                    result_parts.extend(
+                        f"{list_indent_str}{line}\n" for line in para.strip().split("\n") if line.strip()
+                    )
             return "".join(result_parts)
     return "{} {}\n".format(bullet, (text or "").strip())
-def _convert_p(*, wrap: bool, text: str, convert_as_inline: bool, wrap_width: int, tag: Tag) -> str:
+def _convert_p(
+    *, wrap: bool, text: str, convert_as_inline: bool, wrap_width: int, tag: Tag, list_indent_str: str
+) -> str:
     if convert_as_inline:
         return text
@@ -408,7 +401,7 @@ def _convert_p(*, wrap: bool, text: str, convert_as_inline: bool, wrap_width: in
                 indented_lines = []
                 for line in text.split("\n"):
                     if line.strip():
-                        indented_lines.append(f"    {line}")
+                        indented_lines.append(f"{list_indent_str}{line}")
                     else:
                         indented_lines.append("")
                 text = "\n".join(indented_lines)
@@ -417,16 +410,6 @@ def _convert_p(*, wrap: bool, text: str, convert_as_inline: bool, wrap_width: in
 def _convert_mark(*, text: str, convert_as_inline: bool, highlight_style: str) -> str:
-    """Convert HTML mark element to Markdown highlighting.
-    Args:
-        text: The text content of the mark element.
-        convert_as_inline: Whether to convert as inline content.
-        highlight_style: The style to use for highlighting ("double-equal", "html", "bold").
-    Returns:
-        The converted markdown text.
-    """
     if convert_as_inline:
         return text
@@ -548,15 +531,6 @@ def _convert_tr(*, tag: Tag, text: str) -> str:
 def _convert_caption(*, text: str, convert_as_inline: bool) -> str:
-    """Convert HTML caption element to emphasized text.
-    Args:
-        text: The text content of the caption element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The converted markdown text with caption formatting.
-    """
     if convert_as_inline:
         return text
@@ -567,15 +541,6 @@ def _convert_caption(*, text: str, convert_as_inline: bool) -> str:
 def _convert_thead(*, text: str, convert_as_inline: bool) -> str:
-    """Convert HTML thead element preserving table structure.
-    Args:
-        text: The text content of the thead element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The converted markdown text preserving table structure.
-    """
     if convert_as_inline:
         return text
@@ -583,15 +548,6 @@ def _convert_thead(*, text: str, convert_as_inline: bool) -> str:
 def _convert_tbody(*, text: str, convert_as_inline: bool) -> str:
-    """Convert HTML tbody element preserving table structure.
-    Args:
-        text: The text content of the tbody element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The converted markdown text preserving table structure.
-    """
     if convert_as_inline:
         return text
@@ -599,15 +555,6 @@ def _convert_tbody(*, text: str, convert_as_inline: bool) -> str:
 def _convert_tfoot(*, text: str, convert_as_inline: bool) -> str:
-    """Convert HTML tfoot element preserving table structure.
-    Args:
-        text: The text content of the tfoot element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The converted markdown text preserving table structure.
-    """
     if convert_as_inline:
         return text
@@ -615,66 +562,27 @@ def _convert_tfoot(*, text: str, convert_as_inline: bool) -> str:
 def _convert_colgroup(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
-    """Convert HTML colgroup element - removes it entirely from Markdown output.
-    Colgroup is a table column grouping element that defines styling for columns.
-    It has no representation in Markdown and should be removed.
-    Args:
-        tag: The colgroup tag element.
-        text: The text content of the colgroup element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        Empty string as colgroup has no Markdown representation.
-    """
     _ = tag, text, convert_as_inline
     return ""
 def _convert_col(*, tag: Tag, convert_as_inline: bool) -> str:
-    """Convert HTML col element - removes it entirely from Markdown output.
-    Col elements define column properties (width, style) in HTML tables.
-    They have no representation in Markdown and should be removed.
-    Args:
-        tag: The col tag element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        Empty string as col has no Markdown representation.
-    """
     _ = tag, convert_as_inline
     return ""
 def _convert_semantic_block(*, text: str, convert_as_inline: bool) -> str:
-    """Convert HTML5 semantic elements to block-level Markdown.
-    Args:
-        text: The text content of the semantic element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The converted markdown text with proper block spacing.
-    """
     if convert_as_inline:
         return text
     return f"{text}\n\n" if text.strip() else ""
-def _convert_details(*, text: str, convert_as_inline: bool) -> str:
-    """Convert HTML details element to semantic Markdown.
+def _convert_div(*, text: str, convert_as_inline: bool) -> str:  # noqa: ARG001
+    return text
-    Args:
-        text: The text content of the details element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The converted markdown text (only content, no HTML tags).
-    """
+def _convert_details(*, text: str, convert_as_inline: bool) -> str:
     if convert_as_inline:
         return text
@@ -682,15 +590,6 @@ def _convert_details(*, text: str, convert_as_inline: bool) -> str:
 def _convert_summary(*, text: str, convert_as_inline: bool) -> str:
-    """Convert HTML summary element to emphasized text.
-    Args:
-        text: The text content of the summary element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The converted markdown text as bold heading.
-    """
     if convert_as_inline:
         return text
@@ -698,15 +597,6 @@ def _convert_summary(*, text: str, convert_as_inline: bool) -> str:
 def _convert_dl(*, text: str, convert_as_inline: bool) -> str:
-    """Convert HTML definition list element.
-    Args:
-        text: The text content of the definition list.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The converted markdown text with proper spacing.
-    """
     if convert_as_inline:
         return text
@@ -714,15 +604,6 @@ def _convert_dl(*, text: str, convert_as_inline: bool) -> str:
 def _convert_dt(*, text: str, convert_as_inline: bool) -> str:
-    """Convert HTML definition term element.
-    Args:
-        text: The text content of the definition term.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The converted markdown text as a definition term.
-    """
     if convert_as_inline:
         return text
@@ -733,15 +614,6 @@ def _convert_dt(*, text: str, convert_as_inline: bool) -> str:
 def _convert_dd(*, text: str, convert_as_inline: bool) -> str:
-    """Convert HTML definition description element.
-    Args:
-        text: The text content of the definition description.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The converted markdown text as a definition description.
-    """
     if convert_as_inline:
         return text
@@ -752,15 +624,6 @@ def _convert_dd(*, text: str, convert_as_inline: bool) -> str:
 def _convert_cite(*, text: str, convert_as_inline: bool) -> str:
-    """Convert HTML cite element to italic text.
-    Args:
-        text: The text content of the cite element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The converted markdown text in italic format.
-    """
     if convert_as_inline:
         return text
@@ -771,15 +634,6 @@ def _convert_cite(*, text: str, convert_as_inline: bool) -> str:
 def _convert_q(*, text: str, convert_as_inline: bool) -> str:
-    """Convert HTML q element to quoted text.
-    Args:
-        text: The text content of the q element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The converted markdown text with quotes.
-    """
     if convert_as_inline:
         return text
@@ -791,16 +645,6 @@ def _convert_q(*, text: str, convert_as_inline: bool) -> str:
 def _convert_media_element(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
-    """Convert HTML media elements (audio/video) to semantic Markdown.
-    Args:
-        tag: The media tag element.
-        text: The text content of the media element (fallback content).
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The converted markdown text (link if src exists, otherwise fallback content).
-    """
     src = tag.get("src", "")
     if not src and (source_tag := tag.find("source")) and isinstance(source_tag, Tag):
@@ -822,16 +666,6 @@ def _convert_media_element(*, tag: Tag, text: str, convert_as_inline: bool) -> s
 def _convert_iframe(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
-    """Convert HTML iframe element to semantic Markdown.
-    Args:
-        tag: The iframe tag element.
-        text: The text content of the iframe element (usually empty).
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The converted markdown text (link if src exists).
-    """
     _ = text
     src = tag.get("src", "")
@@ -845,16 +679,6 @@ def _convert_iframe(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
 def _convert_abbr(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
-    """Convert HTML abbr element to text with optional title.
-    Args:
-        tag: The abbr tag element.
-        text: The text content of the abbr element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The converted markdown text with optional title annotation.
-    """
     _ = convert_as_inline
     if not text.strip():
         return ""
@@ -867,16 +691,6 @@ def _convert_abbr(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
 def _convert_time(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
-    """Convert HTML time element to semantic Markdown.
-    Args:
-        tag: The time tag element.
-        text: The text content of the time element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The converted markdown text (content only, no HTML tags).
-    """
     _ = tag
     _ = convert_as_inline
     if not text.strip():
@@ -886,16 +700,6 @@ def _convert_time(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
 def _convert_data(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
-    """Convert HTML data element to semantic Markdown.
-    Args:
-        tag: The data tag element.
-        text: The text content of the data element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The converted markdown text (content only, no HTML tags).
-    """
     _ = tag
     _ = convert_as_inline
     if not text.strip():
@@ -905,29 +709,11 @@ def _convert_data(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
 def _convert_wbr(*, convert_as_inline: bool) -> str:
-    """Convert HTML wbr (word break opportunity) element.
-    Args:
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        Empty string as wbr is just a break opportunity.
-    """
     _ = convert_as_inline
     return ""
 def _convert_form(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
-    """Convert HTML form element to semantic Markdown.
-    Args:
-        tag: The form tag element.
-        text: The text content of the form element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The converted markdown text (only content, no HTML tags).
-    """
     _ = tag
     if convert_as_inline:
         return text
@@ -939,15 +725,6 @@ def _convert_form(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
 def _convert_fieldset(*, text: str, convert_as_inline: bool) -> str:
-    """Convert HTML fieldset element to semantic Markdown.
-    Args:
-        text: The text content of the fieldset element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The converted markdown text (only content, no HTML tags).
-    """
     if convert_as_inline:
         return text
@@ -958,15 +735,6 @@ def _convert_fieldset(*, text: str, convert_as_inline: bool) -> str:
 def _convert_legend(*, text: str, convert_as_inline: bool) -> str:
-    """Convert HTML legend element to emphasized text.
-    Args:
-        text: The text content of the legend element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The converted markdown text as emphasized legend.
-    """
     if convert_as_inline:
         return text
@@ -977,16 +745,6 @@ def _convert_legend(*, text: str, convert_as_inline: bool) -> str:
 def _convert_label(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
-    """Convert HTML label element to Markdown.
-    Args:
-        tag: The label tag element.
-        text: The text content of the label element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The label text content.
-    """
     _ = tag
     if not text.strip():
         return ""
@@ -995,30 +753,11 @@ def _convert_label(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
 def _convert_input_enhanced(*, tag: Tag, convert_as_inline: bool) -> str:
-    """Convert HTML input element to Markdown.
-    Args:
-        tag: The input tag element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        Empty string since input elements have no Markdown representation.
-    """
     _ = tag, convert_as_inline
     return ""
 def _convert_textarea(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
-    """Convert HTML textarea element to Markdown.
-    Args:
-        tag: The textarea tag element.
-        text: The text content of the textarea element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The text content of the textarea.
-    """
     _ = tag
     if not text.strip():
         return ""
@@ -1027,16 +766,6 @@ def _convert_textarea(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
 def _convert_select(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
-    """Convert HTML select element to Markdown.
-    Args:
-        tag: The select tag element.
-        text: The text content of the select element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The text content (options) as a comma-separated list.
-    """
     _ = tag
     if not text.strip():
         return ""
@@ -1049,16 +778,6 @@ def _convert_select(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
 def _convert_option(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
-    """Convert HTML option element to Markdown.
-    Args:
-        tag: The option tag element.
-        text: The text content of the option element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The option text, potentially with a marker if selected.
-    """
     if not text.strip():
         return ""
@@ -1074,16 +793,6 @@ def _convert_option(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
 def _convert_optgroup(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
-    """Convert HTML optgroup element to semantic Markdown.
-    Args:
-        tag: The optgroup tag element.
-        text: The text content of the optgroup element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The converted markdown text with label as heading.
-    """
     if convert_as_inline:
         return text
@@ -1100,16 +809,6 @@ def _convert_optgroup(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
 def _convert_button(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
-    """Convert HTML button element to Markdown.
-    Args:
-        tag: The button tag element.
-        text: The text content of the button element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The button text content.
-    """
     _ = tag
     if not text.strip():
         return ""
@@ -1118,16 +817,6 @@ def _convert_button(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
 def _convert_progress(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
-    """Convert HTML progress element to semantic text.
-    Args:
-        tag: The progress tag element.
-        text: The text content of the progress element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The converted markdown text (only content, no HTML tags).
-    """
     _ = tag
     if convert_as_inline:
         return text
@@ -1139,16 +828,6 @@ def _convert_progress(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
 def _convert_meter(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
-    """Convert HTML meter element to semantic text.
-    Args:
-        tag: The meter tag element.
-        text: The text content of the meter element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The converted markdown text (only content, no HTML tags).
-    """
     _ = tag
     if convert_as_inline:
         return text
@@ -1160,16 +839,6 @@ def _convert_meter(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
 def _convert_output(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
-    """Convert HTML output element to semantic text.
-    Args:
-        tag: The output tag element.
-        text: The text content of the output element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The converted markdown text (only content, no HTML tags).
-    """
     _ = tag
     if convert_as_inline:
         return text
@@ -1181,16 +850,6 @@ def _convert_output(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
 def _convert_datalist(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
-    """Convert HTML datalist element to semantic Markdown.
-    Args:
-        tag: The datalist tag element.
-        text: The text content of the datalist element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The converted markdown text (only content, no HTML tags).
-    """
     _ = tag
     if convert_as_inline:
         return text
@@ -1202,15 +861,6 @@ def _convert_datalist(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
 def _convert_ruby(*, text: str, convert_as_inline: bool) -> str:  # noqa: ARG001
-    """Convert HTML ruby element providing pronunciation annotation.
-    Args:
-        text: The text content of the ruby element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The converted markdown text with ruby annotation as fallback text.
-    """
     if not text.strip():
         return ""
@@ -1218,15 +868,6 @@ def _convert_ruby(*, text: str, convert_as_inline: bool) -> str:  # noqa: ARG001
 def _convert_rb(*, text: str, convert_as_inline: bool) -> str:  # noqa: ARG001
-    """Convert HTML rb (ruby base) element.
-    Args:
-        text: The text content of the rb element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The converted markdown text (ruby base text).
-    """
     if not text.strip():
         return ""
@@ -1234,16 +875,6 @@ def _convert_rb(*, text: str, convert_as_inline: bool) -> str:  # noqa: ARG001
 def _convert_rt(*, text: str, convert_as_inline: bool, tag: Tag) -> str:  # noqa: ARG001
-    """Convert HTML rt (ruby text) element for pronunciation.
-    Args:
-        text: The text content of the rt element.
-        convert_as_inline: Whether to convert as inline content.
-        tag: The rt tag element.
-    Returns:
-        The converted markdown text with pronunciation in parentheses.
-    """
     content = text.strip()
     prev_sibling = tag.previous_sibling
@@ -1259,15 +890,6 @@ def _convert_rt(*, text: str, convert_as_inline: bool, tag: Tag) -> str:  # noqa
 def _convert_rp(*, text: str, convert_as_inline: bool) -> str:  # noqa: ARG001
-    """Convert HTML rp (ruby parentheses) element for fallback.
-    Args:
-        text: The text content of the rp element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The converted markdown text (parentheses for ruby fallback).
-    """
     if not text.strip():
         return ""
@@ -1275,15 +897,6 @@ def _convert_rp(*, text: str, convert_as_inline: bool) -> str:  # noqa: ARG001
 def _convert_rtc(*, text: str, convert_as_inline: bool) -> str:  # noqa: ARG001
-    """Convert HTML rtc (ruby text container) element.
-    Args:
-        text: The text content of the rtc element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The converted markdown text (ruby text container).
-    """
     if not text.strip():
         return ""
@@ -1291,16 +904,6 @@ def _convert_rtc(*, text: str, convert_as_inline: bool) -> str:  # noqa: ARG001
 def _convert_dialog(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
-    """Convert HTML dialog element to semantic Markdown.
-    Args:
-        text: The text content of the dialog element.
-        convert_as_inline: Whether to convert as inline content.
-        tag: The dialog tag element.
-    Returns:
-        The converted markdown text (only content, no HTML tags).
-    """
     _ = tag
     if convert_as_inline:
         return text
@@ -1312,16 +915,6 @@ def _convert_dialog(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
 def _convert_menu(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
-    """Convert HTML menu element to semantic Markdown.
-    Args:
-        text: The text content of the menu element.
-        convert_as_inline: Whether to convert as inline content.
-        tag: The menu tag element.
-    Returns:
-        The converted markdown text (only content, no HTML tags).
-    """
     _ = tag
     if convert_as_inline:
         return text
@@ -1333,16 +926,6 @@ def _convert_menu(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
 def _convert_figure(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
-    """Convert HTML figure element to semantic Markdown.
-    Args:
-        text: The text content of the figure element.
-        convert_as_inline: Whether to convert as inline content.
-        tag: The figure tag element.
-    Returns:
-        The converted markdown text (only content, no HTML tags).
-    """
     _ = tag
     if not text.strip():
         return ""
@@ -1360,15 +943,6 @@ def _convert_figure(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
 def _convert_hgroup(*, text: str, convert_as_inline: bool) -> str:
-    """Convert HTML hgroup element to semantic Markdown.
-    Args:
-        text: The text content of the hgroup element.
-        convert_as_inline: Whether to convert as inline content.
-    Returns:
-        The converted markdown text (only content, no HTML tags).
-    """
     if convert_as_inline:
         return text
@@ -1379,16 +953,6 @@ def _convert_hgroup(*, text: str, convert_as_inline: bool) -> str:
 def _convert_picture(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
-    """Convert HTML picture element to semantic Markdown.
-    Args:
-        text: The text content of the picture element.
-        convert_as_inline: Whether to convert as inline content.
-        tag: The picture tag element.
-    Returns:
-        The converted markdown text (only the img element).
-    """
     _ = tag, convert_as_inline
     if not text.strip():
         return ""
@@ -1397,16 +961,6 @@ def _convert_picture(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
 def _convert_svg(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
-    """Convert SVG element to Markdown image reference.
-    Args:
-        text: The text content of the SVG element.
-        convert_as_inline: Whether to convert as inline content.
-        tag: The SVG tag element.
-    Returns:
-        The converted markdown text as an image reference.
-    """
     if convert_as_inline:
         return text.strip()
@@ -1425,16 +979,6 @@ def _convert_svg(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
 def _convert_math(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
-    """Convert MathML math element preserving mathematical notation.
-    Args:
-        text: The text content of the math element.
-        convert_as_inline: Whether to convert as inline content.
-        tag: The math tag element.
-    Returns:
-        The converted markdown text preserving math structure.
-    """
     if not text.strip():
         return ""
@@ -1457,6 +1001,8 @@ def create_converters_map(
     heading_style: Literal["atx", "atx_closed", "underlined"],
     highlight_style: Literal["double-equal", "html", "bold"],
     keep_inline_images_in: Iterable[str] | None,
+    list_indent_type: str,
+    list_indent_width: int,
     newline_style: str,
     strong_em_symbol: str,
     sub_symbol: str,
@@ -1464,27 +1010,7 @@ def create_converters_map(
     wrap: bool,
     wrap_width: int,
 ) -> ConvertersMap:
-    """Create a mapping of HTML elements to their corresponding conversion functions.
-    Args:
-        autolinks: Whether to convert URLs into links.
-        bullets: The bullet characters to use for unordered lists.
-        code_language: The default code language to use.
-        code_language_callback: A callback to get the code language.
-        default_title: Whether to use the URL as the title for links.
-        heading_style: The style of headings.
-        highlight_style: The style to use for highlighted text (mark elements).
-        keep_inline_images_in: The tags to keep inline images in.
-        newline_style: The style of newlines.
-        strong_em_symbol: The symbol to use for strong and emphasis text.
-        sub_symbol: The symbol to use for subscript text.
-        sup_symbol: The symbol to use for superscript text.
-        wrap: Whether to wrap text.
-        wrap_width: The width to wrap text at.
-    Returns:
-        A mapping of HTML elements to their corresponding conversion functions
-    """
+    list_indent_str = "\t" if list_indent_type == "tabs" else " " * list_indent_width
     def _wrapper(func: Callable[..., T]) -> Callable[[str, Tag], T]:
         spec = getfullargspec(func)
@@ -1498,6 +1024,8 @@ def create_converters_map(
                     kwargs["text"] = text
                 if "convert_as_inline" in spec.kwonlyargs:
                     kwargs["convert_as_inline"] = convert_as_inline
+                if "list_indent_str" in spec.kwonlyargs:
+                    kwargs["list_indent_str"] = list_indent_str
                 return func(**kwargs)
             return func(text)
@@ -1512,7 +1040,7 @@ def create_converters_map(
         "b": _wrapper(partial(_create_inline_converter(2 * strong_em_symbol))),
         "bdi": _wrapper(_create_inline_converter("")),
         "bdo": _wrapper(_create_inline_converter("")),
-        "blockquote": _wrapper(partial(_convert_blockquote)),
+        "blockquote": _wrapper(partial(_convert_blockquote, list_indent_str=list_indent_str)),
         "br": _wrapper(partial(_convert_br, newline_style=newline_style)),
         "button": _wrapper(_convert_button),
         "caption": _wrapper(_convert_caption),
@@ -1527,6 +1055,7 @@ def create_converters_map(
         "details": _wrapper(_convert_details),
         "dfn": _wrapper(_create_inline_converter("*")),
         "dialog": _wrapper(_convert_dialog),
+        "div": _wrapper(_convert_div),
         "dl": _wrapper(_convert_dl),
         "dt": _wrapper(_convert_dt),
         "em": _wrapper(_create_inline_converter(strong_em_symbol)),
@@ -1552,19 +1081,19 @@ def create_converters_map(
         "kbd": _wrapper(_create_inline_converter("`")),
         "label": _wrapper(_convert_label),
         "legend": _wrapper(_convert_legend),
-        "li": _wrapper(partial(_convert_li, bullets=bullets)),
-        "list": _wrapper(_convert_list),
+        "li": _wrapper(partial(_convert_li, bullets=bullets, list_indent_str=list_indent_str)),
+        "list": _wrapper(partial(_convert_list, list_indent_str=list_indent_str)),
         "main": _wrapper(_convert_semantic_block),
         "mark": _wrapper(partial(_convert_mark, highlight_style=highlight_style)),
         "math": _wrapper(_convert_math),
         "menu": _wrapper(_convert_menu),
         "meter": _wrapper(_convert_meter),
         "nav": _wrapper(_convert_semantic_block),
-        "ol": _wrapper(_convert_list),
+        "ol": _wrapper(partial(_convert_list, list_indent_str=list_indent_str)),
         "optgroup": _wrapper(_convert_optgroup),
         "option": _wrapper(_convert_option),
         "output": _wrapper(_convert_output),
-        "p": _wrapper(partial(_convert_p, wrap=wrap, wrap_width=wrap_width)),
+        "p": _wrapper(partial(_convert_p, wrap=wrap, wrap_width=wrap_width, list_indent_str=list_indent_str)),
         "picture": _wrapper(_convert_picture),
         "pre": _wrapper(
             partial(
@@ -1602,7 +1131,7 @@ def create_converters_map(
         "time": _wrapper(_convert_time),
         "tr": _wrapper(_convert_tr),
         "u": _wrapper(_create_inline_converter("")),
-        "ul": _wrapper(_convert_list),
+        "ul": _wrapper(partial(_convert_list, list_indent_str=list_indent_str)),
         "var": _wrapper(_create_inline_converter("*")),
         "video": _wrapper(_convert_media_element),
         "wbr": _wrapper(_convert_wbr),

html-to-markdown 1.9.1__py3-none-any.whl → 1.10.0__py3-none-any.whl

Potentially problematic release.

html-to-markdown 1.9.1py3-none-any.whl → 1.10.0py3-none-any.whl