html-to-markdown 1.9.1__py3-none-any.whl → 1.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of html-to-markdown might be problematic. Click here for more details.

@@ -23,17 +23,14 @@ from html_to_markdown.utils import chomp, indent, underline
23
23
 
24
24
 
25
25
  def _format_block_element(text: str) -> str:
26
- """Format text as a block element with trailing newlines."""
27
26
  return f"{text.strip()}\n\n" if text.strip() else ""
28
27
 
29
28
 
30
29
  def _format_inline_or_block(text: str, convert_as_inline: bool) -> str:
31
- """Format text as inline or block element based on context."""
32
30
  return text.strip() if convert_as_inline else _format_block_element(text)
33
31
 
34
32
 
35
33
  def _format_wrapped_block(text: str, start_marker: str, end_marker: str = "") -> str:
36
- """Format text wrapped in markers as a block element."""
37
34
  if not end_marker:
38
35
  end_marker = start_marker
39
36
  return f"{start_marker}{text.strip()}{end_marker}\n\n" if text.strip() else ""
@@ -63,6 +60,7 @@ SupportedElements = Literal[
63
60
  "details",
64
61
  "dfn",
65
62
  "dialog",
63
+ "div",
66
64
  "dl",
67
65
  "dt",
68
66
  "em",
@@ -145,15 +143,6 @@ T = TypeVar("T")
145
143
 
146
144
 
147
145
  def _create_inline_converter(markup_prefix: str) -> Callable[[Tag, str], str]:
148
- """Create an inline converter for a markup pattern or tag.
149
-
150
- Args:
151
- markup_prefix: The markup prefix to insert.
152
-
153
- Returns:
154
- A function that can be used to convert HTML to Markdown.
155
- """
156
-
157
146
  def implementation(*, tag: Tag, text: str) -> str:
158
147
  from html_to_markdown.processing import _has_ancestor # noqa: PLC0415
159
148
 
@@ -200,7 +189,7 @@ def _convert_a(*, tag: Tag, text: str, autolinks: bool, default_title: bool) ->
200
189
  return f"{prefix}[{text}]({href}{title_part}){suffix}" if href else text
201
190
 
202
191
 
203
- def _convert_blockquote(*, text: str, tag: Tag, convert_as_inline: bool) -> str:
192
+ def _convert_blockquote(*, text: str, tag: Tag, convert_as_inline: bool, list_indent_str: str) -> str:
204
193
  if convert_as_inline:
205
194
  return text
206
195
 
@@ -213,14 +202,14 @@ def _convert_blockquote(*, text: str, tag: Tag, convert_as_inline: bool) -> str:
213
202
 
214
203
  if _has_ancestor(tag, "li"):
215
204
  lines = text.strip().split("\n")
216
- indented_lines = [f" > {line}" if line.strip() else "" for line in lines]
205
+ indented_lines = [f"{list_indent_str}> {line}" if line.strip() else "" for line in lines]
217
206
  quote_text = "\n".join(indented_lines) + "\n\n"
218
207
  else:
219
208
  quote_text = f"\n{line_beginning_re.sub('> ', text.strip())}\n\n"
220
209
 
221
210
  if cite_url:
222
211
  if _has_ancestor(tag, "li"):
223
- quote_text += f" — <{cite_url}>\n\n"
212
+ quote_text += f"{list_indent_str}— <{cite_url}>\n\n"
224
213
  else:
225
214
  quote_text += f"— <{cite_url}>\n\n"
226
215
 
@@ -281,7 +270,7 @@ def _convert_img(*, tag: Tag, convert_as_inline: bool, keep_inline_images_in: It
281
270
  return f"![{alt}]({src}{title_part})"
282
271
 
283
272
 
284
- def _convert_list(*, tag: Tag, text: str) -> str:
273
+ def _convert_list(*, tag: Tag, text: str, list_indent_str: str) -> str:
285
274
  from html_to_markdown.processing import _has_ancestor # noqa: PLC0415
286
275
 
287
276
  before_paragraph = False
@@ -307,18 +296,18 @@ def _convert_list(*, tag: Tag, text: str) -> str:
307
296
  indented_lines = []
308
297
  for line in lines:
309
298
  if line.strip():
310
- indented_lines.append(f" {line}")
299
+ indented_lines.append(f"{list_indent_str}{line}")
311
300
  else:
312
301
  indented_lines.append("")
313
302
  return "\n" + "\n".join(indented_lines) + "\n"
314
- return "\n" + indent(text=text, level=1).rstrip()
303
+ return "\n" + indent(text=text, level=1, indent_str=list_indent_str).rstrip()
315
304
 
316
305
  if tag.parent and tag.parent.name in {"ul", "ol"}:
317
306
  lines = text.strip().split("\n")
318
307
  indented_lines = []
319
308
  for line in lines:
320
309
  if line.strip():
321
- indented_lines.append(f" {line}")
310
+ indented_lines.append(f"{list_indent_str}{line}")
322
311
  else:
323
312
  indented_lines.append("")
324
313
  result = "\n".join(indented_lines)
@@ -329,7 +318,7 @@ def _convert_list(*, tag: Tag, text: str) -> str:
329
318
  return text + ("\n" if before_paragraph else "")
330
319
 
331
320
 
332
- def _convert_li(*, tag: Tag, text: str, bullets: str) -> str:
321
+ def _convert_li(*, tag: Tag, text: str, bullets: str, list_indent_str: str) -> str:
333
322
  checkbox = tag.find("input", {"type": "checkbox"})
334
323
  if checkbox and isinstance(checkbox, Tag):
335
324
  checked = checkbox.get("checked") is not None
@@ -375,14 +364,18 @@ def _convert_li(*, tag: Tag, text: str, bullets: str) -> str:
375
364
  for para in paragraphs[1:]:
376
365
  if para.strip():
377
366
  result_parts.append("\n")
378
- result_parts.extend(f" {line}\n" for line in para.strip().split("\n") if line.strip())
367
+ result_parts.extend(
368
+ f"{list_indent_str}{line}\n" for line in para.strip().split("\n") if line.strip()
369
+ )
379
370
 
380
371
  return "".join(result_parts)
381
372
 
382
373
  return "{} {}\n".format(bullet, (text or "").strip())
383
374
 
384
375
 
385
- def _convert_p(*, wrap: bool, text: str, convert_as_inline: bool, wrap_width: int, tag: Tag) -> str:
376
+ def _convert_p(
377
+ *, wrap: bool, text: str, convert_as_inline: bool, wrap_width: int, tag: Tag, list_indent_str: str
378
+ ) -> str:
386
379
  if convert_as_inline:
387
380
  return text
388
381
 
@@ -408,7 +401,7 @@ def _convert_p(*, wrap: bool, text: str, convert_as_inline: bool, wrap_width: in
408
401
  indented_lines = []
409
402
  for line in text.split("\n"):
410
403
  if line.strip():
411
- indented_lines.append(f" {line}")
404
+ indented_lines.append(f"{list_indent_str}{line}")
412
405
  else:
413
406
  indented_lines.append("")
414
407
  text = "\n".join(indented_lines)
@@ -417,16 +410,6 @@ def _convert_p(*, wrap: bool, text: str, convert_as_inline: bool, wrap_width: in
417
410
 
418
411
 
419
412
  def _convert_mark(*, text: str, convert_as_inline: bool, highlight_style: str) -> str:
420
- """Convert HTML mark element to Markdown highlighting.
421
-
422
- Args:
423
- text: The text content of the mark element.
424
- convert_as_inline: Whether to convert as inline content.
425
- highlight_style: The style to use for highlighting ("double-equal", "html", "bold").
426
-
427
- Returns:
428
- The converted markdown text.
429
- """
430
413
  if convert_as_inline:
431
414
  return text
432
415
 
@@ -548,15 +531,6 @@ def _convert_tr(*, tag: Tag, text: str) -> str:
548
531
 
549
532
 
550
533
  def _convert_caption(*, text: str, convert_as_inline: bool) -> str:
551
- """Convert HTML caption element to emphasized text.
552
-
553
- Args:
554
- text: The text content of the caption element.
555
- convert_as_inline: Whether to convert as inline content.
556
-
557
- Returns:
558
- The converted markdown text with caption formatting.
559
- """
560
534
  if convert_as_inline:
561
535
  return text
562
536
 
@@ -567,15 +541,6 @@ def _convert_caption(*, text: str, convert_as_inline: bool) -> str:
567
541
 
568
542
 
569
543
  def _convert_thead(*, text: str, convert_as_inline: bool) -> str:
570
- """Convert HTML thead element preserving table structure.
571
-
572
- Args:
573
- text: The text content of the thead element.
574
- convert_as_inline: Whether to convert as inline content.
575
-
576
- Returns:
577
- The converted markdown text preserving table structure.
578
- """
579
544
  if convert_as_inline:
580
545
  return text
581
546
 
@@ -583,15 +548,6 @@ def _convert_thead(*, text: str, convert_as_inline: bool) -> str:
583
548
 
584
549
 
585
550
  def _convert_tbody(*, text: str, convert_as_inline: bool) -> str:
586
- """Convert HTML tbody element preserving table structure.
587
-
588
- Args:
589
- text: The text content of the tbody element.
590
- convert_as_inline: Whether to convert as inline content.
591
-
592
- Returns:
593
- The converted markdown text preserving table structure.
594
- """
595
551
  if convert_as_inline:
596
552
  return text
597
553
 
@@ -599,15 +555,6 @@ def _convert_tbody(*, text: str, convert_as_inline: bool) -> str:
599
555
 
600
556
 
601
557
  def _convert_tfoot(*, text: str, convert_as_inline: bool) -> str:
602
- """Convert HTML tfoot element preserving table structure.
603
-
604
- Args:
605
- text: The text content of the tfoot element.
606
- convert_as_inline: Whether to convert as inline content.
607
-
608
- Returns:
609
- The converted markdown text preserving table structure.
610
- """
611
558
  if convert_as_inline:
612
559
  return text
613
560
 
@@ -615,66 +562,30 @@ def _convert_tfoot(*, text: str, convert_as_inline: bool) -> str:
615
562
 
616
563
 
617
564
  def _convert_colgroup(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
618
- """Convert HTML colgroup element - removes it entirely from Markdown output.
619
-
620
- Colgroup is a table column grouping element that defines styling for columns.
621
- It has no representation in Markdown and should be removed.
622
-
623
- Args:
624
- tag: The colgroup tag element.
625
- text: The text content of the colgroup element.
626
- convert_as_inline: Whether to convert as inline content.
627
-
628
- Returns:
629
- Empty string as colgroup has no Markdown representation.
630
- """
631
565
  _ = tag, text, convert_as_inline
632
566
  return ""
633
567
 
634
568
 
635
569
  def _convert_col(*, tag: Tag, convert_as_inline: bool) -> str:
636
- """Convert HTML col element - removes it entirely from Markdown output.
637
-
638
- Col elements define column properties (width, style) in HTML tables.
639
- They have no representation in Markdown and should be removed.
640
-
641
- Args:
642
- tag: The col tag element.
643
- convert_as_inline: Whether to convert as inline content.
644
-
645
- Returns:
646
- Empty string as col has no Markdown representation.
647
- """
648
570
  _ = tag, convert_as_inline
649
571
  return ""
650
572
 
651
573
 
652
574
  def _convert_semantic_block(*, text: str, convert_as_inline: bool) -> str:
653
- """Convert HTML5 semantic elements to block-level Markdown.
654
-
655
- Args:
656
- text: The text content of the semantic element.
657
- convert_as_inline: Whether to convert as inline content.
658
-
659
- Returns:
660
- The converted markdown text with proper block spacing.
661
- """
662
575
  if convert_as_inline:
663
576
  return text
664
577
 
665
578
  return f"{text}\n\n" if text.strip() else ""
666
579
 
667
580
 
668
- def _convert_details(*, text: str, convert_as_inline: bool) -> str:
669
- """Convert HTML details element to semantic Markdown.
581
+ def _convert_div(*, text: str, convert_as_inline: bool) -> str:
582
+ if convert_as_inline:
583
+ return text
584
+
585
+ return _format_block_element(text)
670
586
 
671
- Args:
672
- text: The text content of the details element.
673
- convert_as_inline: Whether to convert as inline content.
674
587
 
675
- Returns:
676
- The converted markdown text (only content, no HTML tags).
677
- """
588
+ def _convert_details(*, text: str, convert_as_inline: bool) -> str:
678
589
  if convert_as_inline:
679
590
  return text
680
591
 
@@ -682,15 +593,6 @@ def _convert_details(*, text: str, convert_as_inline: bool) -> str:
682
593
 
683
594
 
684
595
  def _convert_summary(*, text: str, convert_as_inline: bool) -> str:
685
- """Convert HTML summary element to emphasized text.
686
-
687
- Args:
688
- text: The text content of the summary element.
689
- convert_as_inline: Whether to convert as inline content.
690
-
691
- Returns:
692
- The converted markdown text as bold heading.
693
- """
694
596
  if convert_as_inline:
695
597
  return text
696
598
 
@@ -698,15 +600,6 @@ def _convert_summary(*, text: str, convert_as_inline: bool) -> str:
698
600
 
699
601
 
700
602
  def _convert_dl(*, text: str, convert_as_inline: bool) -> str:
701
- """Convert HTML definition list element.
702
-
703
- Args:
704
- text: The text content of the definition list.
705
- convert_as_inline: Whether to convert as inline content.
706
-
707
- Returns:
708
- The converted markdown text with proper spacing.
709
- """
710
603
  if convert_as_inline:
711
604
  return text
712
605
 
@@ -714,15 +607,6 @@ def _convert_dl(*, text: str, convert_as_inline: bool) -> str:
714
607
 
715
608
 
716
609
  def _convert_dt(*, text: str, convert_as_inline: bool) -> str:
717
- """Convert HTML definition term element.
718
-
719
- Args:
720
- text: The text content of the definition term.
721
- convert_as_inline: Whether to convert as inline content.
722
-
723
- Returns:
724
- The converted markdown text as a definition term.
725
- """
726
610
  if convert_as_inline:
727
611
  return text
728
612
 
@@ -733,15 +617,6 @@ def _convert_dt(*, text: str, convert_as_inline: bool) -> str:
733
617
 
734
618
 
735
619
  def _convert_dd(*, text: str, convert_as_inline: bool) -> str:
736
- """Convert HTML definition description element.
737
-
738
- Args:
739
- text: The text content of the definition description.
740
- convert_as_inline: Whether to convert as inline content.
741
-
742
- Returns:
743
- The converted markdown text as a definition description.
744
- """
745
620
  if convert_as_inline:
746
621
  return text
747
622
 
@@ -752,15 +627,6 @@ def _convert_dd(*, text: str, convert_as_inline: bool) -> str:
752
627
 
753
628
 
754
629
  def _convert_cite(*, text: str, convert_as_inline: bool) -> str:
755
- """Convert HTML cite element to italic text.
756
-
757
- Args:
758
- text: The text content of the cite element.
759
- convert_as_inline: Whether to convert as inline content.
760
-
761
- Returns:
762
- The converted markdown text in italic format.
763
- """
764
630
  if convert_as_inline:
765
631
  return text
766
632
 
@@ -771,15 +637,6 @@ def _convert_cite(*, text: str, convert_as_inline: bool) -> str:
771
637
 
772
638
 
773
639
  def _convert_q(*, text: str, convert_as_inline: bool) -> str:
774
- """Convert HTML q element to quoted text.
775
-
776
- Args:
777
- text: The text content of the q element.
778
- convert_as_inline: Whether to convert as inline content.
779
-
780
- Returns:
781
- The converted markdown text with quotes.
782
- """
783
640
  if convert_as_inline:
784
641
  return text
785
642
 
@@ -791,16 +648,6 @@ def _convert_q(*, text: str, convert_as_inline: bool) -> str:
791
648
 
792
649
 
793
650
  def _convert_media_element(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
794
- """Convert HTML media elements (audio/video) to semantic Markdown.
795
-
796
- Args:
797
- tag: The media tag element.
798
- text: The text content of the media element (fallback content).
799
- convert_as_inline: Whether to convert as inline content.
800
-
801
- Returns:
802
- The converted markdown text (link if src exists, otherwise fallback content).
803
- """
804
651
  src = tag.get("src", "")
805
652
 
806
653
  if not src and (source_tag := tag.find("source")) and isinstance(source_tag, Tag):
@@ -822,16 +669,6 @@ def _convert_media_element(*, tag: Tag, text: str, convert_as_inline: bool) -> s
822
669
 
823
670
 
824
671
  def _convert_iframe(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
825
- """Convert HTML iframe element to semantic Markdown.
826
-
827
- Args:
828
- tag: The iframe tag element.
829
- text: The text content of the iframe element (usually empty).
830
- convert_as_inline: Whether to convert as inline content.
831
-
832
- Returns:
833
- The converted markdown text (link if src exists).
834
- """
835
672
  _ = text
836
673
  src = tag.get("src", "")
837
674
 
@@ -845,16 +682,6 @@ def _convert_iframe(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
845
682
 
846
683
 
847
684
  def _convert_abbr(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
848
- """Convert HTML abbr element to text with optional title.
849
-
850
- Args:
851
- tag: The abbr tag element.
852
- text: The text content of the abbr element.
853
- convert_as_inline: Whether to convert as inline content.
854
-
855
- Returns:
856
- The converted markdown text with optional title annotation.
857
- """
858
685
  _ = convert_as_inline
859
686
  if not text.strip():
860
687
  return ""
@@ -867,16 +694,6 @@ def _convert_abbr(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
867
694
 
868
695
 
869
696
  def _convert_time(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
870
- """Convert HTML time element to semantic Markdown.
871
-
872
- Args:
873
- tag: The time tag element.
874
- text: The text content of the time element.
875
- convert_as_inline: Whether to convert as inline content.
876
-
877
- Returns:
878
- The converted markdown text (content only, no HTML tags).
879
- """
880
697
  _ = tag
881
698
  _ = convert_as_inline
882
699
  if not text.strip():
@@ -886,16 +703,6 @@ def _convert_time(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
886
703
 
887
704
 
888
705
  def _convert_data(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
889
- """Convert HTML data element to semantic Markdown.
890
-
891
- Args:
892
- tag: The data tag element.
893
- text: The text content of the data element.
894
- convert_as_inline: Whether to convert as inline content.
895
-
896
- Returns:
897
- The converted markdown text (content only, no HTML tags).
898
- """
899
706
  _ = tag
900
707
  _ = convert_as_inline
901
708
  if not text.strip():
@@ -905,29 +712,11 @@ def _convert_data(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
905
712
 
906
713
 
907
714
  def _convert_wbr(*, convert_as_inline: bool) -> str:
908
- """Convert HTML wbr (word break opportunity) element.
909
-
910
- Args:
911
- convert_as_inline: Whether to convert as inline content.
912
-
913
- Returns:
914
- Empty string as wbr is just a break opportunity.
915
- """
916
715
  _ = convert_as_inline
917
716
  return ""
918
717
 
919
718
 
920
719
  def _convert_form(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
921
- """Convert HTML form element to semantic Markdown.
922
-
923
- Args:
924
- tag: The form tag element.
925
- text: The text content of the form element.
926
- convert_as_inline: Whether to convert as inline content.
927
-
928
- Returns:
929
- The converted markdown text (only content, no HTML tags).
930
- """
931
720
  _ = tag
932
721
  if convert_as_inline:
933
722
  return text
@@ -939,15 +728,6 @@ def _convert_form(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
939
728
 
940
729
 
941
730
  def _convert_fieldset(*, text: str, convert_as_inline: bool) -> str:
942
- """Convert HTML fieldset element to semantic Markdown.
943
-
944
- Args:
945
- text: The text content of the fieldset element.
946
- convert_as_inline: Whether to convert as inline content.
947
-
948
- Returns:
949
- The converted markdown text (only content, no HTML tags).
950
- """
951
731
  if convert_as_inline:
952
732
  return text
953
733
 
@@ -958,15 +738,6 @@ def _convert_fieldset(*, text: str, convert_as_inline: bool) -> str:
958
738
 
959
739
 
960
740
  def _convert_legend(*, text: str, convert_as_inline: bool) -> str:
961
- """Convert HTML legend element to emphasized text.
962
-
963
- Args:
964
- text: The text content of the legend element.
965
- convert_as_inline: Whether to convert as inline content.
966
-
967
- Returns:
968
- The converted markdown text as emphasized legend.
969
- """
970
741
  if convert_as_inline:
971
742
  return text
972
743
 
@@ -977,16 +748,6 @@ def _convert_legend(*, text: str, convert_as_inline: bool) -> str:
977
748
 
978
749
 
979
750
  def _convert_label(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
980
- """Convert HTML label element to Markdown.
981
-
982
- Args:
983
- tag: The label tag element.
984
- text: The text content of the label element.
985
- convert_as_inline: Whether to convert as inline content.
986
-
987
- Returns:
988
- The label text content.
989
- """
990
751
  _ = tag
991
752
  if not text.strip():
992
753
  return ""
@@ -995,30 +756,11 @@ def _convert_label(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
995
756
 
996
757
 
997
758
  def _convert_input_enhanced(*, tag: Tag, convert_as_inline: bool) -> str:
998
- """Convert HTML input element to Markdown.
999
-
1000
- Args:
1001
- tag: The input tag element.
1002
- convert_as_inline: Whether to convert as inline content.
1003
-
1004
- Returns:
1005
- Empty string since input elements have no Markdown representation.
1006
- """
1007
759
  _ = tag, convert_as_inline
1008
760
  return ""
1009
761
 
1010
762
 
1011
763
  def _convert_textarea(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1012
- """Convert HTML textarea element to Markdown.
1013
-
1014
- Args:
1015
- tag: The textarea tag element.
1016
- text: The text content of the textarea element.
1017
- convert_as_inline: Whether to convert as inline content.
1018
-
1019
- Returns:
1020
- The text content of the textarea.
1021
- """
1022
764
  _ = tag
1023
765
  if not text.strip():
1024
766
  return ""
@@ -1027,16 +769,6 @@ def _convert_textarea(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1027
769
 
1028
770
 
1029
771
  def _convert_select(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1030
- """Convert HTML select element to Markdown.
1031
-
1032
- Args:
1033
- tag: The select tag element.
1034
- text: The text content of the select element.
1035
- convert_as_inline: Whether to convert as inline content.
1036
-
1037
- Returns:
1038
- The text content (options) as a comma-separated list.
1039
- """
1040
772
  _ = tag
1041
773
  if not text.strip():
1042
774
  return ""
@@ -1049,16 +781,6 @@ def _convert_select(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1049
781
 
1050
782
 
1051
783
  def _convert_option(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1052
- """Convert HTML option element to Markdown.
1053
-
1054
- Args:
1055
- tag: The option tag element.
1056
- text: The text content of the option element.
1057
- convert_as_inline: Whether to convert as inline content.
1058
-
1059
- Returns:
1060
- The option text, potentially with a marker if selected.
1061
- """
1062
784
  if not text.strip():
1063
785
  return ""
1064
786
 
@@ -1074,16 +796,6 @@ def _convert_option(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1074
796
 
1075
797
 
1076
798
  def _convert_optgroup(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1077
- """Convert HTML optgroup element to semantic Markdown.
1078
-
1079
- Args:
1080
- tag: The optgroup tag element.
1081
- text: The text content of the optgroup element.
1082
- convert_as_inline: Whether to convert as inline content.
1083
-
1084
- Returns:
1085
- The converted markdown text with label as heading.
1086
- """
1087
799
  if convert_as_inline:
1088
800
  return text
1089
801
 
@@ -1100,16 +812,6 @@ def _convert_optgroup(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1100
812
 
1101
813
 
1102
814
  def _convert_button(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1103
- """Convert HTML button element to Markdown.
1104
-
1105
- Args:
1106
- tag: The button tag element.
1107
- text: The text content of the button element.
1108
- convert_as_inline: Whether to convert as inline content.
1109
-
1110
- Returns:
1111
- The button text content.
1112
- """
1113
815
  _ = tag
1114
816
  if not text.strip():
1115
817
  return ""
@@ -1118,16 +820,6 @@ def _convert_button(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1118
820
 
1119
821
 
1120
822
  def _convert_progress(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1121
- """Convert HTML progress element to semantic text.
1122
-
1123
- Args:
1124
- tag: The progress tag element.
1125
- text: The text content of the progress element.
1126
- convert_as_inline: Whether to convert as inline content.
1127
-
1128
- Returns:
1129
- The converted markdown text (only content, no HTML tags).
1130
- """
1131
823
  _ = tag
1132
824
  if convert_as_inline:
1133
825
  return text
@@ -1139,16 +831,6 @@ def _convert_progress(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1139
831
 
1140
832
 
1141
833
  def _convert_meter(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1142
- """Convert HTML meter element to semantic text.
1143
-
1144
- Args:
1145
- tag: The meter tag element.
1146
- text: The text content of the meter element.
1147
- convert_as_inline: Whether to convert as inline content.
1148
-
1149
- Returns:
1150
- The converted markdown text (only content, no HTML tags).
1151
- """
1152
834
  _ = tag
1153
835
  if convert_as_inline:
1154
836
  return text
@@ -1160,16 +842,6 @@ def _convert_meter(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1160
842
 
1161
843
 
1162
844
  def _convert_output(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1163
- """Convert HTML output element to semantic text.
1164
-
1165
- Args:
1166
- tag: The output tag element.
1167
- text: The text content of the output element.
1168
- convert_as_inline: Whether to convert as inline content.
1169
-
1170
- Returns:
1171
- The converted markdown text (only content, no HTML tags).
1172
- """
1173
845
  _ = tag
1174
846
  if convert_as_inline:
1175
847
  return text
@@ -1181,16 +853,6 @@ def _convert_output(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1181
853
 
1182
854
 
1183
855
  def _convert_datalist(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1184
- """Convert HTML datalist element to semantic Markdown.
1185
-
1186
- Args:
1187
- tag: The datalist tag element.
1188
- text: The text content of the datalist element.
1189
- convert_as_inline: Whether to convert as inline content.
1190
-
1191
- Returns:
1192
- The converted markdown text (only content, no HTML tags).
1193
- """
1194
856
  _ = tag
1195
857
  if convert_as_inline:
1196
858
  return text
@@ -1202,15 +864,6 @@ def _convert_datalist(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1202
864
 
1203
865
 
1204
866
  def _convert_ruby(*, text: str, convert_as_inline: bool) -> str: # noqa: ARG001
1205
- """Convert HTML ruby element providing pronunciation annotation.
1206
-
1207
- Args:
1208
- text: The text content of the ruby element.
1209
- convert_as_inline: Whether to convert as inline content.
1210
-
1211
- Returns:
1212
- The converted markdown text with ruby annotation as fallback text.
1213
- """
1214
867
  if not text.strip():
1215
868
  return ""
1216
869
 
@@ -1218,15 +871,6 @@ def _convert_ruby(*, text: str, convert_as_inline: bool) -> str: # noqa: ARG001
1218
871
 
1219
872
 
1220
873
  def _convert_rb(*, text: str, convert_as_inline: bool) -> str: # noqa: ARG001
1221
- """Convert HTML rb (ruby base) element.
1222
-
1223
- Args:
1224
- text: The text content of the rb element.
1225
- convert_as_inline: Whether to convert as inline content.
1226
-
1227
- Returns:
1228
- The converted markdown text (ruby base text).
1229
- """
1230
874
  if not text.strip():
1231
875
  return ""
1232
876
 
@@ -1234,16 +878,6 @@ def _convert_rb(*, text: str, convert_as_inline: bool) -> str: # noqa: ARG001
1234
878
 
1235
879
 
1236
880
  def _convert_rt(*, text: str, convert_as_inline: bool, tag: Tag) -> str: # noqa: ARG001
1237
- """Convert HTML rt (ruby text) element for pronunciation.
1238
-
1239
- Args:
1240
- text: The text content of the rt element.
1241
- convert_as_inline: Whether to convert as inline content.
1242
- tag: The rt tag element.
1243
-
1244
- Returns:
1245
- The converted markdown text with pronunciation in parentheses.
1246
- """
1247
881
  content = text.strip()
1248
882
 
1249
883
  prev_sibling = tag.previous_sibling
@@ -1259,15 +893,6 @@ def _convert_rt(*, text: str, convert_as_inline: bool, tag: Tag) -> str: # noqa
1259
893
 
1260
894
 
1261
895
  def _convert_rp(*, text: str, convert_as_inline: bool) -> str: # noqa: ARG001
1262
- """Convert HTML rp (ruby parentheses) element for fallback.
1263
-
1264
- Args:
1265
- text: The text content of the rp element.
1266
- convert_as_inline: Whether to convert as inline content.
1267
-
1268
- Returns:
1269
- The converted markdown text (parentheses for ruby fallback).
1270
- """
1271
896
  if not text.strip():
1272
897
  return ""
1273
898
 
@@ -1275,15 +900,6 @@ def _convert_rp(*, text: str, convert_as_inline: bool) -> str: # noqa: ARG001
1275
900
 
1276
901
 
1277
902
  def _convert_rtc(*, text: str, convert_as_inline: bool) -> str: # noqa: ARG001
1278
- """Convert HTML rtc (ruby text container) element.
1279
-
1280
- Args:
1281
- text: The text content of the rtc element.
1282
- convert_as_inline: Whether to convert as inline content.
1283
-
1284
- Returns:
1285
- The converted markdown text (ruby text container).
1286
- """
1287
903
  if not text.strip():
1288
904
  return ""
1289
905
 
@@ -1291,16 +907,6 @@ def _convert_rtc(*, text: str, convert_as_inline: bool) -> str: # noqa: ARG001
1291
907
 
1292
908
 
1293
909
  def _convert_dialog(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
1294
- """Convert HTML dialog element to semantic Markdown.
1295
-
1296
- Args:
1297
- text: The text content of the dialog element.
1298
- convert_as_inline: Whether to convert as inline content.
1299
- tag: The dialog tag element.
1300
-
1301
- Returns:
1302
- The converted markdown text (only content, no HTML tags).
1303
- """
1304
910
  _ = tag
1305
911
  if convert_as_inline:
1306
912
  return text
@@ -1312,16 +918,6 @@ def _convert_dialog(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
1312
918
 
1313
919
 
1314
920
  def _convert_menu(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
1315
- """Convert HTML menu element to semantic Markdown.
1316
-
1317
- Args:
1318
- text: The text content of the menu element.
1319
- convert_as_inline: Whether to convert as inline content.
1320
- tag: The menu tag element.
1321
-
1322
- Returns:
1323
- The converted markdown text (only content, no HTML tags).
1324
- """
1325
921
  _ = tag
1326
922
  if convert_as_inline:
1327
923
  return text
@@ -1333,16 +929,6 @@ def _convert_menu(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
1333
929
 
1334
930
 
1335
931
  def _convert_figure(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
1336
- """Convert HTML figure element to semantic Markdown.
1337
-
1338
- Args:
1339
- text: The text content of the figure element.
1340
- convert_as_inline: Whether to convert as inline content.
1341
- tag: The figure tag element.
1342
-
1343
- Returns:
1344
- The converted markdown text (only content, no HTML tags).
1345
- """
1346
932
  _ = tag
1347
933
  if not text.strip():
1348
934
  return ""
@@ -1360,15 +946,6 @@ def _convert_figure(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
1360
946
 
1361
947
 
1362
948
  def _convert_hgroup(*, text: str, convert_as_inline: bool) -> str:
1363
- """Convert HTML hgroup element to semantic Markdown.
1364
-
1365
- Args:
1366
- text: The text content of the hgroup element.
1367
- convert_as_inline: Whether to convert as inline content.
1368
-
1369
- Returns:
1370
- The converted markdown text (only content, no HTML tags).
1371
- """
1372
949
  if convert_as_inline:
1373
950
  return text
1374
951
 
@@ -1379,16 +956,6 @@ def _convert_hgroup(*, text: str, convert_as_inline: bool) -> str:
1379
956
 
1380
957
 
1381
958
  def _convert_picture(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
1382
- """Convert HTML picture element to semantic Markdown.
1383
-
1384
- Args:
1385
- text: The text content of the picture element.
1386
- convert_as_inline: Whether to convert as inline content.
1387
- tag: The picture tag element.
1388
-
1389
- Returns:
1390
- The converted markdown text (only the img element).
1391
- """
1392
959
  _ = tag, convert_as_inline
1393
960
  if not text.strip():
1394
961
  return ""
@@ -1397,16 +964,6 @@ def _convert_picture(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
1397
964
 
1398
965
 
1399
966
  def _convert_svg(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
1400
- """Convert SVG element to Markdown image reference.
1401
-
1402
- Args:
1403
- text: The text content of the SVG element.
1404
- convert_as_inline: Whether to convert as inline content.
1405
- tag: The SVG tag element.
1406
-
1407
- Returns:
1408
- The converted markdown text as an image reference.
1409
- """
1410
967
  if convert_as_inline:
1411
968
  return text.strip()
1412
969
 
@@ -1425,16 +982,6 @@ def _convert_svg(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
1425
982
 
1426
983
 
1427
984
  def _convert_math(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
1428
- """Convert MathML math element preserving mathematical notation.
1429
-
1430
- Args:
1431
- text: The text content of the math element.
1432
- convert_as_inline: Whether to convert as inline content.
1433
- tag: The math tag element.
1434
-
1435
- Returns:
1436
- The converted markdown text preserving math structure.
1437
- """
1438
985
  if not text.strip():
1439
986
  return ""
1440
987
 
@@ -1457,6 +1004,8 @@ def create_converters_map(
1457
1004
  heading_style: Literal["atx", "atx_closed", "underlined"],
1458
1005
  highlight_style: Literal["double-equal", "html", "bold"],
1459
1006
  keep_inline_images_in: Iterable[str] | None,
1007
+ list_indent_type: str,
1008
+ list_indent_width: int,
1460
1009
  newline_style: str,
1461
1010
  strong_em_symbol: str,
1462
1011
  sub_symbol: str,
@@ -1464,27 +1013,7 @@ def create_converters_map(
1464
1013
  wrap: bool,
1465
1014
  wrap_width: int,
1466
1015
  ) -> ConvertersMap:
1467
- """Create a mapping of HTML elements to their corresponding conversion functions.
1468
-
1469
- Args:
1470
- autolinks: Whether to convert URLs into links.
1471
- bullets: The bullet characters to use for unordered lists.
1472
- code_language: The default code language to use.
1473
- code_language_callback: A callback to get the code language.
1474
- default_title: Whether to use the URL as the title for links.
1475
- heading_style: The style of headings.
1476
- highlight_style: The style to use for highlighted text (mark elements).
1477
- keep_inline_images_in: The tags to keep inline images in.
1478
- newline_style: The style of newlines.
1479
- strong_em_symbol: The symbol to use for strong and emphasis text.
1480
- sub_symbol: The symbol to use for subscript text.
1481
- sup_symbol: The symbol to use for superscript text.
1482
- wrap: Whether to wrap text.
1483
- wrap_width: The width to wrap text at.
1484
-
1485
- Returns:
1486
- A mapping of HTML elements to their corresponding conversion functions
1487
- """
1016
+ list_indent_str = "\t" if list_indent_type == "tabs" else " " * list_indent_width
1488
1017
 
1489
1018
  def _wrapper(func: Callable[..., T]) -> Callable[[str, Tag], T]:
1490
1019
  spec = getfullargspec(func)
@@ -1498,6 +1027,8 @@ def create_converters_map(
1498
1027
  kwargs["text"] = text
1499
1028
  if "convert_as_inline" in spec.kwonlyargs:
1500
1029
  kwargs["convert_as_inline"] = convert_as_inline
1030
+ if "list_indent_str" in spec.kwonlyargs:
1031
+ kwargs["list_indent_str"] = list_indent_str
1501
1032
  return func(**kwargs)
1502
1033
  return func(text)
1503
1034
 
@@ -1512,7 +1043,7 @@ def create_converters_map(
1512
1043
  "b": _wrapper(partial(_create_inline_converter(2 * strong_em_symbol))),
1513
1044
  "bdi": _wrapper(_create_inline_converter("")),
1514
1045
  "bdo": _wrapper(_create_inline_converter("")),
1515
- "blockquote": _wrapper(partial(_convert_blockquote)),
1046
+ "blockquote": _wrapper(partial(_convert_blockquote, list_indent_str=list_indent_str)),
1516
1047
  "br": _wrapper(partial(_convert_br, newline_style=newline_style)),
1517
1048
  "button": _wrapper(_convert_button),
1518
1049
  "caption": _wrapper(_convert_caption),
@@ -1527,6 +1058,7 @@ def create_converters_map(
1527
1058
  "details": _wrapper(_convert_details),
1528
1059
  "dfn": _wrapper(_create_inline_converter("*")),
1529
1060
  "dialog": _wrapper(_convert_dialog),
1061
+ "div": _wrapper(_convert_div),
1530
1062
  "dl": _wrapper(_convert_dl),
1531
1063
  "dt": _wrapper(_convert_dt),
1532
1064
  "em": _wrapper(_create_inline_converter(strong_em_symbol)),
@@ -1552,19 +1084,19 @@ def create_converters_map(
1552
1084
  "kbd": _wrapper(_create_inline_converter("`")),
1553
1085
  "label": _wrapper(_convert_label),
1554
1086
  "legend": _wrapper(_convert_legend),
1555
- "li": _wrapper(partial(_convert_li, bullets=bullets)),
1556
- "list": _wrapper(_convert_list),
1087
+ "li": _wrapper(partial(_convert_li, bullets=bullets, list_indent_str=list_indent_str)),
1088
+ "list": _wrapper(partial(_convert_list, list_indent_str=list_indent_str)),
1557
1089
  "main": _wrapper(_convert_semantic_block),
1558
1090
  "mark": _wrapper(partial(_convert_mark, highlight_style=highlight_style)),
1559
1091
  "math": _wrapper(_convert_math),
1560
1092
  "menu": _wrapper(_convert_menu),
1561
1093
  "meter": _wrapper(_convert_meter),
1562
1094
  "nav": _wrapper(_convert_semantic_block),
1563
- "ol": _wrapper(_convert_list),
1095
+ "ol": _wrapper(partial(_convert_list, list_indent_str=list_indent_str)),
1564
1096
  "optgroup": _wrapper(_convert_optgroup),
1565
1097
  "option": _wrapper(_convert_option),
1566
1098
  "output": _wrapper(_convert_output),
1567
- "p": _wrapper(partial(_convert_p, wrap=wrap, wrap_width=wrap_width)),
1099
+ "p": _wrapper(partial(_convert_p, wrap=wrap, wrap_width=wrap_width, list_indent_str=list_indent_str)),
1568
1100
  "picture": _wrapper(_convert_picture),
1569
1101
  "pre": _wrapper(
1570
1102
  partial(
@@ -1602,7 +1134,7 @@ def create_converters_map(
1602
1134
  "time": _wrapper(_convert_time),
1603
1135
  "tr": _wrapper(_convert_tr),
1604
1136
  "u": _wrapper(_create_inline_converter("")),
1605
- "ul": _wrapper(_convert_list),
1137
+ "ul": _wrapper(partial(_convert_list, list_indent_str=list_indent_str)),
1606
1138
  "var": _wrapper(_create_inline_converter("*")),
1607
1139
  "video": _wrapper(_convert_media_element),
1608
1140
  "wbr": _wrapper(_convert_wbr),