html-to-markdown 1.9.0__py3-none-any.whl → 1.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of html-to-markdown might be problematic. Click here for more details.

@@ -23,17 +23,14 @@ from html_to_markdown.utils import chomp, indent, underline
23
23
 
24
24
 
25
25
  def _format_block_element(text: str) -> str:
26
- """Format text as a block element with trailing newlines."""
27
26
  return f"{text.strip()}\n\n" if text.strip() else ""
28
27
 
29
28
 
30
29
  def _format_inline_or_block(text: str, convert_as_inline: bool) -> str:
31
- """Format text as inline or block element based on context."""
32
30
  return text.strip() if convert_as_inline else _format_block_element(text)
33
31
 
34
32
 
35
33
  def _format_wrapped_block(text: str, start_marker: str, end_marker: str = "") -> str:
36
- """Format text wrapped in markers as a block element."""
37
34
  if not end_marker:
38
35
  end_marker = start_marker
39
36
  return f"{start_marker}{text.strip()}{end_marker}\n\n" if text.strip() else ""
@@ -63,6 +60,7 @@ SupportedElements = Literal[
63
60
  "details",
64
61
  "dfn",
65
62
  "dialog",
63
+ "div",
66
64
  "dl",
67
65
  "dt",
68
66
  "em",
@@ -145,15 +143,6 @@ T = TypeVar("T")
145
143
 
146
144
 
147
145
  def _create_inline_converter(markup_prefix: str) -> Callable[[Tag, str], str]:
148
- """Create an inline converter for a markup pattern or tag.
149
-
150
- Args:
151
- markup_prefix: The markup prefix to insert.
152
-
153
- Returns:
154
- A function that can be used to convert HTML to Markdown.
155
- """
156
-
157
146
  def implementation(*, tag: Tag, text: str) -> str:
158
147
  from html_to_markdown.processing import _has_ancestor # noqa: PLC0415
159
148
 
@@ -200,7 +189,7 @@ def _convert_a(*, tag: Tag, text: str, autolinks: bool, default_title: bool) ->
200
189
  return f"{prefix}[{text}]({href}{title_part}){suffix}" if href else text
201
190
 
202
191
 
203
- def _convert_blockquote(*, text: str, tag: Tag, convert_as_inline: bool) -> str:
192
+ def _convert_blockquote(*, text: str, tag: Tag, convert_as_inline: bool, list_indent_str: str) -> str:
204
193
  if convert_as_inline:
205
194
  return text
206
195
 
@@ -211,18 +200,16 @@ def _convert_blockquote(*, text: str, tag: Tag, convert_as_inline: bool) -> str:
211
200
 
212
201
  cite_url = tag.get("cite")
213
202
 
214
- # Check if this blockquote is inside a list item
215
203
  if _has_ancestor(tag, "li"):
216
- # Indent the blockquote by 4 spaces
217
204
  lines = text.strip().split("\n")
218
- indented_lines = [f" > {line}" if line.strip() else "" for line in lines]
205
+ indented_lines = [f"{list_indent_str}> {line}" if line.strip() else "" for line in lines]
219
206
  quote_text = "\n".join(indented_lines) + "\n\n"
220
207
  else:
221
208
  quote_text = f"\n{line_beginning_re.sub('> ', text.strip())}\n\n"
222
209
 
223
210
  if cite_url:
224
211
  if _has_ancestor(tag, "li"):
225
- quote_text += f" — <{cite_url}>\n\n"
212
+ quote_text += f"{list_indent_str}— <{cite_url}>\n\n"
226
213
  else:
227
214
  quote_text += f"— <{cite_url}>\n\n"
228
215
 
@@ -283,23 +270,19 @@ def _convert_img(*, tag: Tag, convert_as_inline: bool, keep_inline_images_in: It
283
270
  return f"![{alt}]({src}{title_part})"
284
271
 
285
272
 
286
- def _convert_list(*, tag: Tag, text: str) -> str:
273
+ def _convert_list(*, tag: Tag, text: str, list_indent_str: str) -> str:
287
274
  from html_to_markdown.processing import _has_ancestor # noqa: PLC0415
288
275
 
289
276
  before_paragraph = False
290
277
  if tag.next_sibling and getattr(tag.next_sibling, "name", None) not in {"ul", "ol"}:
291
278
  before_paragraph = True
292
279
 
293
- # Check if this list is inside a list item
294
280
  if _has_ancestor(tag, "li"):
295
- # This is a nested list - needs indentation
296
- # But we need to check if it's the first element after a paragraph
297
281
  parent = tag.parent
298
282
  while parent and parent.name != "li":
299
283
  parent = parent.parent
300
284
 
301
285
  if parent:
302
- # Check if there's a paragraph before this list
303
286
  prev_p = None
304
287
  for child in parent.children:
305
288
  if hasattr(child, "name"):
@@ -309,22 +292,33 @@ def _convert_list(*, tag: Tag, text: str) -> str:
309
292
  prev_p = child
310
293
 
311
294
  if prev_p:
312
- # If there's a paragraph before, we need proper indentation
313
295
  lines = text.strip().split("\n")
314
296
  indented_lines = []
315
297
  for line in lines:
316
298
  if line.strip():
317
- indented_lines.append(f" {line}")
299
+ indented_lines.append(f"{list_indent_str}{line}")
318
300
  else:
319
301
  indented_lines.append("")
320
302
  return "\n" + "\n".join(indented_lines) + "\n"
321
- # Otherwise use the original tab indentation
322
- return "\n" + indent(text=text, level=1).rstrip()
303
+ return "\n" + indent(text=text, level=1, indent_str=list_indent_str).rstrip()
304
+
305
+ if tag.parent and tag.parent.name in {"ul", "ol"}:
306
+ lines = text.strip().split("\n")
307
+ indented_lines = []
308
+ for line in lines:
309
+ if line.strip():
310
+ indented_lines.append(f"{list_indent_str}{line}")
311
+ else:
312
+ indented_lines.append("")
313
+ result = "\n".join(indented_lines)
314
+ if not result.endswith("\n"):
315
+ result += "\n"
316
+ return result
323
317
 
324
318
  return text + ("\n" if before_paragraph else "")
325
319
 
326
320
 
327
- def _convert_li(*, tag: Tag, text: str, bullets: str) -> str:
321
+ def _convert_li(*, tag: Tag, text: str, bullets: str, list_indent_str: str) -> str:
328
322
  checkbox = tag.find("input", {"type": "checkbox"})
329
323
  if checkbox and isinstance(checkbox, Tag):
330
324
  checked = checkbox.get("checked") is not None
@@ -355,7 +349,6 @@ def _convert_li(*, tag: Tag, text: str, bullets: str) -> str:
355
349
 
356
350
  bullet = bullets[depth % len(bullets)]
357
351
 
358
- # Check if the list item contains block-level elements (like <p>, <blockquote>, etc.)
359
352
  has_block_children = any(
360
353
  child.name in {"p", "blockquote", "pre", "ul", "ol", "div", "h1", "h2", "h3", "h4", "h5", "h6"}
361
354
  for child in tag.children
@@ -363,29 +356,26 @@ def _convert_li(*, tag: Tag, text: str, bullets: str) -> str:
363
356
  )
364
357
 
365
358
  if has_block_children:
366
- # Handle multi-paragraph list items
367
- # Split by double newlines (paragraph separators)
368
359
  paragraphs = text.strip().split("\n\n")
369
360
 
370
361
  if paragraphs:
371
- # First paragraph goes directly after the bullet
372
362
  result_parts = [f"{bullet} {paragraphs[0].strip()}\n"]
373
363
 
374
- # Subsequent paragraphs need to be indented and separated by blank lines
375
364
  for para in paragraphs[1:]:
376
365
  if para.strip():
377
- # Add blank line before the paragraph
378
366
  result_parts.append("\n")
379
- # Indent each line of the paragraph by 4 spaces
380
- result_parts.extend(f" {line}\n" for line in para.strip().split("\n") if line.strip())
367
+ result_parts.extend(
368
+ f"{list_indent_str}{line}\n" for line in para.strip().split("\n") if line.strip()
369
+ )
381
370
 
382
371
  return "".join(result_parts)
383
372
 
384
- # Simple case: no block elements, just inline content
385
373
  return "{} {}\n".format(bullet, (text or "").strip())
386
374
 
387
375
 
388
- def _convert_p(*, wrap: bool, text: str, convert_as_inline: bool, wrap_width: int, tag: Tag) -> str:
376
+ def _convert_p(
377
+ *, wrap: bool, text: str, convert_as_inline: bool, wrap_width: int, tag: Tag, list_indent_str: str
378
+ ) -> str:
389
379
  if convert_as_inline:
390
380
  return text
391
381
 
@@ -399,24 +389,19 @@ def _convert_p(*, wrap: bool, text: str, convert_as_inline: bool, wrap_width: in
399
389
 
400
390
  from html_to_markdown.processing import _has_ancestor # noqa: PLC0415
401
391
 
402
- # Check if this paragraph is inside a list item
403
392
  if _has_ancestor(tag, "li"):
404
- # Check if this is the first paragraph in the list item
405
393
  parent = tag.parent
406
394
  while parent and parent.name != "li":
407
395
  parent = parent.parent
408
396
 
409
397
  if parent:
410
- # Get all direct children that are paragraphs
411
398
  p_children = [child for child in parent.children if hasattr(child, "name") and child.name == "p"]
412
399
 
413
- # If this is not the first paragraph, indent it
414
400
  if p_children and tag != p_children[0]:
415
- # Indent all lines by 4 spaces
416
401
  indented_lines = []
417
402
  for line in text.split("\n"):
418
403
  if line.strip():
419
- indented_lines.append(f" {line}")
404
+ indented_lines.append(f"{list_indent_str}{line}")
420
405
  else:
421
406
  indented_lines.append("")
422
407
  text = "\n".join(indented_lines)
@@ -425,16 +410,6 @@ def _convert_p(*, wrap: bool, text: str, convert_as_inline: bool, wrap_width: in
425
410
 
426
411
 
427
412
  def _convert_mark(*, text: str, convert_as_inline: bool, highlight_style: str) -> str:
428
- """Convert HTML mark element to Markdown highlighting.
429
-
430
- Args:
431
- text: The text content of the mark element.
432
- convert_as_inline: Whether to convert as inline content.
433
- highlight_style: The style to use for highlighting ("double-equal", "html", "bold").
434
-
435
- Returns:
436
- The converted markdown text.
437
- """
438
413
  if convert_as_inline:
439
414
  return text
440
415
 
@@ -480,13 +455,11 @@ def _convert_tr(*, tag: Tag, text: str) -> str:
480
455
  parent_name = tag.parent.name if tag.parent and hasattr(tag.parent, "name") else ""
481
456
  tag_grand_parent = tag.parent.parent if tag.parent else None
482
457
 
483
- # Simple rowspan handling: if previous row had cells with rowspan, add empty cells
484
458
  if tag.previous_sibling and hasattr(tag.previous_sibling, "name") and tag.previous_sibling.name == "tr":
485
459
  prev_cells = cast("Tag", tag.previous_sibling).find_all(["td", "th"])
486
460
  rowspan_positions = []
487
461
  col_pos = 0
488
462
 
489
- # Check which cells in previous row have rowspan > 1
490
463
  for prev_cell in prev_cells:
491
464
  rowspan = 1
492
465
  if (
@@ -497,10 +470,8 @@ def _convert_tr(*, tag: Tag, text: str) -> str:
497
470
  rowspan = int(prev_cell["rowspan"])
498
471
 
499
472
  if rowspan > 1:
500
- # This cell spans into current row
501
473
  rowspan_positions.append(col_pos)
502
474
 
503
- # Account for colspan
504
475
  colspan = 1
505
476
  if (
506
477
  "colspan" in prev_cell.attrs
@@ -510,25 +481,22 @@ def _convert_tr(*, tag: Tag, text: str) -> str:
510
481
  colspan = int(prev_cell["colspan"])
511
482
  col_pos += colspan
512
483
 
513
- # If there are rowspan cells from previous row, add empty cells
514
484
  if rowspan_positions:
515
- # Build new text with empty cells inserted
516
- new_cells = []
485
+ converted_cells: list[str] = []
486
+ if text.strip():
487
+ parts = text.split("|")
488
+ converted_cells.extend(part.rstrip() + " |" for part in parts[:-1] if part)
489
+
490
+ new_cells: list[str] = []
517
491
  cell_index = 0
518
492
 
519
- for pos in range(col_pos): # Total columns
493
+ for pos in range(col_pos):
520
494
  if pos in rowspan_positions:
521
- # Add empty cell for rowspan
522
495
  new_cells.append(" |")
523
- elif cell_index < len(cells):
524
- # Add actual cell content
525
- cell = cells[cell_index]
526
- cell_text = cell.get_text().strip().replace("\n", " ")
527
- colspan = _get_colspan(cell)
528
- new_cells.append(f" {cell_text} |" * colspan)
496
+ elif cell_index < len(converted_cells):
497
+ new_cells.append(converted_cells[cell_index])
529
498
  cell_index += 1
530
499
 
531
- # Override text with new cell arrangement
532
500
  text = "".join(new_cells)
533
501
 
534
502
  is_headrow = (
@@ -563,15 +531,6 @@ def _convert_tr(*, tag: Tag, text: str) -> str:
563
531
 
564
532
 
565
533
  def _convert_caption(*, text: str, convert_as_inline: bool) -> str:
566
- """Convert HTML caption element to emphasized text.
567
-
568
- Args:
569
- text: The text content of the caption element.
570
- convert_as_inline: Whether to convert as inline content.
571
-
572
- Returns:
573
- The converted markdown text with caption formatting.
574
- """
575
534
  if convert_as_inline:
576
535
  return text
577
536
 
@@ -582,15 +541,6 @@ def _convert_caption(*, text: str, convert_as_inline: bool) -> str:
582
541
 
583
542
 
584
543
  def _convert_thead(*, text: str, convert_as_inline: bool) -> str:
585
- """Convert HTML thead element preserving table structure.
586
-
587
- Args:
588
- text: The text content of the thead element.
589
- convert_as_inline: Whether to convert as inline content.
590
-
591
- Returns:
592
- The converted markdown text preserving table structure.
593
- """
594
544
  if convert_as_inline:
595
545
  return text
596
546
 
@@ -598,15 +548,6 @@ def _convert_thead(*, text: str, convert_as_inline: bool) -> str:
598
548
 
599
549
 
600
550
  def _convert_tbody(*, text: str, convert_as_inline: bool) -> str:
601
- """Convert HTML tbody element preserving table structure.
602
-
603
- Args:
604
- text: The text content of the tbody element.
605
- convert_as_inline: Whether to convert as inline content.
606
-
607
- Returns:
608
- The converted markdown text preserving table structure.
609
- """
610
551
  if convert_as_inline:
611
552
  return text
612
553
 
@@ -614,15 +555,6 @@ def _convert_tbody(*, text: str, convert_as_inline: bool) -> str:
614
555
 
615
556
 
616
557
  def _convert_tfoot(*, text: str, convert_as_inline: bool) -> str:
617
- """Convert HTML tfoot element preserving table structure.
618
-
619
- Args:
620
- text: The text content of the tfoot element.
621
- convert_as_inline: Whether to convert as inline content.
622
-
623
- Returns:
624
- The converted markdown text preserving table structure.
625
- """
626
558
  if convert_as_inline:
627
559
  return text
628
560
 
@@ -630,103 +562,41 @@ def _convert_tfoot(*, text: str, convert_as_inline: bool) -> str:
630
562
 
631
563
 
632
564
  def _convert_colgroup(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
633
- """Convert HTML colgroup element - removes it entirely from Markdown output.
634
-
635
- Colgroup is a table column grouping element that defines styling for columns.
636
- It has no representation in Markdown and should be removed.
637
-
638
- Args:
639
- tag: The colgroup tag element.
640
- text: The text content of the colgroup element.
641
- convert_as_inline: Whether to convert as inline content.
642
-
643
- Returns:
644
- Empty string as colgroup has no Markdown representation.
645
- """
646
565
  _ = tag, text, convert_as_inline
647
- # Colgroup and its contents (col elements) are purely presentational
648
- # and have no equivalent in Markdown tables
649
566
  return ""
650
567
 
651
568
 
652
569
  def _convert_col(*, tag: Tag, convert_as_inline: bool) -> str:
653
- """Convert HTML col element - removes it entirely from Markdown output.
654
-
655
- Col elements define column properties (width, style) in HTML tables.
656
- They have no representation in Markdown and should be removed.
657
-
658
- Args:
659
- tag: The col tag element.
660
- convert_as_inline: Whether to convert as inline content.
661
-
662
- Returns:
663
- Empty string as col has no Markdown representation.
664
- """
665
570
  _ = tag, convert_as_inline
666
- # Col elements are self-closing and purely presentational
667
571
  return ""
668
572
 
669
573
 
670
574
  def _convert_semantic_block(*, text: str, convert_as_inline: bool) -> str:
671
- """Convert HTML5 semantic elements to block-level Markdown.
672
-
673
- Args:
674
- text: The text content of the semantic element.
675
- convert_as_inline: Whether to convert as inline content.
676
-
677
- Returns:
678
- The converted markdown text with proper block spacing.
679
- """
680
575
  if convert_as_inline:
681
576
  return text
682
577
 
683
578
  return f"{text}\n\n" if text.strip() else ""
684
579
 
685
580
 
686
- def _convert_details(*, text: str, convert_as_inline: bool) -> str:
687
- """Convert HTML details element to semantic Markdown.
581
+ def _convert_div(*, text: str, convert_as_inline: bool) -> str: # noqa: ARG001
582
+ return text
688
583
 
689
- Args:
690
- text: The text content of the details element.
691
- convert_as_inline: Whether to convert as inline content.
692
584
 
693
- Returns:
694
- The converted markdown text (only content, no HTML tags).
695
- """
585
+ def _convert_details(*, text: str, convert_as_inline: bool) -> str:
696
586
  if convert_as_inline:
697
587
  return text
698
588
 
699
- # Details is a semantic container, return its content
700
589
  return _format_block_element(text)
701
590
 
702
591
 
703
592
  def _convert_summary(*, text: str, convert_as_inline: bool) -> str:
704
- """Convert HTML summary element to emphasized text.
705
-
706
- Args:
707
- text: The text content of the summary element.
708
- convert_as_inline: Whether to convert as inline content.
709
-
710
- Returns:
711
- The converted markdown text as bold heading.
712
- """
713
593
  if convert_as_inline:
714
594
  return text
715
595
 
716
- # Summary is like a heading/title
717
596
  return _format_wrapped_block(text, "**")
718
597
 
719
598
 
720
599
  def _convert_dl(*, text: str, convert_as_inline: bool) -> str:
721
- """Convert HTML definition list element.
722
-
723
- Args:
724
- text: The text content of the definition list.
725
- convert_as_inline: Whether to convert as inline content.
726
-
727
- Returns:
728
- The converted markdown text with proper spacing.
729
- """
730
600
  if convert_as_inline:
731
601
  return text
732
602
 
@@ -734,15 +604,6 @@ def _convert_dl(*, text: str, convert_as_inline: bool) -> str:
734
604
 
735
605
 
736
606
  def _convert_dt(*, text: str, convert_as_inline: bool) -> str:
737
- """Convert HTML definition term element.
738
-
739
- Args:
740
- text: The text content of the definition term.
741
- convert_as_inline: Whether to convert as inline content.
742
-
743
- Returns:
744
- The converted markdown text as a definition term.
745
- """
746
607
  if convert_as_inline:
747
608
  return text
748
609
 
@@ -753,15 +614,6 @@ def _convert_dt(*, text: str, convert_as_inline: bool) -> str:
753
614
 
754
615
 
755
616
  def _convert_dd(*, text: str, convert_as_inline: bool) -> str:
756
- """Convert HTML definition description element.
757
-
758
- Args:
759
- text: The text content of the definition description.
760
- convert_as_inline: Whether to convert as inline content.
761
-
762
- Returns:
763
- The converted markdown text as a definition description.
764
- """
765
617
  if convert_as_inline:
766
618
  return text
767
619
 
@@ -772,15 +624,6 @@ def _convert_dd(*, text: str, convert_as_inline: bool) -> str:
772
624
 
773
625
 
774
626
  def _convert_cite(*, text: str, convert_as_inline: bool) -> str:
775
- """Convert HTML cite element to italic text.
776
-
777
- Args:
778
- text: The text content of the cite element.
779
- convert_as_inline: Whether to convert as inline content.
780
-
781
- Returns:
782
- The converted markdown text in italic format.
783
- """
784
627
  if convert_as_inline:
785
628
  return text
786
629
 
@@ -791,15 +634,6 @@ def _convert_cite(*, text: str, convert_as_inline: bool) -> str:
791
634
 
792
635
 
793
636
  def _convert_q(*, text: str, convert_as_inline: bool) -> str:
794
- """Convert HTML q element to quoted text.
795
-
796
- Args:
797
- text: The text content of the q element.
798
- convert_as_inline: Whether to convert as inline content.
799
-
800
- Returns:
801
- The converted markdown text with quotes.
802
- """
803
637
  if convert_as_inline:
804
638
  return text
805
639
 
@@ -811,33 +645,20 @@ def _convert_q(*, text: str, convert_as_inline: bool) -> str:
811
645
 
812
646
 
813
647
  def _convert_media_element(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
814
- """Convert HTML media elements (audio/video) to semantic Markdown.
815
-
816
- Args:
817
- tag: The media tag element.
818
- text: The text content of the media element (fallback content).
819
- convert_as_inline: Whether to convert as inline content.
820
-
821
- Returns:
822
- The converted markdown text (link if src exists, otherwise fallback content).
823
- """
824
648
  src = tag.get("src", "")
825
649
 
826
650
  if not src and (source_tag := tag.find("source")) and isinstance(source_tag, Tag):
827
651
  src = source_tag.get("src", "")
828
652
 
829
- # If we have a src, convert to a link
830
653
  if src and isinstance(src, str) and src.strip():
831
654
  link = f"[{src}]({src})"
832
655
  if convert_as_inline:
833
656
  return link
834
657
  result = f"{link}\n\n"
835
- # Add fallback content if present
836
658
  if text.strip():
837
659
  result += f"{text.strip()}\n\n"
838
660
  return result
839
661
 
840
- # No src, just return fallback content
841
662
  if text.strip():
842
663
  return _format_inline_or_block(text, convert_as_inline)
843
664
 
@@ -845,20 +666,9 @@ def _convert_media_element(*, tag: Tag, text: str, convert_as_inline: bool) -> s
845
666
 
846
667
 
847
668
  def _convert_iframe(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
848
- """Convert HTML iframe element to semantic Markdown.
849
-
850
- Args:
851
- tag: The iframe tag element.
852
- text: The text content of the iframe element (usually empty).
853
- convert_as_inline: Whether to convert as inline content.
854
-
855
- Returns:
856
- The converted markdown text (link if src exists).
857
- """
858
669
  _ = text
859
670
  src = tag.get("src", "")
860
671
 
861
- # If we have a src, convert to a link
862
672
  if src and isinstance(src, str) and src.strip():
863
673
  link = f"[{src}]({src})"
864
674
  if convert_as_inline:
@@ -869,16 +679,6 @@ def _convert_iframe(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
869
679
 
870
680
 
871
681
  def _convert_abbr(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
872
- """Convert HTML abbr element to text with optional title.
873
-
874
- Args:
875
- tag: The abbr tag element.
876
- text: The text content of the abbr element.
877
- convert_as_inline: Whether to convert as inline content.
878
-
879
- Returns:
880
- The converted markdown text with optional title annotation.
881
- """
882
682
  _ = convert_as_inline
883
683
  if not text.strip():
884
684
  return ""
@@ -891,69 +691,29 @@ def _convert_abbr(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
891
691
 
892
692
 
893
693
  def _convert_time(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
894
- """Convert HTML time element to semantic Markdown.
895
-
896
- Args:
897
- tag: The time tag element.
898
- text: The text content of the time element.
899
- convert_as_inline: Whether to convert as inline content.
900
-
901
- Returns:
902
- The converted markdown text (content only, no HTML tags).
903
- """
904
694
  _ = tag
905
695
  _ = convert_as_inline
906
696
  if not text.strip():
907
697
  return ""
908
698
 
909
- # Time elements are semantic - just return the content
910
699
  return text.strip()
911
700
 
912
701
 
913
702
  def _convert_data(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
914
- """Convert HTML data element to semantic Markdown.
915
-
916
- Args:
917
- tag: The data tag element.
918
- text: The text content of the data element.
919
- convert_as_inline: Whether to convert as inline content.
920
-
921
- Returns:
922
- The converted markdown text (content only, no HTML tags).
923
- """
924
703
  _ = tag
925
704
  _ = convert_as_inline
926
705
  if not text.strip():
927
706
  return ""
928
707
 
929
- # Data elements are semantic - just return the content
930
708
  return text.strip()
931
709
 
932
710
 
933
711
  def _convert_wbr(*, convert_as_inline: bool) -> str:
934
- """Convert HTML wbr (word break opportunity) element.
935
-
936
- Args:
937
- convert_as_inline: Whether to convert as inline content.
938
-
939
- Returns:
940
- Empty string as wbr is just a break opportunity.
941
- """
942
712
  _ = convert_as_inline
943
713
  return ""
944
714
 
945
715
 
946
716
  def _convert_form(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
947
- """Convert HTML form element to semantic Markdown.
948
-
949
- Args:
950
- tag: The form tag element.
951
- text: The text content of the form element.
952
- convert_as_inline: Whether to convert as inline content.
953
-
954
- Returns:
955
- The converted markdown text (only content, no HTML tags).
956
- """
957
717
  _ = tag
958
718
  if convert_as_inline:
959
719
  return text
@@ -961,63 +721,31 @@ def _convert_form(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
961
721
  if not text.strip():
962
722
  return ""
963
723
 
964
- # Forms are just containers, return their content
965
724
  return text
966
725
 
967
726
 
968
727
  def _convert_fieldset(*, text: str, convert_as_inline: bool) -> str:
969
- """Convert HTML fieldset element to semantic Markdown.
970
-
971
- Args:
972
- text: The text content of the fieldset element.
973
- convert_as_inline: Whether to convert as inline content.
974
-
975
- Returns:
976
- The converted markdown text (only content, no HTML tags).
977
- """
978
728
  if convert_as_inline:
979
729
  return text
980
730
 
981
731
  if not text.strip():
982
732
  return ""
983
733
 
984
- # Fieldsets are semantic groupings, return their content
985
734
  return text
986
735
 
987
736
 
988
737
  def _convert_legend(*, text: str, convert_as_inline: bool) -> str:
989
- """Convert HTML legend element to emphasized text.
990
-
991
- Args:
992
- text: The text content of the legend element.
993
- convert_as_inline: Whether to convert as inline content.
994
-
995
- Returns:
996
- The converted markdown text as emphasized legend.
997
- """
998
738
  if convert_as_inline:
999
739
  return text
1000
740
 
1001
741
  if not text.strip():
1002
742
  return ""
1003
743
 
1004
- # Legend is like a heading/title for fieldsets
1005
744
  return _format_wrapped_block(text, "**")
1006
745
 
1007
746
 
1008
747
  def _convert_label(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1009
- """Convert HTML label element to Markdown.
1010
-
1011
- Args:
1012
- tag: The label tag element.
1013
- text: The text content of the label element.
1014
- convert_as_inline: Whether to convert as inline content.
1015
-
1016
- Returns:
1017
- The label text content.
1018
- """
1019
748
  _ = tag
1020
- # Labels are just text, return the content
1021
749
  if not text.strip():
1022
750
  return ""
1023
751
 
@@ -1025,33 +753,12 @@ def _convert_label(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1025
753
 
1026
754
 
1027
755
  def _convert_input_enhanced(*, tag: Tag, convert_as_inline: bool) -> str:
1028
- """Convert HTML input element to Markdown.
1029
-
1030
- Args:
1031
- tag: The input tag element.
1032
- convert_as_inline: Whether to convert as inline content.
1033
-
1034
- Returns:
1035
- Empty string since input elements have no Markdown representation.
1036
- """
1037
756
  _ = tag, convert_as_inline
1038
- # Input elements have no content and no Markdown equivalent
1039
757
  return ""
1040
758
 
1041
759
 
1042
760
  def _convert_textarea(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1043
- """Convert HTML textarea element to Markdown.
1044
-
1045
- Args:
1046
- tag: The textarea tag element.
1047
- text: The text content of the textarea element.
1048
- convert_as_inline: Whether to convert as inline content.
1049
-
1050
- Returns:
1051
- The text content of the textarea.
1052
- """
1053
761
  _ = tag
1054
- # Return the text content, which is what the user entered
1055
762
  if not text.strip():
1056
763
  return ""
1057
764
 
@@ -1059,69 +766,33 @@ def _convert_textarea(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1059
766
 
1060
767
 
1061
768
  def _convert_select(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1062
- """Convert HTML select element to Markdown.
1063
-
1064
- Args:
1065
- tag: The select tag element.
1066
- text: The text content of the select element.
1067
- convert_as_inline: Whether to convert as inline content.
1068
-
1069
- Returns:
1070
- The text content (options) as a comma-separated list.
1071
- """
1072
769
  _ = tag
1073
- # Return the options as text
1074
770
  if not text.strip():
1075
771
  return ""
1076
772
 
1077
- # In inline mode, show options separated by commas
1078
773
  if convert_as_inline:
1079
- # Remove extra whitespace and join options
1080
774
  options = [opt.strip() for opt in text.strip().split("\n") if opt.strip()]
1081
775
  return ", ".join(options)
1082
776
 
1083
- # In block mode, show as a list
1084
777
  return _format_block_element(text)
1085
778
 
1086
779
 
1087
780
  def _convert_option(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1088
- """Convert HTML option element to Markdown.
1089
-
1090
- Args:
1091
- tag: The option tag element.
1092
- text: The text content of the option element.
1093
- convert_as_inline: Whether to convert as inline content.
1094
-
1095
- Returns:
1096
- The option text, potentially with a marker if selected.
1097
- """
1098
781
  if not text.strip():
1099
782
  return ""
1100
783
 
1101
- # Check if this option is selected
1102
784
  selected = tag.get("selected") is not None
1103
785
  content = text.strip()
1104
786
 
1105
787
  if convert_as_inline:
1106
788
  return content
1107
789
 
1108
- # In block mode, mark selected options
1109
790
  if selected:
1110
791
  return f"* {content}\n"
1111
792
  return f"{content}\n"
1112
793
 
1113
794
 
1114
795
  def _convert_optgroup(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1115
- """Convert HTML optgroup element to semantic Markdown.
1116
-
1117
- Args:
1118
- tag: The optgroup tag element.
1119
- text: The text content of the optgroup element.
1120
- convert_as_inline: Whether to convert as inline content.
1121
-
1122
- Returns:
1123
- The converted markdown text with label as heading.
1124
- """
1125
796
  if convert_as_inline:
1126
797
  return text
1127
798
 
@@ -1131,7 +802,6 @@ def _convert_optgroup(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1131
802
  label = tag.get("label", "")
1132
803
  content = text.strip()
1133
804
 
1134
- # If there's a label, show it as a heading
1135
805
  if label and isinstance(label, str) and label.strip():
1136
806
  return f"**{label.strip()}**\n{content}\n"
1137
807
 
@@ -1139,18 +809,7 @@ def _convert_optgroup(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1139
809
 
1140
810
 
1141
811
  def _convert_button(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1142
- """Convert HTML button element to Markdown.
1143
-
1144
- Args:
1145
- tag: The button tag element.
1146
- text: The text content of the button element.
1147
- convert_as_inline: Whether to convert as inline content.
1148
-
1149
- Returns:
1150
- The button text content.
1151
- """
1152
812
  _ = tag
1153
- # Buttons are just interactive text, return the text content
1154
813
  if not text.strip():
1155
814
  return ""
1156
815
 
@@ -1158,16 +817,6 @@ def _convert_button(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1158
817
 
1159
818
 
1160
819
  def _convert_progress(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1161
- """Convert HTML progress element to semantic text.
1162
-
1163
- Args:
1164
- tag: The progress tag element.
1165
- text: The text content of the progress element.
1166
- convert_as_inline: Whether to convert as inline content.
1167
-
1168
- Returns:
1169
- The converted markdown text (only content, no HTML tags).
1170
- """
1171
820
  _ = tag
1172
821
  if convert_as_inline:
1173
822
  return text
@@ -1175,21 +824,10 @@ def _convert_progress(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1175
824
  if not text.strip():
1176
825
  return ""
1177
826
 
1178
- # Progress elements convert to their text content
1179
827
  return _format_block_element(text)
1180
828
 
1181
829
 
1182
830
  def _convert_meter(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1183
- """Convert HTML meter element to semantic text.
1184
-
1185
- Args:
1186
- tag: The meter tag element.
1187
- text: The text content of the meter element.
1188
- convert_as_inline: Whether to convert as inline content.
1189
-
1190
- Returns:
1191
- The converted markdown text (only content, no HTML tags).
1192
- """
1193
831
  _ = tag
1194
832
  if convert_as_inline:
1195
833
  return text
@@ -1197,21 +835,10 @@ def _convert_meter(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1197
835
  if not text.strip():
1198
836
  return ""
1199
837
 
1200
- # Meter elements convert to their text content
1201
838
  return _format_block_element(text)
1202
839
 
1203
840
 
1204
841
  def _convert_output(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1205
- """Convert HTML output element to semantic text.
1206
-
1207
- Args:
1208
- tag: The output tag element.
1209
- text: The text content of the output element.
1210
- convert_as_inline: Whether to convert as inline content.
1211
-
1212
- Returns:
1213
- The converted markdown text (only content, no HTML tags).
1214
- """
1215
842
  _ = tag
1216
843
  if convert_as_inline:
1217
844
  return text
@@ -1219,21 +846,10 @@ def _convert_output(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1219
846
  if not text.strip():
1220
847
  return ""
1221
848
 
1222
- # Output elements convert to their text content
1223
849
  return _format_block_element(text)
1224
850
 
1225
851
 
1226
852
  def _convert_datalist(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1227
- """Convert HTML datalist element to semantic Markdown.
1228
-
1229
- Args:
1230
- tag: The datalist tag element.
1231
- text: The text content of the datalist element.
1232
- convert_as_inline: Whether to convert as inline content.
1233
-
1234
- Returns:
1235
- The converted markdown text (only content, no HTML tags).
1236
- """
1237
853
  _ = tag
1238
854
  if convert_as_inline:
1239
855
  return text
@@ -1241,20 +857,10 @@ def _convert_datalist(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1241
857
  if not text.strip():
1242
858
  return ""
1243
859
 
1244
- # Datalist shows options as a list
1245
860
  return _format_block_element(text)
1246
861
 
1247
862
 
1248
863
  def _convert_ruby(*, text: str, convert_as_inline: bool) -> str: # noqa: ARG001
1249
- """Convert HTML ruby element providing pronunciation annotation.
1250
-
1251
- Args:
1252
- text: The text content of the ruby element.
1253
- convert_as_inline: Whether to convert as inline content.
1254
-
1255
- Returns:
1256
- The converted markdown text with ruby annotation as fallback text.
1257
- """
1258
864
  if not text.strip():
1259
865
  return ""
1260
866
 
@@ -1262,15 +868,6 @@ def _convert_ruby(*, text: str, convert_as_inline: bool) -> str: # noqa: ARG001
1262
868
 
1263
869
 
1264
870
  def _convert_rb(*, text: str, convert_as_inline: bool) -> str: # noqa: ARG001
1265
- """Convert HTML rb (ruby base) element.
1266
-
1267
- Args:
1268
- text: The text content of the rb element.
1269
- convert_as_inline: Whether to convert as inline content.
1270
-
1271
- Returns:
1272
- The converted markdown text (ruby base text).
1273
- """
1274
871
  if not text.strip():
1275
872
  return ""
1276
873
 
@@ -1278,16 +875,6 @@ def _convert_rb(*, text: str, convert_as_inline: bool) -> str: # noqa: ARG001
1278
875
 
1279
876
 
1280
877
  def _convert_rt(*, text: str, convert_as_inline: bool, tag: Tag) -> str: # noqa: ARG001
1281
- """Convert HTML rt (ruby text) element for pronunciation.
1282
-
1283
- Args:
1284
- text: The text content of the rt element.
1285
- convert_as_inline: Whether to convert as inline content.
1286
- tag: The rt tag element.
1287
-
1288
- Returns:
1289
- The converted markdown text with pronunciation in parentheses.
1290
- """
1291
878
  content = text.strip()
1292
879
 
1293
880
  prev_sibling = tag.previous_sibling
@@ -1303,15 +890,6 @@ def _convert_rt(*, text: str, convert_as_inline: bool, tag: Tag) -> str: # noqa
1303
890
 
1304
891
 
1305
892
  def _convert_rp(*, text: str, convert_as_inline: bool) -> str: # noqa: ARG001
1306
- """Convert HTML rp (ruby parentheses) element for fallback.
1307
-
1308
- Args:
1309
- text: The text content of the rp element.
1310
- convert_as_inline: Whether to convert as inline content.
1311
-
1312
- Returns:
1313
- The converted markdown text (parentheses for ruby fallback).
1314
- """
1315
893
  if not text.strip():
1316
894
  return ""
1317
895
 
@@ -1319,15 +897,6 @@ def _convert_rp(*, text: str, convert_as_inline: bool) -> str: # noqa: ARG001
1319
897
 
1320
898
 
1321
899
  def _convert_rtc(*, text: str, convert_as_inline: bool) -> str: # noqa: ARG001
1322
- """Convert HTML rtc (ruby text container) element.
1323
-
1324
- Args:
1325
- text: The text content of the rtc element.
1326
- convert_as_inline: Whether to convert as inline content.
1327
-
1328
- Returns:
1329
- The converted markdown text (ruby text container).
1330
- """
1331
900
  if not text.strip():
1332
901
  return ""
1333
902
 
@@ -1335,16 +904,6 @@ def _convert_rtc(*, text: str, convert_as_inline: bool) -> str: # noqa: ARG001
1335
904
 
1336
905
 
1337
906
  def _convert_dialog(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
1338
- """Convert HTML dialog element to semantic Markdown.
1339
-
1340
- Args:
1341
- text: The text content of the dialog element.
1342
- convert_as_inline: Whether to convert as inline content.
1343
- tag: The dialog tag element.
1344
-
1345
- Returns:
1346
- The converted markdown text (only content, no HTML tags).
1347
- """
1348
907
  _ = tag
1349
908
  if convert_as_inline:
1350
909
  return text
@@ -1352,21 +911,10 @@ def _convert_dialog(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
1352
911
  if not text.strip():
1353
912
  return ""
1354
913
 
1355
- # Dialog is a semantic container, return its content
1356
914
  return _format_block_element(text)
1357
915
 
1358
916
 
1359
917
  def _convert_menu(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
1360
- """Convert HTML menu element to semantic Markdown.
1361
-
1362
- Args:
1363
- text: The text content of the menu element.
1364
- convert_as_inline: Whether to convert as inline content.
1365
- tag: The menu tag element.
1366
-
1367
- Returns:
1368
- The converted markdown text (only content, no HTML tags).
1369
- """
1370
918
  _ = tag
1371
919
  if convert_as_inline:
1372
920
  return text
@@ -1374,21 +922,10 @@ def _convert_menu(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
1374
922
  if not text.strip():
1375
923
  return ""
1376
924
 
1377
- # Menu is converted as a list
1378
925
  return _format_block_element(text)
1379
926
 
1380
927
 
1381
928
  def _convert_figure(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
1382
- """Convert HTML figure element to semantic Markdown.
1383
-
1384
- Args:
1385
- text: The text content of the figure element.
1386
- convert_as_inline: Whether to convert as inline content.
1387
- tag: The figure tag element.
1388
-
1389
- Returns:
1390
- The converted markdown text (only content, no HTML tags).
1391
- """
1392
929
  _ = tag
1393
930
  if not text.strip():
1394
931
  return ""
@@ -1396,8 +933,6 @@ def _convert_figure(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
1396
933
  if convert_as_inline:
1397
934
  return text
1398
935
 
1399
- # Figure is a semantic container, return its content
1400
- # Make sure there's proper spacing after the figure content
1401
936
  content = text.strip()
1402
937
  if content and not content.endswith("\n\n"):
1403
938
  if content.endswith("\n"):
@@ -1408,55 +943,24 @@ def _convert_figure(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
1408
943
 
1409
944
 
1410
945
  def _convert_hgroup(*, text: str, convert_as_inline: bool) -> str:
1411
- """Convert HTML hgroup element to semantic Markdown.
1412
-
1413
- Args:
1414
- text: The text content of the hgroup element.
1415
- convert_as_inline: Whether to convert as inline content.
1416
-
1417
- Returns:
1418
- The converted markdown text (only content, no HTML tags).
1419
- """
1420
946
  if convert_as_inline:
1421
947
  return text
1422
948
 
1423
949
  if not text.strip():
1424
950
  return ""
1425
951
 
1426
- # Hgroup is a semantic container for headings, return its content
1427
952
  return text
1428
953
 
1429
954
 
1430
955
  def _convert_picture(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
1431
- """Convert HTML picture element to semantic Markdown.
1432
-
1433
- Args:
1434
- text: The text content of the picture element.
1435
- convert_as_inline: Whether to convert as inline content.
1436
- tag: The picture tag element.
1437
-
1438
- Returns:
1439
- The converted markdown text (only the img element).
1440
- """
1441
956
  _ = tag, convert_as_inline
1442
957
  if not text.strip():
1443
958
  return ""
1444
959
 
1445
- # Picture is a container for responsive images, only the img matters for Markdown
1446
960
  return text.strip()
1447
961
 
1448
962
 
1449
963
  def _convert_svg(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
1450
- """Convert SVG element to Markdown image reference.
1451
-
1452
- Args:
1453
- text: The text content of the SVG element.
1454
- convert_as_inline: Whether to convert as inline content.
1455
- tag: The SVG tag element.
1456
-
1457
- Returns:
1458
- The converted markdown text as an image reference.
1459
- """
1460
964
  if convert_as_inline:
1461
965
  return text.strip()
1462
966
 
@@ -1475,16 +979,6 @@ def _convert_svg(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
1475
979
 
1476
980
 
1477
981
  def _convert_math(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
1478
- """Convert MathML math element preserving mathematical notation.
1479
-
1480
- Args:
1481
- text: The text content of the math element.
1482
- convert_as_inline: Whether to convert as inline content.
1483
- tag: The math tag element.
1484
-
1485
- Returns:
1486
- The converted markdown text preserving math structure.
1487
- """
1488
982
  if not text.strip():
1489
983
  return ""
1490
984
 
@@ -1507,6 +1001,8 @@ def create_converters_map(
1507
1001
  heading_style: Literal["atx", "atx_closed", "underlined"],
1508
1002
  highlight_style: Literal["double-equal", "html", "bold"],
1509
1003
  keep_inline_images_in: Iterable[str] | None,
1004
+ list_indent_type: str,
1005
+ list_indent_width: int,
1510
1006
  newline_style: str,
1511
1007
  strong_em_symbol: str,
1512
1008
  sub_symbol: str,
@@ -1514,27 +1010,7 @@ def create_converters_map(
1514
1010
  wrap: bool,
1515
1011
  wrap_width: int,
1516
1012
  ) -> ConvertersMap:
1517
- """Create a mapping of HTML elements to their corresponding conversion functions.
1518
-
1519
- Args:
1520
- autolinks: Whether to convert URLs into links.
1521
- bullets: The bullet characters to use for unordered lists.
1522
- code_language: The default code language to use.
1523
- code_language_callback: A callback to get the code language.
1524
- default_title: Whether to use the URL as the title for links.
1525
- heading_style: The style of headings.
1526
- highlight_style: The style to use for highlighted text (mark elements).
1527
- keep_inline_images_in: The tags to keep inline images in.
1528
- newline_style: The style of newlines.
1529
- strong_em_symbol: The symbol to use for strong and emphasis text.
1530
- sub_symbol: The symbol to use for subscript text.
1531
- sup_symbol: The symbol to use for superscript text.
1532
- wrap: Whether to wrap text.
1533
- wrap_width: The width to wrap text at.
1534
-
1535
- Returns:
1536
- A mapping of HTML elements to their corresponding conversion functions
1537
- """
1013
+ list_indent_str = "\t" if list_indent_type == "tabs" else " " * list_indent_width
1538
1014
 
1539
1015
  def _wrapper(func: Callable[..., T]) -> Callable[[str, Tag], T]:
1540
1016
  spec = getfullargspec(func)
@@ -1548,6 +1024,8 @@ def create_converters_map(
1548
1024
  kwargs["text"] = text
1549
1025
  if "convert_as_inline" in spec.kwonlyargs:
1550
1026
  kwargs["convert_as_inline"] = convert_as_inline
1027
+ if "list_indent_str" in spec.kwonlyargs:
1028
+ kwargs["list_indent_str"] = list_indent_str
1551
1029
  return func(**kwargs)
1552
1030
  return func(text)
1553
1031
 
@@ -1562,7 +1040,7 @@ def create_converters_map(
1562
1040
  "b": _wrapper(partial(_create_inline_converter(2 * strong_em_symbol))),
1563
1041
  "bdi": _wrapper(_create_inline_converter("")),
1564
1042
  "bdo": _wrapper(_create_inline_converter("")),
1565
- "blockquote": _wrapper(partial(_convert_blockquote)),
1043
+ "blockquote": _wrapper(partial(_convert_blockquote, list_indent_str=list_indent_str)),
1566
1044
  "br": _wrapper(partial(_convert_br, newline_style=newline_style)),
1567
1045
  "button": _wrapper(_convert_button),
1568
1046
  "caption": _wrapper(_convert_caption),
@@ -1577,6 +1055,7 @@ def create_converters_map(
1577
1055
  "details": _wrapper(_convert_details),
1578
1056
  "dfn": _wrapper(_create_inline_converter("*")),
1579
1057
  "dialog": _wrapper(_convert_dialog),
1058
+ "div": _wrapper(_convert_div),
1580
1059
  "dl": _wrapper(_convert_dl),
1581
1060
  "dt": _wrapper(_convert_dt),
1582
1061
  "em": _wrapper(_create_inline_converter(strong_em_symbol)),
@@ -1602,19 +1081,19 @@ def create_converters_map(
1602
1081
  "kbd": _wrapper(_create_inline_converter("`")),
1603
1082
  "label": _wrapper(_convert_label),
1604
1083
  "legend": _wrapper(_convert_legend),
1605
- "li": _wrapper(partial(_convert_li, bullets=bullets)),
1606
- "list": _wrapper(_convert_list),
1084
+ "li": _wrapper(partial(_convert_li, bullets=bullets, list_indent_str=list_indent_str)),
1085
+ "list": _wrapper(partial(_convert_list, list_indent_str=list_indent_str)),
1607
1086
  "main": _wrapper(_convert_semantic_block),
1608
1087
  "mark": _wrapper(partial(_convert_mark, highlight_style=highlight_style)),
1609
1088
  "math": _wrapper(_convert_math),
1610
1089
  "menu": _wrapper(_convert_menu),
1611
1090
  "meter": _wrapper(_convert_meter),
1612
1091
  "nav": _wrapper(_convert_semantic_block),
1613
- "ol": _wrapper(_convert_list),
1092
+ "ol": _wrapper(partial(_convert_list, list_indent_str=list_indent_str)),
1614
1093
  "optgroup": _wrapper(_convert_optgroup),
1615
1094
  "option": _wrapper(_convert_option),
1616
1095
  "output": _wrapper(_convert_output),
1617
- "p": _wrapper(partial(_convert_p, wrap=wrap, wrap_width=wrap_width)),
1096
+ "p": _wrapper(partial(_convert_p, wrap=wrap, wrap_width=wrap_width, list_indent_str=list_indent_str)),
1618
1097
  "picture": _wrapper(_convert_picture),
1619
1098
  "pre": _wrapper(
1620
1099
  partial(
@@ -1652,7 +1131,7 @@ def create_converters_map(
1652
1131
  "time": _wrapper(_convert_time),
1653
1132
  "tr": _wrapper(_convert_tr),
1654
1133
  "u": _wrapper(_create_inline_converter("")),
1655
- "ul": _wrapper(_convert_list),
1134
+ "ul": _wrapper(partial(_convert_list, list_indent_str=list_indent_str)),
1656
1135
  "var": _wrapper(_create_inline_converter("*")),
1657
1136
  "video": _wrapper(_convert_media_element),
1658
1137
  "wbr": _wrapper(_convert_wbr),