html-to-markdown 1.9.1__py3-none-any.whl → 1.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of html-to-markdown might be problematic. Click here for more details.
- html_to_markdown/__main__.py +0 -1
- html_to_markdown/cli.py +101 -45
- html_to_markdown/constants.py +3 -0
- html_to_markdown/converters.py +34 -502
- html_to_markdown/exceptions.py +1 -11
- html_to_markdown/preprocessor.py +0 -37
- html_to_markdown/processing.py +117 -191
- html_to_markdown/utils.py +2 -42
- html_to_markdown/whitespace.py +303 -0
- {html_to_markdown-1.9.1.dist-info → html_to_markdown-1.11.0.dist-info}/METADATA +196 -204
- html_to_markdown-1.11.0.dist-info/RECORD +17 -0
- html_to_markdown-1.9.1.dist-info/RECORD +0 -16
- {html_to_markdown-1.9.1.dist-info → html_to_markdown-1.11.0.dist-info}/WHEEL +0 -0
- {html_to_markdown-1.9.1.dist-info → html_to_markdown-1.11.0.dist-info}/entry_points.txt +0 -0
- {html_to_markdown-1.9.1.dist-info → html_to_markdown-1.11.0.dist-info}/licenses/LICENSE +0 -0
- {html_to_markdown-1.9.1.dist-info → html_to_markdown-1.11.0.dist-info}/top_level.txt +0 -0
html_to_markdown/converters.py
CHANGED
|
@@ -23,17 +23,14 @@ from html_to_markdown.utils import chomp, indent, underline
|
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
def _format_block_element(text: str) -> str:
|
|
26
|
-
"""Format text as a block element with trailing newlines."""
|
|
27
26
|
return f"{text.strip()}\n\n" if text.strip() else ""
|
|
28
27
|
|
|
29
28
|
|
|
30
29
|
def _format_inline_or_block(text: str, convert_as_inline: bool) -> str:
|
|
31
|
-
"""Format text as inline or block element based on context."""
|
|
32
30
|
return text.strip() if convert_as_inline else _format_block_element(text)
|
|
33
31
|
|
|
34
32
|
|
|
35
33
|
def _format_wrapped_block(text: str, start_marker: str, end_marker: str = "") -> str:
|
|
36
|
-
"""Format text wrapped in markers as a block element."""
|
|
37
34
|
if not end_marker:
|
|
38
35
|
end_marker = start_marker
|
|
39
36
|
return f"{start_marker}{text.strip()}{end_marker}\n\n" if text.strip() else ""
|
|
@@ -63,6 +60,7 @@ SupportedElements = Literal[
|
|
|
63
60
|
"details",
|
|
64
61
|
"dfn",
|
|
65
62
|
"dialog",
|
|
63
|
+
"div",
|
|
66
64
|
"dl",
|
|
67
65
|
"dt",
|
|
68
66
|
"em",
|
|
@@ -145,15 +143,6 @@ T = TypeVar("T")
|
|
|
145
143
|
|
|
146
144
|
|
|
147
145
|
def _create_inline_converter(markup_prefix: str) -> Callable[[Tag, str], str]:
|
|
148
|
-
"""Create an inline converter for a markup pattern or tag.
|
|
149
|
-
|
|
150
|
-
Args:
|
|
151
|
-
markup_prefix: The markup prefix to insert.
|
|
152
|
-
|
|
153
|
-
Returns:
|
|
154
|
-
A function that can be used to convert HTML to Markdown.
|
|
155
|
-
"""
|
|
156
|
-
|
|
157
146
|
def implementation(*, tag: Tag, text: str) -> str:
|
|
158
147
|
from html_to_markdown.processing import _has_ancestor # noqa: PLC0415
|
|
159
148
|
|
|
@@ -200,7 +189,7 @@ def _convert_a(*, tag: Tag, text: str, autolinks: bool, default_title: bool) ->
|
|
|
200
189
|
return f"{prefix}[{text}]({href}{title_part}){suffix}" if href else text
|
|
201
190
|
|
|
202
191
|
|
|
203
|
-
def _convert_blockquote(*, text: str, tag: Tag, convert_as_inline: bool) -> str:
|
|
192
|
+
def _convert_blockquote(*, text: str, tag: Tag, convert_as_inline: bool, list_indent_str: str) -> str:
|
|
204
193
|
if convert_as_inline:
|
|
205
194
|
return text
|
|
206
195
|
|
|
@@ -213,14 +202,14 @@ def _convert_blockquote(*, text: str, tag: Tag, convert_as_inline: bool) -> str:
|
|
|
213
202
|
|
|
214
203
|
if _has_ancestor(tag, "li"):
|
|
215
204
|
lines = text.strip().split("\n")
|
|
216
|
-
indented_lines = [f"
|
|
205
|
+
indented_lines = [f"{list_indent_str}> {line}" if line.strip() else "" for line in lines]
|
|
217
206
|
quote_text = "\n".join(indented_lines) + "\n\n"
|
|
218
207
|
else:
|
|
219
208
|
quote_text = f"\n{line_beginning_re.sub('> ', text.strip())}\n\n"
|
|
220
209
|
|
|
221
210
|
if cite_url:
|
|
222
211
|
if _has_ancestor(tag, "li"):
|
|
223
|
-
quote_text += f"
|
|
212
|
+
quote_text += f"{list_indent_str}— <{cite_url}>\n\n"
|
|
224
213
|
else:
|
|
225
214
|
quote_text += f"— <{cite_url}>\n\n"
|
|
226
215
|
|
|
@@ -281,7 +270,7 @@ def _convert_img(*, tag: Tag, convert_as_inline: bool, keep_inline_images_in: It
|
|
|
281
270
|
return f""
|
|
282
271
|
|
|
283
272
|
|
|
284
|
-
def _convert_list(*, tag: Tag, text: str) -> str:
|
|
273
|
+
def _convert_list(*, tag: Tag, text: str, list_indent_str: str) -> str:
|
|
285
274
|
from html_to_markdown.processing import _has_ancestor # noqa: PLC0415
|
|
286
275
|
|
|
287
276
|
before_paragraph = False
|
|
@@ -307,18 +296,18 @@ def _convert_list(*, tag: Tag, text: str) -> str:
|
|
|
307
296
|
indented_lines = []
|
|
308
297
|
for line in lines:
|
|
309
298
|
if line.strip():
|
|
310
|
-
indented_lines.append(f"
|
|
299
|
+
indented_lines.append(f"{list_indent_str}{line}")
|
|
311
300
|
else:
|
|
312
301
|
indented_lines.append("")
|
|
313
302
|
return "\n" + "\n".join(indented_lines) + "\n"
|
|
314
|
-
return "\n" + indent(text=text, level=1).rstrip()
|
|
303
|
+
return "\n" + indent(text=text, level=1, indent_str=list_indent_str).rstrip()
|
|
315
304
|
|
|
316
305
|
if tag.parent and tag.parent.name in {"ul", "ol"}:
|
|
317
306
|
lines = text.strip().split("\n")
|
|
318
307
|
indented_lines = []
|
|
319
308
|
for line in lines:
|
|
320
309
|
if line.strip():
|
|
321
|
-
indented_lines.append(f"
|
|
310
|
+
indented_lines.append(f"{list_indent_str}{line}")
|
|
322
311
|
else:
|
|
323
312
|
indented_lines.append("")
|
|
324
313
|
result = "\n".join(indented_lines)
|
|
@@ -329,7 +318,7 @@ def _convert_list(*, tag: Tag, text: str) -> str:
|
|
|
329
318
|
return text + ("\n" if before_paragraph else "")
|
|
330
319
|
|
|
331
320
|
|
|
332
|
-
def _convert_li(*, tag: Tag, text: str, bullets: str) -> str:
|
|
321
|
+
def _convert_li(*, tag: Tag, text: str, bullets: str, list_indent_str: str) -> str:
|
|
333
322
|
checkbox = tag.find("input", {"type": "checkbox"})
|
|
334
323
|
if checkbox and isinstance(checkbox, Tag):
|
|
335
324
|
checked = checkbox.get("checked") is not None
|
|
@@ -375,14 +364,18 @@ def _convert_li(*, tag: Tag, text: str, bullets: str) -> str:
|
|
|
375
364
|
for para in paragraphs[1:]:
|
|
376
365
|
if para.strip():
|
|
377
366
|
result_parts.append("\n")
|
|
378
|
-
result_parts.extend(
|
|
367
|
+
result_parts.extend(
|
|
368
|
+
f"{list_indent_str}{line}\n" for line in para.strip().split("\n") if line.strip()
|
|
369
|
+
)
|
|
379
370
|
|
|
380
371
|
return "".join(result_parts)
|
|
381
372
|
|
|
382
373
|
return "{} {}\n".format(bullet, (text or "").strip())
|
|
383
374
|
|
|
384
375
|
|
|
385
|
-
def _convert_p(
|
|
376
|
+
def _convert_p(
|
|
377
|
+
*, wrap: bool, text: str, convert_as_inline: bool, wrap_width: int, tag: Tag, list_indent_str: str
|
|
378
|
+
) -> str:
|
|
386
379
|
if convert_as_inline:
|
|
387
380
|
return text
|
|
388
381
|
|
|
@@ -408,7 +401,7 @@ def _convert_p(*, wrap: bool, text: str, convert_as_inline: bool, wrap_width: in
|
|
|
408
401
|
indented_lines = []
|
|
409
402
|
for line in text.split("\n"):
|
|
410
403
|
if line.strip():
|
|
411
|
-
indented_lines.append(f"
|
|
404
|
+
indented_lines.append(f"{list_indent_str}{line}")
|
|
412
405
|
else:
|
|
413
406
|
indented_lines.append("")
|
|
414
407
|
text = "\n".join(indented_lines)
|
|
@@ -417,16 +410,6 @@ def _convert_p(*, wrap: bool, text: str, convert_as_inline: bool, wrap_width: in
|
|
|
417
410
|
|
|
418
411
|
|
|
419
412
|
def _convert_mark(*, text: str, convert_as_inline: bool, highlight_style: str) -> str:
|
|
420
|
-
"""Convert HTML mark element to Markdown highlighting.
|
|
421
|
-
|
|
422
|
-
Args:
|
|
423
|
-
text: The text content of the mark element.
|
|
424
|
-
convert_as_inline: Whether to convert as inline content.
|
|
425
|
-
highlight_style: The style to use for highlighting ("double-equal", "html", "bold").
|
|
426
|
-
|
|
427
|
-
Returns:
|
|
428
|
-
The converted markdown text.
|
|
429
|
-
"""
|
|
430
413
|
if convert_as_inline:
|
|
431
414
|
return text
|
|
432
415
|
|
|
@@ -548,15 +531,6 @@ def _convert_tr(*, tag: Tag, text: str) -> str:
|
|
|
548
531
|
|
|
549
532
|
|
|
550
533
|
def _convert_caption(*, text: str, convert_as_inline: bool) -> str:
|
|
551
|
-
"""Convert HTML caption element to emphasized text.
|
|
552
|
-
|
|
553
|
-
Args:
|
|
554
|
-
text: The text content of the caption element.
|
|
555
|
-
convert_as_inline: Whether to convert as inline content.
|
|
556
|
-
|
|
557
|
-
Returns:
|
|
558
|
-
The converted markdown text with caption formatting.
|
|
559
|
-
"""
|
|
560
534
|
if convert_as_inline:
|
|
561
535
|
return text
|
|
562
536
|
|
|
@@ -567,15 +541,6 @@ def _convert_caption(*, text: str, convert_as_inline: bool) -> str:
|
|
|
567
541
|
|
|
568
542
|
|
|
569
543
|
def _convert_thead(*, text: str, convert_as_inline: bool) -> str:
|
|
570
|
-
"""Convert HTML thead element preserving table structure.
|
|
571
|
-
|
|
572
|
-
Args:
|
|
573
|
-
text: The text content of the thead element.
|
|
574
|
-
convert_as_inline: Whether to convert as inline content.
|
|
575
|
-
|
|
576
|
-
Returns:
|
|
577
|
-
The converted markdown text preserving table structure.
|
|
578
|
-
"""
|
|
579
544
|
if convert_as_inline:
|
|
580
545
|
return text
|
|
581
546
|
|
|
@@ -583,15 +548,6 @@ def _convert_thead(*, text: str, convert_as_inline: bool) -> str:
|
|
|
583
548
|
|
|
584
549
|
|
|
585
550
|
def _convert_tbody(*, text: str, convert_as_inline: bool) -> str:
|
|
586
|
-
"""Convert HTML tbody element preserving table structure.
|
|
587
|
-
|
|
588
|
-
Args:
|
|
589
|
-
text: The text content of the tbody element.
|
|
590
|
-
convert_as_inline: Whether to convert as inline content.
|
|
591
|
-
|
|
592
|
-
Returns:
|
|
593
|
-
The converted markdown text preserving table structure.
|
|
594
|
-
"""
|
|
595
551
|
if convert_as_inline:
|
|
596
552
|
return text
|
|
597
553
|
|
|
@@ -599,15 +555,6 @@ def _convert_tbody(*, text: str, convert_as_inline: bool) -> str:
|
|
|
599
555
|
|
|
600
556
|
|
|
601
557
|
def _convert_tfoot(*, text: str, convert_as_inline: bool) -> str:
|
|
602
|
-
"""Convert HTML tfoot element preserving table structure.
|
|
603
|
-
|
|
604
|
-
Args:
|
|
605
|
-
text: The text content of the tfoot element.
|
|
606
|
-
convert_as_inline: Whether to convert as inline content.
|
|
607
|
-
|
|
608
|
-
Returns:
|
|
609
|
-
The converted markdown text preserving table structure.
|
|
610
|
-
"""
|
|
611
558
|
if convert_as_inline:
|
|
612
559
|
return text
|
|
613
560
|
|
|
@@ -615,66 +562,30 @@ def _convert_tfoot(*, text: str, convert_as_inline: bool) -> str:
|
|
|
615
562
|
|
|
616
563
|
|
|
617
564
|
def _convert_colgroup(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
618
|
-
"""Convert HTML colgroup element - removes it entirely from Markdown output.
|
|
619
|
-
|
|
620
|
-
Colgroup is a table column grouping element that defines styling for columns.
|
|
621
|
-
It has no representation in Markdown and should be removed.
|
|
622
|
-
|
|
623
|
-
Args:
|
|
624
|
-
tag: The colgroup tag element.
|
|
625
|
-
text: The text content of the colgroup element.
|
|
626
|
-
convert_as_inline: Whether to convert as inline content.
|
|
627
|
-
|
|
628
|
-
Returns:
|
|
629
|
-
Empty string as colgroup has no Markdown representation.
|
|
630
|
-
"""
|
|
631
565
|
_ = tag, text, convert_as_inline
|
|
632
566
|
return ""
|
|
633
567
|
|
|
634
568
|
|
|
635
569
|
def _convert_col(*, tag: Tag, convert_as_inline: bool) -> str:
|
|
636
|
-
"""Convert HTML col element - removes it entirely from Markdown output.
|
|
637
|
-
|
|
638
|
-
Col elements define column properties (width, style) in HTML tables.
|
|
639
|
-
They have no representation in Markdown and should be removed.
|
|
640
|
-
|
|
641
|
-
Args:
|
|
642
|
-
tag: The col tag element.
|
|
643
|
-
convert_as_inline: Whether to convert as inline content.
|
|
644
|
-
|
|
645
|
-
Returns:
|
|
646
|
-
Empty string as col has no Markdown representation.
|
|
647
|
-
"""
|
|
648
570
|
_ = tag, convert_as_inline
|
|
649
571
|
return ""
|
|
650
572
|
|
|
651
573
|
|
|
652
574
|
def _convert_semantic_block(*, text: str, convert_as_inline: bool) -> str:
|
|
653
|
-
"""Convert HTML5 semantic elements to block-level Markdown.
|
|
654
|
-
|
|
655
|
-
Args:
|
|
656
|
-
text: The text content of the semantic element.
|
|
657
|
-
convert_as_inline: Whether to convert as inline content.
|
|
658
|
-
|
|
659
|
-
Returns:
|
|
660
|
-
The converted markdown text with proper block spacing.
|
|
661
|
-
"""
|
|
662
575
|
if convert_as_inline:
|
|
663
576
|
return text
|
|
664
577
|
|
|
665
578
|
return f"{text}\n\n" if text.strip() else ""
|
|
666
579
|
|
|
667
580
|
|
|
668
|
-
def
|
|
669
|
-
|
|
581
|
+
def _convert_div(*, text: str, convert_as_inline: bool) -> str:
|
|
582
|
+
if convert_as_inline:
|
|
583
|
+
return text
|
|
584
|
+
|
|
585
|
+
return _format_block_element(text)
|
|
670
586
|
|
|
671
|
-
Args:
|
|
672
|
-
text: The text content of the details element.
|
|
673
|
-
convert_as_inline: Whether to convert as inline content.
|
|
674
587
|
|
|
675
|
-
|
|
676
|
-
The converted markdown text (only content, no HTML tags).
|
|
677
|
-
"""
|
|
588
|
+
def _convert_details(*, text: str, convert_as_inline: bool) -> str:
|
|
678
589
|
if convert_as_inline:
|
|
679
590
|
return text
|
|
680
591
|
|
|
@@ -682,15 +593,6 @@ def _convert_details(*, text: str, convert_as_inline: bool) -> str:
|
|
|
682
593
|
|
|
683
594
|
|
|
684
595
|
def _convert_summary(*, text: str, convert_as_inline: bool) -> str:
|
|
685
|
-
"""Convert HTML summary element to emphasized text.
|
|
686
|
-
|
|
687
|
-
Args:
|
|
688
|
-
text: The text content of the summary element.
|
|
689
|
-
convert_as_inline: Whether to convert as inline content.
|
|
690
|
-
|
|
691
|
-
Returns:
|
|
692
|
-
The converted markdown text as bold heading.
|
|
693
|
-
"""
|
|
694
596
|
if convert_as_inline:
|
|
695
597
|
return text
|
|
696
598
|
|
|
@@ -698,15 +600,6 @@ def _convert_summary(*, text: str, convert_as_inline: bool) -> str:
|
|
|
698
600
|
|
|
699
601
|
|
|
700
602
|
def _convert_dl(*, text: str, convert_as_inline: bool) -> str:
|
|
701
|
-
"""Convert HTML definition list element.
|
|
702
|
-
|
|
703
|
-
Args:
|
|
704
|
-
text: The text content of the definition list.
|
|
705
|
-
convert_as_inline: Whether to convert as inline content.
|
|
706
|
-
|
|
707
|
-
Returns:
|
|
708
|
-
The converted markdown text with proper spacing.
|
|
709
|
-
"""
|
|
710
603
|
if convert_as_inline:
|
|
711
604
|
return text
|
|
712
605
|
|
|
@@ -714,15 +607,6 @@ def _convert_dl(*, text: str, convert_as_inline: bool) -> str:
|
|
|
714
607
|
|
|
715
608
|
|
|
716
609
|
def _convert_dt(*, text: str, convert_as_inline: bool) -> str:
|
|
717
|
-
"""Convert HTML definition term element.
|
|
718
|
-
|
|
719
|
-
Args:
|
|
720
|
-
text: The text content of the definition term.
|
|
721
|
-
convert_as_inline: Whether to convert as inline content.
|
|
722
|
-
|
|
723
|
-
Returns:
|
|
724
|
-
The converted markdown text as a definition term.
|
|
725
|
-
"""
|
|
726
610
|
if convert_as_inline:
|
|
727
611
|
return text
|
|
728
612
|
|
|
@@ -733,15 +617,6 @@ def _convert_dt(*, text: str, convert_as_inline: bool) -> str:
|
|
|
733
617
|
|
|
734
618
|
|
|
735
619
|
def _convert_dd(*, text: str, convert_as_inline: bool) -> str:
|
|
736
|
-
"""Convert HTML definition description element.
|
|
737
|
-
|
|
738
|
-
Args:
|
|
739
|
-
text: The text content of the definition description.
|
|
740
|
-
convert_as_inline: Whether to convert as inline content.
|
|
741
|
-
|
|
742
|
-
Returns:
|
|
743
|
-
The converted markdown text as a definition description.
|
|
744
|
-
"""
|
|
745
620
|
if convert_as_inline:
|
|
746
621
|
return text
|
|
747
622
|
|
|
@@ -752,15 +627,6 @@ def _convert_dd(*, text: str, convert_as_inline: bool) -> str:
|
|
|
752
627
|
|
|
753
628
|
|
|
754
629
|
def _convert_cite(*, text: str, convert_as_inline: bool) -> str:
|
|
755
|
-
"""Convert HTML cite element to italic text.
|
|
756
|
-
|
|
757
|
-
Args:
|
|
758
|
-
text: The text content of the cite element.
|
|
759
|
-
convert_as_inline: Whether to convert as inline content.
|
|
760
|
-
|
|
761
|
-
Returns:
|
|
762
|
-
The converted markdown text in italic format.
|
|
763
|
-
"""
|
|
764
630
|
if convert_as_inline:
|
|
765
631
|
return text
|
|
766
632
|
|
|
@@ -771,15 +637,6 @@ def _convert_cite(*, text: str, convert_as_inline: bool) -> str:
|
|
|
771
637
|
|
|
772
638
|
|
|
773
639
|
def _convert_q(*, text: str, convert_as_inline: bool) -> str:
|
|
774
|
-
"""Convert HTML q element to quoted text.
|
|
775
|
-
|
|
776
|
-
Args:
|
|
777
|
-
text: The text content of the q element.
|
|
778
|
-
convert_as_inline: Whether to convert as inline content.
|
|
779
|
-
|
|
780
|
-
Returns:
|
|
781
|
-
The converted markdown text with quotes.
|
|
782
|
-
"""
|
|
783
640
|
if convert_as_inline:
|
|
784
641
|
return text
|
|
785
642
|
|
|
@@ -791,16 +648,6 @@ def _convert_q(*, text: str, convert_as_inline: bool) -> str:
|
|
|
791
648
|
|
|
792
649
|
|
|
793
650
|
def _convert_media_element(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
794
|
-
"""Convert HTML media elements (audio/video) to semantic Markdown.
|
|
795
|
-
|
|
796
|
-
Args:
|
|
797
|
-
tag: The media tag element.
|
|
798
|
-
text: The text content of the media element (fallback content).
|
|
799
|
-
convert_as_inline: Whether to convert as inline content.
|
|
800
|
-
|
|
801
|
-
Returns:
|
|
802
|
-
The converted markdown text (link if src exists, otherwise fallback content).
|
|
803
|
-
"""
|
|
804
651
|
src = tag.get("src", "")
|
|
805
652
|
|
|
806
653
|
if not src and (source_tag := tag.find("source")) and isinstance(source_tag, Tag):
|
|
@@ -822,16 +669,6 @@ def _convert_media_element(*, tag: Tag, text: str, convert_as_inline: bool) -> s
|
|
|
822
669
|
|
|
823
670
|
|
|
824
671
|
def _convert_iframe(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
825
|
-
"""Convert HTML iframe element to semantic Markdown.
|
|
826
|
-
|
|
827
|
-
Args:
|
|
828
|
-
tag: The iframe tag element.
|
|
829
|
-
text: The text content of the iframe element (usually empty).
|
|
830
|
-
convert_as_inline: Whether to convert as inline content.
|
|
831
|
-
|
|
832
|
-
Returns:
|
|
833
|
-
The converted markdown text (link if src exists).
|
|
834
|
-
"""
|
|
835
672
|
_ = text
|
|
836
673
|
src = tag.get("src", "")
|
|
837
674
|
|
|
@@ -845,16 +682,6 @@ def _convert_iframe(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
845
682
|
|
|
846
683
|
|
|
847
684
|
def _convert_abbr(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
848
|
-
"""Convert HTML abbr element to text with optional title.
|
|
849
|
-
|
|
850
|
-
Args:
|
|
851
|
-
tag: The abbr tag element.
|
|
852
|
-
text: The text content of the abbr element.
|
|
853
|
-
convert_as_inline: Whether to convert as inline content.
|
|
854
|
-
|
|
855
|
-
Returns:
|
|
856
|
-
The converted markdown text with optional title annotation.
|
|
857
|
-
"""
|
|
858
685
|
_ = convert_as_inline
|
|
859
686
|
if not text.strip():
|
|
860
687
|
return ""
|
|
@@ -867,16 +694,6 @@ def _convert_abbr(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
867
694
|
|
|
868
695
|
|
|
869
696
|
def _convert_time(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
870
|
-
"""Convert HTML time element to semantic Markdown.
|
|
871
|
-
|
|
872
|
-
Args:
|
|
873
|
-
tag: The time tag element.
|
|
874
|
-
text: The text content of the time element.
|
|
875
|
-
convert_as_inline: Whether to convert as inline content.
|
|
876
|
-
|
|
877
|
-
Returns:
|
|
878
|
-
The converted markdown text (content only, no HTML tags).
|
|
879
|
-
"""
|
|
880
697
|
_ = tag
|
|
881
698
|
_ = convert_as_inline
|
|
882
699
|
if not text.strip():
|
|
@@ -886,16 +703,6 @@ def _convert_time(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
886
703
|
|
|
887
704
|
|
|
888
705
|
def _convert_data(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
889
|
-
"""Convert HTML data element to semantic Markdown.
|
|
890
|
-
|
|
891
|
-
Args:
|
|
892
|
-
tag: The data tag element.
|
|
893
|
-
text: The text content of the data element.
|
|
894
|
-
convert_as_inline: Whether to convert as inline content.
|
|
895
|
-
|
|
896
|
-
Returns:
|
|
897
|
-
The converted markdown text (content only, no HTML tags).
|
|
898
|
-
"""
|
|
899
706
|
_ = tag
|
|
900
707
|
_ = convert_as_inline
|
|
901
708
|
if not text.strip():
|
|
@@ -905,29 +712,11 @@ def _convert_data(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
905
712
|
|
|
906
713
|
|
|
907
714
|
def _convert_wbr(*, convert_as_inline: bool) -> str:
|
|
908
|
-
"""Convert HTML wbr (word break opportunity) element.
|
|
909
|
-
|
|
910
|
-
Args:
|
|
911
|
-
convert_as_inline: Whether to convert as inline content.
|
|
912
|
-
|
|
913
|
-
Returns:
|
|
914
|
-
Empty string as wbr is just a break opportunity.
|
|
915
|
-
"""
|
|
916
715
|
_ = convert_as_inline
|
|
917
716
|
return ""
|
|
918
717
|
|
|
919
718
|
|
|
920
719
|
def _convert_form(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
921
|
-
"""Convert HTML form element to semantic Markdown.
|
|
922
|
-
|
|
923
|
-
Args:
|
|
924
|
-
tag: The form tag element.
|
|
925
|
-
text: The text content of the form element.
|
|
926
|
-
convert_as_inline: Whether to convert as inline content.
|
|
927
|
-
|
|
928
|
-
Returns:
|
|
929
|
-
The converted markdown text (only content, no HTML tags).
|
|
930
|
-
"""
|
|
931
720
|
_ = tag
|
|
932
721
|
if convert_as_inline:
|
|
933
722
|
return text
|
|
@@ -939,15 +728,6 @@ def _convert_form(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
939
728
|
|
|
940
729
|
|
|
941
730
|
def _convert_fieldset(*, text: str, convert_as_inline: bool) -> str:
|
|
942
|
-
"""Convert HTML fieldset element to semantic Markdown.
|
|
943
|
-
|
|
944
|
-
Args:
|
|
945
|
-
text: The text content of the fieldset element.
|
|
946
|
-
convert_as_inline: Whether to convert as inline content.
|
|
947
|
-
|
|
948
|
-
Returns:
|
|
949
|
-
The converted markdown text (only content, no HTML tags).
|
|
950
|
-
"""
|
|
951
731
|
if convert_as_inline:
|
|
952
732
|
return text
|
|
953
733
|
|
|
@@ -958,15 +738,6 @@ def _convert_fieldset(*, text: str, convert_as_inline: bool) -> str:
|
|
|
958
738
|
|
|
959
739
|
|
|
960
740
|
def _convert_legend(*, text: str, convert_as_inline: bool) -> str:
|
|
961
|
-
"""Convert HTML legend element to emphasized text.
|
|
962
|
-
|
|
963
|
-
Args:
|
|
964
|
-
text: The text content of the legend element.
|
|
965
|
-
convert_as_inline: Whether to convert as inline content.
|
|
966
|
-
|
|
967
|
-
Returns:
|
|
968
|
-
The converted markdown text as emphasized legend.
|
|
969
|
-
"""
|
|
970
741
|
if convert_as_inline:
|
|
971
742
|
return text
|
|
972
743
|
|
|
@@ -977,16 +748,6 @@ def _convert_legend(*, text: str, convert_as_inline: bool) -> str:
|
|
|
977
748
|
|
|
978
749
|
|
|
979
750
|
def _convert_label(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
980
|
-
"""Convert HTML label element to Markdown.
|
|
981
|
-
|
|
982
|
-
Args:
|
|
983
|
-
tag: The label tag element.
|
|
984
|
-
text: The text content of the label element.
|
|
985
|
-
convert_as_inline: Whether to convert as inline content.
|
|
986
|
-
|
|
987
|
-
Returns:
|
|
988
|
-
The label text content.
|
|
989
|
-
"""
|
|
990
751
|
_ = tag
|
|
991
752
|
if not text.strip():
|
|
992
753
|
return ""
|
|
@@ -995,30 +756,11 @@ def _convert_label(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
995
756
|
|
|
996
757
|
|
|
997
758
|
def _convert_input_enhanced(*, tag: Tag, convert_as_inline: bool) -> str:
|
|
998
|
-
"""Convert HTML input element to Markdown.
|
|
999
|
-
|
|
1000
|
-
Args:
|
|
1001
|
-
tag: The input tag element.
|
|
1002
|
-
convert_as_inline: Whether to convert as inline content.
|
|
1003
|
-
|
|
1004
|
-
Returns:
|
|
1005
|
-
Empty string since input elements have no Markdown representation.
|
|
1006
|
-
"""
|
|
1007
759
|
_ = tag, convert_as_inline
|
|
1008
760
|
return ""
|
|
1009
761
|
|
|
1010
762
|
|
|
1011
763
|
def _convert_textarea(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
1012
|
-
"""Convert HTML textarea element to Markdown.
|
|
1013
|
-
|
|
1014
|
-
Args:
|
|
1015
|
-
tag: The textarea tag element.
|
|
1016
|
-
text: The text content of the textarea element.
|
|
1017
|
-
convert_as_inline: Whether to convert as inline content.
|
|
1018
|
-
|
|
1019
|
-
Returns:
|
|
1020
|
-
The text content of the textarea.
|
|
1021
|
-
"""
|
|
1022
764
|
_ = tag
|
|
1023
765
|
if not text.strip():
|
|
1024
766
|
return ""
|
|
@@ -1027,16 +769,6 @@ def _convert_textarea(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
1027
769
|
|
|
1028
770
|
|
|
1029
771
|
def _convert_select(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
1030
|
-
"""Convert HTML select element to Markdown.
|
|
1031
|
-
|
|
1032
|
-
Args:
|
|
1033
|
-
tag: The select tag element.
|
|
1034
|
-
text: The text content of the select element.
|
|
1035
|
-
convert_as_inline: Whether to convert as inline content.
|
|
1036
|
-
|
|
1037
|
-
Returns:
|
|
1038
|
-
The text content (options) as a comma-separated list.
|
|
1039
|
-
"""
|
|
1040
772
|
_ = tag
|
|
1041
773
|
if not text.strip():
|
|
1042
774
|
return ""
|
|
@@ -1049,16 +781,6 @@ def _convert_select(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
1049
781
|
|
|
1050
782
|
|
|
1051
783
|
def _convert_option(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
1052
|
-
"""Convert HTML option element to Markdown.
|
|
1053
|
-
|
|
1054
|
-
Args:
|
|
1055
|
-
tag: The option tag element.
|
|
1056
|
-
text: The text content of the option element.
|
|
1057
|
-
convert_as_inline: Whether to convert as inline content.
|
|
1058
|
-
|
|
1059
|
-
Returns:
|
|
1060
|
-
The option text, potentially with a marker if selected.
|
|
1061
|
-
"""
|
|
1062
784
|
if not text.strip():
|
|
1063
785
|
return ""
|
|
1064
786
|
|
|
@@ -1074,16 +796,6 @@ def _convert_option(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
1074
796
|
|
|
1075
797
|
|
|
1076
798
|
def _convert_optgroup(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
1077
|
-
"""Convert HTML optgroup element to semantic Markdown.
|
|
1078
|
-
|
|
1079
|
-
Args:
|
|
1080
|
-
tag: The optgroup tag element.
|
|
1081
|
-
text: The text content of the optgroup element.
|
|
1082
|
-
convert_as_inline: Whether to convert as inline content.
|
|
1083
|
-
|
|
1084
|
-
Returns:
|
|
1085
|
-
The converted markdown text with label as heading.
|
|
1086
|
-
"""
|
|
1087
799
|
if convert_as_inline:
|
|
1088
800
|
return text
|
|
1089
801
|
|
|
@@ -1100,16 +812,6 @@ def _convert_optgroup(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
1100
812
|
|
|
1101
813
|
|
|
1102
814
|
def _convert_button(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
1103
|
-
"""Convert HTML button element to Markdown.
|
|
1104
|
-
|
|
1105
|
-
Args:
|
|
1106
|
-
tag: The button tag element.
|
|
1107
|
-
text: The text content of the button element.
|
|
1108
|
-
convert_as_inline: Whether to convert as inline content.
|
|
1109
|
-
|
|
1110
|
-
Returns:
|
|
1111
|
-
The button text content.
|
|
1112
|
-
"""
|
|
1113
815
|
_ = tag
|
|
1114
816
|
if not text.strip():
|
|
1115
817
|
return ""
|
|
@@ -1118,16 +820,6 @@ def _convert_button(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
1118
820
|
|
|
1119
821
|
|
|
1120
822
|
def _convert_progress(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
1121
|
-
"""Convert HTML progress element to semantic text.
|
|
1122
|
-
|
|
1123
|
-
Args:
|
|
1124
|
-
tag: The progress tag element.
|
|
1125
|
-
text: The text content of the progress element.
|
|
1126
|
-
convert_as_inline: Whether to convert as inline content.
|
|
1127
|
-
|
|
1128
|
-
Returns:
|
|
1129
|
-
The converted markdown text (only content, no HTML tags).
|
|
1130
|
-
"""
|
|
1131
823
|
_ = tag
|
|
1132
824
|
if convert_as_inline:
|
|
1133
825
|
return text
|
|
@@ -1139,16 +831,6 @@ def _convert_progress(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
1139
831
|
|
|
1140
832
|
|
|
1141
833
|
def _convert_meter(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
1142
|
-
"""Convert HTML meter element to semantic text.
|
|
1143
|
-
|
|
1144
|
-
Args:
|
|
1145
|
-
tag: The meter tag element.
|
|
1146
|
-
text: The text content of the meter element.
|
|
1147
|
-
convert_as_inline: Whether to convert as inline content.
|
|
1148
|
-
|
|
1149
|
-
Returns:
|
|
1150
|
-
The converted markdown text (only content, no HTML tags).
|
|
1151
|
-
"""
|
|
1152
834
|
_ = tag
|
|
1153
835
|
if convert_as_inline:
|
|
1154
836
|
return text
|
|
@@ -1160,16 +842,6 @@ def _convert_meter(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
1160
842
|
|
|
1161
843
|
|
|
1162
844
|
def _convert_output(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
1163
|
-
"""Convert HTML output element to semantic text.
|
|
1164
|
-
|
|
1165
|
-
Args:
|
|
1166
|
-
tag: The output tag element.
|
|
1167
|
-
text: The text content of the output element.
|
|
1168
|
-
convert_as_inline: Whether to convert as inline content.
|
|
1169
|
-
|
|
1170
|
-
Returns:
|
|
1171
|
-
The converted markdown text (only content, no HTML tags).
|
|
1172
|
-
"""
|
|
1173
845
|
_ = tag
|
|
1174
846
|
if convert_as_inline:
|
|
1175
847
|
return text
|
|
@@ -1181,16 +853,6 @@ def _convert_output(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
1181
853
|
|
|
1182
854
|
|
|
1183
855
|
def _convert_datalist(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
1184
|
-
"""Convert HTML datalist element to semantic Markdown.
|
|
1185
|
-
|
|
1186
|
-
Args:
|
|
1187
|
-
tag: The datalist tag element.
|
|
1188
|
-
text: The text content of the datalist element.
|
|
1189
|
-
convert_as_inline: Whether to convert as inline content.
|
|
1190
|
-
|
|
1191
|
-
Returns:
|
|
1192
|
-
The converted markdown text (only content, no HTML tags).
|
|
1193
|
-
"""
|
|
1194
856
|
_ = tag
|
|
1195
857
|
if convert_as_inline:
|
|
1196
858
|
return text
|
|
@@ -1202,15 +864,6 @@ def _convert_datalist(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
1202
864
|
|
|
1203
865
|
|
|
1204
866
|
def _convert_ruby(*, text: str, convert_as_inline: bool) -> str: # noqa: ARG001
|
|
1205
|
-
"""Convert HTML ruby element providing pronunciation annotation.
|
|
1206
|
-
|
|
1207
|
-
Args:
|
|
1208
|
-
text: The text content of the ruby element.
|
|
1209
|
-
convert_as_inline: Whether to convert as inline content.
|
|
1210
|
-
|
|
1211
|
-
Returns:
|
|
1212
|
-
The converted markdown text with ruby annotation as fallback text.
|
|
1213
|
-
"""
|
|
1214
867
|
if not text.strip():
|
|
1215
868
|
return ""
|
|
1216
869
|
|
|
@@ -1218,15 +871,6 @@ def _convert_ruby(*, text: str, convert_as_inline: bool) -> str: # noqa: ARG001
|
|
|
1218
871
|
|
|
1219
872
|
|
|
1220
873
|
def _convert_rb(*, text: str, convert_as_inline: bool) -> str: # noqa: ARG001
|
|
1221
|
-
"""Convert HTML rb (ruby base) element.
|
|
1222
|
-
|
|
1223
|
-
Args:
|
|
1224
|
-
text: The text content of the rb element.
|
|
1225
|
-
convert_as_inline: Whether to convert as inline content.
|
|
1226
|
-
|
|
1227
|
-
Returns:
|
|
1228
|
-
The converted markdown text (ruby base text).
|
|
1229
|
-
"""
|
|
1230
874
|
if not text.strip():
|
|
1231
875
|
return ""
|
|
1232
876
|
|
|
@@ -1234,16 +878,6 @@ def _convert_rb(*, text: str, convert_as_inline: bool) -> str: # noqa: ARG001
|
|
|
1234
878
|
|
|
1235
879
|
|
|
1236
880
|
def _convert_rt(*, text: str, convert_as_inline: bool, tag: Tag) -> str: # noqa: ARG001
|
|
1237
|
-
"""Convert HTML rt (ruby text) element for pronunciation.
|
|
1238
|
-
|
|
1239
|
-
Args:
|
|
1240
|
-
text: The text content of the rt element.
|
|
1241
|
-
convert_as_inline: Whether to convert as inline content.
|
|
1242
|
-
tag: The rt tag element.
|
|
1243
|
-
|
|
1244
|
-
Returns:
|
|
1245
|
-
The converted markdown text with pronunciation in parentheses.
|
|
1246
|
-
"""
|
|
1247
881
|
content = text.strip()
|
|
1248
882
|
|
|
1249
883
|
prev_sibling = tag.previous_sibling
|
|
@@ -1259,15 +893,6 @@ def _convert_rt(*, text: str, convert_as_inline: bool, tag: Tag) -> str: # noqa
|
|
|
1259
893
|
|
|
1260
894
|
|
|
1261
895
|
def _convert_rp(*, text: str, convert_as_inline: bool) -> str: # noqa: ARG001
|
|
1262
|
-
"""Convert HTML rp (ruby parentheses) element for fallback.
|
|
1263
|
-
|
|
1264
|
-
Args:
|
|
1265
|
-
text: The text content of the rp element.
|
|
1266
|
-
convert_as_inline: Whether to convert as inline content.
|
|
1267
|
-
|
|
1268
|
-
Returns:
|
|
1269
|
-
The converted markdown text (parentheses for ruby fallback).
|
|
1270
|
-
"""
|
|
1271
896
|
if not text.strip():
|
|
1272
897
|
return ""
|
|
1273
898
|
|
|
@@ -1275,15 +900,6 @@ def _convert_rp(*, text: str, convert_as_inline: bool) -> str: # noqa: ARG001
|
|
|
1275
900
|
|
|
1276
901
|
|
|
1277
902
|
def _convert_rtc(*, text: str, convert_as_inline: bool) -> str: # noqa: ARG001
|
|
1278
|
-
"""Convert HTML rtc (ruby text container) element.
|
|
1279
|
-
|
|
1280
|
-
Args:
|
|
1281
|
-
text: The text content of the rtc element.
|
|
1282
|
-
convert_as_inline: Whether to convert as inline content.
|
|
1283
|
-
|
|
1284
|
-
Returns:
|
|
1285
|
-
The converted markdown text (ruby text container).
|
|
1286
|
-
"""
|
|
1287
903
|
if not text.strip():
|
|
1288
904
|
return ""
|
|
1289
905
|
|
|
@@ -1291,16 +907,6 @@ def _convert_rtc(*, text: str, convert_as_inline: bool) -> str: # noqa: ARG001
|
|
|
1291
907
|
|
|
1292
908
|
|
|
1293
909
|
def _convert_dialog(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
|
|
1294
|
-
"""Convert HTML dialog element to semantic Markdown.
|
|
1295
|
-
|
|
1296
|
-
Args:
|
|
1297
|
-
text: The text content of the dialog element.
|
|
1298
|
-
convert_as_inline: Whether to convert as inline content.
|
|
1299
|
-
tag: The dialog tag element.
|
|
1300
|
-
|
|
1301
|
-
Returns:
|
|
1302
|
-
The converted markdown text (only content, no HTML tags).
|
|
1303
|
-
"""
|
|
1304
910
|
_ = tag
|
|
1305
911
|
if convert_as_inline:
|
|
1306
912
|
return text
|
|
@@ -1312,16 +918,6 @@ def _convert_dialog(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
|
|
|
1312
918
|
|
|
1313
919
|
|
|
1314
920
|
def _convert_menu(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
|
|
1315
|
-
"""Convert HTML menu element to semantic Markdown.
|
|
1316
|
-
|
|
1317
|
-
Args:
|
|
1318
|
-
text: The text content of the menu element.
|
|
1319
|
-
convert_as_inline: Whether to convert as inline content.
|
|
1320
|
-
tag: The menu tag element.
|
|
1321
|
-
|
|
1322
|
-
Returns:
|
|
1323
|
-
The converted markdown text (only content, no HTML tags).
|
|
1324
|
-
"""
|
|
1325
921
|
_ = tag
|
|
1326
922
|
if convert_as_inline:
|
|
1327
923
|
return text
|
|
@@ -1333,16 +929,6 @@ def _convert_menu(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
|
|
|
1333
929
|
|
|
1334
930
|
|
|
1335
931
|
def _convert_figure(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
|
|
1336
|
-
"""Convert HTML figure element to semantic Markdown.
|
|
1337
|
-
|
|
1338
|
-
Args:
|
|
1339
|
-
text: The text content of the figure element.
|
|
1340
|
-
convert_as_inline: Whether to convert as inline content.
|
|
1341
|
-
tag: The figure tag element.
|
|
1342
|
-
|
|
1343
|
-
Returns:
|
|
1344
|
-
The converted markdown text (only content, no HTML tags).
|
|
1345
|
-
"""
|
|
1346
932
|
_ = tag
|
|
1347
933
|
if not text.strip():
|
|
1348
934
|
return ""
|
|
@@ -1360,15 +946,6 @@ def _convert_figure(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
|
|
|
1360
946
|
|
|
1361
947
|
|
|
1362
948
|
def _convert_hgroup(*, text: str, convert_as_inline: bool) -> str:
|
|
1363
|
-
"""Convert HTML hgroup element to semantic Markdown.
|
|
1364
|
-
|
|
1365
|
-
Args:
|
|
1366
|
-
text: The text content of the hgroup element.
|
|
1367
|
-
convert_as_inline: Whether to convert as inline content.
|
|
1368
|
-
|
|
1369
|
-
Returns:
|
|
1370
|
-
The converted markdown text (only content, no HTML tags).
|
|
1371
|
-
"""
|
|
1372
949
|
if convert_as_inline:
|
|
1373
950
|
return text
|
|
1374
951
|
|
|
@@ -1379,16 +956,6 @@ def _convert_hgroup(*, text: str, convert_as_inline: bool) -> str:
|
|
|
1379
956
|
|
|
1380
957
|
|
|
1381
958
|
def _convert_picture(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
|
|
1382
|
-
"""Convert HTML picture element to semantic Markdown.
|
|
1383
|
-
|
|
1384
|
-
Args:
|
|
1385
|
-
text: The text content of the picture element.
|
|
1386
|
-
convert_as_inline: Whether to convert as inline content.
|
|
1387
|
-
tag: The picture tag element.
|
|
1388
|
-
|
|
1389
|
-
Returns:
|
|
1390
|
-
The converted markdown text (only the img element).
|
|
1391
|
-
"""
|
|
1392
959
|
_ = tag, convert_as_inline
|
|
1393
960
|
if not text.strip():
|
|
1394
961
|
return ""
|
|
@@ -1397,16 +964,6 @@ def _convert_picture(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
|
|
|
1397
964
|
|
|
1398
965
|
|
|
1399
966
|
def _convert_svg(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
|
|
1400
|
-
"""Convert SVG element to Markdown image reference.
|
|
1401
|
-
|
|
1402
|
-
Args:
|
|
1403
|
-
text: The text content of the SVG element.
|
|
1404
|
-
convert_as_inline: Whether to convert as inline content.
|
|
1405
|
-
tag: The SVG tag element.
|
|
1406
|
-
|
|
1407
|
-
Returns:
|
|
1408
|
-
The converted markdown text as an image reference.
|
|
1409
|
-
"""
|
|
1410
967
|
if convert_as_inline:
|
|
1411
968
|
return text.strip()
|
|
1412
969
|
|
|
@@ -1425,16 +982,6 @@ def _convert_svg(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
|
|
|
1425
982
|
|
|
1426
983
|
|
|
1427
984
|
def _convert_math(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
|
|
1428
|
-
"""Convert MathML math element preserving mathematical notation.
|
|
1429
|
-
|
|
1430
|
-
Args:
|
|
1431
|
-
text: The text content of the math element.
|
|
1432
|
-
convert_as_inline: Whether to convert as inline content.
|
|
1433
|
-
tag: The math tag element.
|
|
1434
|
-
|
|
1435
|
-
Returns:
|
|
1436
|
-
The converted markdown text preserving math structure.
|
|
1437
|
-
"""
|
|
1438
985
|
if not text.strip():
|
|
1439
986
|
return ""
|
|
1440
987
|
|
|
@@ -1457,6 +1004,8 @@ def create_converters_map(
|
|
|
1457
1004
|
heading_style: Literal["atx", "atx_closed", "underlined"],
|
|
1458
1005
|
highlight_style: Literal["double-equal", "html", "bold"],
|
|
1459
1006
|
keep_inline_images_in: Iterable[str] | None,
|
|
1007
|
+
list_indent_type: str,
|
|
1008
|
+
list_indent_width: int,
|
|
1460
1009
|
newline_style: str,
|
|
1461
1010
|
strong_em_symbol: str,
|
|
1462
1011
|
sub_symbol: str,
|
|
@@ -1464,27 +1013,7 @@ def create_converters_map(
|
|
|
1464
1013
|
wrap: bool,
|
|
1465
1014
|
wrap_width: int,
|
|
1466
1015
|
) -> ConvertersMap:
|
|
1467
|
-
""
|
|
1468
|
-
|
|
1469
|
-
Args:
|
|
1470
|
-
autolinks: Whether to convert URLs into links.
|
|
1471
|
-
bullets: The bullet characters to use for unordered lists.
|
|
1472
|
-
code_language: The default code language to use.
|
|
1473
|
-
code_language_callback: A callback to get the code language.
|
|
1474
|
-
default_title: Whether to use the URL as the title for links.
|
|
1475
|
-
heading_style: The style of headings.
|
|
1476
|
-
highlight_style: The style to use for highlighted text (mark elements).
|
|
1477
|
-
keep_inline_images_in: The tags to keep inline images in.
|
|
1478
|
-
newline_style: The style of newlines.
|
|
1479
|
-
strong_em_symbol: The symbol to use for strong and emphasis text.
|
|
1480
|
-
sub_symbol: The symbol to use for subscript text.
|
|
1481
|
-
sup_symbol: The symbol to use for superscript text.
|
|
1482
|
-
wrap: Whether to wrap text.
|
|
1483
|
-
wrap_width: The width to wrap text at.
|
|
1484
|
-
|
|
1485
|
-
Returns:
|
|
1486
|
-
A mapping of HTML elements to their corresponding conversion functions
|
|
1487
|
-
"""
|
|
1016
|
+
list_indent_str = "\t" if list_indent_type == "tabs" else " " * list_indent_width
|
|
1488
1017
|
|
|
1489
1018
|
def _wrapper(func: Callable[..., T]) -> Callable[[str, Tag], T]:
|
|
1490
1019
|
spec = getfullargspec(func)
|
|
@@ -1498,6 +1027,8 @@ def create_converters_map(
|
|
|
1498
1027
|
kwargs["text"] = text
|
|
1499
1028
|
if "convert_as_inline" in spec.kwonlyargs:
|
|
1500
1029
|
kwargs["convert_as_inline"] = convert_as_inline
|
|
1030
|
+
if "list_indent_str" in spec.kwonlyargs:
|
|
1031
|
+
kwargs["list_indent_str"] = list_indent_str
|
|
1501
1032
|
return func(**kwargs)
|
|
1502
1033
|
return func(text)
|
|
1503
1034
|
|
|
@@ -1512,7 +1043,7 @@ def create_converters_map(
|
|
|
1512
1043
|
"b": _wrapper(partial(_create_inline_converter(2 * strong_em_symbol))),
|
|
1513
1044
|
"bdi": _wrapper(_create_inline_converter("")),
|
|
1514
1045
|
"bdo": _wrapper(_create_inline_converter("")),
|
|
1515
|
-
"blockquote": _wrapper(partial(_convert_blockquote)),
|
|
1046
|
+
"blockquote": _wrapper(partial(_convert_blockquote, list_indent_str=list_indent_str)),
|
|
1516
1047
|
"br": _wrapper(partial(_convert_br, newline_style=newline_style)),
|
|
1517
1048
|
"button": _wrapper(_convert_button),
|
|
1518
1049
|
"caption": _wrapper(_convert_caption),
|
|
@@ -1527,6 +1058,7 @@ def create_converters_map(
|
|
|
1527
1058
|
"details": _wrapper(_convert_details),
|
|
1528
1059
|
"dfn": _wrapper(_create_inline_converter("*")),
|
|
1529
1060
|
"dialog": _wrapper(_convert_dialog),
|
|
1061
|
+
"div": _wrapper(_convert_div),
|
|
1530
1062
|
"dl": _wrapper(_convert_dl),
|
|
1531
1063
|
"dt": _wrapper(_convert_dt),
|
|
1532
1064
|
"em": _wrapper(_create_inline_converter(strong_em_symbol)),
|
|
@@ -1552,19 +1084,19 @@ def create_converters_map(
|
|
|
1552
1084
|
"kbd": _wrapper(_create_inline_converter("`")),
|
|
1553
1085
|
"label": _wrapper(_convert_label),
|
|
1554
1086
|
"legend": _wrapper(_convert_legend),
|
|
1555
|
-
"li": _wrapper(partial(_convert_li, bullets=bullets)),
|
|
1556
|
-
"list": _wrapper(_convert_list),
|
|
1087
|
+
"li": _wrapper(partial(_convert_li, bullets=bullets, list_indent_str=list_indent_str)),
|
|
1088
|
+
"list": _wrapper(partial(_convert_list, list_indent_str=list_indent_str)),
|
|
1557
1089
|
"main": _wrapper(_convert_semantic_block),
|
|
1558
1090
|
"mark": _wrapper(partial(_convert_mark, highlight_style=highlight_style)),
|
|
1559
1091
|
"math": _wrapper(_convert_math),
|
|
1560
1092
|
"menu": _wrapper(_convert_menu),
|
|
1561
1093
|
"meter": _wrapper(_convert_meter),
|
|
1562
1094
|
"nav": _wrapper(_convert_semantic_block),
|
|
1563
|
-
"ol": _wrapper(_convert_list),
|
|
1095
|
+
"ol": _wrapper(partial(_convert_list, list_indent_str=list_indent_str)),
|
|
1564
1096
|
"optgroup": _wrapper(_convert_optgroup),
|
|
1565
1097
|
"option": _wrapper(_convert_option),
|
|
1566
1098
|
"output": _wrapper(_convert_output),
|
|
1567
|
-
"p": _wrapper(partial(_convert_p, wrap=wrap, wrap_width=wrap_width)),
|
|
1099
|
+
"p": _wrapper(partial(_convert_p, wrap=wrap, wrap_width=wrap_width, list_indent_str=list_indent_str)),
|
|
1568
1100
|
"picture": _wrapper(_convert_picture),
|
|
1569
1101
|
"pre": _wrapper(
|
|
1570
1102
|
partial(
|
|
@@ -1602,7 +1134,7 @@ def create_converters_map(
|
|
|
1602
1134
|
"time": _wrapper(_convert_time),
|
|
1603
1135
|
"tr": _wrapper(_convert_tr),
|
|
1604
1136
|
"u": _wrapper(_create_inline_converter("")),
|
|
1605
|
-
"ul": _wrapper(_convert_list),
|
|
1137
|
+
"ul": _wrapper(partial(_convert_list, list_indent_str=list_indent_str)),
|
|
1606
1138
|
"var": _wrapper(_create_inline_converter("*")),
|
|
1607
1139
|
"video": _wrapper(_convert_media_element),
|
|
1608
1140
|
"wbr": _wrapper(_convert_wbr),
|