html-to-markdown 1.8.0__py3-none-any.whl → 1.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- html_to_markdown/converters.py +305 -562
- html_to_markdown/processing.py +120 -45
- {html_to_markdown-1.8.0.dist-info → html_to_markdown-1.9.1.dist-info}/METADATA +96 -16
- {html_to_markdown-1.8.0.dist-info → html_to_markdown-1.9.1.dist-info}/RECORD +8 -8
- {html_to_markdown-1.8.0.dist-info → html_to_markdown-1.9.1.dist-info}/WHEEL +0 -0
- {html_to_markdown-1.8.0.dist-info → html_to_markdown-1.9.1.dist-info}/entry_points.txt +0 -0
- {html_to_markdown-1.8.0.dist-info → html_to_markdown-1.9.1.dist-info}/licenses/LICENSE +0 -0
- {html_to_markdown-1.8.0.dist-info → html_to_markdown-1.9.1.dist-info}/top_level.txt +0 -0
html_to_markdown/converters.py
CHANGED
|
@@ -5,11 +5,11 @@ from typing import TYPE_CHECKING
|
|
|
5
5
|
if TYPE_CHECKING:
|
|
6
6
|
from collections.abc import Iterable
|
|
7
7
|
import base64
|
|
8
|
-
import
|
|
8
|
+
from collections.abc import Callable
|
|
9
9
|
from functools import partial
|
|
10
10
|
from inspect import getfullargspec
|
|
11
11
|
from textwrap import fill
|
|
12
|
-
from typing import Any,
|
|
12
|
+
from typing import Any, Literal, TypeVar, cast
|
|
13
13
|
|
|
14
14
|
from bs4.element import Tag
|
|
15
15
|
|
|
@@ -21,6 +21,24 @@ from html_to_markdown.constants import (
|
|
|
21
21
|
)
|
|
22
22
|
from html_to_markdown.utils import chomp, indent, underline
|
|
23
23
|
|
|
24
|
+
|
|
25
|
+
def _format_block_element(text: str) -> str:
|
|
26
|
+
"""Format text as a block element with trailing newlines."""
|
|
27
|
+
return f"{text.strip()}\n\n" if text.strip() else ""
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _format_inline_or_block(text: str, convert_as_inline: bool) -> str:
|
|
31
|
+
"""Format text as inline or block element based on context."""
|
|
32
|
+
return text.strip() if convert_as_inline else _format_block_element(text)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _format_wrapped_block(text: str, start_marker: str, end_marker: str = "") -> str:
|
|
36
|
+
"""Format text wrapped in markers as a block element."""
|
|
37
|
+
if not end_marker:
|
|
38
|
+
end_marker = start_marker
|
|
39
|
+
return f"{start_marker}{text.strip()}{end_marker}\n\n" if text.strip() else ""
|
|
40
|
+
|
|
41
|
+
|
|
24
42
|
SupportedElements = Literal[
|
|
25
43
|
"a",
|
|
26
44
|
"abbr",
|
|
@@ -189,11 +207,22 @@ def _convert_blockquote(*, text: str, tag: Tag, convert_as_inline: bool) -> str:
|
|
|
189
207
|
if not text:
|
|
190
208
|
return ""
|
|
191
209
|
|
|
210
|
+
from html_to_markdown.processing import _has_ancestor # noqa: PLC0415
|
|
211
|
+
|
|
192
212
|
cite_url = tag.get("cite")
|
|
193
|
-
|
|
213
|
+
|
|
214
|
+
if _has_ancestor(tag, "li"):
|
|
215
|
+
lines = text.strip().split("\n")
|
|
216
|
+
indented_lines = [f" > {line}" if line.strip() else "" for line in lines]
|
|
217
|
+
quote_text = "\n".join(indented_lines) + "\n\n"
|
|
218
|
+
else:
|
|
219
|
+
quote_text = f"\n{line_beginning_re.sub('> ', text.strip())}\n\n"
|
|
194
220
|
|
|
195
221
|
if cite_url:
|
|
196
|
-
|
|
222
|
+
if _has_ancestor(tag, "li"):
|
|
223
|
+
quote_text += f" — <{cite_url}>\n\n"
|
|
224
|
+
else:
|
|
225
|
+
quote_text += f"— <{cite_url}>\n\n"
|
|
197
226
|
|
|
198
227
|
return quote_text
|
|
199
228
|
|
|
@@ -243,8 +272,8 @@ def _convert_img(*, tag: Tag, convert_as_inline: bool, keep_inline_images_in: It
|
|
|
243
272
|
title_part = ' "{}"'.format(title.replace('"', r"\"")) if title else ""
|
|
244
273
|
parent_name = tag.parent.name if tag.parent else ""
|
|
245
274
|
|
|
246
|
-
default_preserve_in =
|
|
247
|
-
preserve_in = set(keep_inline_images_in or []) |
|
|
275
|
+
default_preserve_in = {"td", "th"}
|
|
276
|
+
preserve_in = set(keep_inline_images_in or []) | default_preserve_in
|
|
248
277
|
if convert_as_inline and parent_name not in preserve_in:
|
|
249
278
|
return alt
|
|
250
279
|
if width or height:
|
|
@@ -253,24 +282,49 @@ def _convert_img(*, tag: Tag, convert_as_inline: bool, keep_inline_images_in: It
|
|
|
253
282
|
|
|
254
283
|
|
|
255
284
|
def _convert_list(*, tag: Tag, text: str) -> str:
|
|
256
|
-
|
|
285
|
+
from html_to_markdown.processing import _has_ancestor # noqa: PLC0415
|
|
257
286
|
|
|
258
287
|
before_paragraph = False
|
|
259
288
|
if tag.next_sibling and getattr(tag.next_sibling, "name", None) not in {"ul", "ol"}:
|
|
260
289
|
before_paragraph = True
|
|
261
290
|
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
if
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
291
|
+
if _has_ancestor(tag, "li"):
|
|
292
|
+
parent = tag.parent
|
|
293
|
+
while parent and parent.name != "li":
|
|
294
|
+
parent = parent.parent
|
|
295
|
+
|
|
296
|
+
if parent:
|
|
297
|
+
prev_p = None
|
|
298
|
+
for child in parent.children:
|
|
299
|
+
if hasattr(child, "name"):
|
|
300
|
+
if child == tag:
|
|
301
|
+
break
|
|
302
|
+
if child.name == "p":
|
|
303
|
+
prev_p = child
|
|
304
|
+
|
|
305
|
+
if prev_p:
|
|
306
|
+
lines = text.strip().split("\n")
|
|
307
|
+
indented_lines = []
|
|
308
|
+
for line in lines:
|
|
309
|
+
if line.strip():
|
|
310
|
+
indented_lines.append(f" {line}")
|
|
311
|
+
else:
|
|
312
|
+
indented_lines.append("")
|
|
313
|
+
return "\n" + "\n".join(indented_lines) + "\n"
|
|
314
|
+
return "\n" + indent(text=text, level=1).rstrip()
|
|
315
|
+
|
|
316
|
+
if tag.parent and tag.parent.name in {"ul", "ol"}:
|
|
317
|
+
lines = text.strip().split("\n")
|
|
318
|
+
indented_lines = []
|
|
319
|
+
for line in lines:
|
|
320
|
+
if line.strip():
|
|
321
|
+
indented_lines.append(f" {line}")
|
|
322
|
+
else:
|
|
323
|
+
indented_lines.append("")
|
|
324
|
+
result = "\n".join(indented_lines)
|
|
325
|
+
if not result.endswith("\n"):
|
|
326
|
+
result += "\n"
|
|
327
|
+
return result
|
|
274
328
|
|
|
275
329
|
return text + ("\n" if before_paragraph else "")
|
|
276
330
|
|
|
@@ -305,10 +359,30 @@ def _convert_li(*, tag: Tag, text: str, bullets: str) -> str:
|
|
|
305
359
|
tag = tag.parent
|
|
306
360
|
|
|
307
361
|
bullet = bullets[depth % len(bullets)]
|
|
362
|
+
|
|
363
|
+
has_block_children = any(
|
|
364
|
+
child.name in {"p", "blockquote", "pre", "ul", "ol", "div", "h1", "h2", "h3", "h4", "h5", "h6"}
|
|
365
|
+
for child in tag.children
|
|
366
|
+
if hasattr(child, "name")
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
if has_block_children:
|
|
370
|
+
paragraphs = text.strip().split("\n\n")
|
|
371
|
+
|
|
372
|
+
if paragraphs:
|
|
373
|
+
result_parts = [f"{bullet} {paragraphs[0].strip()}\n"]
|
|
374
|
+
|
|
375
|
+
for para in paragraphs[1:]:
|
|
376
|
+
if para.strip():
|
|
377
|
+
result_parts.append("\n")
|
|
378
|
+
result_parts.extend(f" {line}\n" for line in para.strip().split("\n") if line.strip())
|
|
379
|
+
|
|
380
|
+
return "".join(result_parts)
|
|
381
|
+
|
|
308
382
|
return "{} {}\n".format(bullet, (text or "").strip())
|
|
309
383
|
|
|
310
384
|
|
|
311
|
-
def _convert_p(*, wrap: bool, text: str, convert_as_inline: bool, wrap_width: int) -> str:
|
|
385
|
+
def _convert_p(*, wrap: bool, text: str, convert_as_inline: bool, wrap_width: int, tag: Tag) -> str:
|
|
312
386
|
if convert_as_inline:
|
|
313
387
|
return text
|
|
314
388
|
|
|
@@ -320,6 +394,25 @@ def _convert_p(*, wrap: bool, text: str, convert_as_inline: bool, wrap_width: in
|
|
|
320
394
|
break_on_hyphens=False,
|
|
321
395
|
)
|
|
322
396
|
|
|
397
|
+
from html_to_markdown.processing import _has_ancestor # noqa: PLC0415
|
|
398
|
+
|
|
399
|
+
if _has_ancestor(tag, "li"):
|
|
400
|
+
parent = tag.parent
|
|
401
|
+
while parent and parent.name != "li":
|
|
402
|
+
parent = parent.parent
|
|
403
|
+
|
|
404
|
+
if parent:
|
|
405
|
+
p_children = [child for child in parent.children if hasattr(child, "name") and child.name == "p"]
|
|
406
|
+
|
|
407
|
+
if p_children and tag != p_children[0]:
|
|
408
|
+
indented_lines = []
|
|
409
|
+
for line in text.split("\n"):
|
|
410
|
+
if line.strip():
|
|
411
|
+
indented_lines.append(f" {line}")
|
|
412
|
+
else:
|
|
413
|
+
indented_lines.append("")
|
|
414
|
+
text = "\n".join(indented_lines)
|
|
415
|
+
|
|
323
416
|
return f"{text}\n\n" if text else ""
|
|
324
417
|
|
|
325
418
|
|
|
@@ -337,13 +430,15 @@ def _convert_mark(*, text: str, convert_as_inline: bool, highlight_style: str) -
|
|
|
337
430
|
if convert_as_inline:
|
|
338
431
|
return text
|
|
339
432
|
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
433
|
+
match highlight_style:
|
|
434
|
+
case "double-equal":
|
|
435
|
+
return f"=={text}=="
|
|
436
|
+
case "bold":
|
|
437
|
+
return f"**{text}**"
|
|
438
|
+
case "html":
|
|
439
|
+
return f"<mark>{text}</mark>"
|
|
440
|
+
case _:
|
|
441
|
+
return text
|
|
347
442
|
|
|
348
443
|
|
|
349
444
|
def _convert_pre(
|
|
@@ -376,6 +471,51 @@ def _convert_tr(*, tag: Tag, text: str) -> str:
|
|
|
376
471
|
cells = tag.find_all(["td", "th"])
|
|
377
472
|
parent_name = tag.parent.name if tag.parent and hasattr(tag.parent, "name") else ""
|
|
378
473
|
tag_grand_parent = tag.parent.parent if tag.parent else None
|
|
474
|
+
|
|
475
|
+
if tag.previous_sibling and hasattr(tag.previous_sibling, "name") and tag.previous_sibling.name == "tr":
|
|
476
|
+
prev_cells = cast("Tag", tag.previous_sibling).find_all(["td", "th"])
|
|
477
|
+
rowspan_positions = []
|
|
478
|
+
col_pos = 0
|
|
479
|
+
|
|
480
|
+
for prev_cell in prev_cells:
|
|
481
|
+
rowspan = 1
|
|
482
|
+
if (
|
|
483
|
+
"rowspan" in prev_cell.attrs
|
|
484
|
+
and isinstance(prev_cell["rowspan"], str)
|
|
485
|
+
and prev_cell["rowspan"].isdigit()
|
|
486
|
+
):
|
|
487
|
+
rowspan = int(prev_cell["rowspan"])
|
|
488
|
+
|
|
489
|
+
if rowspan > 1:
|
|
490
|
+
rowspan_positions.append(col_pos)
|
|
491
|
+
|
|
492
|
+
colspan = 1
|
|
493
|
+
if (
|
|
494
|
+
"colspan" in prev_cell.attrs
|
|
495
|
+
and isinstance(prev_cell["colspan"], str)
|
|
496
|
+
and prev_cell["colspan"].isdigit()
|
|
497
|
+
):
|
|
498
|
+
colspan = int(prev_cell["colspan"])
|
|
499
|
+
col_pos += colspan
|
|
500
|
+
|
|
501
|
+
if rowspan_positions:
|
|
502
|
+
converted_cells: list[str] = []
|
|
503
|
+
if text.strip():
|
|
504
|
+
parts = text.split("|")
|
|
505
|
+
converted_cells.extend(part.rstrip() + " |" for part in parts[:-1] if part)
|
|
506
|
+
|
|
507
|
+
new_cells: list[str] = []
|
|
508
|
+
cell_index = 0
|
|
509
|
+
|
|
510
|
+
for pos in range(col_pos):
|
|
511
|
+
if pos in rowspan_positions:
|
|
512
|
+
new_cells.append(" |")
|
|
513
|
+
elif cell_index < len(converted_cells):
|
|
514
|
+
new_cells.append(converted_cells[cell_index])
|
|
515
|
+
cell_index += 1
|
|
516
|
+
|
|
517
|
+
text = "".join(new_cells)
|
|
518
|
+
|
|
379
519
|
is_headrow = (
|
|
380
520
|
all(hasattr(cell, "name") and cell.name == "th" for cell in cells)
|
|
381
521
|
or (not tag.previous_sibling and parent_name != "tbody")
|
|
@@ -423,7 +563,7 @@ def _convert_caption(*, text: str, convert_as_inline: bool) -> str:
|
|
|
423
563
|
if not text.strip():
|
|
424
564
|
return ""
|
|
425
565
|
|
|
426
|
-
return
|
|
566
|
+
return _format_wrapped_block(text, "*")
|
|
427
567
|
|
|
428
568
|
|
|
429
569
|
def _convert_thead(*, text: str, convert_as_inline: bool) -> str:
|
|
@@ -475,7 +615,10 @@ def _convert_tfoot(*, text: str, convert_as_inline: bool) -> str:
|
|
|
475
615
|
|
|
476
616
|
|
|
477
617
|
def _convert_colgroup(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
478
|
-
"""Convert HTML colgroup element
|
|
618
|
+
"""Convert HTML colgroup element - removes it entirely from Markdown output.
|
|
619
|
+
|
|
620
|
+
Colgroup is a table column grouping element that defines styling for columns.
|
|
621
|
+
It has no representation in Markdown and should be removed.
|
|
479
622
|
|
|
480
623
|
Args:
|
|
481
624
|
tag: The colgroup tag element.
|
|
@@ -483,54 +626,27 @@ def _convert_colgroup(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
483
626
|
convert_as_inline: Whether to convert as inline content.
|
|
484
627
|
|
|
485
628
|
Returns:
|
|
486
|
-
|
|
629
|
+
Empty string as colgroup has no Markdown representation.
|
|
487
630
|
"""
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
if not text.strip():
|
|
492
|
-
return ""
|
|
493
|
-
|
|
494
|
-
span = tag.get("span", "")
|
|
495
|
-
attrs = []
|
|
496
|
-
if span and isinstance(span, str) and span.strip():
|
|
497
|
-
attrs.append(f'span="{span}"')
|
|
498
|
-
|
|
499
|
-
attrs_str = " ".join(attrs)
|
|
500
|
-
if attrs_str:
|
|
501
|
-
return f"<colgroup {attrs_str}>\n{text.strip()}\n</colgroup>\n\n"
|
|
502
|
-
return f"<colgroup>\n{text.strip()}\n</colgroup>\n\n"
|
|
631
|
+
_ = tag, text, convert_as_inline
|
|
632
|
+
return ""
|
|
503
633
|
|
|
504
634
|
|
|
505
635
|
def _convert_col(*, tag: Tag, convert_as_inline: bool) -> str:
|
|
506
|
-
"""Convert HTML col element
|
|
636
|
+
"""Convert HTML col element - removes it entirely from Markdown output.
|
|
637
|
+
|
|
638
|
+
Col elements define column properties (width, style) in HTML tables.
|
|
639
|
+
They have no representation in Markdown and should be removed.
|
|
507
640
|
|
|
508
641
|
Args:
|
|
509
642
|
tag: The col tag element.
|
|
510
643
|
convert_as_inline: Whether to convert as inline content.
|
|
511
644
|
|
|
512
645
|
Returns:
|
|
513
|
-
|
|
646
|
+
Empty string as col has no Markdown representation.
|
|
514
647
|
"""
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
span = tag.get("span", "")
|
|
519
|
-
width = tag.get("width", "")
|
|
520
|
-
style = tag.get("style", "")
|
|
521
|
-
|
|
522
|
-
attrs = []
|
|
523
|
-
if width and isinstance(width, str) and width.strip():
|
|
524
|
-
attrs.append(f'width="{width}"')
|
|
525
|
-
if style and isinstance(style, str) and style.strip():
|
|
526
|
-
attrs.append(f'style="{style}"')
|
|
527
|
-
if span and isinstance(span, str) and span.strip():
|
|
528
|
-
attrs.append(f'span="{span}"')
|
|
529
|
-
|
|
530
|
-
attrs_str = " ".join(attrs)
|
|
531
|
-
if attrs_str:
|
|
532
|
-
return f"<col {attrs_str} />\n"
|
|
533
|
-
return "<col />\n"
|
|
648
|
+
_ = tag, convert_as_inline
|
|
649
|
+
return ""
|
|
534
650
|
|
|
535
651
|
|
|
536
652
|
def _convert_semantic_block(*, text: str, convert_as_inline: bool) -> str:
|
|
@@ -550,35 +666,35 @@ def _convert_semantic_block(*, text: str, convert_as_inline: bool) -> str:
|
|
|
550
666
|
|
|
551
667
|
|
|
552
668
|
def _convert_details(*, text: str, convert_as_inline: bool) -> str:
|
|
553
|
-
"""Convert HTML details element
|
|
669
|
+
"""Convert HTML details element to semantic Markdown.
|
|
554
670
|
|
|
555
671
|
Args:
|
|
556
672
|
text: The text content of the details element.
|
|
557
673
|
convert_as_inline: Whether to convert as inline content.
|
|
558
674
|
|
|
559
675
|
Returns:
|
|
560
|
-
The converted markdown text
|
|
676
|
+
The converted markdown text (only content, no HTML tags).
|
|
561
677
|
"""
|
|
562
678
|
if convert_as_inline:
|
|
563
679
|
return text
|
|
564
680
|
|
|
565
|
-
return
|
|
681
|
+
return _format_block_element(text)
|
|
566
682
|
|
|
567
683
|
|
|
568
684
|
def _convert_summary(*, text: str, convert_as_inline: bool) -> str:
|
|
569
|
-
"""Convert HTML summary element
|
|
685
|
+
"""Convert HTML summary element to emphasized text.
|
|
570
686
|
|
|
571
687
|
Args:
|
|
572
688
|
text: The text content of the summary element.
|
|
573
689
|
convert_as_inline: Whether to convert as inline content.
|
|
574
690
|
|
|
575
691
|
Returns:
|
|
576
|
-
The converted markdown text
|
|
692
|
+
The converted markdown text as bold heading.
|
|
577
693
|
"""
|
|
578
694
|
if convert_as_inline:
|
|
579
695
|
return text
|
|
580
696
|
|
|
581
|
-
return
|
|
697
|
+
return _format_wrapped_block(text, "**")
|
|
582
698
|
|
|
583
699
|
|
|
584
700
|
def _convert_dl(*, text: str, convert_as_inline: bool) -> str:
|
|
@@ -674,119 +790,39 @@ def _convert_q(*, text: str, convert_as_inline: bool) -> str:
|
|
|
674
790
|
return f'"{escaped_text}"'
|
|
675
791
|
|
|
676
792
|
|
|
677
|
-
def
|
|
678
|
-
"""Convert HTML
|
|
793
|
+
def _convert_media_element(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
794
|
+
"""Convert HTML media elements (audio/video) to semantic Markdown.
|
|
679
795
|
|
|
680
796
|
Args:
|
|
681
|
-
tag: The
|
|
682
|
-
text: The text content of the
|
|
797
|
+
tag: The media tag element.
|
|
798
|
+
text: The text content of the media element (fallback content).
|
|
683
799
|
convert_as_inline: Whether to convert as inline content.
|
|
684
800
|
|
|
685
801
|
Returns:
|
|
686
|
-
The converted markdown text
|
|
802
|
+
The converted markdown text (link if src exists, otherwise fallback content).
|
|
687
803
|
"""
|
|
688
|
-
_ = convert_as_inline
|
|
689
804
|
src = tag.get("src", "")
|
|
690
805
|
|
|
691
|
-
if not src:
|
|
692
|
-
|
|
693
|
-
if source_tag and isinstance(source_tag, Tag):
|
|
694
|
-
src = source_tag.get("src", "")
|
|
695
|
-
|
|
696
|
-
controls = "controls" if tag.get("controls") is not None else ""
|
|
697
|
-
autoplay = "autoplay" if tag.get("autoplay") is not None else ""
|
|
698
|
-
loop = "loop" if tag.get("loop") is not None else ""
|
|
699
|
-
muted = "muted" if tag.get("muted") is not None else ""
|
|
700
|
-
preload = tag.get("preload", "")
|
|
806
|
+
if not src and (source_tag := tag.find("source")) and isinstance(source_tag, Tag):
|
|
807
|
+
src = source_tag.get("src", "")
|
|
701
808
|
|
|
702
|
-
attrs = []
|
|
703
809
|
if src and isinstance(src, str) and src.strip():
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
if muted:
|
|
712
|
-
attrs.append(muted)
|
|
713
|
-
if preload and isinstance(preload, str) and preload.strip():
|
|
714
|
-
attrs.append(f'preload="{preload}"')
|
|
715
|
-
|
|
716
|
-
attrs_str = " ".join(attrs)
|
|
810
|
+
link = f"[{src}]({src})"
|
|
811
|
+
if convert_as_inline:
|
|
812
|
+
return link
|
|
813
|
+
result = f"{link}\n\n"
|
|
814
|
+
if text.strip():
|
|
815
|
+
result += f"{text.strip()}\n\n"
|
|
816
|
+
return result
|
|
717
817
|
|
|
718
818
|
if text.strip():
|
|
719
|
-
|
|
720
|
-
return f"<audio {attrs_str}>\n{text.strip()}\n</audio>\n\n"
|
|
721
|
-
return f"<audio>\n{text.strip()}\n</audio>\n\n"
|
|
722
|
-
|
|
723
|
-
if attrs_str:
|
|
724
|
-
return f"<audio {attrs_str} />\n\n"
|
|
725
|
-
return "<audio />\n\n"
|
|
726
|
-
|
|
819
|
+
return _format_inline_or_block(text, convert_as_inline)
|
|
727
820
|
|
|
728
|
-
|
|
729
|
-
"""Convert HTML video element preserving structure with fallback.
|
|
730
|
-
|
|
731
|
-
Args:
|
|
732
|
-
tag: The video tag element.
|
|
733
|
-
text: The text content of the video element (fallback content).
|
|
734
|
-
convert_as_inline: Whether to convert as inline content.
|
|
735
|
-
|
|
736
|
-
Returns:
|
|
737
|
-
The converted markdown text preserving video element.
|
|
738
|
-
"""
|
|
739
|
-
_ = convert_as_inline
|
|
740
|
-
src = tag.get("src", "")
|
|
741
|
-
|
|
742
|
-
if not src:
|
|
743
|
-
source_tag = tag.find("source")
|
|
744
|
-
if source_tag and isinstance(source_tag, Tag):
|
|
745
|
-
src = source_tag.get("src", "")
|
|
746
|
-
|
|
747
|
-
width = tag.get("width", "")
|
|
748
|
-
height = tag.get("height", "")
|
|
749
|
-
poster = tag.get("poster", "")
|
|
750
|
-
controls = "controls" if tag.get("controls") is not None else ""
|
|
751
|
-
autoplay = "autoplay" if tag.get("autoplay") is not None else ""
|
|
752
|
-
loop = "loop" if tag.get("loop") is not None else ""
|
|
753
|
-
muted = "muted" if tag.get("muted") is not None else ""
|
|
754
|
-
preload = tag.get("preload", "")
|
|
755
|
-
|
|
756
|
-
attrs = []
|
|
757
|
-
if src and isinstance(src, str) and src.strip():
|
|
758
|
-
attrs.append(f'src="{src}"')
|
|
759
|
-
if width and isinstance(width, str) and width.strip():
|
|
760
|
-
attrs.append(f'width="{width}"')
|
|
761
|
-
if height and isinstance(height, str) and height.strip():
|
|
762
|
-
attrs.append(f'height="{height}"')
|
|
763
|
-
if poster and isinstance(poster, str) and poster.strip():
|
|
764
|
-
attrs.append(f'poster="{poster}"')
|
|
765
|
-
if controls:
|
|
766
|
-
attrs.append(controls)
|
|
767
|
-
if autoplay:
|
|
768
|
-
attrs.append(autoplay)
|
|
769
|
-
if loop:
|
|
770
|
-
attrs.append(loop)
|
|
771
|
-
if muted:
|
|
772
|
-
attrs.append(muted)
|
|
773
|
-
if preload and isinstance(preload, str) and preload.strip():
|
|
774
|
-
attrs.append(f'preload="{preload}"')
|
|
775
|
-
|
|
776
|
-
attrs_str = " ".join(attrs)
|
|
777
|
-
|
|
778
|
-
if text.strip():
|
|
779
|
-
if attrs_str:
|
|
780
|
-
return f"<video {attrs_str}>\n{text.strip()}\n</video>\n\n"
|
|
781
|
-
return f"<video>\n{text.strip()}\n</video>\n\n"
|
|
782
|
-
|
|
783
|
-
if attrs_str:
|
|
784
|
-
return f"<video {attrs_str} />\n\n"
|
|
785
|
-
return "<video />\n\n"
|
|
821
|
+
return ""
|
|
786
822
|
|
|
787
823
|
|
|
788
824
|
def _convert_iframe(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
789
|
-
"""Convert HTML iframe element
|
|
825
|
+
"""Convert HTML iframe element to semantic Markdown.
|
|
790
826
|
|
|
791
827
|
Args:
|
|
792
828
|
tag: The iframe tag element.
|
|
@@ -794,47 +830,18 @@ def _convert_iframe(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
794
830
|
convert_as_inline: Whether to convert as inline content.
|
|
795
831
|
|
|
796
832
|
Returns:
|
|
797
|
-
The converted markdown text
|
|
833
|
+
The converted markdown text (link if src exists).
|
|
798
834
|
"""
|
|
799
835
|
_ = text
|
|
800
|
-
_ = convert_as_inline
|
|
801
836
|
src = tag.get("src", "")
|
|
802
|
-
width = tag.get("width", "")
|
|
803
|
-
height = tag.get("height", "")
|
|
804
|
-
title = tag.get("title", "")
|
|
805
|
-
allow = tag.get("allow", "")
|
|
806
|
-
sandbox = tag.get("sandbox")
|
|
807
|
-
loading = tag.get("loading", "")
|
|
808
|
-
|
|
809
|
-
attrs = []
|
|
810
|
-
if src and isinstance(src, str) and src.strip():
|
|
811
|
-
attrs.append(f'src="{src}"')
|
|
812
|
-
if width and isinstance(width, str) and width.strip():
|
|
813
|
-
attrs.append(f'width="{width}"')
|
|
814
|
-
if height and isinstance(height, str) and height.strip():
|
|
815
|
-
attrs.append(f'height="{height}"')
|
|
816
|
-
if title and isinstance(title, str) and title.strip():
|
|
817
|
-
attrs.append(f'title="{title}"')
|
|
818
|
-
if allow and isinstance(allow, str) and allow.strip():
|
|
819
|
-
attrs.append(f'allow="{allow}"')
|
|
820
|
-
if sandbox is not None:
|
|
821
|
-
if isinstance(sandbox, list):
|
|
822
|
-
if sandbox:
|
|
823
|
-
attrs.append(f'sandbox="{" ".join(sandbox)}"')
|
|
824
|
-
else:
|
|
825
|
-
attrs.append("sandbox")
|
|
826
|
-
elif isinstance(sandbox, str) and sandbox:
|
|
827
|
-
attrs.append(f'sandbox="{sandbox}"')
|
|
828
|
-
else:
|
|
829
|
-
attrs.append("sandbox")
|
|
830
|
-
if loading and isinstance(loading, str) and loading.strip():
|
|
831
|
-
attrs.append(f'loading="{loading}"')
|
|
832
837
|
|
|
833
|
-
|
|
838
|
+
if src and isinstance(src, str) and src.strip():
|
|
839
|
+
link = f"[{src}]({src})"
|
|
840
|
+
if convert_as_inline:
|
|
841
|
+
return link
|
|
842
|
+
return f"{link}\n\n"
|
|
834
843
|
|
|
835
|
-
|
|
836
|
-
return f"<iframe {attrs_str}></iframe>\n\n"
|
|
837
|
-
return "<iframe></iframe>\n\n"
|
|
844
|
+
return ""
|
|
838
845
|
|
|
839
846
|
|
|
840
847
|
def _convert_abbr(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
@@ -860,7 +867,7 @@ def _convert_abbr(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
860
867
|
|
|
861
868
|
|
|
862
869
|
def _convert_time(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
863
|
-
"""Convert HTML time element
|
|
870
|
+
"""Convert HTML time element to semantic Markdown.
|
|
864
871
|
|
|
865
872
|
Args:
|
|
866
873
|
tag: The time tag element.
|
|
@@ -868,21 +875,18 @@ def _convert_time(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
868
875
|
convert_as_inline: Whether to convert as inline content.
|
|
869
876
|
|
|
870
877
|
Returns:
|
|
871
|
-
The converted markdown text
|
|
878
|
+
The converted markdown text (content only, no HTML tags).
|
|
872
879
|
"""
|
|
880
|
+
_ = tag
|
|
873
881
|
_ = convert_as_inline
|
|
874
882
|
if not text.strip():
|
|
875
883
|
return ""
|
|
876
884
|
|
|
877
|
-
datetime_attr = tag.get("datetime")
|
|
878
|
-
if datetime_attr and isinstance(datetime_attr, str) and datetime_attr.strip():
|
|
879
|
-
return f'<time datetime="{datetime_attr.strip()}">{text.strip()}</time>'
|
|
880
|
-
|
|
881
885
|
return text.strip()
|
|
882
886
|
|
|
883
887
|
|
|
884
888
|
def _convert_data(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
885
|
-
"""Convert HTML data element
|
|
889
|
+
"""Convert HTML data element to semantic Markdown.
|
|
886
890
|
|
|
887
891
|
Args:
|
|
888
892
|
tag: The data tag element.
|
|
@@ -890,16 +894,13 @@ def _convert_data(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
890
894
|
convert_as_inline: Whether to convert as inline content.
|
|
891
895
|
|
|
892
896
|
Returns:
|
|
893
|
-
The converted markdown text
|
|
897
|
+
The converted markdown text (content only, no HTML tags).
|
|
894
898
|
"""
|
|
899
|
+
_ = tag
|
|
895
900
|
_ = convert_as_inline
|
|
896
901
|
if not text.strip():
|
|
897
902
|
return ""
|
|
898
903
|
|
|
899
|
-
value_attr = tag.get("value")
|
|
900
|
-
if value_attr and isinstance(value_attr, str) and value_attr.strip():
|
|
901
|
-
return f'<data value="{value_attr.strip()}">{text.strip()}</data>'
|
|
902
|
-
|
|
903
904
|
return text.strip()
|
|
904
905
|
|
|
905
906
|
|
|
@@ -917,7 +918,7 @@ def _convert_wbr(*, convert_as_inline: bool) -> str:
|
|
|
917
918
|
|
|
918
919
|
|
|
919
920
|
def _convert_form(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
920
|
-
"""Convert HTML form element
|
|
921
|
+
"""Convert HTML form element to semantic Markdown.
|
|
921
922
|
|
|
922
923
|
Args:
|
|
923
924
|
tag: The form tag element.
|
|
@@ -925,38 +926,27 @@ def _convert_form(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
925
926
|
convert_as_inline: Whether to convert as inline content.
|
|
926
927
|
|
|
927
928
|
Returns:
|
|
928
|
-
The converted markdown text
|
|
929
|
+
The converted markdown text (only content, no HTML tags).
|
|
929
930
|
"""
|
|
931
|
+
_ = tag
|
|
930
932
|
if convert_as_inline:
|
|
931
933
|
return text
|
|
932
934
|
|
|
933
935
|
if not text.strip():
|
|
934
936
|
return ""
|
|
935
937
|
|
|
936
|
-
|
|
937
|
-
method = tag.get("method", "")
|
|
938
|
-
attrs = []
|
|
939
|
-
|
|
940
|
-
if action and isinstance(action, str) and action.strip():
|
|
941
|
-
attrs.append(f'action="{action.strip()}"')
|
|
942
|
-
if method and isinstance(method, str) and method.strip():
|
|
943
|
-
attrs.append(f'method="{method.strip()}"')
|
|
944
|
-
|
|
945
|
-
attrs_str = " ".join(attrs)
|
|
946
|
-
if attrs_str:
|
|
947
|
-
return f"<form {attrs_str}>\n{text.strip()}\n</form>\n\n"
|
|
948
|
-
return f"<form>\n{text.strip()}\n</form>\n\n"
|
|
938
|
+
return text
|
|
949
939
|
|
|
950
940
|
|
|
951
941
|
def _convert_fieldset(*, text: str, convert_as_inline: bool) -> str:
|
|
952
|
-
"""Convert HTML fieldset element
|
|
942
|
+
"""Convert HTML fieldset element to semantic Markdown.
|
|
953
943
|
|
|
954
944
|
Args:
|
|
955
945
|
text: The text content of the fieldset element.
|
|
956
946
|
convert_as_inline: Whether to convert as inline content.
|
|
957
947
|
|
|
958
948
|
Returns:
|
|
959
|
-
The converted markdown text
|
|
949
|
+
The converted markdown text (only content, no HTML tags).
|
|
960
950
|
"""
|
|
961
951
|
if convert_as_inline:
|
|
962
952
|
return text
|
|
@@ -964,7 +954,7 @@ def _convert_fieldset(*, text: str, convert_as_inline: bool) -> str:
|
|
|
964
954
|
if not text.strip():
|
|
965
955
|
return ""
|
|
966
956
|
|
|
967
|
-
return
|
|
957
|
+
return text
|
|
968
958
|
|
|
969
959
|
|
|
970
960
|
def _convert_legend(*, text: str, convert_as_inline: bool) -> str:
|
|
@@ -983,11 +973,11 @@ def _convert_legend(*, text: str, convert_as_inline: bool) -> str:
|
|
|
983
973
|
if not text.strip():
|
|
984
974
|
return ""
|
|
985
975
|
|
|
986
|
-
return
|
|
976
|
+
return _format_wrapped_block(text, "**")
|
|
987
977
|
|
|
988
978
|
|
|
989
979
|
def _convert_label(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
990
|
-
"""Convert HTML label element
|
|
980
|
+
"""Convert HTML label element to Markdown.
|
|
991
981
|
|
|
992
982
|
Args:
|
|
993
983
|
tag: The label tag element.
|
|
@@ -995,78 +985,31 @@ def _convert_label(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
995
985
|
convert_as_inline: Whether to convert as inline content.
|
|
996
986
|
|
|
997
987
|
Returns:
|
|
998
|
-
The
|
|
988
|
+
The label text content.
|
|
999
989
|
"""
|
|
1000
|
-
|
|
1001
|
-
return text
|
|
1002
|
-
|
|
990
|
+
_ = tag
|
|
1003
991
|
if not text.strip():
|
|
1004
992
|
return ""
|
|
1005
993
|
|
|
1006
|
-
|
|
1007
|
-
if for_attr and isinstance(for_attr, str) and for_attr.strip():
|
|
1008
|
-
return f'<label for="{for_attr.strip()}">{text.strip()}</label>\n\n'
|
|
1009
|
-
|
|
1010
|
-
return f"<label>{text.strip()}</label>\n\n"
|
|
994
|
+
return _format_inline_or_block(text, convert_as_inline)
|
|
1011
995
|
|
|
1012
996
|
|
|
1013
997
|
def _convert_input_enhanced(*, tag: Tag, convert_as_inline: bool) -> str:
|
|
1014
|
-
"""Convert HTML input element
|
|
998
|
+
"""Convert HTML input element to Markdown.
|
|
1015
999
|
|
|
1016
1000
|
Args:
|
|
1017
1001
|
tag: The input tag element.
|
|
1018
1002
|
convert_as_inline: Whether to convert as inline content.
|
|
1019
1003
|
|
|
1020
1004
|
Returns:
|
|
1021
|
-
|
|
1005
|
+
Empty string since input elements have no Markdown representation.
|
|
1022
1006
|
"""
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
from html_to_markdown.processing import _has_ancestor # noqa: PLC0415
|
|
1026
|
-
|
|
1027
|
-
if _has_ancestor(tag, "li"):
|
|
1028
|
-
return ""
|
|
1029
|
-
|
|
1030
|
-
id_attr = tag.get("id", "")
|
|
1031
|
-
name = tag.get("name", "")
|
|
1032
|
-
value = tag.get("value", "")
|
|
1033
|
-
placeholder = tag.get("placeholder", "")
|
|
1034
|
-
required = tag.get("required") is not None
|
|
1035
|
-
disabled = tag.get("disabled") is not None
|
|
1036
|
-
readonly = tag.get("readonly") is not None
|
|
1037
|
-
checked = tag.get("checked") is not None
|
|
1038
|
-
accept = tag.get("accept", "")
|
|
1039
|
-
|
|
1040
|
-
attrs = []
|
|
1041
|
-
if input_type and isinstance(input_type, str):
|
|
1042
|
-
attrs.append(f'type="{input_type}"')
|
|
1043
|
-
if id_attr and isinstance(id_attr, str) and id_attr.strip():
|
|
1044
|
-
attrs.append(f'id="{id_attr}"')
|
|
1045
|
-
if name and isinstance(name, str) and name.strip():
|
|
1046
|
-
attrs.append(f'name="{name}"')
|
|
1047
|
-
if value and isinstance(value, str) and value.strip():
|
|
1048
|
-
attrs.append(f'value="{value}"')
|
|
1049
|
-
if placeholder and isinstance(placeholder, str) and placeholder.strip():
|
|
1050
|
-
attrs.append(f'placeholder="{placeholder}"')
|
|
1051
|
-
if accept and isinstance(accept, str) and accept.strip():
|
|
1052
|
-
attrs.append(f'accept="{accept}"')
|
|
1053
|
-
if required:
|
|
1054
|
-
attrs.append("required")
|
|
1055
|
-
if disabled:
|
|
1056
|
-
attrs.append("disabled")
|
|
1057
|
-
if readonly:
|
|
1058
|
-
attrs.append("readonly")
|
|
1059
|
-
if checked:
|
|
1060
|
-
attrs.append("checked")
|
|
1061
|
-
|
|
1062
|
-
attrs_str = " ".join(attrs)
|
|
1063
|
-
result = f"<input {attrs_str} />" if attrs_str else "<input />"
|
|
1064
|
-
|
|
1065
|
-
return result if convert_as_inline else f"{result}\n\n"
|
|
1007
|
+
_ = tag, convert_as_inline
|
|
1008
|
+
return ""
|
|
1066
1009
|
|
|
1067
1010
|
|
|
1068
1011
|
def _convert_textarea(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
1069
|
-
"""Convert HTML textarea element
|
|
1012
|
+
"""Convert HTML textarea element to Markdown.
|
|
1070
1013
|
|
|
1071
1014
|
Args:
|
|
1072
1015
|
tag: The textarea tag element.
|
|
@@ -1074,42 +1017,17 @@ def _convert_textarea(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
1074
1017
|
convert_as_inline: Whether to convert as inline content.
|
|
1075
1018
|
|
|
1076
1019
|
Returns:
|
|
1077
|
-
The
|
|
1020
|
+
The text content of the textarea.
|
|
1078
1021
|
"""
|
|
1079
|
-
|
|
1080
|
-
return text
|
|
1081
|
-
|
|
1022
|
+
_ = tag
|
|
1082
1023
|
if not text.strip():
|
|
1083
1024
|
return ""
|
|
1084
1025
|
|
|
1085
|
-
|
|
1086
|
-
placeholder = tag.get("placeholder", "")
|
|
1087
|
-
rows = tag.get("rows", "")
|
|
1088
|
-
cols = tag.get("cols", "")
|
|
1089
|
-
required = tag.get("required") is not None
|
|
1090
|
-
|
|
1091
|
-
attrs = []
|
|
1092
|
-
if name and isinstance(name, str) and name.strip():
|
|
1093
|
-
attrs.append(f'name="{name}"')
|
|
1094
|
-
if placeholder and isinstance(placeholder, str) and placeholder.strip():
|
|
1095
|
-
attrs.append(f'placeholder="{placeholder}"')
|
|
1096
|
-
if rows and isinstance(rows, str) and rows.strip():
|
|
1097
|
-
attrs.append(f'rows="{rows}"')
|
|
1098
|
-
if cols and isinstance(cols, str) and cols.strip():
|
|
1099
|
-
attrs.append(f'cols="{cols}"')
|
|
1100
|
-
if required:
|
|
1101
|
-
attrs.append("required")
|
|
1102
|
-
|
|
1103
|
-
attrs_str = " ".join(attrs)
|
|
1104
|
-
content = text.strip()
|
|
1105
|
-
|
|
1106
|
-
if attrs_str:
|
|
1107
|
-
return f"<textarea {attrs_str}>{content}</textarea>\n\n"
|
|
1108
|
-
return f"<textarea>{content}</textarea>\n\n"
|
|
1026
|
+
return _format_inline_or_block(text, convert_as_inline)
|
|
1109
1027
|
|
|
1110
1028
|
|
|
1111
1029
|
def _convert_select(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
1112
|
-
"""Convert HTML select element
|
|
1030
|
+
"""Convert HTML select element to Markdown.
|
|
1113
1031
|
|
|
1114
1032
|
Args:
|
|
1115
1033
|
tag: The select tag element.
|
|
@@ -1117,39 +1035,21 @@ def _convert_select(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
1117
1035
|
convert_as_inline: Whether to convert as inline content.
|
|
1118
1036
|
|
|
1119
1037
|
Returns:
|
|
1120
|
-
The
|
|
1038
|
+
The text content (options) as a comma-separated list.
|
|
1121
1039
|
"""
|
|
1122
|
-
|
|
1123
|
-
return text
|
|
1124
|
-
|
|
1040
|
+
_ = tag
|
|
1125
1041
|
if not text.strip():
|
|
1126
1042
|
return ""
|
|
1127
1043
|
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
required = tag.get("required") is not None
|
|
1132
|
-
|
|
1133
|
-
attrs = []
|
|
1134
|
-
if id_attr and isinstance(id_attr, str) and id_attr.strip():
|
|
1135
|
-
attrs.append(f'id="{id_attr}"')
|
|
1136
|
-
if name and isinstance(name, str) and name.strip():
|
|
1137
|
-
attrs.append(f'name="{name}"')
|
|
1138
|
-
if multiple:
|
|
1139
|
-
attrs.append("multiple")
|
|
1140
|
-
if required:
|
|
1141
|
-
attrs.append("required")
|
|
1142
|
-
|
|
1143
|
-
attrs_str = " ".join(attrs)
|
|
1144
|
-
content = text.strip()
|
|
1044
|
+
if convert_as_inline:
|
|
1045
|
+
options = [opt.strip() for opt in text.strip().split("\n") if opt.strip()]
|
|
1046
|
+
return ", ".join(options)
|
|
1145
1047
|
|
|
1146
|
-
|
|
1147
|
-
return f"<select {attrs_str}>\n{content}\n</select>\n\n"
|
|
1148
|
-
return f"<select>\n{content}\n</select>\n\n"
|
|
1048
|
+
return _format_block_element(text)
|
|
1149
1049
|
|
|
1150
1050
|
|
|
1151
1051
|
def _convert_option(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
1152
|
-
"""Convert HTML option element
|
|
1052
|
+
"""Convert HTML option element to Markdown.
|
|
1153
1053
|
|
|
1154
1054
|
Args:
|
|
1155
1055
|
tag: The option tag element.
|
|
@@ -1157,33 +1057,24 @@ def _convert_option(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
1157
1057
|
convert_as_inline: Whether to convert as inline content.
|
|
1158
1058
|
|
|
1159
1059
|
Returns:
|
|
1160
|
-
The
|
|
1060
|
+
The option text, potentially with a marker if selected.
|
|
1161
1061
|
"""
|
|
1162
|
-
if convert_as_inline:
|
|
1163
|
-
return text
|
|
1164
|
-
|
|
1165
1062
|
if not text.strip():
|
|
1166
1063
|
return ""
|
|
1167
1064
|
|
|
1168
|
-
value = tag.get("value", "")
|
|
1169
1065
|
selected = tag.get("selected") is not None
|
|
1170
|
-
|
|
1171
|
-
attrs = []
|
|
1172
|
-
if value and isinstance(value, str) and value.strip():
|
|
1173
|
-
attrs.append(f'value="{value}"')
|
|
1174
|
-
if selected:
|
|
1175
|
-
attrs.append("selected")
|
|
1176
|
-
|
|
1177
|
-
attrs_str = " ".join(attrs)
|
|
1178
1066
|
content = text.strip()
|
|
1179
1067
|
|
|
1180
|
-
if
|
|
1181
|
-
return
|
|
1182
|
-
|
|
1068
|
+
if convert_as_inline:
|
|
1069
|
+
return content
|
|
1070
|
+
|
|
1071
|
+
if selected:
|
|
1072
|
+
return f"* {content}\n"
|
|
1073
|
+
return f"{content}\n"
|
|
1183
1074
|
|
|
1184
1075
|
|
|
1185
1076
|
def _convert_optgroup(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
1186
|
-
"""Convert HTML optgroup element
|
|
1077
|
+
"""Convert HTML optgroup element to semantic Markdown.
|
|
1187
1078
|
|
|
1188
1079
|
Args:
|
|
1189
1080
|
tag: The optgroup tag element.
|
|
@@ -1191,7 +1082,7 @@ def _convert_optgroup(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
1191
1082
|
convert_as_inline: Whether to convert as inline content.
|
|
1192
1083
|
|
|
1193
1084
|
Returns:
|
|
1194
|
-
The converted markdown text
|
|
1085
|
+
The converted markdown text with label as heading.
|
|
1195
1086
|
"""
|
|
1196
1087
|
if convert_as_inline:
|
|
1197
1088
|
return text
|
|
@@ -1200,21 +1091,16 @@ def _convert_optgroup(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
1200
1091
|
return ""
|
|
1201
1092
|
|
|
1202
1093
|
label = tag.get("label", "")
|
|
1094
|
+
content = text.strip()
|
|
1203
1095
|
|
|
1204
|
-
attrs = []
|
|
1205
1096
|
if label and isinstance(label, str) and label.strip():
|
|
1206
|
-
|
|
1097
|
+
return f"**{label.strip()}**\n{content}\n"
|
|
1207
1098
|
|
|
1208
|
-
|
|
1209
|
-
content = text.strip()
|
|
1210
|
-
|
|
1211
|
-
if attrs_str:
|
|
1212
|
-
return f"<optgroup {attrs_str}>\n{content}\n</optgroup>\n"
|
|
1213
|
-
return f"<optgroup>\n{content}\n</optgroup>\n"
|
|
1099
|
+
return f"{content}\n"
|
|
1214
1100
|
|
|
1215
1101
|
|
|
1216
1102
|
def _convert_button(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
1217
|
-
"""Convert HTML button element
|
|
1103
|
+
"""Convert HTML button element to Markdown.
|
|
1218
1104
|
|
|
1219
1105
|
Args:
|
|
1220
1106
|
tag: The button tag element.
|
|
@@ -1222,38 +1108,17 @@ def _convert_button(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
1222
1108
|
convert_as_inline: Whether to convert as inline content.
|
|
1223
1109
|
|
|
1224
1110
|
Returns:
|
|
1225
|
-
The
|
|
1111
|
+
The button text content.
|
|
1226
1112
|
"""
|
|
1227
|
-
|
|
1228
|
-
return text
|
|
1229
|
-
|
|
1113
|
+
_ = tag
|
|
1230
1114
|
if not text.strip():
|
|
1231
1115
|
return ""
|
|
1232
1116
|
|
|
1233
|
-
|
|
1234
|
-
name = tag.get("name", "")
|
|
1235
|
-
value = tag.get("value", "")
|
|
1236
|
-
disabled = tag.get("disabled") is not None
|
|
1237
|
-
|
|
1238
|
-
attrs = []
|
|
1239
|
-
if button_type and isinstance(button_type, str) and button_type.strip():
|
|
1240
|
-
attrs.append(f'type="{button_type}"')
|
|
1241
|
-
if name and isinstance(name, str) and name.strip():
|
|
1242
|
-
attrs.append(f'name="{name}"')
|
|
1243
|
-
if value and isinstance(value, str) and value.strip():
|
|
1244
|
-
attrs.append(f'value="{value}"')
|
|
1245
|
-
if disabled:
|
|
1246
|
-
attrs.append("disabled")
|
|
1247
|
-
|
|
1248
|
-
attrs_str = " ".join(attrs)
|
|
1249
|
-
|
|
1250
|
-
if attrs_str:
|
|
1251
|
-
return f"<button {attrs_str}>{text.strip()}</button>\n\n"
|
|
1252
|
-
return f"<button>{text.strip()}</button>\n\n"
|
|
1117
|
+
return _format_inline_or_block(text, convert_as_inline)
|
|
1253
1118
|
|
|
1254
1119
|
|
|
1255
1120
|
def _convert_progress(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
1256
|
-
"""Convert HTML progress element
|
|
1121
|
+
"""Convert HTML progress element to semantic text.
|
|
1257
1122
|
|
|
1258
1123
|
Args:
|
|
1259
1124
|
tag: The progress tag element.
|
|
@@ -1261,33 +1126,20 @@ def _convert_progress(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
1261
1126
|
convert_as_inline: Whether to convert as inline content.
|
|
1262
1127
|
|
|
1263
1128
|
Returns:
|
|
1264
|
-
The converted markdown text
|
|
1129
|
+
The converted markdown text (only content, no HTML tags).
|
|
1265
1130
|
"""
|
|
1131
|
+
_ = tag
|
|
1266
1132
|
if convert_as_inline:
|
|
1267
1133
|
return text
|
|
1268
1134
|
|
|
1269
1135
|
if not text.strip():
|
|
1270
1136
|
return ""
|
|
1271
1137
|
|
|
1272
|
-
|
|
1273
|
-
max_val = tag.get("max", "")
|
|
1274
|
-
|
|
1275
|
-
attrs = []
|
|
1276
|
-
if value and isinstance(value, str) and value.strip():
|
|
1277
|
-
attrs.append(f'value="{value}"')
|
|
1278
|
-
if max_val and isinstance(max_val, str) and max_val.strip():
|
|
1279
|
-
attrs.append(f'max="{max_val}"')
|
|
1280
|
-
|
|
1281
|
-
attrs_str = " ".join(attrs)
|
|
1282
|
-
content = text.strip()
|
|
1283
|
-
|
|
1284
|
-
if attrs_str:
|
|
1285
|
-
return f"<progress {attrs_str}>{content}</progress>\n\n"
|
|
1286
|
-
return f"<progress>{content}</progress>\n\n"
|
|
1138
|
+
return _format_block_element(text)
|
|
1287
1139
|
|
|
1288
1140
|
|
|
1289
1141
|
def _convert_meter(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
1290
|
-
"""Convert HTML meter element
|
|
1142
|
+
"""Convert HTML meter element to semantic text.
|
|
1291
1143
|
|
|
1292
1144
|
Args:
|
|
1293
1145
|
tag: The meter tag element.
|
|
@@ -1295,45 +1147,20 @@ def _convert_meter(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
1295
1147
|
convert_as_inline: Whether to convert as inline content.
|
|
1296
1148
|
|
|
1297
1149
|
Returns:
|
|
1298
|
-
The converted markdown text
|
|
1150
|
+
The converted markdown text (only content, no HTML tags).
|
|
1299
1151
|
"""
|
|
1152
|
+
_ = tag
|
|
1300
1153
|
if convert_as_inline:
|
|
1301
1154
|
return text
|
|
1302
1155
|
|
|
1303
1156
|
if not text.strip():
|
|
1304
1157
|
return ""
|
|
1305
1158
|
|
|
1306
|
-
|
|
1307
|
-
min_val = tag.get("min", "")
|
|
1308
|
-
max_val = tag.get("max", "")
|
|
1309
|
-
low = tag.get("low", "")
|
|
1310
|
-
high = tag.get("high", "")
|
|
1311
|
-
optimum = tag.get("optimum", "")
|
|
1312
|
-
|
|
1313
|
-
attrs = []
|
|
1314
|
-
if value and isinstance(value, str) and value.strip():
|
|
1315
|
-
attrs.append(f'value="{value}"')
|
|
1316
|
-
if min_val and isinstance(min_val, str) and min_val.strip():
|
|
1317
|
-
attrs.append(f'min="{min_val}"')
|
|
1318
|
-
if max_val and isinstance(max_val, str) and max_val.strip():
|
|
1319
|
-
attrs.append(f'max="{max_val}"')
|
|
1320
|
-
if low and isinstance(low, str) and low.strip():
|
|
1321
|
-
attrs.append(f'low="{low}"')
|
|
1322
|
-
if high and isinstance(high, str) and high.strip():
|
|
1323
|
-
attrs.append(f'high="{high}"')
|
|
1324
|
-
if optimum and isinstance(optimum, str) and optimum.strip():
|
|
1325
|
-
attrs.append(f'optimum="{optimum}"')
|
|
1326
|
-
|
|
1327
|
-
attrs_str = " ".join(attrs)
|
|
1328
|
-
content = text.strip()
|
|
1329
|
-
|
|
1330
|
-
if attrs_str:
|
|
1331
|
-
return f"<meter {attrs_str}>{content}</meter>\n\n"
|
|
1332
|
-
return f"<meter>{content}</meter>\n\n"
|
|
1159
|
+
return _format_block_element(text)
|
|
1333
1160
|
|
|
1334
1161
|
|
|
1335
1162
|
def _convert_output(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
1336
|
-
"""Convert HTML output element
|
|
1163
|
+
"""Convert HTML output element to semantic text.
|
|
1337
1164
|
|
|
1338
1165
|
Args:
|
|
1339
1166
|
tag: The output tag element.
|
|
@@ -1341,34 +1168,20 @@ def _convert_output(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
1341
1168
|
convert_as_inline: Whether to convert as inline content.
|
|
1342
1169
|
|
|
1343
1170
|
Returns:
|
|
1344
|
-
The converted markdown text
|
|
1171
|
+
The converted markdown text (only content, no HTML tags).
|
|
1345
1172
|
"""
|
|
1173
|
+
_ = tag
|
|
1346
1174
|
if convert_as_inline:
|
|
1347
1175
|
return text
|
|
1348
1176
|
|
|
1349
1177
|
if not text.strip():
|
|
1350
1178
|
return ""
|
|
1351
1179
|
|
|
1352
|
-
|
|
1353
|
-
name = tag.get("name", "")
|
|
1354
|
-
|
|
1355
|
-
attrs = []
|
|
1356
|
-
if for_attr:
|
|
1357
|
-
for_value = " ".join(for_attr) if isinstance(for_attr, list) else str(for_attr)
|
|
1358
|
-
if for_value.strip():
|
|
1359
|
-
attrs.append(f'for="{for_value}"')
|
|
1360
|
-
if name and isinstance(name, str) and name.strip():
|
|
1361
|
-
attrs.append(f'name="{name}"')
|
|
1362
|
-
|
|
1363
|
-
attrs_str = " ".join(attrs)
|
|
1364
|
-
|
|
1365
|
-
if attrs_str:
|
|
1366
|
-
return f"<output {attrs_str}>{text.strip()}</output>\n\n"
|
|
1367
|
-
return f"<output>{text.strip()}</output>\n\n"
|
|
1180
|
+
return _format_block_element(text)
|
|
1368
1181
|
|
|
1369
1182
|
|
|
1370
1183
|
def _convert_datalist(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
1371
|
-
"""Convert HTML datalist element
|
|
1184
|
+
"""Convert HTML datalist element to semantic Markdown.
|
|
1372
1185
|
|
|
1373
1186
|
Args:
|
|
1374
1187
|
tag: The datalist tag element.
|
|
@@ -1376,26 +1189,16 @@ def _convert_datalist(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
1376
1189
|
convert_as_inline: Whether to convert as inline content.
|
|
1377
1190
|
|
|
1378
1191
|
Returns:
|
|
1379
|
-
The converted markdown text
|
|
1192
|
+
The converted markdown text (only content, no HTML tags).
|
|
1380
1193
|
"""
|
|
1194
|
+
_ = tag
|
|
1381
1195
|
if convert_as_inline:
|
|
1382
1196
|
return text
|
|
1383
1197
|
|
|
1384
1198
|
if not text.strip():
|
|
1385
1199
|
return ""
|
|
1386
1200
|
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
attrs = []
|
|
1390
|
-
if id_attr and isinstance(id_attr, str) and id_attr.strip():
|
|
1391
|
-
attrs.append(f'id="{id_attr}"')
|
|
1392
|
-
|
|
1393
|
-
attrs_str = " ".join(attrs)
|
|
1394
|
-
content = text.strip()
|
|
1395
|
-
|
|
1396
|
-
if attrs_str:
|
|
1397
|
-
return f"<datalist {attrs_str}>\n{content}\n</datalist>\n\n"
|
|
1398
|
-
return f"<datalist>\n{content}\n</datalist>\n\n"
|
|
1201
|
+
return _format_block_element(text)
|
|
1399
1202
|
|
|
1400
1203
|
|
|
1401
1204
|
def _convert_ruby(*, text: str, convert_as_inline: bool) -> str: # noqa: ARG001
|
|
@@ -1488,7 +1291,7 @@ def _convert_rtc(*, text: str, convert_as_inline: bool) -> str: # noqa: ARG001
|
|
|
1488
1291
|
|
|
1489
1292
|
|
|
1490
1293
|
def _convert_dialog(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
|
|
1491
|
-
"""Convert HTML dialog element
|
|
1294
|
+
"""Convert HTML dialog element to semantic Markdown.
|
|
1492
1295
|
|
|
1493
1296
|
Args:
|
|
1494
1297
|
text: The text content of the dialog element.
|
|
@@ -1496,27 +1299,20 @@ def _convert_dialog(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
|
|
|
1496
1299
|
tag: The dialog tag element.
|
|
1497
1300
|
|
|
1498
1301
|
Returns:
|
|
1499
|
-
The converted markdown text
|
|
1302
|
+
The converted markdown text (only content, no HTML tags).
|
|
1500
1303
|
"""
|
|
1304
|
+
_ = tag
|
|
1501
1305
|
if convert_as_inline:
|
|
1502
1306
|
return text
|
|
1503
1307
|
|
|
1504
1308
|
if not text.strip():
|
|
1505
1309
|
return ""
|
|
1506
1310
|
|
|
1507
|
-
|
|
1508
|
-
if tag.get("open") is not None:
|
|
1509
|
-
attrs.append("open")
|
|
1510
|
-
if tag.get("id"):
|
|
1511
|
-
attrs.append(f'id="{tag.get("id")}"')
|
|
1512
|
-
|
|
1513
|
-
attrs_str = " " + " ".join(attrs) if attrs else ""
|
|
1514
|
-
|
|
1515
|
-
return f"<dialog{attrs_str}>\n{text.strip()}\n</dialog>\n\n"
|
|
1311
|
+
return _format_block_element(text)
|
|
1516
1312
|
|
|
1517
1313
|
|
|
1518
1314
|
def _convert_menu(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
|
|
1519
|
-
"""Convert HTML menu element
|
|
1315
|
+
"""Convert HTML menu element to semantic Markdown.
|
|
1520
1316
|
|
|
1521
1317
|
Args:
|
|
1522
1318
|
text: The text content of the menu element.
|
|
@@ -1524,29 +1320,20 @@ def _convert_menu(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
|
|
|
1524
1320
|
tag: The menu tag element.
|
|
1525
1321
|
|
|
1526
1322
|
Returns:
|
|
1527
|
-
The converted markdown text
|
|
1323
|
+
The converted markdown text (only content, no HTML tags).
|
|
1528
1324
|
"""
|
|
1325
|
+
_ = tag
|
|
1529
1326
|
if convert_as_inline:
|
|
1530
1327
|
return text
|
|
1531
1328
|
|
|
1532
1329
|
if not text.strip():
|
|
1533
1330
|
return ""
|
|
1534
1331
|
|
|
1535
|
-
|
|
1536
|
-
if tag.get("type") and tag.get("type") != "list":
|
|
1537
|
-
attrs.append(f'type="{tag.get("type")}"')
|
|
1538
|
-
if tag.get("label"):
|
|
1539
|
-
attrs.append(f'label="{tag.get("label")}"')
|
|
1540
|
-
if tag.get("id"):
|
|
1541
|
-
attrs.append(f'id="{tag.get("id")}"')
|
|
1542
|
-
|
|
1543
|
-
attrs_str = " " + " ".join(attrs) if attrs else ""
|
|
1544
|
-
|
|
1545
|
-
return f"<menu{attrs_str}>\n{text.strip()}\n</menu>\n\n"
|
|
1332
|
+
return _format_block_element(text)
|
|
1546
1333
|
|
|
1547
1334
|
|
|
1548
1335
|
def _convert_figure(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
|
|
1549
|
-
"""Convert HTML figure element
|
|
1336
|
+
"""Convert HTML figure element to semantic Markdown.
|
|
1550
1337
|
|
|
1551
1338
|
Args:
|
|
1552
1339
|
text: The text content of the figure element.
|
|
@@ -1554,42 +1341,33 @@ def _convert_figure(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
|
|
|
1554
1341
|
tag: The figure tag element.
|
|
1555
1342
|
|
|
1556
1343
|
Returns:
|
|
1557
|
-
The converted markdown text
|
|
1344
|
+
The converted markdown text (only content, no HTML tags).
|
|
1558
1345
|
"""
|
|
1346
|
+
_ = tag
|
|
1559
1347
|
if not text.strip():
|
|
1560
1348
|
return ""
|
|
1561
1349
|
|
|
1562
1350
|
if convert_as_inline:
|
|
1563
1351
|
return text
|
|
1564
1352
|
|
|
1565
|
-
attrs = []
|
|
1566
|
-
if tag.get("id"):
|
|
1567
|
-
attrs.append(f'id="{tag.get("id")}"')
|
|
1568
|
-
if tag.get("class"):
|
|
1569
|
-
class_val = tag.get("class")
|
|
1570
|
-
if isinstance(class_val, list):
|
|
1571
|
-
class_val = " ".join(class_val)
|
|
1572
|
-
attrs.append(f'class="{class_val}"')
|
|
1573
|
-
|
|
1574
|
-
attrs_str = " " + " ".join(attrs) if attrs else ""
|
|
1575
|
-
|
|
1576
1353
|
content = text.strip()
|
|
1577
|
-
|
|
1578
|
-
|
|
1579
|
-
|
|
1580
|
-
|
|
1581
|
-
|
|
1354
|
+
if content and not content.endswith("\n\n"):
|
|
1355
|
+
if content.endswith("\n"):
|
|
1356
|
+
content += "\n"
|
|
1357
|
+
else:
|
|
1358
|
+
content += "\n\n"
|
|
1359
|
+
return content
|
|
1582
1360
|
|
|
1583
1361
|
|
|
1584
1362
|
def _convert_hgroup(*, text: str, convert_as_inline: bool) -> str:
|
|
1585
|
-
"""Convert HTML hgroup element
|
|
1363
|
+
"""Convert HTML hgroup element to semantic Markdown.
|
|
1586
1364
|
|
|
1587
1365
|
Args:
|
|
1588
1366
|
text: The text content of the hgroup element.
|
|
1589
1367
|
convert_as_inline: Whether to convert as inline content.
|
|
1590
1368
|
|
|
1591
1369
|
Returns:
|
|
1592
|
-
The converted markdown text
|
|
1370
|
+
The converted markdown text (only content, no HTML tags).
|
|
1593
1371
|
"""
|
|
1594
1372
|
if convert_as_inline:
|
|
1595
1373
|
return text
|
|
@@ -1597,15 +1375,11 @@ def _convert_hgroup(*, text: str, convert_as_inline: bool) -> str:
|
|
|
1597
1375
|
if not text.strip():
|
|
1598
1376
|
return ""
|
|
1599
1377
|
|
|
1600
|
-
|
|
1601
|
-
|
|
1602
|
-
content = re.sub(r"\n{3,}", "\n\n", content)
|
|
1603
|
-
|
|
1604
|
-
return f"<!-- heading group -->\n{content}\n<!-- end heading group -->\n\n"
|
|
1378
|
+
return text
|
|
1605
1379
|
|
|
1606
1380
|
|
|
1607
1381
|
def _convert_picture(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
|
|
1608
|
-
"""Convert HTML picture element
|
|
1382
|
+
"""Convert HTML picture element to semantic Markdown.
|
|
1609
1383
|
|
|
1610
1384
|
Args:
|
|
1611
1385
|
text: The text content of the picture element.
|
|
@@ -1613,44 +1387,13 @@ def _convert_picture(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
|
|
|
1613
1387
|
tag: The picture tag element.
|
|
1614
1388
|
|
|
1615
1389
|
Returns:
|
|
1616
|
-
The converted markdown text
|
|
1390
|
+
The converted markdown text (only the img element).
|
|
1617
1391
|
"""
|
|
1392
|
+
_ = tag, convert_as_inline
|
|
1618
1393
|
if not text.strip():
|
|
1619
1394
|
return ""
|
|
1620
1395
|
|
|
1621
|
-
|
|
1622
|
-
img = tag.find("img")
|
|
1623
|
-
|
|
1624
|
-
if not img:
|
|
1625
|
-
return text.strip()
|
|
1626
|
-
|
|
1627
|
-
img_markdown = text.strip()
|
|
1628
|
-
|
|
1629
|
-
if not sources:
|
|
1630
|
-
return img_markdown
|
|
1631
|
-
|
|
1632
|
-
source_info = []
|
|
1633
|
-
for source in sources:
|
|
1634
|
-
srcset = source.get("srcset")
|
|
1635
|
-
media = source.get("media")
|
|
1636
|
-
mime_type = source.get("type")
|
|
1637
|
-
|
|
1638
|
-
if srcset:
|
|
1639
|
-
info = f'srcset="{srcset}"'
|
|
1640
|
-
if media:
|
|
1641
|
-
info += f' media="{media}"'
|
|
1642
|
-
if mime_type:
|
|
1643
|
-
info += f' type="{mime_type}"'
|
|
1644
|
-
source_info.append(info)
|
|
1645
|
-
|
|
1646
|
-
if source_info and not convert_as_inline:
|
|
1647
|
-
sources_comment = "<!-- picture sources:\n"
|
|
1648
|
-
for info in source_info:
|
|
1649
|
-
sources_comment += f" {info}\n"
|
|
1650
|
-
sources_comment += "-->\n"
|
|
1651
|
-
return f"{sources_comment}{img_markdown}"
|
|
1652
|
-
|
|
1653
|
-
return img_markdown
|
|
1396
|
+
return text.strip()
|
|
1654
1397
|
|
|
1655
1398
|
|
|
1656
1399
|
def _convert_svg(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
|
|
@@ -1765,7 +1508,7 @@ def create_converters_map(
|
|
|
1765
1508
|
"abbr": _wrapper(_convert_abbr),
|
|
1766
1509
|
"article": _wrapper(_convert_semantic_block),
|
|
1767
1510
|
"aside": _wrapper(_convert_semantic_block),
|
|
1768
|
-
"audio": _wrapper(
|
|
1511
|
+
"audio": _wrapper(_convert_media_element),
|
|
1769
1512
|
"b": _wrapper(partial(_create_inline_converter(2 * strong_em_symbol))),
|
|
1770
1513
|
"bdi": _wrapper(_create_inline_converter("")),
|
|
1771
1514
|
"bdo": _wrapper(_create_inline_converter("")),
|
|
@@ -1788,7 +1531,7 @@ def create_converters_map(
|
|
|
1788
1531
|
"dt": _wrapper(_convert_dt),
|
|
1789
1532
|
"em": _wrapper(_create_inline_converter(strong_em_symbol)),
|
|
1790
1533
|
"fieldset": _wrapper(_convert_fieldset),
|
|
1791
|
-
"figcaption": _wrapper(lambda text: f"\n\n{text}
|
|
1534
|
+
"figcaption": _wrapper(lambda text: f"\n\n*{text.strip()}*\n\n" if text.strip() else ""),
|
|
1792
1535
|
"figure": _wrapper(_convert_figure),
|
|
1793
1536
|
"footer": _wrapper(_convert_semantic_block),
|
|
1794
1537
|
"form": _wrapper(_convert_form),
|
|
@@ -1861,6 +1604,6 @@ def create_converters_map(
|
|
|
1861
1604
|
"u": _wrapper(_create_inline_converter("")),
|
|
1862
1605
|
"ul": _wrapper(_convert_list),
|
|
1863
1606
|
"var": _wrapper(_create_inline_converter("*")),
|
|
1864
|
-
"video": _wrapper(
|
|
1607
|
+
"video": _wrapper(_convert_media_element),
|
|
1865
1608
|
"wbr": _wrapper(_convert_wbr),
|
|
1866
1609
|
}
|