html-to-markdown 1.8.0__py3-none-any.whl → 1.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of html-to-markdown might be problematic. Click here for more details.
- html_to_markdown/converters.py +356 -563
- html_to_markdown/processing.py +139 -43
- {html_to_markdown-1.8.0.dist-info → html_to_markdown-1.9.0.dist-info}/METADATA +87 -15
- {html_to_markdown-1.8.0.dist-info → html_to_markdown-1.9.0.dist-info}/RECORD +8 -8
- {html_to_markdown-1.8.0.dist-info → html_to_markdown-1.9.0.dist-info}/WHEEL +0 -0
- {html_to_markdown-1.8.0.dist-info → html_to_markdown-1.9.0.dist-info}/entry_points.txt +0 -0
- {html_to_markdown-1.8.0.dist-info → html_to_markdown-1.9.0.dist-info}/licenses/LICENSE +0 -0
- {html_to_markdown-1.8.0.dist-info → html_to_markdown-1.9.0.dist-info}/top_level.txt +0 -0
html_to_markdown/converters.py
CHANGED
|
@@ -5,11 +5,11 @@ from typing import TYPE_CHECKING
|
|
|
5
5
|
if TYPE_CHECKING:
|
|
6
6
|
from collections.abc import Iterable
|
|
7
7
|
import base64
|
|
8
|
-
import
|
|
8
|
+
from collections.abc import Callable
|
|
9
9
|
from functools import partial
|
|
10
10
|
from inspect import getfullargspec
|
|
11
11
|
from textwrap import fill
|
|
12
|
-
from typing import Any,
|
|
12
|
+
from typing import Any, Literal, TypeVar, cast
|
|
13
13
|
|
|
14
14
|
from bs4.element import Tag
|
|
15
15
|
|
|
@@ -21,6 +21,24 @@ from html_to_markdown.constants import (
|
|
|
21
21
|
)
|
|
22
22
|
from html_to_markdown.utils import chomp, indent, underline
|
|
23
23
|
|
|
24
|
+
|
|
25
|
+
def _format_block_element(text: str) -> str:
|
|
26
|
+
"""Format text as a block element with trailing newlines."""
|
|
27
|
+
return f"{text.strip()}\n\n" if text.strip() else ""
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _format_inline_or_block(text: str, convert_as_inline: bool) -> str:
|
|
31
|
+
"""Format text as inline or block element based on context."""
|
|
32
|
+
return text.strip() if convert_as_inline else _format_block_element(text)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _format_wrapped_block(text: str, start_marker: str, end_marker: str = "") -> str:
|
|
36
|
+
"""Format text wrapped in markers as a block element."""
|
|
37
|
+
if not end_marker:
|
|
38
|
+
end_marker = start_marker
|
|
39
|
+
return f"{start_marker}{text.strip()}{end_marker}\n\n" if text.strip() else ""
|
|
40
|
+
|
|
41
|
+
|
|
24
42
|
SupportedElements = Literal[
|
|
25
43
|
"a",
|
|
26
44
|
"abbr",
|
|
@@ -189,11 +207,24 @@ def _convert_blockquote(*, text: str, tag: Tag, convert_as_inline: bool) -> str:
|
|
|
189
207
|
if not text:
|
|
190
208
|
return ""
|
|
191
209
|
|
|
210
|
+
from html_to_markdown.processing import _has_ancestor # noqa: PLC0415
|
|
211
|
+
|
|
192
212
|
cite_url = tag.get("cite")
|
|
193
|
-
|
|
213
|
+
|
|
214
|
+
# Check if this blockquote is inside a list item
|
|
215
|
+
if _has_ancestor(tag, "li"):
|
|
216
|
+
# Indent the blockquote by 4 spaces
|
|
217
|
+
lines = text.strip().split("\n")
|
|
218
|
+
indented_lines = [f" > {line}" if line.strip() else "" for line in lines]
|
|
219
|
+
quote_text = "\n".join(indented_lines) + "\n\n"
|
|
220
|
+
else:
|
|
221
|
+
quote_text = f"\n{line_beginning_re.sub('> ', text.strip())}\n\n"
|
|
194
222
|
|
|
195
223
|
if cite_url:
|
|
196
|
-
|
|
224
|
+
if _has_ancestor(tag, "li"):
|
|
225
|
+
quote_text += f" — <{cite_url}>\n\n"
|
|
226
|
+
else:
|
|
227
|
+
quote_text += f"— <{cite_url}>\n\n"
|
|
197
228
|
|
|
198
229
|
return quote_text
|
|
199
230
|
|
|
@@ -243,8 +274,8 @@ def _convert_img(*, tag: Tag, convert_as_inline: bool, keep_inline_images_in: It
|
|
|
243
274
|
title_part = ' "{}"'.format(title.replace('"', r"\"")) if title else ""
|
|
244
275
|
parent_name = tag.parent.name if tag.parent else ""
|
|
245
276
|
|
|
246
|
-
default_preserve_in =
|
|
247
|
-
preserve_in = set(keep_inline_images_in or []) |
|
|
277
|
+
default_preserve_in = {"td", "th"}
|
|
278
|
+
preserve_in = set(keep_inline_images_in or []) | default_preserve_in
|
|
248
279
|
if convert_as_inline and parent_name not in preserve_in:
|
|
249
280
|
return alt
|
|
250
281
|
if width or height:
|
|
@@ -253,24 +284,42 @@ def _convert_img(*, tag: Tag, convert_as_inline: bool, keep_inline_images_in: It
|
|
|
253
284
|
|
|
254
285
|
|
|
255
286
|
def _convert_list(*, tag: Tag, text: str) -> str:
|
|
256
|
-
|
|
287
|
+
from html_to_markdown.processing import _has_ancestor # noqa: PLC0415
|
|
257
288
|
|
|
258
289
|
before_paragraph = False
|
|
259
290
|
if tag.next_sibling and getattr(tag.next_sibling, "name", None) not in {"ul", "ol"}:
|
|
260
291
|
before_paragraph = True
|
|
261
292
|
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
293
|
+
# Check if this list is inside a list item
|
|
294
|
+
if _has_ancestor(tag, "li"):
|
|
295
|
+
# This is a nested list - needs indentation
|
|
296
|
+
# But we need to check if it's the first element after a paragraph
|
|
297
|
+
parent = tag.parent
|
|
298
|
+
while parent and parent.name != "li":
|
|
299
|
+
parent = parent.parent
|
|
300
|
+
|
|
301
|
+
if parent:
|
|
302
|
+
# Check if there's a paragraph before this list
|
|
303
|
+
prev_p = None
|
|
304
|
+
for child in parent.children:
|
|
305
|
+
if hasattr(child, "name"):
|
|
306
|
+
if child == tag:
|
|
307
|
+
break
|
|
308
|
+
if child.name == "p":
|
|
309
|
+
prev_p = child
|
|
310
|
+
|
|
311
|
+
if prev_p:
|
|
312
|
+
# If there's a paragraph before, we need proper indentation
|
|
313
|
+
lines = text.strip().split("\n")
|
|
314
|
+
indented_lines = []
|
|
315
|
+
for line in lines:
|
|
316
|
+
if line.strip():
|
|
317
|
+
indented_lines.append(f" {line}")
|
|
318
|
+
else:
|
|
319
|
+
indented_lines.append("")
|
|
320
|
+
return "\n" + "\n".join(indented_lines) + "\n"
|
|
321
|
+
# Otherwise use the original tab indentation
|
|
322
|
+
return "\n" + indent(text=text, level=1).rstrip()
|
|
274
323
|
|
|
275
324
|
return text + ("\n" if before_paragraph else "")
|
|
276
325
|
|
|
@@ -305,10 +354,38 @@ def _convert_li(*, tag: Tag, text: str, bullets: str) -> str:
|
|
|
305
354
|
tag = tag.parent
|
|
306
355
|
|
|
307
356
|
bullet = bullets[depth % len(bullets)]
|
|
357
|
+
|
|
358
|
+
# Check if the list item contains block-level elements (like <p>, <blockquote>, etc.)
|
|
359
|
+
has_block_children = any(
|
|
360
|
+
child.name in {"p", "blockquote", "pre", "ul", "ol", "div", "h1", "h2", "h3", "h4", "h5", "h6"}
|
|
361
|
+
for child in tag.children
|
|
362
|
+
if hasattr(child, "name")
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
if has_block_children:
|
|
366
|
+
# Handle multi-paragraph list items
|
|
367
|
+
# Split by double newlines (paragraph separators)
|
|
368
|
+
paragraphs = text.strip().split("\n\n")
|
|
369
|
+
|
|
370
|
+
if paragraphs:
|
|
371
|
+
# First paragraph goes directly after the bullet
|
|
372
|
+
result_parts = [f"{bullet} {paragraphs[0].strip()}\n"]
|
|
373
|
+
|
|
374
|
+
# Subsequent paragraphs need to be indented and separated by blank lines
|
|
375
|
+
for para in paragraphs[1:]:
|
|
376
|
+
if para.strip():
|
|
377
|
+
# Add blank line before the paragraph
|
|
378
|
+
result_parts.append("\n")
|
|
379
|
+
# Indent each line of the paragraph by 4 spaces
|
|
380
|
+
result_parts.extend(f" {line}\n" for line in para.strip().split("\n") if line.strip())
|
|
381
|
+
|
|
382
|
+
return "".join(result_parts)
|
|
383
|
+
|
|
384
|
+
# Simple case: no block elements, just inline content
|
|
308
385
|
return "{} {}\n".format(bullet, (text or "").strip())
|
|
309
386
|
|
|
310
387
|
|
|
311
|
-
def _convert_p(*, wrap: bool, text: str, convert_as_inline: bool, wrap_width: int) -> str:
|
|
388
|
+
def _convert_p(*, wrap: bool, text: str, convert_as_inline: bool, wrap_width: int, tag: Tag) -> str:
|
|
312
389
|
if convert_as_inline:
|
|
313
390
|
return text
|
|
314
391
|
|
|
@@ -320,6 +397,30 @@ def _convert_p(*, wrap: bool, text: str, convert_as_inline: bool, wrap_width: in
|
|
|
320
397
|
break_on_hyphens=False,
|
|
321
398
|
)
|
|
322
399
|
|
|
400
|
+
from html_to_markdown.processing import _has_ancestor # noqa: PLC0415
|
|
401
|
+
|
|
402
|
+
# Check if this paragraph is inside a list item
|
|
403
|
+
if _has_ancestor(tag, "li"):
|
|
404
|
+
# Check if this is the first paragraph in the list item
|
|
405
|
+
parent = tag.parent
|
|
406
|
+
while parent and parent.name != "li":
|
|
407
|
+
parent = parent.parent
|
|
408
|
+
|
|
409
|
+
if parent:
|
|
410
|
+
# Get all direct children that are paragraphs
|
|
411
|
+
p_children = [child for child in parent.children if hasattr(child, "name") and child.name == "p"]
|
|
412
|
+
|
|
413
|
+
# If this is not the first paragraph, indent it
|
|
414
|
+
if p_children and tag != p_children[0]:
|
|
415
|
+
# Indent all lines by 4 spaces
|
|
416
|
+
indented_lines = []
|
|
417
|
+
for line in text.split("\n"):
|
|
418
|
+
if line.strip():
|
|
419
|
+
indented_lines.append(f" {line}")
|
|
420
|
+
else:
|
|
421
|
+
indented_lines.append("")
|
|
422
|
+
text = "\n".join(indented_lines)
|
|
423
|
+
|
|
323
424
|
return f"{text}\n\n" if text else ""
|
|
324
425
|
|
|
325
426
|
|
|
@@ -337,13 +438,15 @@ def _convert_mark(*, text: str, convert_as_inline: bool, highlight_style: str) -
|
|
|
337
438
|
if convert_as_inline:
|
|
338
439
|
return text
|
|
339
440
|
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
441
|
+
match highlight_style:
|
|
442
|
+
case "double-equal":
|
|
443
|
+
return f"=={text}=="
|
|
444
|
+
case "bold":
|
|
445
|
+
return f"**{text}**"
|
|
446
|
+
case "html":
|
|
447
|
+
return f"<mark>{text}</mark>"
|
|
448
|
+
case _:
|
|
449
|
+
return text
|
|
347
450
|
|
|
348
451
|
|
|
349
452
|
def _convert_pre(
|
|
@@ -376,6 +479,58 @@ def _convert_tr(*, tag: Tag, text: str) -> str:
|
|
|
376
479
|
cells = tag.find_all(["td", "th"])
|
|
377
480
|
parent_name = tag.parent.name if tag.parent and hasattr(tag.parent, "name") else ""
|
|
378
481
|
tag_grand_parent = tag.parent.parent if tag.parent else None
|
|
482
|
+
|
|
483
|
+
# Simple rowspan handling: if previous row had cells with rowspan, add empty cells
|
|
484
|
+
if tag.previous_sibling and hasattr(tag.previous_sibling, "name") and tag.previous_sibling.name == "tr":
|
|
485
|
+
prev_cells = cast("Tag", tag.previous_sibling).find_all(["td", "th"])
|
|
486
|
+
rowspan_positions = []
|
|
487
|
+
col_pos = 0
|
|
488
|
+
|
|
489
|
+
# Check which cells in previous row have rowspan > 1
|
|
490
|
+
for prev_cell in prev_cells:
|
|
491
|
+
rowspan = 1
|
|
492
|
+
if (
|
|
493
|
+
"rowspan" in prev_cell.attrs
|
|
494
|
+
and isinstance(prev_cell["rowspan"], str)
|
|
495
|
+
and prev_cell["rowspan"].isdigit()
|
|
496
|
+
):
|
|
497
|
+
rowspan = int(prev_cell["rowspan"])
|
|
498
|
+
|
|
499
|
+
if rowspan > 1:
|
|
500
|
+
# This cell spans into current row
|
|
501
|
+
rowspan_positions.append(col_pos)
|
|
502
|
+
|
|
503
|
+
# Account for colspan
|
|
504
|
+
colspan = 1
|
|
505
|
+
if (
|
|
506
|
+
"colspan" in prev_cell.attrs
|
|
507
|
+
and isinstance(prev_cell["colspan"], str)
|
|
508
|
+
and prev_cell["colspan"].isdigit()
|
|
509
|
+
):
|
|
510
|
+
colspan = int(prev_cell["colspan"])
|
|
511
|
+
col_pos += colspan
|
|
512
|
+
|
|
513
|
+
# If there are rowspan cells from previous row, add empty cells
|
|
514
|
+
if rowspan_positions:
|
|
515
|
+
# Build new text with empty cells inserted
|
|
516
|
+
new_cells = []
|
|
517
|
+
cell_index = 0
|
|
518
|
+
|
|
519
|
+
for pos in range(col_pos): # Total columns
|
|
520
|
+
if pos in rowspan_positions:
|
|
521
|
+
# Add empty cell for rowspan
|
|
522
|
+
new_cells.append(" |")
|
|
523
|
+
elif cell_index < len(cells):
|
|
524
|
+
# Add actual cell content
|
|
525
|
+
cell = cells[cell_index]
|
|
526
|
+
cell_text = cell.get_text().strip().replace("\n", " ")
|
|
527
|
+
colspan = _get_colspan(cell)
|
|
528
|
+
new_cells.append(f" {cell_text} |" * colspan)
|
|
529
|
+
cell_index += 1
|
|
530
|
+
|
|
531
|
+
# Override text with new cell arrangement
|
|
532
|
+
text = "".join(new_cells)
|
|
533
|
+
|
|
379
534
|
is_headrow = (
|
|
380
535
|
all(hasattr(cell, "name") and cell.name == "th" for cell in cells)
|
|
381
536
|
or (not tag.previous_sibling and parent_name != "tbody")
|
|
@@ -423,7 +578,7 @@ def _convert_caption(*, text: str, convert_as_inline: bool) -> str:
|
|
|
423
578
|
if not text.strip():
|
|
424
579
|
return ""
|
|
425
580
|
|
|
426
|
-
return
|
|
581
|
+
return _format_wrapped_block(text, "*")
|
|
427
582
|
|
|
428
583
|
|
|
429
584
|
def _convert_thead(*, text: str, convert_as_inline: bool) -> str:
|
|
@@ -475,7 +630,10 @@ def _convert_tfoot(*, text: str, convert_as_inline: bool) -> str:
|
|
|
475
630
|
|
|
476
631
|
|
|
477
632
|
def _convert_colgroup(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
478
|
-
"""Convert HTML colgroup element
|
|
633
|
+
"""Convert HTML colgroup element - removes it entirely from Markdown output.
|
|
634
|
+
|
|
635
|
+
Colgroup is a table column grouping element that defines styling for columns.
|
|
636
|
+
It has no representation in Markdown and should be removed.
|
|
479
637
|
|
|
480
638
|
Args:
|
|
481
639
|
tag: The colgroup tag element.
|
|
@@ -483,54 +641,30 @@ def _convert_colgroup(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
483
641
|
convert_as_inline: Whether to convert as inline content.
|
|
484
642
|
|
|
485
643
|
Returns:
|
|
486
|
-
|
|
644
|
+
Empty string as colgroup has no Markdown representation.
|
|
487
645
|
"""
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
return ""
|
|
493
|
-
|
|
494
|
-
span = tag.get("span", "")
|
|
495
|
-
attrs = []
|
|
496
|
-
if span and isinstance(span, str) and span.strip():
|
|
497
|
-
attrs.append(f'span="{span}"')
|
|
498
|
-
|
|
499
|
-
attrs_str = " ".join(attrs)
|
|
500
|
-
if attrs_str:
|
|
501
|
-
return f"<colgroup {attrs_str}>\n{text.strip()}\n</colgroup>\n\n"
|
|
502
|
-
return f"<colgroup>\n{text.strip()}\n</colgroup>\n\n"
|
|
646
|
+
_ = tag, text, convert_as_inline
|
|
647
|
+
# Colgroup and its contents (col elements) are purely presentational
|
|
648
|
+
# and have no equivalent in Markdown tables
|
|
649
|
+
return ""
|
|
503
650
|
|
|
504
651
|
|
|
505
652
|
def _convert_col(*, tag: Tag, convert_as_inline: bool) -> str:
|
|
506
|
-
"""Convert HTML col element
|
|
653
|
+
"""Convert HTML col element - removes it entirely from Markdown output.
|
|
654
|
+
|
|
655
|
+
Col elements define column properties (width, style) in HTML tables.
|
|
656
|
+
They have no representation in Markdown and should be removed.
|
|
507
657
|
|
|
508
658
|
Args:
|
|
509
659
|
tag: The col tag element.
|
|
510
660
|
convert_as_inline: Whether to convert as inline content.
|
|
511
661
|
|
|
512
662
|
Returns:
|
|
513
|
-
|
|
663
|
+
Empty string as col has no Markdown representation.
|
|
514
664
|
"""
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
span = tag.get("span", "")
|
|
519
|
-
width = tag.get("width", "")
|
|
520
|
-
style = tag.get("style", "")
|
|
521
|
-
|
|
522
|
-
attrs = []
|
|
523
|
-
if width and isinstance(width, str) and width.strip():
|
|
524
|
-
attrs.append(f'width="{width}"')
|
|
525
|
-
if style and isinstance(style, str) and style.strip():
|
|
526
|
-
attrs.append(f'style="{style}"')
|
|
527
|
-
if span and isinstance(span, str) and span.strip():
|
|
528
|
-
attrs.append(f'span="{span}"')
|
|
529
|
-
|
|
530
|
-
attrs_str = " ".join(attrs)
|
|
531
|
-
if attrs_str:
|
|
532
|
-
return f"<col {attrs_str} />\n"
|
|
533
|
-
return "<col />\n"
|
|
665
|
+
_ = tag, convert_as_inline
|
|
666
|
+
# Col elements are self-closing and purely presentational
|
|
667
|
+
return ""
|
|
534
668
|
|
|
535
669
|
|
|
536
670
|
def _convert_semantic_block(*, text: str, convert_as_inline: bool) -> str:
|
|
@@ -550,35 +684,37 @@ def _convert_semantic_block(*, text: str, convert_as_inline: bool) -> str:
|
|
|
550
684
|
|
|
551
685
|
|
|
552
686
|
def _convert_details(*, text: str, convert_as_inline: bool) -> str:
|
|
553
|
-
"""Convert HTML details element
|
|
687
|
+
"""Convert HTML details element to semantic Markdown.
|
|
554
688
|
|
|
555
689
|
Args:
|
|
556
690
|
text: The text content of the details element.
|
|
557
691
|
convert_as_inline: Whether to convert as inline content.
|
|
558
692
|
|
|
559
693
|
Returns:
|
|
560
|
-
The converted markdown text
|
|
694
|
+
The converted markdown text (only content, no HTML tags).
|
|
561
695
|
"""
|
|
562
696
|
if convert_as_inline:
|
|
563
697
|
return text
|
|
564
698
|
|
|
565
|
-
|
|
699
|
+
# Details is a semantic container, return its content
|
|
700
|
+
return _format_block_element(text)
|
|
566
701
|
|
|
567
702
|
|
|
568
703
|
def _convert_summary(*, text: str, convert_as_inline: bool) -> str:
|
|
569
|
-
"""Convert HTML summary element
|
|
704
|
+
"""Convert HTML summary element to emphasized text.
|
|
570
705
|
|
|
571
706
|
Args:
|
|
572
707
|
text: The text content of the summary element.
|
|
573
708
|
convert_as_inline: Whether to convert as inline content.
|
|
574
709
|
|
|
575
710
|
Returns:
|
|
576
|
-
The converted markdown text
|
|
711
|
+
The converted markdown text as bold heading.
|
|
577
712
|
"""
|
|
578
713
|
if convert_as_inline:
|
|
579
714
|
return text
|
|
580
715
|
|
|
581
|
-
|
|
716
|
+
# Summary is like a heading/title
|
|
717
|
+
return _format_wrapped_block(text, "**")
|
|
582
718
|
|
|
583
719
|
|
|
584
720
|
def _convert_dl(*, text: str, convert_as_inline: bool) -> str:
|
|
@@ -674,119 +810,42 @@ def _convert_q(*, text: str, convert_as_inline: bool) -> str:
|
|
|
674
810
|
return f'"{escaped_text}"'
|
|
675
811
|
|
|
676
812
|
|
|
677
|
-
def
|
|
678
|
-
"""Convert HTML
|
|
813
|
+
def _convert_media_element(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
814
|
+
"""Convert HTML media elements (audio/video) to semantic Markdown.
|
|
679
815
|
|
|
680
816
|
Args:
|
|
681
|
-
tag: The
|
|
682
|
-
text: The text content of the
|
|
817
|
+
tag: The media tag element.
|
|
818
|
+
text: The text content of the media element (fallback content).
|
|
683
819
|
convert_as_inline: Whether to convert as inline content.
|
|
684
820
|
|
|
685
821
|
Returns:
|
|
686
|
-
The converted markdown text
|
|
822
|
+
The converted markdown text (link if src exists, otherwise fallback content).
|
|
687
823
|
"""
|
|
688
|
-
_ = convert_as_inline
|
|
689
824
|
src = tag.get("src", "")
|
|
690
825
|
|
|
691
|
-
if not src:
|
|
692
|
-
|
|
693
|
-
if source_tag and isinstance(source_tag, Tag):
|
|
694
|
-
src = source_tag.get("src", "")
|
|
695
|
-
|
|
696
|
-
controls = "controls" if tag.get("controls") is not None else ""
|
|
697
|
-
autoplay = "autoplay" if tag.get("autoplay") is not None else ""
|
|
698
|
-
loop = "loop" if tag.get("loop") is not None else ""
|
|
699
|
-
muted = "muted" if tag.get("muted") is not None else ""
|
|
700
|
-
preload = tag.get("preload", "")
|
|
826
|
+
if not src and (source_tag := tag.find("source")) and isinstance(source_tag, Tag):
|
|
827
|
+
src = source_tag.get("src", "")
|
|
701
828
|
|
|
702
|
-
|
|
829
|
+
# If we have a src, convert to a link
|
|
703
830
|
if src and isinstance(src, str) and src.strip():
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
attrs.append(f'preload="{preload}"')
|
|
715
|
-
|
|
716
|
-
attrs_str = " ".join(attrs)
|
|
717
|
-
|
|
831
|
+
link = f"[{src}]({src})"
|
|
832
|
+
if convert_as_inline:
|
|
833
|
+
return link
|
|
834
|
+
result = f"{link}\n\n"
|
|
835
|
+
# Add fallback content if present
|
|
836
|
+
if text.strip():
|
|
837
|
+
result += f"{text.strip()}\n\n"
|
|
838
|
+
return result
|
|
839
|
+
|
|
840
|
+
# No src, just return fallback content
|
|
718
841
|
if text.strip():
|
|
719
|
-
|
|
720
|
-
return f"<audio {attrs_str}>\n{text.strip()}\n</audio>\n\n"
|
|
721
|
-
return f"<audio>\n{text.strip()}\n</audio>\n\n"
|
|
722
|
-
|
|
723
|
-
if attrs_str:
|
|
724
|
-
return f"<audio {attrs_str} />\n\n"
|
|
725
|
-
return "<audio />\n\n"
|
|
726
|
-
|
|
842
|
+
return _format_inline_or_block(text, convert_as_inline)
|
|
727
843
|
|
|
728
|
-
|
|
729
|
-
"""Convert HTML video element preserving structure with fallback.
|
|
730
|
-
|
|
731
|
-
Args:
|
|
732
|
-
tag: The video tag element.
|
|
733
|
-
text: The text content of the video element (fallback content).
|
|
734
|
-
convert_as_inline: Whether to convert as inline content.
|
|
735
|
-
|
|
736
|
-
Returns:
|
|
737
|
-
The converted markdown text preserving video element.
|
|
738
|
-
"""
|
|
739
|
-
_ = convert_as_inline
|
|
740
|
-
src = tag.get("src", "")
|
|
741
|
-
|
|
742
|
-
if not src:
|
|
743
|
-
source_tag = tag.find("source")
|
|
744
|
-
if source_tag and isinstance(source_tag, Tag):
|
|
745
|
-
src = source_tag.get("src", "")
|
|
746
|
-
|
|
747
|
-
width = tag.get("width", "")
|
|
748
|
-
height = tag.get("height", "")
|
|
749
|
-
poster = tag.get("poster", "")
|
|
750
|
-
controls = "controls" if tag.get("controls") is not None else ""
|
|
751
|
-
autoplay = "autoplay" if tag.get("autoplay") is not None else ""
|
|
752
|
-
loop = "loop" if tag.get("loop") is not None else ""
|
|
753
|
-
muted = "muted" if tag.get("muted") is not None else ""
|
|
754
|
-
preload = tag.get("preload", "")
|
|
755
|
-
|
|
756
|
-
attrs = []
|
|
757
|
-
if src and isinstance(src, str) and src.strip():
|
|
758
|
-
attrs.append(f'src="{src}"')
|
|
759
|
-
if width and isinstance(width, str) and width.strip():
|
|
760
|
-
attrs.append(f'width="{width}"')
|
|
761
|
-
if height and isinstance(height, str) and height.strip():
|
|
762
|
-
attrs.append(f'height="{height}"')
|
|
763
|
-
if poster and isinstance(poster, str) and poster.strip():
|
|
764
|
-
attrs.append(f'poster="{poster}"')
|
|
765
|
-
if controls:
|
|
766
|
-
attrs.append(controls)
|
|
767
|
-
if autoplay:
|
|
768
|
-
attrs.append(autoplay)
|
|
769
|
-
if loop:
|
|
770
|
-
attrs.append(loop)
|
|
771
|
-
if muted:
|
|
772
|
-
attrs.append(muted)
|
|
773
|
-
if preload and isinstance(preload, str) and preload.strip():
|
|
774
|
-
attrs.append(f'preload="{preload}"')
|
|
775
|
-
|
|
776
|
-
attrs_str = " ".join(attrs)
|
|
777
|
-
|
|
778
|
-
if text.strip():
|
|
779
|
-
if attrs_str:
|
|
780
|
-
return f"<video {attrs_str}>\n{text.strip()}\n</video>\n\n"
|
|
781
|
-
return f"<video>\n{text.strip()}\n</video>\n\n"
|
|
782
|
-
|
|
783
|
-
if attrs_str:
|
|
784
|
-
return f"<video {attrs_str} />\n\n"
|
|
785
|
-
return "<video />\n\n"
|
|
844
|
+
return ""
|
|
786
845
|
|
|
787
846
|
|
|
788
847
|
def _convert_iframe(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
789
|
-
"""Convert HTML iframe element
|
|
848
|
+
"""Convert HTML iframe element to semantic Markdown.
|
|
790
849
|
|
|
791
850
|
Args:
|
|
792
851
|
tag: The iframe tag element.
|
|
@@ -794,47 +853,19 @@ def _convert_iframe(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
794
853
|
convert_as_inline: Whether to convert as inline content.
|
|
795
854
|
|
|
796
855
|
Returns:
|
|
797
|
-
The converted markdown text
|
|
856
|
+
The converted markdown text (link if src exists).
|
|
798
857
|
"""
|
|
799
858
|
_ = text
|
|
800
|
-
_ = convert_as_inline
|
|
801
859
|
src = tag.get("src", "")
|
|
802
|
-
width = tag.get("width", "")
|
|
803
|
-
height = tag.get("height", "")
|
|
804
|
-
title = tag.get("title", "")
|
|
805
|
-
allow = tag.get("allow", "")
|
|
806
|
-
sandbox = tag.get("sandbox")
|
|
807
|
-
loading = tag.get("loading", "")
|
|
808
|
-
|
|
809
|
-
attrs = []
|
|
810
|
-
if src and isinstance(src, str) and src.strip():
|
|
811
|
-
attrs.append(f'src="{src}"')
|
|
812
|
-
if width and isinstance(width, str) and width.strip():
|
|
813
|
-
attrs.append(f'width="{width}"')
|
|
814
|
-
if height and isinstance(height, str) and height.strip():
|
|
815
|
-
attrs.append(f'height="{height}"')
|
|
816
|
-
if title and isinstance(title, str) and title.strip():
|
|
817
|
-
attrs.append(f'title="{title}"')
|
|
818
|
-
if allow and isinstance(allow, str) and allow.strip():
|
|
819
|
-
attrs.append(f'allow="{allow}"')
|
|
820
|
-
if sandbox is not None:
|
|
821
|
-
if isinstance(sandbox, list):
|
|
822
|
-
if sandbox:
|
|
823
|
-
attrs.append(f'sandbox="{" ".join(sandbox)}"')
|
|
824
|
-
else:
|
|
825
|
-
attrs.append("sandbox")
|
|
826
|
-
elif isinstance(sandbox, str) and sandbox:
|
|
827
|
-
attrs.append(f'sandbox="{sandbox}"')
|
|
828
|
-
else:
|
|
829
|
-
attrs.append("sandbox")
|
|
830
|
-
if loading and isinstance(loading, str) and loading.strip():
|
|
831
|
-
attrs.append(f'loading="{loading}"')
|
|
832
860
|
|
|
833
|
-
|
|
861
|
+
# If we have a src, convert to a link
|
|
862
|
+
if src and isinstance(src, str) and src.strip():
|
|
863
|
+
link = f"[{src}]({src})"
|
|
864
|
+
if convert_as_inline:
|
|
865
|
+
return link
|
|
866
|
+
return f"{link}\n\n"
|
|
834
867
|
|
|
835
|
-
|
|
836
|
-
return f"<iframe {attrs_str}></iframe>\n\n"
|
|
837
|
-
return "<iframe></iframe>\n\n"
|
|
868
|
+
return ""
|
|
838
869
|
|
|
839
870
|
|
|
840
871
|
def _convert_abbr(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
@@ -860,7 +891,7 @@ def _convert_abbr(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
860
891
|
|
|
861
892
|
|
|
862
893
|
def _convert_time(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
863
|
-
"""Convert HTML time element
|
|
894
|
+
"""Convert HTML time element to semantic Markdown.
|
|
864
895
|
|
|
865
896
|
Args:
|
|
866
897
|
tag: The time tag element.
|
|
@@ -868,21 +899,19 @@ def _convert_time(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
868
899
|
convert_as_inline: Whether to convert as inline content.
|
|
869
900
|
|
|
870
901
|
Returns:
|
|
871
|
-
The converted markdown text
|
|
902
|
+
The converted markdown text (content only, no HTML tags).
|
|
872
903
|
"""
|
|
904
|
+
_ = tag
|
|
873
905
|
_ = convert_as_inline
|
|
874
906
|
if not text.strip():
|
|
875
907
|
return ""
|
|
876
908
|
|
|
877
|
-
|
|
878
|
-
if datetime_attr and isinstance(datetime_attr, str) and datetime_attr.strip():
|
|
879
|
-
return f'<time datetime="{datetime_attr.strip()}">{text.strip()}</time>'
|
|
880
|
-
|
|
909
|
+
# Time elements are semantic - just return the content
|
|
881
910
|
return text.strip()
|
|
882
911
|
|
|
883
912
|
|
|
884
913
|
def _convert_data(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
885
|
-
"""Convert HTML data element
|
|
914
|
+
"""Convert HTML data element to semantic Markdown.
|
|
886
915
|
|
|
887
916
|
Args:
|
|
888
917
|
tag: The data tag element.
|
|
@@ -890,16 +919,14 @@ def _convert_data(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
890
919
|
convert_as_inline: Whether to convert as inline content.
|
|
891
920
|
|
|
892
921
|
Returns:
|
|
893
|
-
The converted markdown text
|
|
922
|
+
The converted markdown text (content only, no HTML tags).
|
|
894
923
|
"""
|
|
924
|
+
_ = tag
|
|
895
925
|
_ = convert_as_inline
|
|
896
926
|
if not text.strip():
|
|
897
927
|
return ""
|
|
898
928
|
|
|
899
|
-
|
|
900
|
-
if value_attr and isinstance(value_attr, str) and value_attr.strip():
|
|
901
|
-
return f'<data value="{value_attr.strip()}">{text.strip()}</data>'
|
|
902
|
-
|
|
929
|
+
# Data elements are semantic - just return the content
|
|
903
930
|
return text.strip()
|
|
904
931
|
|
|
905
932
|
|
|
@@ -917,7 +944,7 @@ def _convert_wbr(*, convert_as_inline: bool) -> str:
|
|
|
917
944
|
|
|
918
945
|
|
|
919
946
|
def _convert_form(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
920
|
-
"""Convert HTML form element
|
|
947
|
+
"""Convert HTML form element to semantic Markdown.
|
|
921
948
|
|
|
922
949
|
Args:
|
|
923
950
|
tag: The form tag element.
|
|
@@ -925,38 +952,28 @@ def _convert_form(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
925
952
|
convert_as_inline: Whether to convert as inline content.
|
|
926
953
|
|
|
927
954
|
Returns:
|
|
928
|
-
The converted markdown text
|
|
955
|
+
The converted markdown text (only content, no HTML tags).
|
|
929
956
|
"""
|
|
957
|
+
_ = tag
|
|
930
958
|
if convert_as_inline:
|
|
931
959
|
return text
|
|
932
960
|
|
|
933
961
|
if not text.strip():
|
|
934
962
|
return ""
|
|
935
963
|
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
attrs = []
|
|
939
|
-
|
|
940
|
-
if action and isinstance(action, str) and action.strip():
|
|
941
|
-
attrs.append(f'action="{action.strip()}"')
|
|
942
|
-
if method and isinstance(method, str) and method.strip():
|
|
943
|
-
attrs.append(f'method="{method.strip()}"')
|
|
944
|
-
|
|
945
|
-
attrs_str = " ".join(attrs)
|
|
946
|
-
if attrs_str:
|
|
947
|
-
return f"<form {attrs_str}>\n{text.strip()}\n</form>\n\n"
|
|
948
|
-
return f"<form>\n{text.strip()}\n</form>\n\n"
|
|
964
|
+
# Forms are just containers, return their content
|
|
965
|
+
return text
|
|
949
966
|
|
|
950
967
|
|
|
951
968
|
def _convert_fieldset(*, text: str, convert_as_inline: bool) -> str:
|
|
952
|
-
"""Convert HTML fieldset element
|
|
969
|
+
"""Convert HTML fieldset element to semantic Markdown.
|
|
953
970
|
|
|
954
971
|
Args:
|
|
955
972
|
text: The text content of the fieldset element.
|
|
956
973
|
convert_as_inline: Whether to convert as inline content.
|
|
957
974
|
|
|
958
975
|
Returns:
|
|
959
|
-
The converted markdown text
|
|
976
|
+
The converted markdown text (only content, no HTML tags).
|
|
960
977
|
"""
|
|
961
978
|
if convert_as_inline:
|
|
962
979
|
return text
|
|
@@ -964,7 +981,8 @@ def _convert_fieldset(*, text: str, convert_as_inline: bool) -> str:
|
|
|
964
981
|
if not text.strip():
|
|
965
982
|
return ""
|
|
966
983
|
|
|
967
|
-
return
|
|
984
|
+
# Fieldsets are semantic groupings, return their content
|
|
985
|
+
return text
|
|
968
986
|
|
|
969
987
|
|
|
970
988
|
def _convert_legend(*, text: str, convert_as_inline: bool) -> str:
|
|
@@ -983,11 +1001,12 @@ def _convert_legend(*, text: str, convert_as_inline: bool) -> str:
|
|
|
983
1001
|
if not text.strip():
|
|
984
1002
|
return ""
|
|
985
1003
|
|
|
986
|
-
|
|
1004
|
+
# Legend is like a heading/title for fieldsets
|
|
1005
|
+
return _format_wrapped_block(text, "**")
|
|
987
1006
|
|
|
988
1007
|
|
|
989
1008
|
def _convert_label(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
990
|
-
"""Convert HTML label element
|
|
1009
|
+
"""Convert HTML label element to Markdown.
|
|
991
1010
|
|
|
992
1011
|
Args:
|
|
993
1012
|
tag: The label tag element.
|
|
@@ -995,78 +1014,33 @@ def _convert_label(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
995
1014
|
convert_as_inline: Whether to convert as inline content.
|
|
996
1015
|
|
|
997
1016
|
Returns:
|
|
998
|
-
The
|
|
1017
|
+
The label text content.
|
|
999
1018
|
"""
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1019
|
+
_ = tag
|
|
1020
|
+
# Labels are just text, return the content
|
|
1003
1021
|
if not text.strip():
|
|
1004
1022
|
return ""
|
|
1005
1023
|
|
|
1006
|
-
|
|
1007
|
-
if for_attr and isinstance(for_attr, str) and for_attr.strip():
|
|
1008
|
-
return f'<label for="{for_attr.strip()}">{text.strip()}</label>\n\n'
|
|
1009
|
-
|
|
1010
|
-
return f"<label>{text.strip()}</label>\n\n"
|
|
1024
|
+
return _format_inline_or_block(text, convert_as_inline)
|
|
1011
1025
|
|
|
1012
1026
|
|
|
1013
1027
|
def _convert_input_enhanced(*, tag: Tag, convert_as_inline: bool) -> str:
|
|
1014
|
-
"""Convert HTML input element
|
|
1028
|
+
"""Convert HTML input element to Markdown.
|
|
1015
1029
|
|
|
1016
1030
|
Args:
|
|
1017
1031
|
tag: The input tag element.
|
|
1018
1032
|
convert_as_inline: Whether to convert as inline content.
|
|
1019
1033
|
|
|
1020
1034
|
Returns:
|
|
1021
|
-
|
|
1035
|
+
Empty string since input elements have no Markdown representation.
|
|
1022
1036
|
"""
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
if _has_ancestor(tag, "li"):
|
|
1028
|
-
return ""
|
|
1029
|
-
|
|
1030
|
-
id_attr = tag.get("id", "")
|
|
1031
|
-
name = tag.get("name", "")
|
|
1032
|
-
value = tag.get("value", "")
|
|
1033
|
-
placeholder = tag.get("placeholder", "")
|
|
1034
|
-
required = tag.get("required") is not None
|
|
1035
|
-
disabled = tag.get("disabled") is not None
|
|
1036
|
-
readonly = tag.get("readonly") is not None
|
|
1037
|
-
checked = tag.get("checked") is not None
|
|
1038
|
-
accept = tag.get("accept", "")
|
|
1039
|
-
|
|
1040
|
-
attrs = []
|
|
1041
|
-
if input_type and isinstance(input_type, str):
|
|
1042
|
-
attrs.append(f'type="{input_type}"')
|
|
1043
|
-
if id_attr and isinstance(id_attr, str) and id_attr.strip():
|
|
1044
|
-
attrs.append(f'id="{id_attr}"')
|
|
1045
|
-
if name and isinstance(name, str) and name.strip():
|
|
1046
|
-
attrs.append(f'name="{name}"')
|
|
1047
|
-
if value and isinstance(value, str) and value.strip():
|
|
1048
|
-
attrs.append(f'value="{value}"')
|
|
1049
|
-
if placeholder and isinstance(placeholder, str) and placeholder.strip():
|
|
1050
|
-
attrs.append(f'placeholder="{placeholder}"')
|
|
1051
|
-
if accept and isinstance(accept, str) and accept.strip():
|
|
1052
|
-
attrs.append(f'accept="{accept}"')
|
|
1053
|
-
if required:
|
|
1054
|
-
attrs.append("required")
|
|
1055
|
-
if disabled:
|
|
1056
|
-
attrs.append("disabled")
|
|
1057
|
-
if readonly:
|
|
1058
|
-
attrs.append("readonly")
|
|
1059
|
-
if checked:
|
|
1060
|
-
attrs.append("checked")
|
|
1061
|
-
|
|
1062
|
-
attrs_str = " ".join(attrs)
|
|
1063
|
-
result = f"<input {attrs_str} />" if attrs_str else "<input />"
|
|
1064
|
-
|
|
1065
|
-
return result if convert_as_inline else f"{result}\n\n"
|
|
1037
|
+
_ = tag, convert_as_inline
|
|
1038
|
+
# Input elements have no content and no Markdown equivalent
|
|
1039
|
+
return ""
|
|
1066
1040
|
|
|
1067
1041
|
|
|
1068
1042
|
def _convert_textarea(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
1069
|
-
"""Convert HTML textarea element
|
|
1043
|
+
"""Convert HTML textarea element to Markdown.
|
|
1070
1044
|
|
|
1071
1045
|
Args:
|
|
1072
1046
|
tag: The textarea tag element.
|
|
@@ -1074,42 +1048,18 @@ def _convert_textarea(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
1074
1048
|
convert_as_inline: Whether to convert as inline content.
|
|
1075
1049
|
|
|
1076
1050
|
Returns:
|
|
1077
|
-
The
|
|
1051
|
+
The text content of the textarea.
|
|
1078
1052
|
"""
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1053
|
+
_ = tag
|
|
1054
|
+
# Return the text content, which is what the user entered
|
|
1082
1055
|
if not text.strip():
|
|
1083
1056
|
return ""
|
|
1084
1057
|
|
|
1085
|
-
|
|
1086
|
-
placeholder = tag.get("placeholder", "")
|
|
1087
|
-
rows = tag.get("rows", "")
|
|
1088
|
-
cols = tag.get("cols", "")
|
|
1089
|
-
required = tag.get("required") is not None
|
|
1090
|
-
|
|
1091
|
-
attrs = []
|
|
1092
|
-
if name and isinstance(name, str) and name.strip():
|
|
1093
|
-
attrs.append(f'name="{name}"')
|
|
1094
|
-
if placeholder and isinstance(placeholder, str) and placeholder.strip():
|
|
1095
|
-
attrs.append(f'placeholder="{placeholder}"')
|
|
1096
|
-
if rows and isinstance(rows, str) and rows.strip():
|
|
1097
|
-
attrs.append(f'rows="{rows}"')
|
|
1098
|
-
if cols and isinstance(cols, str) and cols.strip():
|
|
1099
|
-
attrs.append(f'cols="{cols}"')
|
|
1100
|
-
if required:
|
|
1101
|
-
attrs.append("required")
|
|
1102
|
-
|
|
1103
|
-
attrs_str = " ".join(attrs)
|
|
1104
|
-
content = text.strip()
|
|
1105
|
-
|
|
1106
|
-
if attrs_str:
|
|
1107
|
-
return f"<textarea {attrs_str}>{content}</textarea>\n\n"
|
|
1108
|
-
return f"<textarea>{content}</textarea>\n\n"
|
|
1058
|
+
return _format_inline_or_block(text, convert_as_inline)
|
|
1109
1059
|
|
|
1110
1060
|
|
|
1111
1061
|
def _convert_select(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
1112
|
-
"""Convert HTML select element
|
|
1062
|
+
"""Convert HTML select element to Markdown.
|
|
1113
1063
|
|
|
1114
1064
|
Args:
|
|
1115
1065
|
tag: The select tag element.
|
|
@@ -1117,39 +1067,25 @@ def _convert_select(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
1117
1067
|
convert_as_inline: Whether to convert as inline content.
|
|
1118
1068
|
|
|
1119
1069
|
Returns:
|
|
1120
|
-
The
|
|
1070
|
+
The text content (options) as a comma-separated list.
|
|
1121
1071
|
"""
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1072
|
+
_ = tag
|
|
1073
|
+
# Return the options as text
|
|
1125
1074
|
if not text.strip():
|
|
1126
1075
|
return ""
|
|
1127
1076
|
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
attrs = []
|
|
1134
|
-
if id_attr and isinstance(id_attr, str) and id_attr.strip():
|
|
1135
|
-
attrs.append(f'id="{id_attr}"')
|
|
1136
|
-
if name and isinstance(name, str) and name.strip():
|
|
1137
|
-
attrs.append(f'name="{name}"')
|
|
1138
|
-
if multiple:
|
|
1139
|
-
attrs.append("multiple")
|
|
1140
|
-
if required:
|
|
1141
|
-
attrs.append("required")
|
|
1142
|
-
|
|
1143
|
-
attrs_str = " ".join(attrs)
|
|
1144
|
-
content = text.strip()
|
|
1077
|
+
# In inline mode, show options separated by commas
|
|
1078
|
+
if convert_as_inline:
|
|
1079
|
+
# Remove extra whitespace and join options
|
|
1080
|
+
options = [opt.strip() for opt in text.strip().split("\n") if opt.strip()]
|
|
1081
|
+
return ", ".join(options)
|
|
1145
1082
|
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
return f"<select>\n{content}\n</select>\n\n"
|
|
1083
|
+
# In block mode, show as a list
|
|
1084
|
+
return _format_block_element(text)
|
|
1149
1085
|
|
|
1150
1086
|
|
|
1151
1087
|
def _convert_option(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
1152
|
-
"""Convert HTML option element
|
|
1088
|
+
"""Convert HTML option element to Markdown.
|
|
1153
1089
|
|
|
1154
1090
|
Args:
|
|
1155
1091
|
tag: The option tag element.
|
|
@@ -1157,33 +1093,26 @@ def _convert_option(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
1157
1093
|
convert_as_inline: Whether to convert as inline content.
|
|
1158
1094
|
|
|
1159
1095
|
Returns:
|
|
1160
|
-
The
|
|
1096
|
+
The option text, potentially with a marker if selected.
|
|
1161
1097
|
"""
|
|
1162
|
-
if convert_as_inline:
|
|
1163
|
-
return text
|
|
1164
|
-
|
|
1165
1098
|
if not text.strip():
|
|
1166
1099
|
return ""
|
|
1167
1100
|
|
|
1168
|
-
|
|
1101
|
+
# Check if this option is selected
|
|
1169
1102
|
selected = tag.get("selected") is not None
|
|
1170
|
-
|
|
1171
|
-
attrs = []
|
|
1172
|
-
if value and isinstance(value, str) and value.strip():
|
|
1173
|
-
attrs.append(f'value="{value}"')
|
|
1174
|
-
if selected:
|
|
1175
|
-
attrs.append("selected")
|
|
1176
|
-
|
|
1177
|
-
attrs_str = " ".join(attrs)
|
|
1178
1103
|
content = text.strip()
|
|
1179
1104
|
|
|
1180
|
-
if
|
|
1181
|
-
return
|
|
1182
|
-
|
|
1105
|
+
if convert_as_inline:
|
|
1106
|
+
return content
|
|
1107
|
+
|
|
1108
|
+
# In block mode, mark selected options
|
|
1109
|
+
if selected:
|
|
1110
|
+
return f"* {content}\n"
|
|
1111
|
+
return f"{content}\n"
|
|
1183
1112
|
|
|
1184
1113
|
|
|
1185
1114
|
def _convert_optgroup(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
1186
|
-
"""Convert HTML optgroup element
|
|
1115
|
+
"""Convert HTML optgroup element to semantic Markdown.
|
|
1187
1116
|
|
|
1188
1117
|
Args:
|
|
1189
1118
|
tag: The optgroup tag element.
|
|
@@ -1191,7 +1120,7 @@ def _convert_optgroup(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
1191
1120
|
convert_as_inline: Whether to convert as inline content.
|
|
1192
1121
|
|
|
1193
1122
|
Returns:
|
|
1194
|
-
The converted markdown text
|
|
1123
|
+
The converted markdown text with label as heading.
|
|
1195
1124
|
"""
|
|
1196
1125
|
if convert_as_inline:
|
|
1197
1126
|
return text
|
|
@@ -1200,21 +1129,17 @@ def _convert_optgroup(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
1200
1129
|
return ""
|
|
1201
1130
|
|
|
1202
1131
|
label = tag.get("label", "")
|
|
1132
|
+
content = text.strip()
|
|
1203
1133
|
|
|
1204
|
-
|
|
1134
|
+
# If there's a label, show it as a heading
|
|
1205
1135
|
if label and isinstance(label, str) and label.strip():
|
|
1206
|
-
|
|
1136
|
+
return f"**{label.strip()}**\n{content}\n"
|
|
1207
1137
|
|
|
1208
|
-
|
|
1209
|
-
content = text.strip()
|
|
1210
|
-
|
|
1211
|
-
if attrs_str:
|
|
1212
|
-
return f"<optgroup {attrs_str}>\n{content}\n</optgroup>\n"
|
|
1213
|
-
return f"<optgroup>\n{content}\n</optgroup>\n"
|
|
1138
|
+
return f"{content}\n"
|
|
1214
1139
|
|
|
1215
1140
|
|
|
1216
1141
|
def _convert_button(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
1217
|
-
"""Convert HTML button element
|
|
1142
|
+
"""Convert HTML button element to Markdown.
|
|
1218
1143
|
|
|
1219
1144
|
Args:
|
|
1220
1145
|
tag: The button tag element.
|
|
@@ -1222,38 +1147,18 @@ def _convert_button(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
1222
1147
|
convert_as_inline: Whether to convert as inline content.
|
|
1223
1148
|
|
|
1224
1149
|
Returns:
|
|
1225
|
-
The
|
|
1150
|
+
The button text content.
|
|
1226
1151
|
"""
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1152
|
+
_ = tag
|
|
1153
|
+
# Buttons are just interactive text, return the text content
|
|
1230
1154
|
if not text.strip():
|
|
1231
1155
|
return ""
|
|
1232
1156
|
|
|
1233
|
-
|
|
1234
|
-
name = tag.get("name", "")
|
|
1235
|
-
value = tag.get("value", "")
|
|
1236
|
-
disabled = tag.get("disabled") is not None
|
|
1237
|
-
|
|
1238
|
-
attrs = []
|
|
1239
|
-
if button_type and isinstance(button_type, str) and button_type.strip():
|
|
1240
|
-
attrs.append(f'type="{button_type}"')
|
|
1241
|
-
if name and isinstance(name, str) and name.strip():
|
|
1242
|
-
attrs.append(f'name="{name}"')
|
|
1243
|
-
if value and isinstance(value, str) and value.strip():
|
|
1244
|
-
attrs.append(f'value="{value}"')
|
|
1245
|
-
if disabled:
|
|
1246
|
-
attrs.append("disabled")
|
|
1247
|
-
|
|
1248
|
-
attrs_str = " ".join(attrs)
|
|
1249
|
-
|
|
1250
|
-
if attrs_str:
|
|
1251
|
-
return f"<button {attrs_str}>{text.strip()}</button>\n\n"
|
|
1252
|
-
return f"<button>{text.strip()}</button>\n\n"
|
|
1157
|
+
return _format_inline_or_block(text, convert_as_inline)
|
|
1253
1158
|
|
|
1254
1159
|
|
|
1255
1160
|
def _convert_progress(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
1256
|
-
"""Convert HTML progress element
|
|
1161
|
+
"""Convert HTML progress element to semantic text.
|
|
1257
1162
|
|
|
1258
1163
|
Args:
|
|
1259
1164
|
tag: The progress tag element.
|
|
@@ -1261,33 +1166,21 @@ def _convert_progress(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
1261
1166
|
convert_as_inline: Whether to convert as inline content.
|
|
1262
1167
|
|
|
1263
1168
|
Returns:
|
|
1264
|
-
The converted markdown text
|
|
1169
|
+
The converted markdown text (only content, no HTML tags).
|
|
1265
1170
|
"""
|
|
1171
|
+
_ = tag
|
|
1266
1172
|
if convert_as_inline:
|
|
1267
1173
|
return text
|
|
1268
1174
|
|
|
1269
1175
|
if not text.strip():
|
|
1270
1176
|
return ""
|
|
1271
1177
|
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
attrs = []
|
|
1276
|
-
if value and isinstance(value, str) and value.strip():
|
|
1277
|
-
attrs.append(f'value="{value}"')
|
|
1278
|
-
if max_val and isinstance(max_val, str) and max_val.strip():
|
|
1279
|
-
attrs.append(f'max="{max_val}"')
|
|
1280
|
-
|
|
1281
|
-
attrs_str = " ".join(attrs)
|
|
1282
|
-
content = text.strip()
|
|
1283
|
-
|
|
1284
|
-
if attrs_str:
|
|
1285
|
-
return f"<progress {attrs_str}>{content}</progress>\n\n"
|
|
1286
|
-
return f"<progress>{content}</progress>\n\n"
|
|
1178
|
+
# Progress elements convert to their text content
|
|
1179
|
+
return _format_block_element(text)
|
|
1287
1180
|
|
|
1288
1181
|
|
|
1289
1182
|
def _convert_meter(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
1290
|
-
"""Convert HTML meter element
|
|
1183
|
+
"""Convert HTML meter element to semantic text.
|
|
1291
1184
|
|
|
1292
1185
|
Args:
|
|
1293
1186
|
tag: The meter tag element.
|
|
@@ -1295,45 +1188,21 @@ def _convert_meter(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
1295
1188
|
convert_as_inline: Whether to convert as inline content.
|
|
1296
1189
|
|
|
1297
1190
|
Returns:
|
|
1298
|
-
The converted markdown text
|
|
1191
|
+
The converted markdown text (only content, no HTML tags).
|
|
1299
1192
|
"""
|
|
1193
|
+
_ = tag
|
|
1300
1194
|
if convert_as_inline:
|
|
1301
1195
|
return text
|
|
1302
1196
|
|
|
1303
1197
|
if not text.strip():
|
|
1304
1198
|
return ""
|
|
1305
1199
|
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
max_val = tag.get("max", "")
|
|
1309
|
-
low = tag.get("low", "")
|
|
1310
|
-
high = tag.get("high", "")
|
|
1311
|
-
optimum = tag.get("optimum", "")
|
|
1312
|
-
|
|
1313
|
-
attrs = []
|
|
1314
|
-
if value and isinstance(value, str) and value.strip():
|
|
1315
|
-
attrs.append(f'value="{value}"')
|
|
1316
|
-
if min_val and isinstance(min_val, str) and min_val.strip():
|
|
1317
|
-
attrs.append(f'min="{min_val}"')
|
|
1318
|
-
if max_val and isinstance(max_val, str) and max_val.strip():
|
|
1319
|
-
attrs.append(f'max="{max_val}"')
|
|
1320
|
-
if low and isinstance(low, str) and low.strip():
|
|
1321
|
-
attrs.append(f'low="{low}"')
|
|
1322
|
-
if high and isinstance(high, str) and high.strip():
|
|
1323
|
-
attrs.append(f'high="{high}"')
|
|
1324
|
-
if optimum and isinstance(optimum, str) and optimum.strip():
|
|
1325
|
-
attrs.append(f'optimum="{optimum}"')
|
|
1326
|
-
|
|
1327
|
-
attrs_str = " ".join(attrs)
|
|
1328
|
-
content = text.strip()
|
|
1329
|
-
|
|
1330
|
-
if attrs_str:
|
|
1331
|
-
return f"<meter {attrs_str}>{content}</meter>\n\n"
|
|
1332
|
-
return f"<meter>{content}</meter>\n\n"
|
|
1200
|
+
# Meter elements convert to their text content
|
|
1201
|
+
return _format_block_element(text)
|
|
1333
1202
|
|
|
1334
1203
|
|
|
1335
1204
|
def _convert_output(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
1336
|
-
"""Convert HTML output element
|
|
1205
|
+
"""Convert HTML output element to semantic text.
|
|
1337
1206
|
|
|
1338
1207
|
Args:
|
|
1339
1208
|
tag: The output tag element.
|
|
@@ -1341,34 +1210,21 @@ def _convert_output(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
1341
1210
|
convert_as_inline: Whether to convert as inline content.
|
|
1342
1211
|
|
|
1343
1212
|
Returns:
|
|
1344
|
-
The converted markdown text
|
|
1213
|
+
The converted markdown text (only content, no HTML tags).
|
|
1345
1214
|
"""
|
|
1215
|
+
_ = tag
|
|
1346
1216
|
if convert_as_inline:
|
|
1347
1217
|
return text
|
|
1348
1218
|
|
|
1349
1219
|
if not text.strip():
|
|
1350
1220
|
return ""
|
|
1351
1221
|
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
attrs = []
|
|
1356
|
-
if for_attr:
|
|
1357
|
-
for_value = " ".join(for_attr) if isinstance(for_attr, list) else str(for_attr)
|
|
1358
|
-
if for_value.strip():
|
|
1359
|
-
attrs.append(f'for="{for_value}"')
|
|
1360
|
-
if name and isinstance(name, str) and name.strip():
|
|
1361
|
-
attrs.append(f'name="{name}"')
|
|
1362
|
-
|
|
1363
|
-
attrs_str = " ".join(attrs)
|
|
1364
|
-
|
|
1365
|
-
if attrs_str:
|
|
1366
|
-
return f"<output {attrs_str}>{text.strip()}</output>\n\n"
|
|
1367
|
-
return f"<output>{text.strip()}</output>\n\n"
|
|
1222
|
+
# Output elements convert to their text content
|
|
1223
|
+
return _format_block_element(text)
|
|
1368
1224
|
|
|
1369
1225
|
|
|
1370
1226
|
def _convert_datalist(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
1371
|
-
"""Convert HTML datalist element
|
|
1227
|
+
"""Convert HTML datalist element to semantic Markdown.
|
|
1372
1228
|
|
|
1373
1229
|
Args:
|
|
1374
1230
|
tag: The datalist tag element.
|
|
@@ -1376,26 +1232,17 @@ def _convert_datalist(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
|
|
|
1376
1232
|
convert_as_inline: Whether to convert as inline content.
|
|
1377
1233
|
|
|
1378
1234
|
Returns:
|
|
1379
|
-
The converted markdown text
|
|
1235
|
+
The converted markdown text (only content, no HTML tags).
|
|
1380
1236
|
"""
|
|
1237
|
+
_ = tag
|
|
1381
1238
|
if convert_as_inline:
|
|
1382
1239
|
return text
|
|
1383
1240
|
|
|
1384
1241
|
if not text.strip():
|
|
1385
1242
|
return ""
|
|
1386
1243
|
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
attrs = []
|
|
1390
|
-
if id_attr and isinstance(id_attr, str) and id_attr.strip():
|
|
1391
|
-
attrs.append(f'id="{id_attr}"')
|
|
1392
|
-
|
|
1393
|
-
attrs_str = " ".join(attrs)
|
|
1394
|
-
content = text.strip()
|
|
1395
|
-
|
|
1396
|
-
if attrs_str:
|
|
1397
|
-
return f"<datalist {attrs_str}>\n{content}\n</datalist>\n\n"
|
|
1398
|
-
return f"<datalist>\n{content}\n</datalist>\n\n"
|
|
1244
|
+
# Datalist shows options as a list
|
|
1245
|
+
return _format_block_element(text)
|
|
1399
1246
|
|
|
1400
1247
|
|
|
1401
1248
|
def _convert_ruby(*, text: str, convert_as_inline: bool) -> str: # noqa: ARG001
|
|
@@ -1488,7 +1335,7 @@ def _convert_rtc(*, text: str, convert_as_inline: bool) -> str: # noqa: ARG001
|
|
|
1488
1335
|
|
|
1489
1336
|
|
|
1490
1337
|
def _convert_dialog(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
|
|
1491
|
-
"""Convert HTML dialog element
|
|
1338
|
+
"""Convert HTML dialog element to semantic Markdown.
|
|
1492
1339
|
|
|
1493
1340
|
Args:
|
|
1494
1341
|
text: The text content of the dialog element.
|
|
@@ -1496,27 +1343,21 @@ def _convert_dialog(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
|
|
|
1496
1343
|
tag: The dialog tag element.
|
|
1497
1344
|
|
|
1498
1345
|
Returns:
|
|
1499
|
-
The converted markdown text
|
|
1346
|
+
The converted markdown text (only content, no HTML tags).
|
|
1500
1347
|
"""
|
|
1348
|
+
_ = tag
|
|
1501
1349
|
if convert_as_inline:
|
|
1502
1350
|
return text
|
|
1503
1351
|
|
|
1504
1352
|
if not text.strip():
|
|
1505
1353
|
return ""
|
|
1506
1354
|
|
|
1507
|
-
|
|
1508
|
-
|
|
1509
|
-
attrs.append("open")
|
|
1510
|
-
if tag.get("id"):
|
|
1511
|
-
attrs.append(f'id="{tag.get("id")}"')
|
|
1512
|
-
|
|
1513
|
-
attrs_str = " " + " ".join(attrs) if attrs else ""
|
|
1514
|
-
|
|
1515
|
-
return f"<dialog{attrs_str}>\n{text.strip()}\n</dialog>\n\n"
|
|
1355
|
+
# Dialog is a semantic container, return its content
|
|
1356
|
+
return _format_block_element(text)
|
|
1516
1357
|
|
|
1517
1358
|
|
|
1518
1359
|
def _convert_menu(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
|
|
1519
|
-
"""Convert HTML menu element
|
|
1360
|
+
"""Convert HTML menu element to semantic Markdown.
|
|
1520
1361
|
|
|
1521
1362
|
Args:
|
|
1522
1363
|
text: The text content of the menu element.
|
|
@@ -1524,29 +1365,21 @@ def _convert_menu(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
|
|
|
1524
1365
|
tag: The menu tag element.
|
|
1525
1366
|
|
|
1526
1367
|
Returns:
|
|
1527
|
-
The converted markdown text
|
|
1368
|
+
The converted markdown text (only content, no HTML tags).
|
|
1528
1369
|
"""
|
|
1370
|
+
_ = tag
|
|
1529
1371
|
if convert_as_inline:
|
|
1530
1372
|
return text
|
|
1531
1373
|
|
|
1532
1374
|
if not text.strip():
|
|
1533
1375
|
return ""
|
|
1534
1376
|
|
|
1535
|
-
|
|
1536
|
-
|
|
1537
|
-
attrs.append(f'type="{tag.get("type")}"')
|
|
1538
|
-
if tag.get("label"):
|
|
1539
|
-
attrs.append(f'label="{tag.get("label")}"')
|
|
1540
|
-
if tag.get("id"):
|
|
1541
|
-
attrs.append(f'id="{tag.get("id")}"')
|
|
1542
|
-
|
|
1543
|
-
attrs_str = " " + " ".join(attrs) if attrs else ""
|
|
1544
|
-
|
|
1545
|
-
return f"<menu{attrs_str}>\n{text.strip()}\n</menu>\n\n"
|
|
1377
|
+
# Menu is converted as a list
|
|
1378
|
+
return _format_block_element(text)
|
|
1546
1379
|
|
|
1547
1380
|
|
|
1548
1381
|
def _convert_figure(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
|
|
1549
|
-
"""Convert HTML figure element
|
|
1382
|
+
"""Convert HTML figure element to semantic Markdown.
|
|
1550
1383
|
|
|
1551
1384
|
Args:
|
|
1552
1385
|
text: The text content of the figure element.
|
|
@@ -1554,42 +1387,35 @@ def _convert_figure(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
|
|
|
1554
1387
|
tag: The figure tag element.
|
|
1555
1388
|
|
|
1556
1389
|
Returns:
|
|
1557
|
-
The converted markdown text
|
|
1390
|
+
The converted markdown text (only content, no HTML tags).
|
|
1558
1391
|
"""
|
|
1392
|
+
_ = tag
|
|
1559
1393
|
if not text.strip():
|
|
1560
1394
|
return ""
|
|
1561
1395
|
|
|
1562
1396
|
if convert_as_inline:
|
|
1563
1397
|
return text
|
|
1564
1398
|
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
attrs.append(f'id="{tag.get("id")}"')
|
|
1568
|
-
if tag.get("class"):
|
|
1569
|
-
class_val = tag.get("class")
|
|
1570
|
-
if isinstance(class_val, list):
|
|
1571
|
-
class_val = " ".join(class_val)
|
|
1572
|
-
attrs.append(f'class="{class_val}"')
|
|
1573
|
-
|
|
1574
|
-
attrs_str = " " + " ".join(attrs) if attrs else ""
|
|
1575
|
-
|
|
1399
|
+
# Figure is a semantic container, return its content
|
|
1400
|
+
# Make sure there's proper spacing after the figure content
|
|
1576
1401
|
content = text.strip()
|
|
1577
|
-
|
|
1578
|
-
|
|
1579
|
-
|
|
1580
|
-
|
|
1581
|
-
|
|
1402
|
+
if content and not content.endswith("\n\n"):
|
|
1403
|
+
if content.endswith("\n"):
|
|
1404
|
+
content += "\n"
|
|
1405
|
+
else:
|
|
1406
|
+
content += "\n\n"
|
|
1407
|
+
return content
|
|
1582
1408
|
|
|
1583
1409
|
|
|
1584
1410
|
def _convert_hgroup(*, text: str, convert_as_inline: bool) -> str:
|
|
1585
|
-
"""Convert HTML hgroup element
|
|
1411
|
+
"""Convert HTML hgroup element to semantic Markdown.
|
|
1586
1412
|
|
|
1587
1413
|
Args:
|
|
1588
1414
|
text: The text content of the hgroup element.
|
|
1589
1415
|
convert_as_inline: Whether to convert as inline content.
|
|
1590
1416
|
|
|
1591
1417
|
Returns:
|
|
1592
|
-
The converted markdown text
|
|
1418
|
+
The converted markdown text (only content, no HTML tags).
|
|
1593
1419
|
"""
|
|
1594
1420
|
if convert_as_inline:
|
|
1595
1421
|
return text
|
|
@@ -1597,15 +1423,12 @@ def _convert_hgroup(*, text: str, convert_as_inline: bool) -> str:
|
|
|
1597
1423
|
if not text.strip():
|
|
1598
1424
|
return ""
|
|
1599
1425
|
|
|
1600
|
-
|
|
1601
|
-
|
|
1602
|
-
content = re.sub(r"\n{3,}", "\n\n", content)
|
|
1603
|
-
|
|
1604
|
-
return f"<!-- heading group -->\n{content}\n<!-- end heading group -->\n\n"
|
|
1426
|
+
# Hgroup is a semantic container for headings, return its content
|
|
1427
|
+
return text
|
|
1605
1428
|
|
|
1606
1429
|
|
|
1607
1430
|
def _convert_picture(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
|
|
1608
|
-
"""Convert HTML picture element
|
|
1431
|
+
"""Convert HTML picture element to semantic Markdown.
|
|
1609
1432
|
|
|
1610
1433
|
Args:
|
|
1611
1434
|
text: The text content of the picture element.
|
|
@@ -1613,44 +1436,14 @@ def _convert_picture(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
|
|
|
1613
1436
|
tag: The picture tag element.
|
|
1614
1437
|
|
|
1615
1438
|
Returns:
|
|
1616
|
-
The converted markdown text
|
|
1439
|
+
The converted markdown text (only the img element).
|
|
1617
1440
|
"""
|
|
1441
|
+
_ = tag, convert_as_inline
|
|
1618
1442
|
if not text.strip():
|
|
1619
1443
|
return ""
|
|
1620
1444
|
|
|
1621
|
-
|
|
1622
|
-
|
|
1623
|
-
|
|
1624
|
-
if not img:
|
|
1625
|
-
return text.strip()
|
|
1626
|
-
|
|
1627
|
-
img_markdown = text.strip()
|
|
1628
|
-
|
|
1629
|
-
if not sources:
|
|
1630
|
-
return img_markdown
|
|
1631
|
-
|
|
1632
|
-
source_info = []
|
|
1633
|
-
for source in sources:
|
|
1634
|
-
srcset = source.get("srcset")
|
|
1635
|
-
media = source.get("media")
|
|
1636
|
-
mime_type = source.get("type")
|
|
1637
|
-
|
|
1638
|
-
if srcset:
|
|
1639
|
-
info = f'srcset="{srcset}"'
|
|
1640
|
-
if media:
|
|
1641
|
-
info += f' media="{media}"'
|
|
1642
|
-
if mime_type:
|
|
1643
|
-
info += f' type="{mime_type}"'
|
|
1644
|
-
source_info.append(info)
|
|
1645
|
-
|
|
1646
|
-
if source_info and not convert_as_inline:
|
|
1647
|
-
sources_comment = "<!-- picture sources:\n"
|
|
1648
|
-
for info in source_info:
|
|
1649
|
-
sources_comment += f" {info}\n"
|
|
1650
|
-
sources_comment += "-->\n"
|
|
1651
|
-
return f"{sources_comment}{img_markdown}"
|
|
1652
|
-
|
|
1653
|
-
return img_markdown
|
|
1445
|
+
# Picture is a container for responsive images, only the img matters for Markdown
|
|
1446
|
+
return text.strip()
|
|
1654
1447
|
|
|
1655
1448
|
|
|
1656
1449
|
def _convert_svg(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
|
|
@@ -1765,7 +1558,7 @@ def create_converters_map(
|
|
|
1765
1558
|
"abbr": _wrapper(_convert_abbr),
|
|
1766
1559
|
"article": _wrapper(_convert_semantic_block),
|
|
1767
1560
|
"aside": _wrapper(_convert_semantic_block),
|
|
1768
|
-
"audio": _wrapper(
|
|
1561
|
+
"audio": _wrapper(_convert_media_element),
|
|
1769
1562
|
"b": _wrapper(partial(_create_inline_converter(2 * strong_em_symbol))),
|
|
1770
1563
|
"bdi": _wrapper(_create_inline_converter("")),
|
|
1771
1564
|
"bdo": _wrapper(_create_inline_converter("")),
|
|
@@ -1788,7 +1581,7 @@ def create_converters_map(
|
|
|
1788
1581
|
"dt": _wrapper(_convert_dt),
|
|
1789
1582
|
"em": _wrapper(_create_inline_converter(strong_em_symbol)),
|
|
1790
1583
|
"fieldset": _wrapper(_convert_fieldset),
|
|
1791
|
-
"figcaption": _wrapper(lambda text: f"\n\n{text}
|
|
1584
|
+
"figcaption": _wrapper(lambda text: f"\n\n*{text.strip()}*\n\n" if text.strip() else ""),
|
|
1792
1585
|
"figure": _wrapper(_convert_figure),
|
|
1793
1586
|
"footer": _wrapper(_convert_semantic_block),
|
|
1794
1587
|
"form": _wrapper(_convert_form),
|
|
@@ -1861,6 +1654,6 @@ def create_converters_map(
|
|
|
1861
1654
|
"u": _wrapper(_create_inline_converter("")),
|
|
1862
1655
|
"ul": _wrapper(_convert_list),
|
|
1863
1656
|
"var": _wrapper(_create_inline_converter("*")),
|
|
1864
|
-
"video": _wrapper(
|
|
1657
|
+
"video": _wrapper(_convert_media_element),
|
|
1865
1658
|
"wbr": _wrapper(_convert_wbr),
|
|
1866
1659
|
}
|