html-to-markdown 1.9.0__py3-none-any.whl → 1.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -211,9 +211,7 @@ def _convert_blockquote(*, text: str, tag: Tag, convert_as_inline: bool) -> str:
211
211
 
212
212
  cite_url = tag.get("cite")
213
213
 
214
- # Check if this blockquote is inside a list item
215
214
  if _has_ancestor(tag, "li"):
216
- # Indent the blockquote by 4 spaces
217
215
  lines = text.strip().split("\n")
218
216
  indented_lines = [f" > {line}" if line.strip() else "" for line in lines]
219
217
  quote_text = "\n".join(indented_lines) + "\n\n"
@@ -290,16 +288,12 @@ def _convert_list(*, tag: Tag, text: str) -> str:
290
288
  if tag.next_sibling and getattr(tag.next_sibling, "name", None) not in {"ul", "ol"}:
291
289
  before_paragraph = True
292
290
 
293
- # Check if this list is inside a list item
294
291
  if _has_ancestor(tag, "li"):
295
- # This is a nested list - needs indentation
296
- # But we need to check if it's the first element after a paragraph
297
292
  parent = tag.parent
298
293
  while parent and parent.name != "li":
299
294
  parent = parent.parent
300
295
 
301
296
  if parent:
302
- # Check if there's a paragraph before this list
303
297
  prev_p = None
304
298
  for child in parent.children:
305
299
  if hasattr(child, "name"):
@@ -309,7 +303,6 @@ def _convert_list(*, tag: Tag, text: str) -> str:
309
303
  prev_p = child
310
304
 
311
305
  if prev_p:
312
- # If there's a paragraph before, we need proper indentation
313
306
  lines = text.strip().split("\n")
314
307
  indented_lines = []
315
308
  for line in lines:
@@ -318,9 +311,21 @@ def _convert_list(*, tag: Tag, text: str) -> str:
318
311
  else:
319
312
  indented_lines.append("")
320
313
  return "\n" + "\n".join(indented_lines) + "\n"
321
- # Otherwise use the original tab indentation
322
314
  return "\n" + indent(text=text, level=1).rstrip()
323
315
 
316
+ if tag.parent and tag.parent.name in {"ul", "ol"}:
317
+ lines = text.strip().split("\n")
318
+ indented_lines = []
319
+ for line in lines:
320
+ if line.strip():
321
+ indented_lines.append(f" {line}")
322
+ else:
323
+ indented_lines.append("")
324
+ result = "\n".join(indented_lines)
325
+ if not result.endswith("\n"):
326
+ result += "\n"
327
+ return result
328
+
324
329
  return text + ("\n" if before_paragraph else "")
325
330
 
326
331
 
@@ -355,7 +360,6 @@ def _convert_li(*, tag: Tag, text: str, bullets: str) -> str:
355
360
 
356
361
  bullet = bullets[depth % len(bullets)]
357
362
 
358
- # Check if the list item contains block-level elements (like <p>, <blockquote>, etc.)
359
363
  has_block_children = any(
360
364
  child.name in {"p", "blockquote", "pre", "ul", "ol", "div", "h1", "h2", "h3", "h4", "h5", "h6"}
361
365
  for child in tag.children
@@ -363,25 +367,18 @@ def _convert_li(*, tag: Tag, text: str, bullets: str) -> str:
363
367
  )
364
368
 
365
369
  if has_block_children:
366
- # Handle multi-paragraph list items
367
- # Split by double newlines (paragraph separators)
368
370
  paragraphs = text.strip().split("\n\n")
369
371
 
370
372
  if paragraphs:
371
- # First paragraph goes directly after the bullet
372
373
  result_parts = [f"{bullet} {paragraphs[0].strip()}\n"]
373
374
 
374
- # Subsequent paragraphs need to be indented and separated by blank lines
375
375
  for para in paragraphs[1:]:
376
376
  if para.strip():
377
- # Add blank line before the paragraph
378
377
  result_parts.append("\n")
379
- # Indent each line of the paragraph by 4 spaces
380
378
  result_parts.extend(f" {line}\n" for line in para.strip().split("\n") if line.strip())
381
379
 
382
380
  return "".join(result_parts)
383
381
 
384
- # Simple case: no block elements, just inline content
385
382
  return "{} {}\n".format(bullet, (text or "").strip())
386
383
 
387
384
 
@@ -399,20 +396,15 @@ def _convert_p(*, wrap: bool, text: str, convert_as_inline: bool, wrap_width: in
399
396
 
400
397
  from html_to_markdown.processing import _has_ancestor # noqa: PLC0415
401
398
 
402
- # Check if this paragraph is inside a list item
403
399
  if _has_ancestor(tag, "li"):
404
- # Check if this is the first paragraph in the list item
405
400
  parent = tag.parent
406
401
  while parent and parent.name != "li":
407
402
  parent = parent.parent
408
403
 
409
404
  if parent:
410
- # Get all direct children that are paragraphs
411
405
  p_children = [child for child in parent.children if hasattr(child, "name") and child.name == "p"]
412
406
 
413
- # If this is not the first paragraph, indent it
414
407
  if p_children and tag != p_children[0]:
415
- # Indent all lines by 4 spaces
416
408
  indented_lines = []
417
409
  for line in text.split("\n"):
418
410
  if line.strip():
@@ -480,13 +472,11 @@ def _convert_tr(*, tag: Tag, text: str) -> str:
480
472
  parent_name = tag.parent.name if tag.parent and hasattr(tag.parent, "name") else ""
481
473
  tag_grand_parent = tag.parent.parent if tag.parent else None
482
474
 
483
- # Simple rowspan handling: if previous row had cells with rowspan, add empty cells
484
475
  if tag.previous_sibling and hasattr(tag.previous_sibling, "name") and tag.previous_sibling.name == "tr":
485
476
  prev_cells = cast("Tag", tag.previous_sibling).find_all(["td", "th"])
486
477
  rowspan_positions = []
487
478
  col_pos = 0
488
479
 
489
- # Check which cells in previous row have rowspan > 1
490
480
  for prev_cell in prev_cells:
491
481
  rowspan = 1
492
482
  if (
@@ -497,10 +487,8 @@ def _convert_tr(*, tag: Tag, text: str) -> str:
497
487
  rowspan = int(prev_cell["rowspan"])
498
488
 
499
489
  if rowspan > 1:
500
- # This cell spans into current row
501
490
  rowspan_positions.append(col_pos)
502
491
 
503
- # Account for colspan
504
492
  colspan = 1
505
493
  if (
506
494
  "colspan" in prev_cell.attrs
@@ -510,25 +498,22 @@ def _convert_tr(*, tag: Tag, text: str) -> str:
510
498
  colspan = int(prev_cell["colspan"])
511
499
  col_pos += colspan
512
500
 
513
- # If there are rowspan cells from previous row, add empty cells
514
501
  if rowspan_positions:
515
- # Build new text with empty cells inserted
516
- new_cells = []
502
+ converted_cells: list[str] = []
503
+ if text.strip():
504
+ parts = text.split("|")
505
+ converted_cells.extend(part.rstrip() + " |" for part in parts[:-1] if part)
506
+
507
+ new_cells: list[str] = []
517
508
  cell_index = 0
518
509
 
519
- for pos in range(col_pos): # Total columns
510
+ for pos in range(col_pos):
520
511
  if pos in rowspan_positions:
521
- # Add empty cell for rowspan
522
512
  new_cells.append(" |")
523
- elif cell_index < len(cells):
524
- # Add actual cell content
525
- cell = cells[cell_index]
526
- cell_text = cell.get_text().strip().replace("\n", " ")
527
- colspan = _get_colspan(cell)
528
- new_cells.append(f" {cell_text} |" * colspan)
513
+ elif cell_index < len(converted_cells):
514
+ new_cells.append(converted_cells[cell_index])
529
515
  cell_index += 1
530
516
 
531
- # Override text with new cell arrangement
532
517
  text = "".join(new_cells)
533
518
 
534
519
  is_headrow = (
@@ -644,8 +629,6 @@ def _convert_colgroup(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
644
629
  Empty string as colgroup has no Markdown representation.
645
630
  """
646
631
  _ = tag, text, convert_as_inline
647
- # Colgroup and its contents (col elements) are purely presentational
648
- # and have no equivalent in Markdown tables
649
632
  return ""
650
633
 
651
634
 
@@ -663,7 +646,6 @@ def _convert_col(*, tag: Tag, convert_as_inline: bool) -> str:
663
646
  Empty string as col has no Markdown representation.
664
647
  """
665
648
  _ = tag, convert_as_inline
666
- # Col elements are self-closing and purely presentational
667
649
  return ""
668
650
 
669
651
 
@@ -696,7 +678,6 @@ def _convert_details(*, text: str, convert_as_inline: bool) -> str:
696
678
  if convert_as_inline:
697
679
  return text
698
680
 
699
- # Details is a semantic container, return its content
700
681
  return _format_block_element(text)
701
682
 
702
683
 
@@ -713,7 +694,6 @@ def _convert_summary(*, text: str, convert_as_inline: bool) -> str:
713
694
  if convert_as_inline:
714
695
  return text
715
696
 
716
- # Summary is like a heading/title
717
697
  return _format_wrapped_block(text, "**")
718
698
 
719
699
 
@@ -826,18 +806,15 @@ def _convert_media_element(*, tag: Tag, text: str, convert_as_inline: bool) -> s
826
806
  if not src and (source_tag := tag.find("source")) and isinstance(source_tag, Tag):
827
807
  src = source_tag.get("src", "")
828
808
 
829
- # If we have a src, convert to a link
830
809
  if src and isinstance(src, str) and src.strip():
831
810
  link = f"[{src}]({src})"
832
811
  if convert_as_inline:
833
812
  return link
834
813
  result = f"{link}\n\n"
835
- # Add fallback content if present
836
814
  if text.strip():
837
815
  result += f"{text.strip()}\n\n"
838
816
  return result
839
817
 
840
- # No src, just return fallback content
841
818
  if text.strip():
842
819
  return _format_inline_or_block(text, convert_as_inline)
843
820
 
@@ -858,7 +835,6 @@ def _convert_iframe(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
858
835
  _ = text
859
836
  src = tag.get("src", "")
860
837
 
861
- # If we have a src, convert to a link
862
838
  if src and isinstance(src, str) and src.strip():
863
839
  link = f"[{src}]({src})"
864
840
  if convert_as_inline:
@@ -906,7 +882,6 @@ def _convert_time(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
906
882
  if not text.strip():
907
883
  return ""
908
884
 
909
- # Time elements are semantic - just return the content
910
885
  return text.strip()
911
886
 
912
887
 
@@ -926,7 +901,6 @@ def _convert_data(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
926
901
  if not text.strip():
927
902
  return ""
928
903
 
929
- # Data elements are semantic - just return the content
930
904
  return text.strip()
931
905
 
932
906
 
@@ -961,7 +935,6 @@ def _convert_form(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
961
935
  if not text.strip():
962
936
  return ""
963
937
 
964
- # Forms are just containers, return their content
965
938
  return text
966
939
 
967
940
 
@@ -981,7 +954,6 @@ def _convert_fieldset(*, text: str, convert_as_inline: bool) -> str:
981
954
  if not text.strip():
982
955
  return ""
983
956
 
984
- # Fieldsets are semantic groupings, return their content
985
957
  return text
986
958
 
987
959
 
@@ -1001,7 +973,6 @@ def _convert_legend(*, text: str, convert_as_inline: bool) -> str:
1001
973
  if not text.strip():
1002
974
  return ""
1003
975
 
1004
- # Legend is like a heading/title for fieldsets
1005
976
  return _format_wrapped_block(text, "**")
1006
977
 
1007
978
 
@@ -1017,7 +988,6 @@ def _convert_label(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1017
988
  The label text content.
1018
989
  """
1019
990
  _ = tag
1020
- # Labels are just text, return the content
1021
991
  if not text.strip():
1022
992
  return ""
1023
993
 
@@ -1035,7 +1005,6 @@ def _convert_input_enhanced(*, tag: Tag, convert_as_inline: bool) -> str:
1035
1005
  Empty string since input elements have no Markdown representation.
1036
1006
  """
1037
1007
  _ = tag, convert_as_inline
1038
- # Input elements have no content and no Markdown equivalent
1039
1008
  return ""
1040
1009
 
1041
1010
 
@@ -1051,7 +1020,6 @@ def _convert_textarea(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1051
1020
  The text content of the textarea.
1052
1021
  """
1053
1022
  _ = tag
1054
- # Return the text content, which is what the user entered
1055
1023
  if not text.strip():
1056
1024
  return ""
1057
1025
 
@@ -1070,17 +1038,13 @@ def _convert_select(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1070
1038
  The text content (options) as a comma-separated list.
1071
1039
  """
1072
1040
  _ = tag
1073
- # Return the options as text
1074
1041
  if not text.strip():
1075
1042
  return ""
1076
1043
 
1077
- # In inline mode, show options separated by commas
1078
1044
  if convert_as_inline:
1079
- # Remove extra whitespace and join options
1080
1045
  options = [opt.strip() for opt in text.strip().split("\n") if opt.strip()]
1081
1046
  return ", ".join(options)
1082
1047
 
1083
- # In block mode, show as a list
1084
1048
  return _format_block_element(text)
1085
1049
 
1086
1050
 
@@ -1098,14 +1062,12 @@ def _convert_option(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1098
1062
  if not text.strip():
1099
1063
  return ""
1100
1064
 
1101
- # Check if this option is selected
1102
1065
  selected = tag.get("selected") is not None
1103
1066
  content = text.strip()
1104
1067
 
1105
1068
  if convert_as_inline:
1106
1069
  return content
1107
1070
 
1108
- # In block mode, mark selected options
1109
1071
  if selected:
1110
1072
  return f"* {content}\n"
1111
1073
  return f"{content}\n"
@@ -1131,7 +1093,6 @@ def _convert_optgroup(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1131
1093
  label = tag.get("label", "")
1132
1094
  content = text.strip()
1133
1095
 
1134
- # If there's a label, show it as a heading
1135
1096
  if label and isinstance(label, str) and label.strip():
1136
1097
  return f"**{label.strip()}**\n{content}\n"
1137
1098
 
@@ -1150,7 +1111,6 @@ def _convert_button(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1150
1111
  The button text content.
1151
1112
  """
1152
1113
  _ = tag
1153
- # Buttons are just interactive text, return the text content
1154
1114
  if not text.strip():
1155
1115
  return ""
1156
1116
 
@@ -1175,7 +1135,6 @@ def _convert_progress(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1175
1135
  if not text.strip():
1176
1136
  return ""
1177
1137
 
1178
- # Progress elements convert to their text content
1179
1138
  return _format_block_element(text)
1180
1139
 
1181
1140
 
@@ -1197,7 +1156,6 @@ def _convert_meter(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1197
1156
  if not text.strip():
1198
1157
  return ""
1199
1158
 
1200
- # Meter elements convert to their text content
1201
1159
  return _format_block_element(text)
1202
1160
 
1203
1161
 
@@ -1219,7 +1177,6 @@ def _convert_output(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1219
1177
  if not text.strip():
1220
1178
  return ""
1221
1179
 
1222
- # Output elements convert to their text content
1223
1180
  return _format_block_element(text)
1224
1181
 
1225
1182
 
@@ -1241,7 +1198,6 @@ def _convert_datalist(*, tag: Tag, text: str, convert_as_inline: bool) -> str:
1241
1198
  if not text.strip():
1242
1199
  return ""
1243
1200
 
1244
- # Datalist shows options as a list
1245
1201
  return _format_block_element(text)
1246
1202
 
1247
1203
 
@@ -1352,7 +1308,6 @@ def _convert_dialog(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
1352
1308
  if not text.strip():
1353
1309
  return ""
1354
1310
 
1355
- # Dialog is a semantic container, return its content
1356
1311
  return _format_block_element(text)
1357
1312
 
1358
1313
 
@@ -1374,7 +1329,6 @@ def _convert_menu(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
1374
1329
  if not text.strip():
1375
1330
  return ""
1376
1331
 
1377
- # Menu is converted as a list
1378
1332
  return _format_block_element(text)
1379
1333
 
1380
1334
 
@@ -1396,8 +1350,6 @@ def _convert_figure(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
1396
1350
  if convert_as_inline:
1397
1351
  return text
1398
1352
 
1399
- # Figure is a semantic container, return its content
1400
- # Make sure there's proper spacing after the figure content
1401
1353
  content = text.strip()
1402
1354
  if content and not content.endswith("\n\n"):
1403
1355
  if content.endswith("\n"):
@@ -1423,7 +1375,6 @@ def _convert_hgroup(*, text: str, convert_as_inline: bool) -> str:
1423
1375
  if not text.strip():
1424
1376
  return ""
1425
1377
 
1426
- # Hgroup is a semantic container for headings, return its content
1427
1378
  return text
1428
1379
 
1429
1380
 
@@ -1442,7 +1393,6 @@ def _convert_picture(*, text: str, convert_as_inline: bool, tag: Tag) -> str:
1442
1393
  if not text.strip():
1443
1394
  return ""
1444
1395
 
1445
- # Picture is a container for responsive images, only the img matters for Markdown
1446
1396
  return text.strip()
1447
1397
 
1448
1398
 
@@ -195,18 +195,14 @@ def _process_tag(
195
195
 
196
196
  children = list(filter(lambda value: not isinstance(value, (Comment, Doctype)), tag.children))
197
197
 
198
- # List of tags that return empty string when they have no content
199
198
  empty_when_no_content_tags = {"abbr", "var", "ins", "dfn", "time", "data", "cite", "q", "mark", "small", "u"}
200
199
 
201
200
  for i, el in enumerate(children):
202
201
  if isinstance(el, NavigableString):
203
- # Check if this is whitespace between empty elements
204
202
  if el.strip() == "" and i > 0 and i < len(children) - 1:
205
203
  prev_el = children[i - 1]
206
204
  next_el = children[i + 1]
207
205
 
208
- # If previous element was a tag that produced empty output
209
- # and next element is also a tag that could be empty, skip this whitespace
210
206
  if (
211
207
  isinstance(prev_el, Tag)
212
208
  and isinstance(next_el, Tag)
@@ -214,7 +210,6 @@ def _process_tag(
214
210
  and next_el.name.lower() in empty_when_no_content_tags
215
211
  and not prev_el.get_text().strip()
216
212
  ):
217
- # Previous tag is empty and next could be empty too, skip this whitespace
218
213
  continue
219
214
 
220
215
  text_parts.append(
@@ -281,14 +276,10 @@ def _process_text(
281
276
  break
282
277
 
283
278
  if "pre" not in ancestor_names:
284
- # Special case: if the text is only whitespace
285
279
  if text.strip() == "":
286
- # If it contains newlines, it's probably indentation whitespace, return empty
287
280
  if "\n" in text:
288
281
  text = ""
289
282
  else:
290
- # Check if this whitespace is between block elements
291
- # Define block elements that should not have whitespace between them
292
283
  block_elements = {
293
284
  "p",
294
285
  "ul",
@@ -320,7 +311,6 @@ def _process_text(
320
311
  prev_sibling = el.previous_sibling
321
312
  next_sibling = el.next_sibling
322
313
 
323
- # Check if whitespace is between block elements
324
314
  if (
325
315
  prev_sibling
326
316
  and hasattr(prev_sibling, "name")
@@ -329,10 +319,8 @@ def _process_text(
329
319
  and hasattr(next_sibling, "name")
330
320
  and next_sibling.name in block_elements
331
321
  ):
332
- # Remove whitespace between block elements
333
322
  text = ""
334
323
  else:
335
- # Otherwise it's inline whitespace, normalize to single space
336
324
  text = " " if text else ""
337
325
  else:
338
326
  has_leading_space = text.startswith((" ", "\t"))
@@ -470,7 +458,6 @@ def _extract_metadata(soup: BeautifulSoup) -> dict[str, str]:
470
458
  if canonical and isinstance(canonical, Tag) and isinstance(canonical["href"], str):
471
459
  metadata["canonical"] = canonical["href"]
472
460
 
473
- # Extract link relations
474
461
  link_relations = {"author", "license", "alternate"}
475
462
  for rel_type in link_relations:
476
463
  link = soup.find("link", rel=rel_type, href=True)
@@ -595,8 +582,6 @@ def convert_to_markdown(
595
582
  if strip_newlines:
596
583
  source = source.replace("\n", " ").replace("\r", " ")
597
584
 
598
- # Fix lxml parsing of void elements like <wbr>
599
- # lxml incorrectly treats them as container tags
600
585
  source = re.sub(r"<wbr\s*>", "<wbr />", source, flags=re.IGNORECASE)
601
586
 
602
587
  if preprocess_html and create_preprocessor is not None and preprocess_fn is not None:
@@ -737,7 +722,6 @@ def convert_to_markdown(
737
722
  if leading_whitespace_match:
738
723
  leading_whitespace = leading_whitespace_match.group(0)
739
724
 
740
- # Check if input contains list or heading tags
741
725
  list_heading_tags = {"<ol", "<ul", "<li", "<h1", "<h2", "<h3", "<h4", "<h5", "<h6"}
742
726
  if any(tag in original_input for tag in list_heading_tags):
743
727
  leading_newlines = re.match(r"^[\n\r]*", leading_whitespace)
@@ -751,19 +735,14 @@ def convert_to_markdown(
751
735
  def normalize_spaces_outside_code(text: str) -> str:
752
736
  parts = text.split("```")
753
737
  for i in range(0, len(parts), 2):
754
- # Process each line separately to preserve leading spaces
755
738
  lines = parts[i].split("\n")
756
739
  processed_lines = []
757
740
  for line in lines:
758
- # Preserve definition list formatting (: followed by 3 spaces)
759
741
  def_parts = re.split(r"(:\s{3})", line)
760
742
  for j in range(0, len(def_parts), 2):
761
- # Only normalize non-definition-list parts
762
- # Also preserve leading spaces (for list indentation)
763
743
  match = re.match(r"^(\s*)(.*)", def_parts[j])
764
744
  if match:
765
745
  leading_spaces, rest = match.groups()
766
- # Only normalize multiple spaces that are not at the beginning
767
746
  rest = re.sub(r" {3,}", " ", rest)
768
747
  def_parts[j] = leading_spaces + rest
769
748
  processed_lines.append("".join(def_parts))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: html-to-markdown
3
- Version: 1.9.0
3
+ Version: 1.9.1
4
4
  Summary: A modern, type-safe Python library for converting HTML to Markdown with comprehensive tag support and customizable options
5
5
  Author-email: Na'aman Hirschfeld <nhirschfeld@gmail.com>
6
6
  License: MIT
@@ -30,10 +30,10 @@ Classifier: Typing :: Typed
30
30
  Requires-Python: >=3.10
31
31
  Description-Content-Type: text/markdown
32
32
  License-File: LICENSE
33
- Requires-Dist: beautifulsoup4>=4.13.4
33
+ Requires-Dist: beautifulsoup4>=4.13.5
34
34
  Requires-Dist: nh3>=0.3
35
35
  Provides-Extra: lxml
36
- Requires-Dist: lxml>=6; extra == "lxml"
36
+ Requires-Dist: lxml>=6.0.1; extra == "lxml"
37
37
  Dynamic: license-file
38
38
 
39
39
  # html-to-markdown
@@ -42,6 +42,14 @@ A modern, fully typed Python library for converting HTML to Markdown. This libra
42
42
  of [markdownify](https://pypi.org/project/markdownify/) with a modernized codebase, strict type safety and support for
43
43
  Python 3.9+.
44
44
 
45
+ ## Support This Project
46
+
47
+ If you find html-to-markdown useful, please consider sponsoring the development:
48
+
49
+ <a href="https://github.com/sponsors/Goldziher"><img src="https://img.shields.io/badge/Sponsor-%E2%9D%A4-pink?logo=github-sponsors" alt="Sponsor on GitHub" height="32"></a>
50
+
51
+ Your support helps maintain and improve this library for the community! 🚀
52
+
45
53
  ## Features
46
54
 
47
55
  - **Full HTML5 Support**: Comprehensive support for all modern HTML5 elements including semantic, form, table, ruby, interactive, structural, SVG, and math elements
@@ -2,15 +2,15 @@ html_to_markdown/__init__.py,sha256=TzZzhZDJHeXW_3B9zceYehz2zlttqdLsDr5un8stZLM,
2
2
  html_to_markdown/__main__.py,sha256=DJyJX7NIK0BVPNS2r3BYJ0Ci_lKHhgVOpw7ZEqACH3c,323
3
3
  html_to_markdown/cli.py,sha256=8xlgSEcnqsSM_dr1TCSgPDAo09YvUtO78PvDFivFFdg,6973
4
4
  html_to_markdown/constants.py,sha256=8vqANd-7wYvDzBm1VXZvdIxS4Xom4Ov_Yghg6jvmyio,584
5
- html_to_markdown/converters.py,sha256=ESOZQSW8qGAG1S9f_iDpPUirKIc9MGz_G0_rqbTCJ30,50018
5
+ html_to_markdown/converters.py,sha256=n0OeRnfDc7sH2j5oOuqJQmxySJxRFdfpPRHcrHJXFGE,46869
6
6
  html_to_markdown/exceptions.py,sha256=s1DaG6A23rOurF91e4jryuUzplWcC_JIAuK9_bw_4jQ,1558
7
7
  html_to_markdown/preprocessor.py,sha256=S4S1ZfLC_hkJVgmA5atImTyWQDOxfHctPbaep2QtyrQ,11248
8
- html_to_markdown/processing.py,sha256=iUVZfDG_QmFsY32O3mJZEuyxS2m8cjZaNnsstx2RkQo,40544
8
+ html_to_markdown/processing.py,sha256=ephjzcUJOilId8Z6AScaMY6AKkyNq9N0A1DMt9HfVuk,39068
9
9
  html_to_markdown/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  html_to_markdown/utils.py,sha256=QgWPzmpZKFd6wDTe8IY3gbVT3xNzoGV3PBgd17J0O-w,2066
11
- html_to_markdown-1.9.0.dist-info/licenses/LICENSE,sha256=3J_HR5BWvUM1mlIrlkF32-uC1FM64gy8JfG17LBuheQ,1122
12
- html_to_markdown-1.9.0.dist-info/METADATA,sha256=Rptd2quL9YEGi7Bmh-pgbdPGx-8Ud8EZeZZLQNIMEik,18450
13
- html_to_markdown-1.9.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
- html_to_markdown-1.9.0.dist-info/entry_points.txt,sha256=xmFijrTfgYW7lOrZxZGRPciicQHa5KiXKkUhBCmICtQ,116
15
- html_to_markdown-1.9.0.dist-info/top_level.txt,sha256=Ev6djb1c4dSKr_-n4K-FpEGDkzBigXY6LuZ5onqS7AE,17
16
- html_to_markdown-1.9.0.dist-info/RECORD,,
11
+ html_to_markdown-1.9.1.dist-info/licenses/LICENSE,sha256=3J_HR5BWvUM1mlIrlkF32-uC1FM64gy8JfG17LBuheQ,1122
12
+ html_to_markdown-1.9.1.dist-info/METADATA,sha256=FUHr7dId_1ZQfgKjPcInKwvwBChyxlyPMIwYl0Z4dko,18813
13
+ html_to_markdown-1.9.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
+ html_to_markdown-1.9.1.dist-info/entry_points.txt,sha256=xmFijrTfgYW7lOrZxZGRPciicQHa5KiXKkUhBCmICtQ,116
15
+ html_to_markdown-1.9.1.dist-info/top_level.txt,sha256=Ev6djb1c4dSKr_-n4K-FpEGDkzBigXY6LuZ5onqS7AE,17
16
+ html_to_markdown-1.9.1.dist-info/RECORD,,