html-to-markdown 1.6.0__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of html-to-markdown might be problematic. Click here for more details.

@@ -4,7 +4,7 @@ from typing import TYPE_CHECKING
4
4
 
5
5
  if TYPE_CHECKING:
6
6
  from collections.abc import Generator, Mapping
7
- # Use the imported PageElement instead of re-importing
7
+
8
8
  import re
9
9
  from contextvars import ContextVar
10
10
  from io import StringIO
@@ -14,7 +14,13 @@ from typing import TYPE_CHECKING, Any, Callable, Literal, cast
14
14
  from bs4 import BeautifulSoup, Comment, Doctype, Tag
15
15
  from bs4.element import NavigableString, PageElement
16
16
 
17
- # Check if lxml is available for better performance
17
+ try:
18
+ from html_to_markdown.preprocessor import create_preprocessor
19
+ from html_to_markdown.preprocessor import preprocess_html as preprocess_fn
20
+ except ImportError:
21
+ create_preprocessor = None # type: ignore[assignment]
22
+ preprocess_fn = None # type: ignore[assignment]
23
+
18
24
  try:
19
25
  import importlib.util
20
26
 
@@ -212,8 +218,7 @@ def _process_tag(
212
218
  rendered = converters_map[tag_name]( # type: ignore[call-arg]
213
219
  tag=tag, text=text, convert_as_inline=convert_as_inline
214
220
  )
215
- # For headings, ensure two newlines before if not already present
216
- # Edge case where the document starts with a \n and then a heading
221
+
217
222
  if is_heading and context_before not in {"", "\n"}:
218
223
  n_eol_to_add = 2 - (len(context_before) - len(context_before.rstrip("\n")))
219
224
  if n_eol_to_add > 0:
@@ -233,27 +238,37 @@ def _process_text(
233
238
  ) -> str:
234
239
  text = str(el) or ""
235
240
 
236
- # Cache parent lookups to avoid repeated traversal
237
241
  parent = el.parent
238
242
  parent_name = parent.name if parent else None
239
243
 
240
- # Build set of ancestor tag names for efficient lookup
241
- # Only traverse once instead of multiple find_parent calls
242
244
  ancestor_names = set()
243
245
  current = parent
244
246
  while current and hasattr(current, "name"):
245
247
  if current.name:
246
248
  ancestor_names.add(current.name)
247
249
  current = getattr(current, "parent", None)
248
- # Limit traversal depth for performance
250
+
249
251
  if len(ancestor_names) > 10:
250
252
  break
251
253
 
252
- # Check for pre ancestor (whitespace handling)
253
254
  if "pre" not in ancestor_names:
254
- text = whitespace_re.sub(" ", text)
255
+ has_leading_space = text.startswith((" ", "\t"))
256
+
257
+ has_trailing_space = text.endswith((" ", "\t"))
258
+
259
+ middle_content = (
260
+ text[1:-1]
261
+ if has_leading_space and has_trailing_space
262
+ else text[1:]
263
+ if has_leading_space
264
+ else text[:-1]
265
+ if has_trailing_space
266
+ else text
267
+ )
268
+
269
+ middle_content = whitespace_re.sub(" ", middle_content.strip())
270
+ text = (" " if has_leading_space else "") + middle_content + (" " if has_trailing_space else "")
255
271
 
256
- # Check for code-like ancestors (escaping)
257
272
  if not ancestor_names.intersection({"pre", "code", "kbd", "samp"}):
258
273
  text = escape(
259
274
  text=text,
@@ -262,14 +277,12 @@ def _process_text(
262
277
  escape_underscores=escape_underscores,
263
278
  )
264
279
 
265
- # List item text processing
266
280
  if parent_name == "li" and (not el.next_sibling or getattr(el.next_sibling, "name", None) in {"ul", "ol"}):
267
281
  text = text.rstrip()
268
282
 
269
283
  return text
270
284
 
271
285
 
272
- # Context variable for ancestor cache - automatically isolated per conversion
273
286
  _ancestor_cache: ContextVar[dict[int, set[str]] | None] = ContextVar("ancestor_cache", default=None)
274
287
 
275
288
 
@@ -281,7 +294,6 @@ def _get_ancestor_names(element: PageElement, max_depth: int = 10) -> set[str]:
281
294
  cache = {}
282
295
  _ancestor_cache.set(cache)
283
296
 
284
- # Check cache first
285
297
  if elem_id in cache:
286
298
  return cache[elem_id]
287
299
 
@@ -293,17 +305,14 @@ def _get_ancestor_names(element: PageElement, max_depth: int = 10) -> set[str]:
293
305
  if hasattr(current, "name") and current.name:
294
306
  ancestor_names.add(current.name)
295
307
 
296
- # Check if we've already cached this parent's ancestors
297
308
  parent_id = id(current)
298
309
  if parent_id in cache:
299
- # Reuse cached ancestors
300
310
  ancestor_names.update(cache[parent_id])
301
311
  break
302
312
 
303
313
  current = getattr(current, "parent", None)
304
314
  depth += 1
305
315
 
306
- # Cache the result
307
316
  cache[elem_id] = ancestor_names
308
317
  return ancestor_names
309
318
 
@@ -345,33 +354,29 @@ def _extract_metadata(soup: BeautifulSoup) -> dict[str, str]:
345
354
  """
346
355
  metadata = {}
347
356
 
348
- # Extract title
349
357
  title_tag = soup.find("title")
350
358
  if title_tag and isinstance(title_tag, Tag) and title_tag.string:
351
359
  metadata["title"] = title_tag.string.strip()
352
360
 
353
- # Extract base href
354
361
  base_tag = soup.find("base", href=True)
355
362
  if base_tag and isinstance(base_tag, Tag) and isinstance(base_tag["href"], str):
356
363
  metadata["base-href"] = base_tag["href"]
357
364
 
358
- # Extract meta tags
359
365
  for meta in soup.find_all("meta"):
360
- # Handle name-based meta tags
361
366
  if meta.get("name") and meta.get("content") is not None:
362
367
  name = meta["name"]
363
368
  content = meta["content"]
364
369
  if isinstance(name, str) and isinstance(content, str):
365
370
  key = f"meta-{name.lower()}"
366
371
  metadata[key] = content
367
- # Handle property-based meta tags (Open Graph, etc.)
372
+
368
373
  elif meta.get("property") and meta.get("content") is not None:
369
374
  prop = meta["property"]
370
375
  content = meta["content"]
371
376
  if isinstance(prop, str) and isinstance(content, str):
372
377
  key = f"meta-{prop.lower().replace(':', '-')}"
373
378
  metadata[key] = content
374
- # Handle http-equiv meta tags
379
+
375
380
  elif meta.get("http-equiv") and meta.get("content") is not None:
376
381
  equiv = meta["http-equiv"]
377
382
  content = meta["content"]
@@ -379,12 +384,10 @@ def _extract_metadata(soup: BeautifulSoup) -> dict[str, str]:
379
384
  key = f"meta-{equiv.lower()}"
380
385
  metadata[key] = content
381
386
 
382
- # Extract canonical link
383
387
  canonical = soup.find("link", rel="canonical", href=True)
384
388
  if canonical and isinstance(canonical, Tag) and isinstance(canonical["href"], str):
385
389
  metadata["canonical"] = canonical["href"]
386
390
 
387
- # Extract other important link relations
388
391
  for rel_type in ["author", "license", "alternate"]:
389
392
  link = soup.find("link", rel=rel_type, href=True)
390
393
  if link and isinstance(link, Tag) and isinstance(link["href"], str):
@@ -407,7 +410,6 @@ def _format_metadata_comment(metadata: dict[str, str]) -> str:
407
410
 
408
411
  lines = ["<!--"]
409
412
  for key, value in sorted(metadata.items()):
410
- # Escape any potential comment closers in the value
411
413
  safe_value = value.replace("-->", "--&gt;")
412
414
  lines.append(f"{key}: {safe_value}")
413
415
  lines.append("-->")
@@ -446,6 +448,10 @@ def convert_to_markdown(
446
448
  sup_symbol: str = "",
447
449
  wrap: bool = False,
448
450
  wrap_width: int = 80,
451
+ preprocess_html: bool = False,
452
+ preprocessing_preset: Literal["minimal", "standard", "aggressive"] = "standard",
453
+ remove_navigation: bool = True,
454
+ remove_forms: bool = True,
449
455
  ) -> str:
450
456
  """Convert HTML to Markdown.
451
457
 
@@ -480,6 +486,10 @@ def convert_to_markdown(
480
486
  sup_symbol: Custom symbol for superscript text. Defaults to an empty string.
481
487
  wrap: Wrap text to the specified width. Defaults to False.
482
488
  wrap_width: The number of characters at which to wrap text. Defaults to 80.
489
+ preprocess_html: Apply HTML preprocessing to improve quality. Defaults to False.
490
+ preprocessing_preset: Preset configuration for preprocessing. Defaults to "standard".
491
+ remove_navigation: Remove navigation elements during preprocessing. Defaults to True.
492
+ remove_forms: Remove form elements during preprocessing. Defaults to True.
483
493
 
484
494
  Raises:
485
495
  ConflictingOptionsError: If both 'strip' and 'convert' are specified.
@@ -499,27 +509,63 @@ def convert_to_markdown(
499
509
  return source
500
510
 
501
511
  if strip_newlines:
502
- # Replace all newlines with spaces before parsing
503
512
  source = source.replace("\n", " ").replace("\r", " ")
504
513
 
514
+ # Fix lxml parsing of void elements like <wbr>
515
+ # lxml incorrectly treats them as container tags
516
+ source = re.sub(r"<wbr\s*>", "<wbr />", source, flags=re.IGNORECASE)
517
+
518
+ if preprocess_html and create_preprocessor is not None and preprocess_fn is not None:
519
+ config = create_preprocessor(
520
+ preset=preprocessing_preset,
521
+ remove_navigation=remove_navigation,
522
+ remove_forms=remove_forms,
523
+ )
524
+ source = preprocess_fn(source, **config)
525
+
505
526
  if "".join(source.split("\n")):
506
- # Determine parser to use
507
527
  if parser is None:
508
- # Auto-detect best available parser
509
528
  parser = "lxml" if LXML_AVAILABLE else "html.parser"
510
529
 
511
- # Validate parser choice
512
530
  if parser == "lxml" and not LXML_AVAILABLE:
513
531
  raise MissingDependencyError("lxml", "pip install html-to-markdown[lxml]")
514
532
 
533
+ original_source = source if isinstance(source, str) else str(source)
534
+ needs_leading_whitespace_fix = (
535
+ parser == "lxml" and isinstance(source, str) and original_source.startswith((" ", "\t", "\n", "\r"))
536
+ )
537
+
515
538
  source = BeautifulSoup(source, parser)
539
+
540
+ if parser == "lxml":
541
+ body = source.find("body")
542
+ if body and isinstance(body, Tag):
543
+ children = list(body.children)
544
+
545
+ if (
546
+ len(children) == 1
547
+ and isinstance(children[0], NavigableString)
548
+ and original_source.startswith((" ", "\t", "\n", "\r"))
549
+ and not str(children[0]).startswith((" ", "\t", "\n", "\r"))
550
+ ):
551
+ first_child = children[0]
552
+
553
+ leading_ws = ""
554
+ for char in original_source:
555
+ if char in " \t":
556
+ leading_ws += char
557
+ else:
558
+ break
559
+
560
+ new_text = NavigableString(leading_ws + str(first_child))
561
+ first_child.replace_with(new_text)
562
+ needs_leading_space_fix = False
516
563
  else:
517
564
  raise EmptyHtmlError
518
565
 
519
566
  if strip is not None and convert is not None:
520
567
  raise ConflictingOptionsError("strip", "convert")
521
568
 
522
- # Use streaming processing if requested
523
569
  if stream_processing:
524
570
  result_chunks = []
525
571
  for chunk in convert_to_markdown_stream(
@@ -555,19 +601,15 @@ def convert_to_markdown(
555
601
  chunk_callback(chunk)
556
602
  result_chunks.append(chunk)
557
603
 
558
- # Apply same post-processing as regular path
559
604
  result = "".join(result_chunks)
560
605
 
561
- # Normalize excessive newlines - max 2 consecutive newlines (one empty line)
562
606
  result = re.sub(r"\n{3,}", "\n\n", result)
563
607
 
564
- # Strip all trailing newlines in inline mode
565
608
  if convert_as_inline:
566
609
  result = result.rstrip("\n")
567
610
 
568
611
  return result
569
612
 
570
- # Use shared core with string sink for regular processing
571
613
  sink = StringSink()
572
614
 
573
615
  _process_html_core(
@@ -601,10 +643,42 @@ def convert_to_markdown(
601
643
 
602
644
  result = sink.get_result()
603
645
 
604
- # Normalize excessive newlines - max 2 consecutive newlines (one empty line)
646
+ if (
647
+ "needs_leading_whitespace_fix" in locals()
648
+ and needs_leading_whitespace_fix
649
+ and not result.startswith((" ", "\t", "\n", "\r"))
650
+ ):
651
+ original_input = sink.original_source if hasattr(sink, "original_source") else original_source
652
+ leading_whitespace_match = re.match(r"^[\s]*", original_input)
653
+ if leading_whitespace_match:
654
+ leading_whitespace = leading_whitespace_match.group(0)
655
+
656
+ if any(tag in original_input for tag in ["<ol", "<ul", "<li", "<h1", "<h2", "<h3", "<h4", "<h5", "<h6"]):
657
+ leading_newlines = re.match(r"^[\n\r]*", leading_whitespace)
658
+ leading_whitespace = leading_newlines.group(0) if leading_newlines else ""
659
+
660
+ if leading_whitespace:
661
+ result = leading_whitespace + result
662
+
605
663
  result = re.sub(r"\n{3,}", "\n\n", result)
606
664
 
607
- # Strip all trailing newlines in inline mode
665
+ def normalize_spaces_outside_code(text: str) -> str:
666
+ parts = text.split("```")
667
+ for i in range(0, len(parts), 2):
668
+ # Preserve definition list formatting (: followed by 3 spaces)
669
+ # Split by definition list patterns to preserve them
670
+ def_parts = re.split(r"(:\s{3})", parts[i])
671
+ for j in range(0, len(def_parts), 2):
672
+ # Only normalize non-definition-list parts
673
+ def_parts[j] = re.sub(r" {3,}", " ", def_parts[j])
674
+ parts[i] = "".join(def_parts)
675
+ return "```".join(parts)
676
+
677
+ result = normalize_spaces_outside_code(result)
678
+
679
+ result = re.sub(r"\*\* {2,}", "** ", result)
680
+ result = re.sub(r" {2,}\*\*", " **", result)
681
+
608
682
  if convert_as_inline:
609
683
  result = result.rstrip("\n")
610
684
 
@@ -654,25 +728,19 @@ class StreamingSink(OutputSink):
654
728
  if not text:
655
729
  return
656
730
 
657
- # Use string concatenation instead of StringIO for better performance
658
731
  current_content = self.buffer.getvalue() if self.buffer_size > 0 else ""
659
732
  current_content += text
660
733
 
661
- # Yield chunks when buffer is large enough
662
734
  while len(current_content) >= self.chunk_size:
663
- # Find optimal split point (prefer after newlines)
664
735
  split_pos = self._find_split_position(current_content)
665
736
 
666
- # Extract chunk and update remaining content
667
737
  chunk = current_content[:split_pos]
668
738
  current_content = current_content[split_pos:]
669
739
 
670
- # Store chunk and update progress
671
740
  self.chunks.append(chunk)
672
741
  self.processed_bytes += len(chunk)
673
742
  self._update_progress()
674
743
 
675
- # Update buffer with remaining content
676
744
  self.buffer = StringIO()
677
745
  if current_content:
678
746
  self.buffer.write(current_content)
@@ -692,7 +760,6 @@ class StreamingSink(OutputSink):
692
760
 
693
761
  def _find_split_position(self, content: str) -> int:
694
762
  """Find optimal position to split content for chunks."""
695
- # Look for newline within reasonable distance of target size
696
763
  target = self.chunk_size
697
764
  lookahead = min(100, len(content) - target)
698
765
 
@@ -740,11 +807,9 @@ def _process_html_core(
740
807
  wrap_width: int,
741
808
  ) -> None:
742
809
  """Core HTML to Markdown processing logic shared by both regular and streaming."""
743
- # Set up a fresh cache for this conversion
744
810
  token = _ancestor_cache.set({})
745
811
 
746
812
  try:
747
- # Input validation and preprocessing
748
813
  if isinstance(source, str):
749
814
  if (
750
815
  heading_style == UNDERLINED
@@ -759,12 +824,9 @@ def _process_html_core(
759
824
  source = source.replace("\n", " ").replace("\r", " ")
760
825
 
761
826
  if "".join(source.split("\n")):
762
- # Determine parser to use
763
827
  if parser is None:
764
- # Auto-detect best available parser
765
828
  parser = "lxml" if LXML_AVAILABLE else "html.parser"
766
829
 
767
- # Validate parser choice
768
830
  if parser == "lxml" and not LXML_AVAILABLE:
769
831
  raise MissingDependencyError("lxml", "pip install html-to-markdown[lxml]")
770
832
 
@@ -775,7 +837,6 @@ def _process_html_core(
775
837
  if strip is not None and convert is not None:
776
838
  raise ConflictingOptionsError("strip", "convert")
777
839
 
778
- # Create converters map
779
840
  converters_map = create_converters_map(
780
841
  autolinks=autolinks,
781
842
  bullets=bullets,
@@ -795,18 +856,15 @@ def _process_html_core(
795
856
  if custom_converters:
796
857
  converters_map.update(cast("ConvertersMap", custom_converters))
797
858
 
798
- # Extract metadata if requested
799
859
  if extract_metadata and not convert_as_inline:
800
860
  metadata = _extract_metadata(source)
801
861
  metadata_comment = _format_metadata_comment(metadata)
802
862
  if metadata_comment:
803
863
  sink.write(metadata_comment)
804
864
 
805
- # Find the body tag to process only its content
806
865
  body = source.find("body")
807
866
  elements_to_process = body.children if body and isinstance(body, Tag) else source.children
808
867
 
809
- # Process elements using shared logic
810
868
  context = ""
811
869
  for el in filter(lambda value: not isinstance(value, (Comment, Doctype)), elements_to_process):
812
870
  if isinstance(el, NavigableString):
@@ -833,10 +891,8 @@ def _process_html_core(
833
891
  sink.write(text)
834
892
  context += text
835
893
 
836
- # Finalize output
837
894
  sink.finalize()
838
895
  finally:
839
- # Reset context
840
896
  _ancestor_cache.reset(token)
841
897
 
842
898
 
@@ -909,16 +965,13 @@ def convert_to_markdown_stream(
909
965
  Yields:
910
966
  str: Chunks of Markdown-formatted text.
911
967
  """
912
- # Use shared core with streaming sink
913
968
  sink = StreamingSink(chunk_size, progress_callback)
914
969
 
915
- # Estimate total size for progress reporting
916
970
  if isinstance(source, str):
917
971
  sink.total_bytes = len(source)
918
972
  elif isinstance(source, BeautifulSoup):
919
973
  sink.total_bytes = len(str(source))
920
974
 
921
- # Process using shared core
922
975
  _process_html_core(
923
976
  source,
924
977
  sink,
@@ -948,30 +1001,22 @@ def convert_to_markdown_stream(
948
1001
  wrap_width=wrap_width,
949
1002
  )
950
1003
 
951
- # Get all chunks from the sink and apply post-processing
952
1004
  all_chunks = list(sink.get_chunks())
953
1005
  combined_result = "".join(all_chunks)
954
1006
 
955
- # Apply same post-processing as regular conversion
956
- # Normalize excessive newlines - max 2 consecutive newlines (one empty line)
957
1007
  combined_result = re.sub(r"\n{3,}", "\n\n", combined_result)
958
1008
 
959
- # Strip all trailing newlines in inline mode
960
1009
  if convert_as_inline:
961
1010
  combined_result = combined_result.rstrip("\n")
962
1011
 
963
- # Now split the post-processed result back into chunks at good boundaries
964
1012
  if not combined_result:
965
1013
  return
966
1014
 
967
1015
  pos = 0
968
1016
  while pos < len(combined_result):
969
- # Calculate chunk end position
970
1017
  end_pos = min(pos + chunk_size, len(combined_result))
971
1018
 
972
- # If not at the end, try to find a good split point
973
1019
  if end_pos < len(combined_result):
974
- # Look for newline within reasonable distance
975
1020
  search_start = max(pos, end_pos - 50)
976
1021
  search_end = min(len(combined_result), end_pos + 50)
977
1022
  search_area = combined_result[search_start:search_end]
@@ -980,7 +1025,6 @@ def convert_to_markdown_stream(
980
1025
  if newline_pos > 0:
981
1026
  end_pos = search_start + newline_pos + 1
982
1027
 
983
- # Yield the chunk
984
1028
  chunk = combined_result[pos:end_pos]
985
1029
  if chunk:
986
1030
  yield chunk
html_to_markdown/utils.py CHANGED
@@ -6,18 +6,25 @@ from html_to_markdown.constants import line_beginning_re
6
6
 
7
7
 
8
8
  def chomp(text: str) -> tuple[str, str, str]:
9
- """If the text in an inline tag like b, a, or em contains a leading or trailing
10
- space, strip the string and return a space as suffix of prefix, if needed.
9
+ """Simplified whitespace handling for inline elements.
10
+
11
+ For semantic markdown output, preserves leading/trailing spaces as single spaces
12
+ and normalizes internal whitespace.
11
13
 
12
14
  Args:
13
15
  text: The text to chomp.
14
16
 
15
17
  Returns:
16
- A tuple containing the prefix, suffix, and the stripped text.
18
+ A tuple containing the prefix, suffix, and the normalized text.
17
19
  """
18
- prefix = " " if text and text[0] == " " else ""
19
- suffix = " " if text and text[-1] == " " else ""
20
+ if not text:
21
+ return "", "", ""
22
+
23
+ prefix = " " if text.startswith((" ", "\t")) else ""
24
+ suffix = " " if text.endswith((" ", "\t")) else ""
25
+
20
26
  text = text.strip()
27
+
21
28
  return prefix, suffix, text
22
29
 
23
30
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: html-to-markdown
3
- Version: 1.6.0
3
+ Version: 1.8.0
4
4
  Summary: A modern, type-safe Python library for converting HTML to Markdown with comprehensive tag support and customizable options
5
5
  Author-email: Na'aman Hirschfeld <nhirschfeld@gmail.com>
6
6
  License: MIT
@@ -32,6 +32,7 @@ Requires-Python: >=3.9
32
32
  Description-Content-Type: text/markdown
33
33
  License-File: LICENSE
34
34
  Requires-Dist: beautifulsoup4>=4.13.4
35
+ Requires-Dist: nh3>=0.2.21
35
36
  Provides-Extra: lxml
36
37
  Requires-Dist: lxml>=5; extra == "lxml"
37
38
  Dynamic: license-file
@@ -0,0 +1,16 @@
1
+ html_to_markdown/__init__.py,sha256=TzZzhZDJHeXW_3B9zceYehz2zlttqdLsDr5un8stZLM,653
2
+ html_to_markdown/__main__.py,sha256=DJyJX7NIK0BVPNS2r3BYJ0Ci_lKHhgVOpw7ZEqACH3c,323
3
+ html_to_markdown/cli.py,sha256=8xlgSEcnqsSM_dr1TCSgPDAo09YvUtO78PvDFivFFdg,6973
4
+ html_to_markdown/constants.py,sha256=8vqANd-7wYvDzBm1VXZvdIxS4Xom4Ov_Yghg6jvmyio,584
5
+ html_to_markdown/converters.py,sha256=COC2KqPelJlMCY5eXUS5gdiPOG8Yzx0U719FeXPw3GA,55514
6
+ html_to_markdown/exceptions.py,sha256=s1DaG6A23rOurF91e4jryuUzplWcC_JIAuK9_bw_4jQ,1558
7
+ html_to_markdown/preprocessor.py,sha256=S4S1ZfLC_hkJVgmA5atImTyWQDOxfHctPbaep2QtyrQ,11248
8
+ html_to_markdown/processing.py,sha256=wkbhLg42U3aeVQSZFuzGt5irtN037XzRKpCE71QYZXI,36520
9
+ html_to_markdown/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ html_to_markdown/utils.py,sha256=QgWPzmpZKFd6wDTe8IY3gbVT3xNzoGV3PBgd17J0O-w,2066
11
+ html_to_markdown-1.8.0.dist-info/licenses/LICENSE,sha256=3J_HR5BWvUM1mlIrlkF32-uC1FM64gy8JfG17LBuheQ,1122
12
+ html_to_markdown-1.8.0.dist-info/METADATA,sha256=6pgiK4p0A77axLfD8MH1EGgzifP06koVV8KWS_5-iYk,17175
13
+ html_to_markdown-1.8.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
+ html_to_markdown-1.8.0.dist-info/entry_points.txt,sha256=xmFijrTfgYW7lOrZxZGRPciicQHa5KiXKkUhBCmICtQ,116
15
+ html_to_markdown-1.8.0.dist-info/top_level.txt,sha256=Ev6djb1c4dSKr_-n4K-FpEGDkzBigXY6LuZ5onqS7AE,17
16
+ html_to_markdown-1.8.0.dist-info/RECORD,,
@@ -1,15 +0,0 @@
1
- html_to_markdown/__init__.py,sha256=-JFtH1mquoU_FLgAvq2NUvaeI0HUWd2lnoinimh5wxM,586
2
- html_to_markdown/__main__.py,sha256=DJyJX7NIK0BVPNS2r3BYJ0Ci_lKHhgVOpw7ZEqACH3c,323
3
- html_to_markdown/cli.py,sha256=WzQVr97jKECEZwW-xIJofSl3v4EhqU-De7XRQjmgc08,7179
4
- html_to_markdown/constants.py,sha256=8vqANd-7wYvDzBm1VXZvdIxS4Xom4Ov_Yghg6jvmyio,584
5
- html_to_markdown/converters.py,sha256=z7vphGLAGKn1f8T3xJojfKCdGbzKdof3LyjKTTmwkQo,59694
6
- html_to_markdown/exceptions.py,sha256=s1DaG6A23rOurF91e4jryuUzplWcC_JIAuK9_bw_4jQ,1558
7
- html_to_markdown/processing.py,sha256=S3EtjDG9xM4WcIzPEgVDrey04eT33OS2LOPwu6AhZT0,35107
8
- html_to_markdown/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- html_to_markdown/utils.py,sha256=HJUDej5HSpXRtYv-OkCyD0hwnPnVfQCwY6rBRlIOt9s,1989
10
- html_to_markdown-1.6.0.dist-info/licenses/LICENSE,sha256=3J_HR5BWvUM1mlIrlkF32-uC1FM64gy8JfG17LBuheQ,1122
11
- html_to_markdown-1.6.0.dist-info/METADATA,sha256=xLpWliFQDooUVrxxN_SaA4gXy7GixPakOdJal0iC7RQ,17148
12
- html_to_markdown-1.6.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
- html_to_markdown-1.6.0.dist-info/entry_points.txt,sha256=xmFijrTfgYW7lOrZxZGRPciicQHa5KiXKkUhBCmICtQ,116
14
- html_to_markdown-1.6.0.dist-info/top_level.txt,sha256=Ev6djb1c4dSKr_-n4K-FpEGDkzBigXY6LuZ5onqS7AE,17
15
- html_to_markdown-1.6.0.dist-info/RECORD,,