html-to-markdown 2.9.2__cp310-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,58 @@
1
+ """html-to-markdown: Convert HTML to Markdown using Rust backend.
2
+
3
+ This package provides high-performance HTML to Markdown conversion
4
+ powered by Rust with a clean Python API.
5
+
6
+ V2 API (current):
7
+ from html_to_markdown import convert, ConversionOptions
8
+
9
+ options = ConversionOptions(heading_style="atx")
10
+ markdown = convert(html, options)
11
+
12
+ V1 API (backward compatibility):
13
+ from html_to_markdown import convert_to_markdown
14
+
15
+ markdown = convert_to_markdown(html, heading_style="atx")
16
+ """
17
+
18
+ from html_to_markdown.api import (
19
+ InlineImage,
20
+ InlineImageConfig,
21
+ InlineImageWarning,
22
+ OptionsHandle,
23
+ convert,
24
+ convert_with_handle,
25
+ convert_with_inline_images,
26
+ create_options_handle,
27
+ )
28
+ from html_to_markdown.exceptions import (
29
+ ConflictingOptionsError,
30
+ EmptyHtmlError,
31
+ HtmlToMarkdownError,
32
+ InvalidParserError,
33
+ MissingDependencyError,
34
+ )
35
+ from html_to_markdown.options import ConversionOptions, PreprocessingOptions
36
+ from html_to_markdown.v1_compat import convert_to_markdown, markdownify
37
+
38
+ __all__ = [
39
+ "ConflictingOptionsError",
40
+ "ConversionOptions",
41
+ "EmptyHtmlError",
42
+ "HtmlToMarkdownError",
43
+ "InlineImage",
44
+ "InlineImageConfig",
45
+ "InlineImageWarning",
46
+ "InvalidParserError",
47
+ "MissingDependencyError",
48
+ "OptionsHandle",
49
+ "PreprocessingOptions",
50
+ "convert",
51
+ "convert_to_markdown",
52
+ "convert_with_handle",
53
+ "convert_with_inline_images",
54
+ "create_options_handle",
55
+ "markdownify",
56
+ ]
57
+
58
+ __version__ = "2.9.2"
@@ -0,0 +1,16 @@
1
+ import sys
2
+
3
+ from html_to_markdown.cli_proxy import main
4
+
5
+
6
+ def cli() -> None:
7
+ try:
8
+ result = main(sys.argv[1:])
9
+ print(result, end="") # noqa: T201
10
+ except (ValueError, FileNotFoundError) as e:
11
+ print(str(e), file=sys.stderr) # noqa: T201
12
+ sys.exit(1)
13
+
14
+
15
+ if __name__ == "__main__":
16
+ cli()
@@ -0,0 +1,22 @@
1
+ from typing import Any
2
+
3
+ class PreprocessingOptions:
4
+ def __init__(self, *args: Any, **kwargs: Any) -> None: ...
5
+
6
+ class ConversionOptions:
7
+ def __init__(self, *args: Any, **kwargs: Any) -> None: ...
8
+
9
+ class InlineImageConfig:
10
+ def __init__(self, *args: Any, **kwargs: Any) -> None: ...
11
+
12
+ class ConversionOptionsHandle:
13
+ def __init__(self, options: ConversionOptions | None = None) -> None: ...
14
+
15
+ def convert(html: str, options: ConversionOptions | None = None) -> str: ...
16
+ def convert_with_inline_images(
17
+ html: str,
18
+ options: ConversionOptions | None = None,
19
+ image_config: InlineImageConfig | None = None,
20
+ ) -> tuple[str, list[Any], list[Any]]: ...
21
+ def create_options_handle(options: ConversionOptions | None = None) -> ConversionOptionsHandle: ...
22
+ def convert_with_options_handle(html: str, handle: ConversionOptionsHandle) -> str: ...
@@ -0,0 +1,151 @@
1
+ """High-level Python API backed by the Rust core."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Literal, TypedDict, cast
6
+
7
+ import html_to_markdown._html_to_markdown as _rust
8
+ from html_to_markdown._html_to_markdown import (
9
+ ConversionOptionsHandle as OptionsHandle,
10
+ )
11
+ from html_to_markdown._html_to_markdown import (
12
+ InlineImageConfig,
13
+ )
14
+ from html_to_markdown.options import ConversionOptions, PreprocessingOptions
15
+
16
+
17
+ class InlineImage(TypedDict):
18
+ """Inline image extracted during conversion."""
19
+
20
+ data: bytes
21
+ format: str
22
+ filename: str | None
23
+ description: str | None
24
+ dimensions: tuple[int, int] | None
25
+ source: Literal["img_data_uri", "svg_element"]
26
+ attributes: dict[str, str]
27
+
28
+
29
+ class InlineImageWarning(TypedDict):
30
+ """Warning produced during inline image extraction."""
31
+
32
+ index: int
33
+ message: str
34
+
35
+
36
+ def _to_rust_preprocessing(options: PreprocessingOptions) -> _rust.PreprocessingOptions:
37
+ return _rust.PreprocessingOptions(
38
+ enabled=options.enabled,
39
+ preset=options.preset,
40
+ remove_navigation=options.remove_navigation,
41
+ remove_forms=options.remove_forms,
42
+ )
43
+
44
+
45
+ def _to_rust_options(
46
+ options: ConversionOptions,
47
+ preprocessing: PreprocessingOptions,
48
+ ) -> _rust.ConversionOptions:
49
+ return _rust.ConversionOptions(
50
+ heading_style=options.heading_style,
51
+ list_indent_type=options.list_indent_type,
52
+ list_indent_width=options.list_indent_width,
53
+ bullets=options.bullets,
54
+ strong_em_symbol=options.strong_em_symbol,
55
+ escape_asterisks=options.escape_asterisks,
56
+ escape_underscores=options.escape_underscores,
57
+ escape_misc=options.escape_misc,
58
+ escape_ascii=options.escape_ascii,
59
+ code_language=options.code_language,
60
+ autolinks=options.autolinks,
61
+ default_title=options.default_title,
62
+ br_in_tables=options.br_in_tables,
63
+ hocr_spatial_tables=options.hocr_spatial_tables,
64
+ highlight_style=options.highlight_style,
65
+ extract_metadata=options.extract_metadata,
66
+ whitespace_mode=options.whitespace_mode,
67
+ strip_newlines=options.strip_newlines,
68
+ wrap=options.wrap,
69
+ wrap_width=options.wrap_width,
70
+ convert_as_inline=options.convert_as_inline,
71
+ sub_symbol=options.sub_symbol,
72
+ sup_symbol=options.sup_symbol,
73
+ newline_style=options.newline_style,
74
+ code_block_style=options.code_block_style,
75
+ keep_inline_images_in=list(options.keep_inline_images_in) if options.keep_inline_images_in else [],
76
+ preprocessing=_to_rust_preprocessing(preprocessing),
77
+ encoding=options.encoding,
78
+ debug=options.debug,
79
+ strip_tags=list(options.strip_tags) if options.strip_tags else [],
80
+ preserve_tags=list(options.preserve_tags) if options.preserve_tags else [],
81
+ )
82
+
83
+
84
+ def convert(
85
+ html: str,
86
+ options: ConversionOptions | None = None,
87
+ preprocessing: PreprocessingOptions | None = None,
88
+ ) -> str:
89
+ """Convert HTML to Markdown using the Rust backend."""
90
+ if options is None and preprocessing is None:
91
+ return _rust.convert(html, None)
92
+
93
+ if options is None:
94
+ options = ConversionOptions()
95
+ if preprocessing is None:
96
+ preprocessing = PreprocessingOptions()
97
+
98
+ rust_options = _to_rust_options(options, preprocessing)
99
+ return _rust.convert(html, rust_options)
100
+
101
+
102
+ def convert_with_inline_images(
103
+ html: str,
104
+ options: ConversionOptions | None = None,
105
+ preprocessing: PreprocessingOptions | None = None,
106
+ image_config: InlineImageConfig | None = None,
107
+ ) -> tuple[str, list[InlineImage], list[InlineImageWarning]]:
108
+ """Convert HTML and extract inline images."""
109
+ if options is None:
110
+ options = ConversionOptions()
111
+ if preprocessing is None:
112
+ preprocessing = PreprocessingOptions()
113
+ if image_config is None:
114
+ image_config = InlineImageConfig()
115
+
116
+ rust_options = _to_rust_options(options, preprocessing)
117
+ markdown, images, warnings = cast(
118
+ "tuple[str, list[InlineImage], list[InlineImageWarning]]",
119
+ _rust.convert_with_inline_images(html, rust_options, image_config),
120
+ )
121
+ return markdown, list(images), list(warnings)
122
+
123
+
124
+ def create_options_handle(
125
+ options: ConversionOptions | None = None,
126
+ preprocessing: PreprocessingOptions | None = None,
127
+ ) -> OptionsHandle:
128
+ """Create a reusable ConversionOptions handle backed by Rust."""
129
+ if options is None:
130
+ options = ConversionOptions()
131
+ if preprocessing is None:
132
+ preprocessing = PreprocessingOptions()
133
+ rust_options = _to_rust_options(options, preprocessing)
134
+ return _rust.create_options_handle(rust_options)
135
+
136
+
137
+ def convert_with_handle(html: str, handle: OptionsHandle) -> str:
138
+ """Convert HTML using a pre-parsed ConversionOptions handle."""
139
+ return _rust.convert_with_options_handle(html, handle)
140
+
141
+
142
+ __all__ = [
143
+ "InlineImage",
144
+ "InlineImageConfig",
145
+ "InlineImageWarning",
146
+ "OptionsHandle",
147
+ "convert",
148
+ "convert_with_handle",
149
+ "convert_with_inline_images",
150
+ "create_options_handle",
151
+ ]
Binary file
@@ -0,0 +1,3 @@
1
+ from html_to_markdown.cli_proxy import main
2
+
3
+ __all__ = ["main"]
@@ -0,0 +1,142 @@
1
+ import subprocess
2
+ import sys
3
+ import warnings
4
+ from pathlib import Path
5
+
6
+ from html_to_markdown.exceptions import RedundantV1FlagError, RemovedV1FlagError
7
+
8
+
9
+ def find_cli_binary() -> Path:
10
+ """Find the html-to-markdown CLI binary in expected locations.
11
+
12
+ Returns:
13
+ Path to the CLI binary.
14
+
15
+ Raises:
16
+ FileNotFoundError: If the binary cannot be found.
17
+ """
18
+ binary_name = "html-to-markdown.exe" if sys.platform == "win32" else "html-to-markdown"
19
+
20
+ module_dir = Path(__file__).resolve().parent
21
+ parent_dirs = list(module_dir.parents)
22
+
23
+ search_roots = []
24
+ for parent in parent_dirs:
25
+ candidate = parent / "target" / "release" / binary_name
26
+ search_roots.append(candidate)
27
+
28
+ possible_locations = [
29
+ *search_roots,
30
+ module_dir / "bin" / binary_name,
31
+ module_dir / binary_name,
32
+ ]
33
+
34
+ for location in possible_locations:
35
+ if location.exists() and location.is_file():
36
+ return location
37
+
38
+ msg = "html-to-markdown CLI binary not found. Please install or build the package."
39
+ raise FileNotFoundError(msg)
40
+
41
+
42
+ def translate_v1_args_to_v2(argv: list[str]) -> list[str]:
43
+ """Translate v1 CLI arguments to v2 format.
44
+
45
+ Args:
46
+ argv: List of command-line arguments.
47
+
48
+ Returns:
49
+ Translated list of arguments compatible with v2.
50
+
51
+ Raises:
52
+ RemovedV1FlagError: If a v1 flag has been removed in v2.
53
+ """
54
+ translated = []
55
+ i = 0
56
+ while i < len(argv):
57
+ arg = argv[i]
58
+
59
+ if arg in ("--strip", "--convert"):
60
+ raise RemovedV1FlagError(
61
+ flag=arg,
62
+ reason=f"{arg} option has been removed in v2.",
63
+ migration="Remove this flag from your command. The feature is no longer available.",
64
+ )
65
+
66
+ if arg in (
67
+ "--no-escape-asterisks",
68
+ "--no-escape-underscores",
69
+ "--no-escape-misc",
70
+ "--no-wrap",
71
+ "--no-autolinks",
72
+ "--no-extract-metadata",
73
+ ):
74
+ warnings.warn(
75
+ f"'{arg}' is deprecated and redundant in v2. "
76
+ f"These options are now disabled by default. Remove this flag.",
77
+ DeprecationWarning,
78
+ stacklevel=2,
79
+ )
80
+
81
+ elif arg == "--preprocess-html":
82
+ warnings.warn(
83
+ "'--preprocess-html' is deprecated. Use '--preprocess' instead.",
84
+ DeprecationWarning,
85
+ stacklevel=2,
86
+ )
87
+ translated.append("--preprocess")
88
+
89
+ elif arg in (
90
+ "--escape-asterisks",
91
+ "--escape-underscores",
92
+ "--escape-misc",
93
+ "--autolinks",
94
+ "--extract-metadata",
95
+ "--wrap",
96
+ ):
97
+ translated.append(arg)
98
+
99
+ else:
100
+ translated.append(arg)
101
+
102
+ i += 1
103
+
104
+ return translated
105
+
106
+
107
+ def main(argv: list[str]) -> str:
108
+ """Execute the CLI proxy.
109
+
110
+ Translates v1 arguments to v2 and invokes the native Rust CLI binary.
111
+
112
+ Args:
113
+ argv: Command-line arguments.
114
+
115
+ Returns:
116
+ Stdout from the CLI binary.
117
+ """
118
+ cli_binary = find_cli_binary()
119
+
120
+ try:
121
+ translated_args = translate_v1_args_to_v2(argv)
122
+ except (RemovedV1FlagError, RedundantV1FlagError) as e:
123
+ sys.stderr.write(f"\n❌ Error: {e.flag}\n\n")
124
+ sys.stderr.write(f" {e.reason}\n\n")
125
+ sys.stderr.write(f" 💡 {e.migration}\n\n")
126
+ sys.exit(1)
127
+ except ValueError as e:
128
+ sys.stderr.write(f"Error: {e}\n")
129
+ sys.exit(1)
130
+
131
+ result = subprocess.run( # noqa: S603
132
+ [str(cli_binary), *translated_args],
133
+ capture_output=True,
134
+ text=True,
135
+ check=False,
136
+ )
137
+
138
+ if result.returncode != 0:
139
+ sys.stderr.write(result.stderr)
140
+ sys.exit(result.returncode)
141
+
142
+ return result.stdout
@@ -0,0 +1,73 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ class HtmlToMarkdownError(Exception):
5
+ """Base exception for all html-to-markdown errors."""
6
+
7
+
8
+ class MissingDependencyError(HtmlToMarkdownError):
9
+ """Raised when a required dependency is not installed."""
10
+
11
+ def __init__(self, dependency: str, install_command: str | None = None) -> None:
12
+ self.dependency = dependency
13
+ self.install_command = install_command
14
+
15
+ message = f"{dependency} is not installed."
16
+ if install_command:
17
+ message += f" Install with: {install_command}"
18
+
19
+ super().__init__(message)
20
+
21
+
22
+ class InvalidParserError(HtmlToMarkdownError):
23
+ """Raised when an invalid parser is specified."""
24
+
25
+ def __init__(self, parser: str, available_parsers: list[str]) -> None:
26
+ self.parser = parser
27
+ self.available_parsers = available_parsers
28
+
29
+ message = f"Invalid parser '{parser}'. Available parsers: {', '.join(available_parsers)}"
30
+ super().__init__(message)
31
+
32
+
33
+ class EmptyHtmlError(HtmlToMarkdownError):
34
+ """Raised when input HTML is empty."""
35
+
36
+ def __init__(self) -> None:
37
+ super().__init__("The input HTML is empty.")
38
+
39
+
40
+ class ConflictingOptionsError(HtmlToMarkdownError):
41
+ """Raised when conflicting configuration options are specified."""
42
+
43
+ def __init__(self, option1: str, option2: str) -> None:
44
+ self.option1 = option1
45
+ self.option2 = option2
46
+
47
+ super().__init__(f"Only one of '{option1}' and '{option2}' can be specified.")
48
+
49
+
50
+ class InvalidEncodingError(HtmlToMarkdownError):
51
+ """Raised when an invalid character encoding is specified."""
52
+
53
+ def __init__(self, encoding: str) -> None:
54
+ super().__init__(f"The specified encoding ({encoding}) is not valid.")
55
+
56
+
57
+ class UnsupportedV1FeatureError(HtmlToMarkdownError):
58
+ """Raised when a v1 feature is not supported in v2."""
59
+
60
+ def __init__(self, flag: str, reason: str, migration: str) -> None:
61
+ self.flag = flag
62
+ self.reason = reason
63
+ self.migration = migration
64
+ message = f"'{flag}' is not supported in v2.\n\nReason: {reason}\n\nMigration: {migration}"
65
+ super().__init__(message)
66
+
67
+
68
+ class RemovedV1FlagError(UnsupportedV1FeatureError):
69
+ """Raised when a v1 flag has been removed in v2."""
70
+
71
+
72
+ class RedundantV1FlagError(UnsupportedV1FeatureError):
73
+ """Raised when a v1 flag is redundant in v2."""
@@ -0,0 +1,144 @@
1
+ """Configuration options for HTML to Markdown conversion.
2
+
3
+ This module provides dataclass-based configuration for the v2 API.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from dataclasses import dataclass
9
+ from typing import Literal
10
+
11
+
12
+ @dataclass
13
+ class ConversionOptions:
14
+ """Main conversion configuration.
15
+
16
+ This class groups all conversion-related options together, replacing
17
+ the large number of keyword arguments in the v1 API.
18
+
19
+ Example:
20
+ >>> options = ConversionOptions(
21
+ ... heading_style="atx",
22
+ ... list_indent_width=2,
23
+ ... escape_asterisks=True,
24
+ ... )
25
+ >>> from html_to_markdown import convert
26
+ >>> markdown = convert("<h1>Title</h1>", options)
27
+ """
28
+
29
+ heading_style: Literal["underlined", "atx", "atx_closed"] = "atx"
30
+ """Style for headings: 'atx' (#) is CommonMark default, 'underlined' (===), or 'atx_closed' (# #)."""
31
+
32
+ list_indent_type: Literal["spaces", "tabs"] = "spaces"
33
+ """Type of indentation for lists."""
34
+
35
+ list_indent_width: int = 2
36
+ """Number of spaces for list indentation (CommonMark uses 2 spaces, ignored if list_indent_type='tabs')."""
37
+
38
+ bullets: str = "-*+"
39
+ """Characters to use for unordered list bullets (cycles through -, *, + for nested levels). CommonMark compliant."""
40
+
41
+ strong_em_symbol: Literal["*", "_"] = "*"
42
+ """Symbol for strong/emphasis formatting."""
43
+
44
+ escape_asterisks: bool = False
45
+ """Escape asterisk characters in text to prevent accidental formatting. Default False for minimal escaping (CommonMark)."""
46
+
47
+ escape_underscores: bool = False
48
+ """Escape underscore characters in text to prevent accidental formatting. Default False for minimal escaping (CommonMark)."""
49
+
50
+ escape_misc: bool = False
51
+ """Escape miscellaneous Markdown characters. Default False for minimal escaping (CommonMark)."""
52
+
53
+ escape_ascii: bool = False
54
+ """Escape all ASCII punctuation (for CommonMark spec compliance tests). Disabled by default for minimal escaping."""
55
+
56
+ code_language: str = ""
57
+ """Default language for code blocks."""
58
+
59
+ encoding: str = "utf-8"
60
+ """Character encoding expected for the HTML input."""
61
+
62
+ autolinks: bool = True
63
+ """Convert bare URLs to automatic links."""
64
+
65
+ default_title: bool = False
66
+ """Add a default title if none exists."""
67
+
68
+ keep_inline_images_in: set[str] | None = None
69
+ """Parent tag names where images should remain inline."""
70
+
71
+ br_in_tables: bool = False
72
+ """Use <br> tags for line breaks in table cells instead of spaces."""
73
+
74
+ hocr_spatial_tables: bool = True
75
+ """Reconstruct tables in hOCR documents using spatial heuristics."""
76
+
77
+ highlight_style: Literal["double-equal", "html", "bold"] = "double-equal"
78
+ """Style for highlighting <mark> elements."""
79
+
80
+ extract_metadata: bool = True
81
+ """Extract metadata from HTML head and include as comment."""
82
+
83
+ whitespace_mode: Literal["normalized", "strict"] = "normalized"
84
+ """How to handle whitespace: 'normalized' or 'strict'."""
85
+
86
+ strip_newlines: bool = False
87
+ """Remove newlines from HTML before processing."""
88
+
89
+ wrap: bool = False
90
+ """Enable text wrapping."""
91
+
92
+ wrap_width: int = 80
93
+ """Column width for text wrapping."""
94
+
95
+ strip_tags: set[str] | None = None
96
+ """HTML tags to strip from output (output only text content, no markdown conversion)."""
97
+
98
+ preserve_tags: set[str] | None = None
99
+ """HTML tags to preserve as-is in the output (keep original HTML). Useful for complex elements like tables."""
100
+
101
+ convert_as_inline: bool = False
102
+ """Treat block elements as inline during conversion."""
103
+
104
+ sub_symbol: str = ""
105
+ """Symbol for subscript text."""
106
+
107
+ sup_symbol: str = ""
108
+ """Symbol for superscript text."""
109
+
110
+ newline_style: Literal["spaces", "backslash"] = "spaces"
111
+ """Style for newlines: 'spaces' (two trailing spaces, CommonMark default) or 'backslash' (\\). Both are equally CommonMark compliant."""
112
+
113
+ code_block_style: Literal["indented", "backticks", "tildes"] = "backticks"
114
+ """Style for code blocks: 'backticks' (```, better whitespace preservation), 'indented' (4 spaces), or 'tildes' (~~~). All are CommonMark compliant."""
115
+
116
+ debug: bool = False
117
+ """Enable debug mode with diagnostic warnings about unhandled elements and hOCR processing."""
118
+
119
+
120
+ @dataclass
121
+ class PreprocessingOptions:
122
+ """HTML preprocessing configuration.
123
+
124
+ Controls how HTML is cleaned and preprocessed before conversion.
125
+
126
+ Example:
127
+ >>> options = PreprocessingOptions(
128
+ ... enabled=True,
129
+ ... preset="aggressive",
130
+ ... remove_navigation=True,
131
+ ... )
132
+ """
133
+
134
+ enabled: bool = True
135
+ """Whether to enable HTML preprocessing (enabled by default for robust handling of malformed HTML)."""
136
+
137
+ preset: Literal["minimal", "standard", "aggressive"] = "standard"
138
+ """Preprocessing aggressiveness level."""
139
+
140
+ remove_navigation: bool = True
141
+ """Remove navigation elements during preprocessing."""
142
+
143
+ remove_forms: bool = True
144
+ """Remove form elements during preprocessing."""
File without changes
@@ -0,0 +1,192 @@
1
+ """V1 API compatibility layer.
2
+
3
+ Provides backward compatibility for the v1 convert_to_markdown API
4
+ by translating v1 kwargs to v2 ConversionOptions and PreprocessingOptions.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import warnings
10
+
11
+ from html_to_markdown import ConversionOptions, PreprocessingOptions
12
+ from html_to_markdown import convert as convert_v2
13
+
14
+ DEPRECATION_MESSAGE = (
15
+ "The v1 compatibility layer is deprecated and will be removed in v3.0. "
16
+ "Use html_to_markdown.convert() with ConversionOptions instead."
17
+ )
18
+
19
+
20
+ def _warn_deprecated(api_name: str, *, stacklevel: int = 2) -> None:
21
+ warnings.warn(f"{api_name} is deprecated. {DEPRECATION_MESSAGE}", DeprecationWarning, stacklevel=stacklevel)
22
+
23
+
24
+ def convert_to_markdown(
25
+ html: str,
26
+ *,
27
+ heading_style: str = "underlined",
28
+ list_indent_type: str = "spaces",
29
+ list_indent_width: int = 4,
30
+ bullets: str = "*+-",
31
+ strong_em_symbol: str = "*",
32
+ escape_asterisks: bool = True,
33
+ escape_underscores: bool = True,
34
+ escape_misc: bool = True,
35
+ code_language: str = "",
36
+ autolinks: bool = True,
37
+ default_title: bool = False,
38
+ br_in_tables: bool = False,
39
+ hocr_extract_tables: bool = True,
40
+ hocr_table_column_threshold: int = 50,
41
+ hocr_table_row_threshold_ratio: float = 0.5,
42
+ highlight_style: str = "double-equal",
43
+ extract_metadata: bool = True,
44
+ whitespace_mode: str = "normalized",
45
+ strip_newlines: bool = False,
46
+ wrap: bool = False,
47
+ wrap_width: int = 80,
48
+ convert_as_inline: bool = False,
49
+ sub_symbol: str = "",
50
+ sup_symbol: str = "",
51
+ newline_style: str = "spaces",
52
+ keep_inline_images_in: set[str] | None = None,
53
+ preprocess: bool = False,
54
+ preprocessing_preset: str = "standard",
55
+ remove_navigation: bool = True,
56
+ remove_forms: bool = True,
57
+ source_encoding: str = "utf-8",
58
+ code_language_callback: object | None = None,
59
+ strip: list[str] | None = None,
60
+ convert: list[str] | None = None,
61
+ custom_converters: dict[str, object] | None = None,
62
+ ) -> str:
63
+ """Convert HTML to Markdown (v1 compatibility API).
64
+
65
+ This function provides backward compatibility with the v1 API by translating
66
+ v1-style keyword arguments to v2 ConversionOptions and PreprocessingOptions.
67
+
68
+ Args:
69
+ html: HTML string to convert.
70
+ heading_style: Style for headings (default: "underlined" for v1 compatibility).
71
+ list_indent_type: Type of indentation for lists.
72
+ list_indent_width: Number of spaces for list indentation (v1 default: 4).
73
+ bullets: Characters to use for unordered list bullets.
74
+ strong_em_symbol: Symbol for strong/emphasis formatting.
75
+ escape_asterisks: Escape asterisk characters (v1 default: True).
76
+ escape_underscores: Escape underscore characters (v1 default: True).
77
+ escape_misc: Escape miscellaneous Markdown characters (v1 default: True).
78
+ code_language: Default language for code blocks.
79
+ autolinks: Convert bare URLs to automatic links.
80
+ default_title: Add a default title if none exists.
81
+ br_in_tables: Use <br> tags for line breaks in table cells.
82
+ hocr_extract_tables: Deprecated - always True in v2.
83
+ hocr_table_column_threshold: Deprecated - uses built-in heuristics in v2.
84
+ hocr_table_row_threshold_ratio: Deprecated - uses built-in heuristics in v2.
85
+ highlight_style: Style for highlighting <mark> elements.
86
+ extract_metadata: Extract metadata from HTML head.
87
+ whitespace_mode: How to handle whitespace.
88
+ strip_newlines: Remove newlines from HTML before processing.
89
+ wrap: Enable text wrapping.
90
+ wrap_width: Column width for text wrapping.
91
+ convert_as_inline: Treat block elements as inline.
92
+ sub_symbol: Symbol for subscript text.
93
+ sup_symbol: Symbol for superscript text.
94
+ newline_style: Style for newlines.
95
+ keep_inline_images_in: Parent tag names where images should remain inline.
96
+ preprocess: Enable HTML preprocessing.
97
+ preprocessing_preset: Preprocessing aggressiveness level.
98
+ remove_navigation: Remove navigation elements during preprocessing.
99
+ remove_forms: Remove form elements during preprocessing.
100
+ source_encoding: Character encoding expected for the HTML input.
101
+ code_language_callback: Deprecated - not supported in v2.
102
+ strip: HTML tags to strip from output.
103
+ convert: Deprecated - not supported in v2.
104
+ custom_converters: Deprecated - not yet implemented in v2.
105
+
106
+ Returns:
107
+ Converted Markdown string.
108
+
109
+ Raises:
110
+ NotImplementedError: If deprecated v1 features are used.
111
+
112
+ .. deprecated:: 2.0
113
+ Use :func:`html_to_markdown.convert` with :class:`ConversionOptions` instead.
114
+ The v1 API is provided for backward compatibility only.
115
+ """
116
+ _warn_deprecated("convert_to_markdown()", stacklevel=2)
117
+
118
+ if code_language_callback is not None:
119
+ raise NotImplementedError(
120
+ "code_language_callback was removed in v2. Use the code_language option to set a default language."
121
+ )
122
+ if convert is not None:
123
+ raise NotImplementedError("convert option was removed in v2. All supported tags are converted by default.")
124
+ if custom_converters is not None:
125
+ raise NotImplementedError("custom_converters is not yet implemented in v2")
126
+ if not hocr_extract_tables:
127
+ warnings.warn(
128
+ "hocr_extract_tables is deprecated and will be removed in a future release. "
129
+ "Use ConversionOptions(hocr_spatial_tables=False) to disable spatial table reconstruction.",
130
+ DeprecationWarning,
131
+ stacklevel=2,
132
+ )
133
+ if hocr_table_column_threshold != 50 or hocr_table_row_threshold_ratio != 0.5:
134
+ raise NotImplementedError(
135
+ "hOCR table threshold overrides were removed in v2. Table reconstruction now uses built-in heuristics."
136
+ )
137
+
138
+ # ~keep: v1 used indented code blocks by default, but switched to backticks when a language was set
139
+ # This maintains v1 behavior for backward compatibility
140
+ code_block_style = "backticks" if code_language else "indented"
141
+
142
+ options = ConversionOptions(
143
+ heading_style=heading_style, # type: ignore[arg-type]
144
+ list_indent_type=list_indent_type, # type: ignore[arg-type]
145
+ list_indent_width=list_indent_width,
146
+ bullets=bullets,
147
+ strong_em_symbol=strong_em_symbol, # type: ignore[arg-type]
148
+ escape_asterisks=escape_asterisks,
149
+ escape_underscores=escape_underscores,
150
+ escape_misc=escape_misc,
151
+ code_block_style=code_block_style, # type: ignore[arg-type]
152
+ code_language=code_language,
153
+ autolinks=autolinks,
154
+ default_title=default_title,
155
+ br_in_tables=br_in_tables,
156
+ hocr_spatial_tables=hocr_extract_tables,
157
+ highlight_style=highlight_style, # type: ignore[arg-type]
158
+ extract_metadata=extract_metadata,
159
+ whitespace_mode=whitespace_mode, # type: ignore[arg-type]
160
+ strip_newlines=strip_newlines,
161
+ wrap=wrap,
162
+ wrap_width=wrap_width,
163
+ convert_as_inline=convert_as_inline,
164
+ sub_symbol=sub_symbol,
165
+ sup_symbol=sup_symbol,
166
+ newline_style=newline_style, # type: ignore[arg-type]
167
+ keep_inline_images_in=keep_inline_images_in,
168
+ strip_tags=set(strip) if strip else None,
169
+ )
170
+
171
+ preprocessing = PreprocessingOptions(
172
+ enabled=preprocess,
173
+ preset=preprocessing_preset, # type: ignore[arg-type]
174
+ remove_navigation=remove_navigation,
175
+ remove_forms=remove_forms,
176
+ )
177
+
178
+ options.encoding = source_encoding
179
+ return convert_v2(html, options, preprocessing)
180
+
181
+
182
+ def markdownify(*args: object, **kwargs: object) -> str:
183
+ """Alias for convert_to_markdown (deprecated).
184
+
185
+ .. deprecated:: 2.0
186
+ Use html_to_markdown.convert() instead.
187
+ """
188
+ _warn_deprecated("markdownify()", stacklevel=2)
189
+ return convert_to_markdown(*args, **kwargs) # type: ignore[arg-type]
190
+
191
+
192
+ __all__ = ["convert_to_markdown", "markdownify"]
@@ -0,0 +1,281 @@
1
+ Metadata-Version: 2.4
2
+ Name: html-to-markdown
3
+ Version: 2.9.2
4
+ Classifier: Development Status :: 5 - Production/Stable
5
+ Classifier: Environment :: Console
6
+ Classifier: Intended Audience :: Developers
7
+ Classifier: License :: OSI Approved :: MIT License
8
+ Classifier: Operating System :: OS Independent
9
+ Classifier: Programming Language :: Python :: 3 :: Only
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Programming Language :: Python :: 3.13
14
+ Classifier: Programming Language :: Python :: 3.14
15
+ Classifier: Programming Language :: Rust
16
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
17
+ Classifier: Topic :: Text Processing
18
+ Classifier: Topic :: Text Processing :: Markup
19
+ Classifier: Topic :: Text Processing :: Markup :: HTML
20
+ Classifier: Topic :: Text Processing :: Markup :: Markdown
21
+ Classifier: Typing :: Typed
22
+ License-File: LICENSE
23
+ Summary: High-performance HTML to Markdown converter powered by Rust with a clean Python API
24
+ Keywords: cli-tool,converter,html,html2markdown,html5,markdown,markup,parser,rust,text-processing
25
+ Home-Page: https://github.com/Goldziher/html-to-markdown
26
+ Author-email: Na'aman Hirschfeld <nhirschfeld@gmail.com>
27
+ Requires-Python: >=3.10
28
+ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
29
+ Project-URL: Changelog, https://github.com/Goldziher/html-to-markdown/releases
30
+ Project-URL: Homepage, https://github.com/Goldziher/html-to-markdown
31
+ Project-URL: Issues, https://github.com/Goldziher/html-to-markdown/issues
32
+ Project-URL: Repository, https://github.com/Goldziher/html-to-markdown.git
33
+
34
+ # html-to-markdown
35
+
36
+ High-performance HTML to Markdown converter with a clean Python API (powered by a Rust core). The same engine also drives the Node.js, Ruby, PHP, and WebAssembly bindings, so rendered Markdown stays identical across runtimes. Wheels are published for Linux, macOS, and Windows.
37
+
38
+ [![Crates.io](https://img.shields.io/crates/v/html-to-markdown.svg)](https://crates.io/crates/html-to-markdown)
39
+ [![npm (node)](https://badge.fury.io/js/html-to-markdown-node.svg)](https://www.npmjs.com/package/html-to-markdown-node)
40
+ [![npm (wasm)](https://badge.fury.io/js/html-to-markdown-wasm.svg)](https://www.npmjs.com/package/html-to-markdown-wasm)
41
+ [![PyPI](https://badge.fury.io/py/html-to-markdown.svg)](https://pypi.org/project/html-to-markdown/)
42
+ [![Packagist](https://img.shields.io/packagist/v/goldziher/html-to-markdown.svg)](https://packagist.org/packages/goldziher/html-to-markdown)
43
+ [![RubyGems](https://badge.fury.io/rb/html-to-markdown.svg)](https://rubygems.org/gems/html-to-markdown)
44
+ [![Hex.pm](https://img.shields.io/hexpm/v/html_to_markdown.svg)](https://hex.pm/packages/html_to_markdown)
45
+ [![NuGet](https://img.shields.io/nuget/v/Goldziher.HtmlToMarkdown.svg)](https://www.nuget.org/packages/Goldziher.HtmlToMarkdown/)
46
+ [![Maven Central](https://img.shields.io/maven-central/v/io.github.goldziher/html-to-markdown.svg)](https://central.sonatype.com/artifact/io.github.goldziher/html-to-markdown)
47
+ [![Go Reference](https://pkg.go.dev/badge/github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown.svg)](https://pkg.go.dev/github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown)
48
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://github.com/Goldziher/html-to-markdown/blob/main/LICENSE)
49
+ [![Discord](https://img.shields.io/badge/Discord-Join%20our%20community-7289da)](https://discord.gg/pXxagNK2zN)
50
+
51
+ ## Installation
52
+
53
+ ```bash
54
+ pip install html-to-markdown
55
+ ```
56
+
57
+ ## Performance Snapshot
58
+
59
+ Apple M4 • Real Wikipedia documents • `convert()` (Python)
60
+
61
+ | Document | Size | Latency | Throughput | Docs/sec |
62
+ | ------------------- | ----- | ------- | ---------- | -------- |
63
+ | Lists (Timeline) | 129KB | 0.62ms | 208 MB/s | 1,613 |
64
+ | Tables (Countries) | 360KB | 2.02ms | 178 MB/s | 495 |
65
+ | Mixed (Python wiki) | 656KB | 4.56ms | 144 MB/s | 219 |
66
+
67
+ > V1 averaged ~2.5 MB/s (Python/BeautifulSoup). V2's Rust engine delivers 60–80× higher throughput.
68
+
69
+ ### Benchmark Fixtures (Apple M4)
70
+
71
+ Pulled directly from `tools/runtime-bench` (`task bench:bindings -- --language python`) so they stay in lockstep with the Rust core:
72
+
73
+ | Document | Size | ops/sec (Python) |
74
+ | ---------------------- | ------ | ---------------- |
75
+ | Lists (Timeline) | 129 KB | 1,405 |
76
+ | Tables (Countries) | 360 KB | 352 |
77
+ | Medium (Python) | 657 KB | 158 |
78
+ | Large (Rust) | 567 KB | 183 |
79
+ | Small (Intro) | 463 KB | 223 |
80
+ | hOCR German PDF | 44 KB | 2,991 |
81
+ | hOCR Invoice | 4 KB | 23,500 |
82
+ | hOCR Embedded Tables | 37 KB | 3,464 |
83
+
84
+ > Re-run locally with `task bench:bindings -- --language python --output tmp.json` to compare against CI history.
85
+
86
+ ## Quick Start
87
+
88
+ ```python
89
+ from html_to_markdown import convert
90
+
91
+ html = """
92
+ <h1>Welcome</h1>
93
+ <p>This is <strong>fast</strong> Rust-powered conversion!</p>
94
+ <ul>
95
+ <li>Blazing fast</li>
96
+ <li>Type safe</li>
97
+ <li>Easy to use</li>
98
+ </ul>
99
+ """
100
+
101
+ markdown = convert(html)
102
+ print(markdown)
103
+ ```
104
+
105
+ ## Configuration (v2 API)
106
+
107
+ ```python
108
+ from html_to_markdown import ConversionOptions, convert
109
+
110
+ options = ConversionOptions(
111
+ heading_style="atx",
112
+ list_indent_width=2,
113
+ bullets="*+-",
114
+ )
115
+ options.escape_asterisks = True
116
+ options.code_language = "python"
117
+ options.extract_metadata = True
118
+
119
+ markdown = convert(html, options)
120
+ ```
121
+
122
+ ### Reusing Parsed Options
123
+
124
+ Avoid re-parsing the same option dictionaries inside hot loops by building a reusable handle:
125
+
126
+ ```python
127
+ from html_to_markdown import ConversionOptions, convert_with_handle, create_options_handle
128
+
129
+ handle = create_options_handle(ConversionOptions(hocr_spatial_tables=False))
130
+
131
+ for html in documents:
132
+ markdown = convert_with_handle(html, handle)
133
+ ```
134
+
135
+ ### HTML Preprocessing
136
+
137
+ ```python
138
+ from html_to_markdown import ConversionOptions, PreprocessingOptions, convert
139
+
140
+ options = ConversionOptions(
141
+ ...
142
+ )
143
+
144
+ preprocessing = PreprocessingOptions(
145
+ enabled=True,
146
+ preset="aggressive",
147
+ )
148
+
149
+ markdown = convert(scraped_html, options, preprocessing)
150
+ ```
151
+
152
+ ### Inline Image Extraction
153
+
154
+ ```python
155
+ from html_to_markdown import InlineImageConfig, convert_with_inline_images
156
+
157
+ markdown, inline_images, warnings = convert_with_inline_images(
158
+ '<p><img src="data:image/png;base64,...==" alt="Pixel" width="1" height="1"></p>',
159
+ image_config=InlineImageConfig(max_decoded_size_bytes=1024, infer_dimensions=True),
160
+ )
161
+
162
+ if inline_images:
163
+ first = inline_images[0]
164
+ print(first["format"], first["dimensions"], first["attributes"]) # e.g. "png", (1, 1), {"width": "1"}
165
+ ```
166
+
167
+ Each inline image is returned as a typed dictionary (`bytes` payload, metadata, and relevant HTML attributes). Warnings are human-readable skip reasons.
168
+
169
+ ### hOCR (HTML OCR) Support
170
+
171
+ ```python
172
+ from html_to_markdown import ConversionOptions, convert
173
+
174
+ # Default: emit structured Markdown directly
175
+ markdown = convert(hocr_html)
176
+
177
+ # hOCR documents are detected automatically; tables are reconstructed without extra configuration.
178
+ markdown = convert(hocr_html)
179
+ ```
180
+
181
+ ## CLI (same engine)
182
+
183
+ ```bash
184
+ pipx install html-to-markdown # or: pip install html-to-markdown
185
+
186
+ html-to-markdown page.html > page.md
187
+ cat page.html | html-to-markdown --heading-style atx > page.md
188
+ ```
189
+
190
+ ## API Surface
191
+
192
+ ### `ConversionOptions`
193
+
194
+ Key fields (see docstring for full matrix):
195
+
196
+ - `heading_style`: `"underlined" | "atx" | "atx_closed"`
197
+ - `list_indent_width`: spaces per indent level (default 2)
198
+ - `bullets`: cycle of bullet characters (`"*+-"`)
199
+ - `strong_em_symbol`: `"*"` or `"_"`
200
+ - `code_language`: default fenced code block language
201
+ - `wrap`, `wrap_width`: wrap Markdown output
202
+ - `strip_tags`: remove specific HTML tags
203
+ - `preprocessing`: `PreprocessingOptions`
204
+ - `encoding`: input character encoding (informational)
205
+
206
+ ### `PreprocessingOptions`
207
+
208
+ - `enabled`: enable HTML sanitisation (default: `True` since v2.4.2 for robust malformed HTML handling)
209
+ - `preset`: `"minimal" | "standard" | "aggressive"` (default: `"standard"`)
210
+ - `remove_navigation`: remove navigation elements (default: `True`)
211
+ - `remove_forms`: remove form elements (default: `True`)
212
+
213
+ **Note:** As of v2.4.2, preprocessing is enabled by default to ensure robust handling of malformed HTML (e.g., bare angle brackets like `1<2` in content). Set `enabled=False` if you need minimal preprocessing.
214
+
215
+ ### `InlineImageConfig`
216
+
217
+ - `max_decoded_size_bytes`: reject larger payloads
218
+ - `filename_prefix`: generated name prefix (`embedded_image` default)
219
+ - `capture_svg`: collect inline `<svg>` (default `True`)
220
+ - `infer_dimensions`: decode raster images to obtain dimensions (default `False`)
221
+
222
+ ## Performance: V2 vs V1 Compatibility Layer
223
+
224
+ ### ⚠️ Important: Always Use V2 API
225
+
226
+ The v2 API (`convert()`) is **strongly recommended** for all code. The v1 compatibility layer adds significant overhead and should only be used for gradual migration:
227
+
228
+ ```python
229
+ # ✅ RECOMMENDED - V2 Direct API (Fast)
230
+ from html_to_markdown import convert, ConversionOptions
231
+
232
+ markdown = convert(html) # Simple conversion - FAST
233
+ markdown = convert(html, ConversionOptions(heading_style="atx")) # With options - FAST
234
+
235
+ # ❌ AVOID - V1 Compatibility Layer (Slow)
236
+ from html_to_markdown import convert_to_markdown
237
+
238
+ markdown = convert_to_markdown(html, heading_style="atx") # Adds 77% overhead
239
+ ```
240
+
241
+ ### Performance Comparison
242
+
243
+ Benchmarked on Apple M4 with 25-paragraph HTML document:
244
+
245
+ | API | ops/sec | Relative Performance | Recommendation |
246
+ | ------------------------ | ---------------- | -------------------- | ------------------- |
247
+ | **V2 API** (`convert()`) | **129,822** | baseline | ✅ **Use this** |
248
+ | **V1 Compat Layer** | **67,673** | **77% slower** | ⚠️ Migration only |
249
+ | **CLI** | **150-210 MB/s** | Fastest | ✅ Batch processing |
250
+
251
+ The v1 compatibility layer creates extra Python objects and performs additional conversions, significantly impacting performance.
252
+
253
+ ### When to Use Each
254
+
255
+ - **V2 API (`convert()`)**: All new code, production systems, performance-critical applications ← **Use this**
256
+ - **V1 Compat (`convert_to_markdown()`)**: Only for gradual migration from legacy codebases
257
+ - **CLI (`html-to-markdown`)**: Batch processing, shell scripts, maximum throughput
258
+
259
+ ## v1 Compatibility
260
+
261
+ A compatibility layer is provided to ease migration from v1.x:
262
+
263
+ - **Compat shim**: `html_to_markdown.v1_compat` exposes `convert_to_markdown`, `convert_to_markdown_stream`, and `markdownify`. Keyword mappings are listed in the [changelog](https://github.com/Goldziher/html-to-markdown/blob/main/CHANGELOG.md#v200).
264
+ - **⚠️ Performance warning**: These compatibility functions add 77% overhead. Migrate to v2 API as soon as possible.
265
+ - **CLI**: The Rust CLI replaces the old Python script. New flags are documented via `html-to-markdown --help`.
266
+ - **Removed options**: `code_language_callback`, `strip`, and streaming APIs were removed; use `ConversionOptions`, `PreprocessingOptions`, and the inline-image helpers instead.
267
+
268
+ ## Links
269
+
270
+ - GitHub: [https://github.com/Goldziher/html-to-markdown](https://github.com/Goldziher/html-to-markdown)
271
+ - Discord: [https://discord.gg/pXxagNK2zN](https://discord.gg/pXxagNK2zN)
272
+ - Kreuzberg ecosystem: [https://kreuzberg.dev](https://kreuzberg.dev)
273
+
274
+ ## License
275
+
276
+ MIT License – see [LICENSE](https://github.com/Goldziher/html-to-markdown/blob/main/LICENSE).
277
+
278
+ ## Support
279
+
280
+ If you find this library useful, consider [sponsoring the project](https://github.com/sponsors/Goldziher).
281
+
@@ -0,0 +1,17 @@
1
+ html_to_markdown/__init__.py,sha256=aX-YIWAK87DqmkDOx8qMuCLbXqOHbm6sckMk5pWZOIs,1506
2
+ html_to_markdown/__main__.py,sha256=3Ic_EbOt2h6W88q084pkz5IKU6iY5z_woBygH6u9aw0,327
3
+ html_to_markdown/_html_to_markdown.abi3.so,sha256=xdq0A3NaCuMVknVykYmO_WwH2S50gXN5Z1rNl9gKKM4,4401816
4
+ html_to_markdown/_html_to_markdown.pyi,sha256=ke8_jd35wwoSDj5KX1YM5Sj_y_tD7qn4GLgVG4RwZ1E,856
5
+ html_to_markdown/api.py,sha256=EDn838XWA5LSKSFhgtVHc2fqn5rzhsnCAMybPlTooak,5090
6
+ html_to_markdown/cli.py,sha256=Rn-s3FZPea1jgCJtDzH_TFvOEiA_uZFVfgjhr6xyL_g,64
7
+ html_to_markdown/cli_proxy.py,sha256=HPYKH5Mf5OUvkbEQISJvAkxrbjWKxE5GokA44HoQ6z8,3858
8
+ html_to_markdown/exceptions.py,sha256=aTASOzbywgfqOYjlw18ZkOWSxKff4EbUbmMua_73TGA,2370
9
+ html_to_markdown/options.py,sha256=vImRfeHAeyAy0Lnt6cTPHGbj7mTdw8AEUgo19u7MAA0,5080
10
+ html_to_markdown/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ html_to_markdown/v1_compat.py,sha256=nZN8hVd3u4vacbfyLsPMIGqSmJENZgx1Ya0SpqVLi-g,8061
12
+ html_to_markdown/bin/html-to-markdown,sha256=mhumOD3LUj2YdrVOOEj8Q9H9yU2uKIwCrffBWMgqx1Y,4514832
13
+ html_to_markdown-2.9.2.data/scripts/html-to-markdown,sha256=mhumOD3LUj2YdrVOOEj8Q9H9yU2uKIwCrffBWMgqx1Y,4514832
14
+ html_to_markdown-2.9.2.dist-info/METADATA,sha256=yW-foX4ejruLx3NxZQN4qZJww9UKN1EL6GXp53aarUI,11697
15
+ html_to_markdown-2.9.2.dist-info/WHEEL,sha256=GZ8Bj1_F3gyOkAjh92uwv3D10I5SM_VZJkuZVDmniNg,146
16
+ html_to_markdown-2.9.2.dist-info/RECORD,,
17
+ html_to_markdown-2.9.2.dist-info/licenses/LICENSE,sha256=oQvPC-0UWvfg0WaeUBe11OJMtX60An-TW1ev_oaAA0k,1086
@@ -0,0 +1,6 @@
1
+ Wheel-Version: 1.0
2
+ Generator: maturin (1.10.2)
3
+ Root-Is-Purelib: false
4
+ Tag: cp310-abi3-manylinux_2_17_x86_64
5
+ Tag: cp310-abi3-manylinux2014_x86_64
6
+
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright 2024-2025 Na'aman Hirschfeld
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.