html-to-markdown 2.6.3__cp310-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,52 @@
1
+ """html-to-markdown: Convert HTML to Markdown using Rust backend.
2
+
3
+ This package provides high-performance HTML to Markdown conversion
4
+ powered by Rust with a clean Python API.
5
+
6
+ V2 API (current):
7
+ from html_to_markdown import convert, ConversionOptions
8
+
9
+ options = ConversionOptions(heading_style="atx")
10
+ markdown = convert(html, options)
11
+
12
+ V1 API (backward compatibility):
13
+ from html_to_markdown import convert_to_markdown
14
+
15
+ markdown = convert_to_markdown(html, heading_style="atx")
16
+ """
17
+
18
+ from html_to_markdown.api import (
19
+ InlineImage,
20
+ InlineImageConfig,
21
+ InlineImageWarning,
22
+ convert,
23
+ convert_with_inline_images,
24
+ )
25
+ from html_to_markdown.exceptions import (
26
+ ConflictingOptionsError,
27
+ EmptyHtmlError,
28
+ HtmlToMarkdownError,
29
+ InvalidParserError,
30
+ MissingDependencyError,
31
+ )
32
+ from html_to_markdown.options import ConversionOptions, PreprocessingOptions
33
+ from html_to_markdown.v1_compat import convert_to_markdown, markdownify
34
+
35
+ __all__ = [
36
+ "ConflictingOptionsError",
37
+ "ConversionOptions",
38
+ "EmptyHtmlError",
39
+ "HtmlToMarkdownError",
40
+ "InlineImage",
41
+ "InlineImageConfig",
42
+ "InlineImageWarning",
43
+ "InvalidParserError",
44
+ "MissingDependencyError",
45
+ "PreprocessingOptions",
46
+ "convert",
47
+ "convert_to_markdown",
48
+ "convert_with_inline_images",
49
+ "markdownify",
50
+ ]
51
+
52
+ __version__ = "2.5.7"
@@ -0,0 +1,16 @@
1
+ import sys
2
+
3
+ from html_to_markdown.cli_proxy import main
4
+
5
+
6
+ def cli() -> None:
7
+ try:
8
+ result = main(sys.argv[1:])
9
+ print(result, end="") # noqa: T201
10
+ except (ValueError, FileNotFoundError) as e:
11
+ print(str(e), file=sys.stderr) # noqa: T201
12
+ sys.exit(1)
13
+
14
+
15
+ if __name__ == "__main__":
16
+ cli()
@@ -0,0 +1,73 @@
1
+ class ConversionOptions:
2
+ heading_style: str
3
+ list_indent_type: str
4
+ list_indent_width: int
5
+ bullets: str
6
+ strong_em_symbol: str
7
+ escape_asterisks: bool
8
+ escape_underscores: bool
9
+ escape_misc: bool
10
+ code_language: str
11
+ autolinks: bool
12
+ default_title: bool
13
+ br_in_tables: bool
14
+ hocr_spatial_tables: bool
15
+ highlight_style: str
16
+ extract_metadata: bool
17
+ whitespace_mode: str
18
+ strip_newlines: bool
19
+ wrap: bool
20
+ wrap_width: int
21
+ convert_as_inline: bool
22
+ sub_symbol: str
23
+ sup_symbol: str
24
+ newline_style: str
25
+ keep_inline_images_in: list[str]
26
+ preprocessing: PreprocessingOptions
27
+ encoding: str
28
+
29
+ def __init__(
30
+ self,
31
+ heading_style: str = "underlined",
32
+ list_indent_type: str = "spaces",
33
+ list_indent_width: int = 4,
34
+ bullets: str = "*+-",
35
+ strong_em_symbol: str = "*",
36
+ escape_asterisks: bool = True,
37
+ escape_underscores: bool = True,
38
+ escape_misc: bool = True,
39
+ code_language: str = "",
40
+ autolinks: bool = True,
41
+ default_title: bool = False,
42
+ br_in_tables: bool = False,
43
+ hocr_spatial_tables: bool = True,
44
+ highlight_style: str = "double-equal",
45
+ extract_metadata: bool = True,
46
+ whitespace_mode: str = "normalized",
47
+ strip_newlines: bool = False,
48
+ wrap: bool = False,
49
+ wrap_width: int = 80,
50
+ convert_as_inline: bool = False,
51
+ sub_symbol: str = "",
52
+ sup_symbol: str = "",
53
+ newline_style: str = "spaces",
54
+ keep_inline_images_in: list[str] | None = None,
55
+ preprocessing: PreprocessingOptions | None = None,
56
+ encoding: str = "utf-8",
57
+ ) -> None: ...
58
+
59
+ class PreprocessingOptions:
60
+ enabled: bool
61
+ preset: str
62
+ remove_navigation: bool
63
+ remove_forms: bool
64
+
65
+ def __init__(
66
+ self,
67
+ enabled: bool = False,
68
+ preset: str = "standard",
69
+ remove_navigation: bool = True,
70
+ remove_forms: bool = True,
71
+ ) -> None: ...
72
+
73
+ def convert(html: str, options: ConversionOptions | None = None) -> str: ...
@@ -0,0 +1,144 @@
1
+ """New v2 functional API for HTML to Markdown conversion.
2
+
3
+ This module provides the new functional API with dataclass-based options,
4
+ using the Rust backend for conversion.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import TYPE_CHECKING, Literal, TypedDict, cast
10
+
11
+ import html_to_markdown._html_to_markdown as _rust # type: ignore[import-not-found]
12
+ from html_to_markdown.options import ConversionOptions, PreprocessingOptions
13
+
14
+ if TYPE_CHECKING:
15
+ from html_to_markdown._html_to_markdown import InlineImageConfig
16
+ else:
17
+ InlineImageConfig = _rust.InlineImageConfig # type: ignore[misc, assignment]
18
+
19
+
20
+ class InlineImage(TypedDict):
21
+ """Inline image extracted during conversion."""
22
+
23
+ data: bytes
24
+ format: str
25
+ filename: str | None
26
+ description: str | None
27
+ dimensions: tuple[int, int] | None
28
+ source: Literal["img_data_uri", "svg_element"]
29
+ attributes: dict[str, str]
30
+
31
+
32
+ class InlineImageWarning(TypedDict):
33
+ """Warning produced during inline image extraction."""
34
+
35
+ index: int
36
+ message: str
37
+
38
+
39
+ def _to_rust_preprocessing(options: PreprocessingOptions) -> _rust.PreprocessingOptions:
40
+ """Convert high-level preprocessing options to the Rust bindings."""
41
+ return _rust.PreprocessingOptions(
42
+ enabled=options.enabled,
43
+ preset=options.preset,
44
+ remove_navigation=options.remove_navigation,
45
+ remove_forms=options.remove_forms,
46
+ )
47
+
48
+
49
+ def _to_rust_options(
50
+ options: ConversionOptions,
51
+ preprocessing: PreprocessingOptions,
52
+ ) -> _rust.ConversionOptions:
53
+ """Convert high-level conversion options to the Rust bindings."""
54
+ return _rust.ConversionOptions(
55
+ heading_style=options.heading_style,
56
+ list_indent_type=options.list_indent_type,
57
+ list_indent_width=options.list_indent_width,
58
+ bullets=options.bullets,
59
+ strong_em_symbol=options.strong_em_symbol,
60
+ escape_asterisks=options.escape_asterisks,
61
+ escape_underscores=options.escape_underscores,
62
+ escape_misc=options.escape_misc,
63
+ escape_ascii=options.escape_ascii,
64
+ code_language=options.code_language,
65
+ autolinks=options.autolinks,
66
+ default_title=options.default_title,
67
+ br_in_tables=options.br_in_tables,
68
+ hocr_spatial_tables=options.hocr_spatial_tables,
69
+ highlight_style=options.highlight_style,
70
+ extract_metadata=options.extract_metadata,
71
+ whitespace_mode=options.whitespace_mode,
72
+ strip_newlines=options.strip_newlines,
73
+ wrap=options.wrap,
74
+ wrap_width=options.wrap_width,
75
+ convert_as_inline=options.convert_as_inline,
76
+ sub_symbol=options.sub_symbol,
77
+ sup_symbol=options.sup_symbol,
78
+ newline_style=options.newline_style,
79
+ code_block_style=options.code_block_style,
80
+ keep_inline_images_in=list(options.keep_inline_images_in) if options.keep_inline_images_in else [],
81
+ preprocessing=_to_rust_preprocessing(preprocessing),
82
+ encoding=options.encoding,
83
+ debug=options.debug,
84
+ strip_tags=list(options.strip_tags) if options.strip_tags else [],
85
+ preserve_tags=list(options.preserve_tags) if options.preserve_tags else [],
86
+ )
87
+
88
+
89
+ def convert(
90
+ html: str,
91
+ options: ConversionOptions | None = None,
92
+ preprocessing: PreprocessingOptions | None = None,
93
+ ) -> str:
94
+ """Convert HTML to Markdown using the Rust backend.
95
+
96
+ Args:
97
+ html: HTML string to convert.
98
+ options: Conversion configuration options (defaults to ConversionOptions()).
99
+ preprocessing: HTML preprocessing options (defaults to PreprocessingOptions()).
100
+
101
+ Returns:
102
+ Converted Markdown string.
103
+ """
104
+ if options is None:
105
+ options = ConversionOptions()
106
+ if preprocessing is None:
107
+ preprocessing = PreprocessingOptions()
108
+
109
+ rust_options = _to_rust_options(options, preprocessing)
110
+ return cast("str", _rust.convert(html, rust_options))
111
+
112
+
113
+ def convert_with_inline_images(
114
+ html: str,
115
+ options: ConversionOptions | None = None,
116
+ preprocessing: PreprocessingOptions | None = None,
117
+ image_config: InlineImageConfig | None = None,
118
+ ) -> tuple[str, list[InlineImage], list[InlineImageWarning]]:
119
+ """Convert HTML and extract inline images.
120
+
121
+ Returns Markdown along with extracted inline images and any warnings.
122
+ """
123
+ if options is None:
124
+ options = ConversionOptions()
125
+ if preprocessing is None:
126
+ preprocessing = PreprocessingOptions()
127
+ if image_config is None:
128
+ image_config = InlineImageConfig()
129
+
130
+ rust_options = _to_rust_options(options, preprocessing)
131
+ markdown, images, warnings = cast(
132
+ "tuple[str, list[InlineImage], list[InlineImageWarning]]",
133
+ _rust.convert_with_inline_images(html, rust_options, image_config),
134
+ )
135
+ return markdown, list(images), list(warnings)
136
+
137
+
138
+ __all__ = [
139
+ "InlineImage",
140
+ "InlineImageConfig",
141
+ "InlineImageWarning",
142
+ "convert",
143
+ "convert_with_inline_images",
144
+ ]
Binary file
@@ -0,0 +1,3 @@
1
+ from html_to_markdown.cli_proxy import main
2
+
3
+ __all__ = ["main"]
@@ -0,0 +1,142 @@
1
+ import subprocess
2
+ import sys
3
+ import warnings
4
+ from pathlib import Path
5
+
6
+ from html_to_markdown.exceptions import RedundantV1FlagError, RemovedV1FlagError
7
+
8
+
9
+ def find_cli_binary() -> Path:
10
+ """Find the html-to-markdown CLI binary in expected locations.
11
+
12
+ Returns:
13
+ Path to the CLI binary.
14
+
15
+ Raises:
16
+ FileNotFoundError: If the binary cannot be found.
17
+ """
18
+ binary_name = "html-to-markdown.exe" if sys.platform == "win32" else "html-to-markdown"
19
+
20
+ module_dir = Path(__file__).resolve().parent
21
+ parent_dirs = list(module_dir.parents)
22
+
23
+ search_roots = []
24
+ for parent in parent_dirs:
25
+ candidate = parent / "target" / "release" / binary_name
26
+ search_roots.append(candidate)
27
+
28
+ possible_locations = [
29
+ *search_roots,
30
+ module_dir / "bin" / binary_name,
31
+ module_dir / binary_name,
32
+ ]
33
+
34
+ for location in possible_locations:
35
+ if location.exists() and location.is_file():
36
+ return location
37
+
38
+ msg = "html-to-markdown CLI binary not found. Please install or build the package."
39
+ raise FileNotFoundError(msg)
40
+
41
+
42
+ def translate_v1_args_to_v2(argv: list[str]) -> list[str]:
43
+ """Translate v1 CLI arguments to v2 format.
44
+
45
+ Args:
46
+ argv: List of command-line arguments.
47
+
48
+ Returns:
49
+ Translated list of arguments compatible with v2.
50
+
51
+ Raises:
52
+ RemovedV1FlagError: If a v1 flag has been removed in v2.
53
+ """
54
+ translated = []
55
+ i = 0
56
+ while i < len(argv):
57
+ arg = argv[i]
58
+
59
+ if arg in ("--strip", "--convert"):
60
+ raise RemovedV1FlagError(
61
+ flag=arg,
62
+ reason=f"{arg} option has been removed in v2.",
63
+ migration="Remove this flag from your command. The feature is no longer available.",
64
+ )
65
+
66
+ if arg in (
67
+ "--no-escape-asterisks",
68
+ "--no-escape-underscores",
69
+ "--no-escape-misc",
70
+ "--no-wrap",
71
+ "--no-autolinks",
72
+ "--no-extract-metadata",
73
+ ):
74
+ warnings.warn(
75
+ f"'{arg}' is deprecated and redundant in v2. "
76
+ f"These options are now disabled by default. Remove this flag.",
77
+ DeprecationWarning,
78
+ stacklevel=2,
79
+ )
80
+
81
+ elif arg == "--preprocess-html":
82
+ warnings.warn(
83
+ "'--preprocess-html' is deprecated. Use '--preprocess' instead.",
84
+ DeprecationWarning,
85
+ stacklevel=2,
86
+ )
87
+ translated.append("--preprocess")
88
+
89
+ elif arg in (
90
+ "--escape-asterisks",
91
+ "--escape-underscores",
92
+ "--escape-misc",
93
+ "--autolinks",
94
+ "--extract-metadata",
95
+ "--wrap",
96
+ ):
97
+ translated.append(arg)
98
+
99
+ else:
100
+ translated.append(arg)
101
+
102
+ i += 1
103
+
104
+ return translated
105
+
106
+
107
+ def main(argv: list[str]) -> str:
108
+ """Execute the CLI proxy.
109
+
110
+ Translates v1 arguments to v2 and invokes the native Rust CLI binary.
111
+
112
+ Args:
113
+ argv: Command-line arguments.
114
+
115
+ Returns:
116
+ Stdout from the CLI binary.
117
+ """
118
+ cli_binary = find_cli_binary()
119
+
120
+ try:
121
+ translated_args = translate_v1_args_to_v2(argv)
122
+ except (RemovedV1FlagError, RedundantV1FlagError) as e:
123
+ sys.stderr.write(f"\n❌ Error: {e.flag}\n\n")
124
+ sys.stderr.write(f" {e.reason}\n\n")
125
+ sys.stderr.write(f" 💡 {e.migration}\n\n")
126
+ sys.exit(1)
127
+ except ValueError as e:
128
+ sys.stderr.write(f"Error: {e}\n")
129
+ sys.exit(1)
130
+
131
+ result = subprocess.run( # noqa: S603
132
+ [str(cli_binary), *translated_args],
133
+ capture_output=True,
134
+ text=True,
135
+ check=False,
136
+ )
137
+
138
+ if result.returncode != 0:
139
+ sys.stderr.write(result.stderr)
140
+ sys.exit(result.returncode)
141
+
142
+ return result.stdout
@@ -0,0 +1,73 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ class HtmlToMarkdownError(Exception):
5
+ """Base exception for all html-to-markdown errors."""
6
+
7
+
8
+ class MissingDependencyError(HtmlToMarkdownError):
9
+ """Raised when a required dependency is not installed."""
10
+
11
+ def __init__(self, dependency: str, install_command: str | None = None) -> None:
12
+ self.dependency = dependency
13
+ self.install_command = install_command
14
+
15
+ message = f"{dependency} is not installed."
16
+ if install_command:
17
+ message += f" Install with: {install_command}"
18
+
19
+ super().__init__(message)
20
+
21
+
22
+ class InvalidParserError(HtmlToMarkdownError):
23
+ """Raised when an invalid parser is specified."""
24
+
25
+ def __init__(self, parser: str, available_parsers: list[str]) -> None:
26
+ self.parser = parser
27
+ self.available_parsers = available_parsers
28
+
29
+ message = f"Invalid parser '{parser}'. Available parsers: {', '.join(available_parsers)}"
30
+ super().__init__(message)
31
+
32
+
33
+ class EmptyHtmlError(HtmlToMarkdownError):
34
+ """Raised when input HTML is empty."""
35
+
36
+ def __init__(self) -> None:
37
+ super().__init__("The input HTML is empty.")
38
+
39
+
40
+ class ConflictingOptionsError(HtmlToMarkdownError):
41
+ """Raised when conflicting configuration options are specified."""
42
+
43
+ def __init__(self, option1: str, option2: str) -> None:
44
+ self.option1 = option1
45
+ self.option2 = option2
46
+
47
+ super().__init__(f"Only one of '{option1}' and '{option2}' can be specified.")
48
+
49
+
50
+ class InvalidEncodingError(HtmlToMarkdownError):
51
+ """Raised when an invalid character encoding is specified."""
52
+
53
+ def __init__(self, encoding: str) -> None:
54
+ super().__init__(f"The specified encoding ({encoding}) is not valid.")
55
+
56
+
57
+ class UnsupportedV1FeatureError(HtmlToMarkdownError):
58
+ """Raised when a v1 feature is not supported in v2."""
59
+
60
+ def __init__(self, flag: str, reason: str, migration: str) -> None:
61
+ self.flag = flag
62
+ self.reason = reason
63
+ self.migration = migration
64
+ message = f"'{flag}' is not supported in v2.\n\nReason: {reason}\n\nMigration: {migration}"
65
+ super().__init__(message)
66
+
67
+
68
+ class RemovedV1FlagError(UnsupportedV1FeatureError):
69
+ """Raised when a v1 flag has been removed in v2."""
70
+
71
+
72
+ class RedundantV1FlagError(UnsupportedV1FeatureError):
73
+ """Raised when a v1 flag is redundant in v2."""
@@ -0,0 +1,144 @@
1
+ """Configuration options for HTML to Markdown conversion.
2
+
3
+ This module provides dataclass-based configuration for the v2 API.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from dataclasses import dataclass
9
+ from typing import Literal
10
+
11
+
12
+ @dataclass
13
+ class ConversionOptions:
14
+ """Main conversion configuration.
15
+
16
+ This class groups all conversion-related options together, replacing
17
+ the large number of keyword arguments in the v1 API.
18
+
19
+ Example:
20
+ >>> options = ConversionOptions(
21
+ ... heading_style="atx",
22
+ ... list_indent_width=2,
23
+ ... escape_asterisks=True,
24
+ ... )
25
+ >>> from html_to_markdown import convert
26
+ >>> markdown = convert("<h1>Title</h1>", options)
27
+ """
28
+
29
+ heading_style: Literal["underlined", "atx", "atx_closed"] = "atx"
30
+ """Style for headings: 'atx' (#) is CommonMark default, 'underlined' (===), or 'atx_closed' (# #)."""
31
+
32
+ list_indent_type: Literal["spaces", "tabs"] = "spaces"
33
+ """Type of indentation for lists."""
34
+
35
+ list_indent_width: int = 2
36
+ """Number of spaces for list indentation (CommonMark uses 2 spaces, ignored if list_indent_type='tabs')."""
37
+
38
+ bullets: str = "-*+"
39
+ """Characters to use for unordered list bullets (cycles through -, *, + for nested levels). CommonMark compliant."""
40
+
41
+ strong_em_symbol: Literal["*", "_"] = "*"
42
+ """Symbol for strong/emphasis formatting."""
43
+
44
+ escape_asterisks: bool = False
45
+ """Escape asterisk characters in text to prevent accidental formatting. Default False for minimal escaping (CommonMark)."""
46
+
47
+ escape_underscores: bool = False
48
+ """Escape underscore characters in text to prevent accidental formatting. Default False for minimal escaping (CommonMark)."""
49
+
50
+ escape_misc: bool = False
51
+ """Escape miscellaneous Markdown characters. Default False for minimal escaping (CommonMark)."""
52
+
53
+ escape_ascii: bool = False
54
+ """Escape all ASCII punctuation (for CommonMark spec compliance tests). Disabled by default for minimal escaping."""
55
+
56
+ code_language: str = ""
57
+ """Default language for code blocks."""
58
+
59
+ encoding: str = "utf-8"
60
+ """Character encoding expected for the HTML input."""
61
+
62
+ autolinks: bool = True
63
+ """Convert bare URLs to automatic links."""
64
+
65
+ default_title: bool = False
66
+ """Add a default title if none exists."""
67
+
68
+ keep_inline_images_in: set[str] | None = None
69
+ """Parent tag names where images should remain inline."""
70
+
71
+ br_in_tables: bool = False
72
+ """Use <br> tags for line breaks in table cells instead of spaces."""
73
+
74
+ hocr_spatial_tables: bool = True
75
+ """Reconstruct tables in hOCR documents using spatial heuristics."""
76
+
77
+ highlight_style: Literal["double-equal", "html", "bold"] = "double-equal"
78
+ """Style for highlighting <mark> elements."""
79
+
80
+ extract_metadata: bool = True
81
+ """Extract metadata from HTML head and include as comment."""
82
+
83
+ whitespace_mode: Literal["normalized", "strict"] = "normalized"
84
+ """How to handle whitespace: 'normalized' or 'strict'."""
85
+
86
+ strip_newlines: bool = False
87
+ """Remove newlines from HTML before processing."""
88
+
89
+ wrap: bool = False
90
+ """Enable text wrapping."""
91
+
92
+ wrap_width: int = 80
93
+ """Column width for text wrapping."""
94
+
95
+ strip_tags: set[str] | None = None
96
+ """HTML tags to strip from output (output only text content, no markdown conversion)."""
97
+
98
+ preserve_tags: set[str] | None = None
99
+ """HTML tags to preserve as-is in the output (keep original HTML). Useful for complex elements like tables."""
100
+
101
+ convert_as_inline: bool = False
102
+ """Treat block elements as inline during conversion."""
103
+
104
+ sub_symbol: str = ""
105
+ """Symbol for subscript text."""
106
+
107
+ sup_symbol: str = ""
108
+ """Symbol for superscript text."""
109
+
110
+ newline_style: Literal["spaces", "backslash"] = "spaces"
111
+ """Style for newlines: 'spaces' (two trailing spaces, CommonMark default) or 'backslash' (\\). Both are equally CommonMark compliant."""
112
+
113
+ code_block_style: Literal["indented", "backticks", "tildes"] = "backticks"
114
+ """Style for code blocks: 'backticks' (```, better whitespace preservation), 'indented' (4 spaces), or 'tildes' (~~~). All are CommonMark compliant."""
115
+
116
+ debug: bool = False
117
+ """Enable debug mode with diagnostic warnings about unhandled elements and hOCR processing."""
118
+
119
+
120
+ @dataclass
121
+ class PreprocessingOptions:
122
+ """HTML preprocessing configuration.
123
+
124
+ Controls how HTML is cleaned and preprocessed before conversion.
125
+
126
+ Example:
127
+ >>> options = PreprocessingOptions(
128
+ ... enabled=True,
129
+ ... preset="aggressive",
130
+ ... remove_navigation=True,
131
+ ... )
132
+ """
133
+
134
+ enabled: bool = True
135
+ """Whether to enable HTML preprocessing (enabled by default for robust handling of malformed HTML)."""
136
+
137
+ preset: Literal["minimal", "standard", "aggressive"] = "standard"
138
+ """Preprocessing aggressiveness level."""
139
+
140
+ remove_navigation: bool = True
141
+ """Remove navigation elements during preprocessing."""
142
+
143
+ remove_forms: bool = True
144
+ """Remove form elements during preprocessing."""
File without changes
@@ -0,0 +1,193 @@
1
+ """V1 API compatibility layer.
2
+
3
+ Provides backward compatibility for the v1 convert_to_markdown API
4
+ by translating v1 kwargs to v2 ConversionOptions and PreprocessingOptions.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import warnings
10
+
11
+ from html_to_markdown import ConversionOptions, PreprocessingOptions
12
+ from html_to_markdown import convert as convert_v2
13
+
14
+
15
+ def convert_to_markdown(
16
+ html: str,
17
+ *,
18
+ heading_style: str = "underlined",
19
+ list_indent_type: str = "spaces",
20
+ list_indent_width: int = 4,
21
+ bullets: str = "*+-",
22
+ strong_em_symbol: str = "*",
23
+ escape_asterisks: bool = True,
24
+ escape_underscores: bool = True,
25
+ escape_misc: bool = True,
26
+ code_language: str = "",
27
+ autolinks: bool = True,
28
+ default_title: bool = False,
29
+ br_in_tables: bool = False,
30
+ hocr_extract_tables: bool = True,
31
+ hocr_table_column_threshold: int = 50,
32
+ hocr_table_row_threshold_ratio: float = 0.5,
33
+ highlight_style: str = "double-equal",
34
+ extract_metadata: bool = True,
35
+ whitespace_mode: str = "normalized",
36
+ strip_newlines: bool = False,
37
+ wrap: bool = False,
38
+ wrap_width: int = 80,
39
+ convert_as_inline: bool = False,
40
+ sub_symbol: str = "",
41
+ sup_symbol: str = "",
42
+ newline_style: str = "spaces",
43
+ keep_inline_images_in: set[str] | None = None,
44
+ preprocess: bool = False,
45
+ preprocessing_preset: str = "standard",
46
+ remove_navigation: bool = True,
47
+ remove_forms: bool = True,
48
+ source_encoding: str = "utf-8",
49
+ code_language_callback: object | None = None,
50
+ strip: list[str] | None = None,
51
+ convert: list[str] | None = None,
52
+ custom_converters: dict[str, object] | None = None,
53
+ ) -> str:
54
+ """Convert HTML to Markdown (v1 compatibility API).
55
+
56
+ This function provides backward compatibility with the v1 API by translating
57
+ v1-style keyword arguments to v2 ConversionOptions and PreprocessingOptions.
58
+
59
+ Args:
60
+ html: HTML string to convert.
61
+ heading_style: Style for headings (default: "underlined" for v1 compatibility).
62
+ list_indent_type: Type of indentation for lists.
63
+ list_indent_width: Number of spaces for list indentation (v1 default: 4).
64
+ bullets: Characters to use for unordered list bullets.
65
+ strong_em_symbol: Symbol for strong/emphasis formatting.
66
+ escape_asterisks: Escape asterisk characters (v1 default: True).
67
+ escape_underscores: Escape underscore characters (v1 default: True).
68
+ escape_misc: Escape miscellaneous Markdown characters (v1 default: True).
69
+ code_language: Default language for code blocks.
70
+ autolinks: Convert bare URLs to automatic links.
71
+ default_title: Add a default title if none exists.
72
+ br_in_tables: Use <br> tags for line breaks in table cells.
73
+ hocr_extract_tables: Deprecated - always True in v2.
74
+ hocr_table_column_threshold: Deprecated - uses built-in heuristics in v2.
75
+ hocr_table_row_threshold_ratio: Deprecated - uses built-in heuristics in v2.
76
+ highlight_style: Style for highlighting <mark> elements.
77
+ extract_metadata: Extract metadata from HTML head.
78
+ whitespace_mode: How to handle whitespace.
79
+ strip_newlines: Remove newlines from HTML before processing.
80
+ wrap: Enable text wrapping.
81
+ wrap_width: Column width for text wrapping.
82
+ convert_as_inline: Treat block elements as inline.
83
+ sub_symbol: Symbol for subscript text.
84
+ sup_symbol: Symbol for superscript text.
85
+ newline_style: Style for newlines.
86
+ keep_inline_images_in: Parent tag names where images should remain inline.
87
+ preprocess: Enable HTML preprocessing.
88
+ preprocessing_preset: Preprocessing aggressiveness level.
89
+ remove_navigation: Remove navigation elements during preprocessing.
90
+ remove_forms: Remove form elements during preprocessing.
91
+ source_encoding: Character encoding expected for the HTML input.
92
+ code_language_callback: Deprecated - not supported in v2.
93
+ strip: HTML tags to strip from output.
94
+ convert: Deprecated - not supported in v2.
95
+ custom_converters: Deprecated - not yet implemented in v2.
96
+
97
+ Returns:
98
+ Converted Markdown string.
99
+
100
+ Raises:
101
+ NotImplementedError: If deprecated v1 features are used.
102
+
103
+ .. deprecated:: 2.0
104
+ Use :func:`html_to_markdown.convert` with :class:`ConversionOptions` instead.
105
+ The v1 API is provided for backward compatibility only.
106
+ """
107
+ warnings.warn(
108
+ "convert_to_markdown() is deprecated and will be removed in v3.0. "
109
+ "Use html_to_markdown.convert() with ConversionOptions instead.",
110
+ DeprecationWarning,
111
+ stacklevel=2,
112
+ )
113
+
114
+ if code_language_callback is not None:
115
+ raise NotImplementedError(
116
+ "code_language_callback was removed in v2. Use the code_language option to set a default language."
117
+ )
118
+ if convert is not None:
119
+ raise NotImplementedError("convert option was removed in v2. All supported tags are converted by default.")
120
+ if custom_converters is not None:
121
+ raise NotImplementedError("custom_converters is not yet implemented in v2")
122
+ if not hocr_extract_tables:
123
+ warnings.warn(
124
+ "hocr_extract_tables is deprecated and will be removed in a future release. "
125
+ "Use ConversionOptions(hocr_spatial_tables=False) to disable spatial table reconstruction.",
126
+ DeprecationWarning,
127
+ stacklevel=2,
128
+ )
129
+ if hocr_table_column_threshold != 50 or hocr_table_row_threshold_ratio != 0.5:
130
+ raise NotImplementedError(
131
+ "hOCR table threshold overrides were removed in v2. Table reconstruction now uses built-in heuristics."
132
+ )
133
+
134
+ # ~keep: v1 used indented code blocks by default, but switched to backticks when a language was set
135
+ # This maintains v1 behavior for backward compatibility
136
+ code_block_style = "backticks" if code_language else "indented"
137
+
138
+ options = ConversionOptions(
139
+ heading_style=heading_style, # type: ignore[arg-type]
140
+ list_indent_type=list_indent_type, # type: ignore[arg-type]
141
+ list_indent_width=list_indent_width,
142
+ bullets=bullets,
143
+ strong_em_symbol=strong_em_symbol, # type: ignore[arg-type]
144
+ escape_asterisks=escape_asterisks,
145
+ escape_underscores=escape_underscores,
146
+ escape_misc=escape_misc,
147
+ code_block_style=code_block_style, # type: ignore[arg-type]
148
+ code_language=code_language,
149
+ autolinks=autolinks,
150
+ default_title=default_title,
151
+ br_in_tables=br_in_tables,
152
+ hocr_spatial_tables=hocr_extract_tables,
153
+ highlight_style=highlight_style, # type: ignore[arg-type]
154
+ extract_metadata=extract_metadata,
155
+ whitespace_mode=whitespace_mode, # type: ignore[arg-type]
156
+ strip_newlines=strip_newlines,
157
+ wrap=wrap,
158
+ wrap_width=wrap_width,
159
+ convert_as_inline=convert_as_inline,
160
+ sub_symbol=sub_symbol,
161
+ sup_symbol=sup_symbol,
162
+ newline_style=newline_style, # type: ignore[arg-type]
163
+ keep_inline_images_in=keep_inline_images_in,
164
+ strip_tags=set(strip) if strip else None,
165
+ )
166
+
167
+ preprocessing = PreprocessingOptions(
168
+ enabled=preprocess,
169
+ preset=preprocessing_preset, # type: ignore[arg-type]
170
+ remove_navigation=remove_navigation,
171
+ remove_forms=remove_forms,
172
+ )
173
+
174
+ options.encoding = source_encoding
175
+ return convert_v2(html, options, preprocessing)
176
+
177
+
178
+ def markdownify(*args: object, **kwargs: object) -> str:
179
+ """Alias for convert_to_markdown (deprecated).
180
+
181
+ .. deprecated:: 2.0
182
+ Use html_to_markdown.convert() instead.
183
+ """
184
+ warnings.warn(
185
+ "markdownify() is deprecated and will be removed in v3.0. "
186
+ "Use html_to_markdown.convert() with ConversionOptions instead.",
187
+ DeprecationWarning,
188
+ stacklevel=2,
189
+ )
190
+ return convert_to_markdown(*args, **kwargs) # type: ignore[arg-type]
191
+
192
+
193
+ __all__ = ["convert_to_markdown", "markdownify"]
@@ -0,0 +1,242 @@
1
+ Metadata-Version: 2.4
2
+ Name: html-to-markdown
3
+ Version: 2.6.3
4
+ Classifier: Development Status :: 5 - Production/Stable
5
+ Classifier: Environment :: Console
6
+ Classifier: Intended Audience :: Developers
7
+ Classifier: License :: OSI Approved :: MIT License
8
+ Classifier: Operating System :: OS Independent
9
+ Classifier: Programming Language :: Python :: 3 :: Only
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Programming Language :: Python :: 3.13
14
+ Classifier: Programming Language :: Python :: 3.14
15
+ Classifier: Programming Language :: Rust
16
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
17
+ Classifier: Topic :: Text Processing
18
+ Classifier: Topic :: Text Processing :: Markup
19
+ Classifier: Topic :: Text Processing :: Markup :: HTML
20
+ Classifier: Topic :: Text Processing :: Markup :: Markdown
21
+ Classifier: Typing :: Typed
22
+ License-File: LICENSE
23
+ Summary: High-performance HTML to Markdown converter powered by Rust with a clean Python API
24
+ Keywords: cli-tool,converter,html,html2markdown,html5,markdown,markup,parser,rust,text-processing
25
+ Home-Page: https://github.com/Goldziher/html-to-markdown
26
+ Author-email: Na'aman Hirschfeld <nhirschfeld@gmail.com>
27
+ Requires-Python: >=3.10
28
+ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
29
+ Project-URL: Changelog, https://github.com/Goldziher/html-to-markdown/releases
30
+ Project-URL: Homepage, https://github.com/Goldziher/html-to-markdown
31
+ Project-URL: Issues, https://github.com/Goldziher/html-to-markdown/issues
32
+ Project-URL: Repository, https://github.com/Goldziher/html-to-markdown.git
33
+
34
+ # html-to-markdown
35
+
36
+ High-performance HTML to Markdown converter with a clean Python API (powered by a Rust core). The same engine also drives the Node.js, Ruby, and WebAssembly bindings, so rendered Markdown stays identical across runtimes. Wheels are published for Linux, macOS, and Windows.
37
+
38
+ [![Crates.io](https://img.shields.io/crates/v/html-to-markdown-rs.svg)](https://crates.io/crates/html-to-markdown-rs)
39
+ [![npm (node)](https://badge.fury.io/js/html-to-markdown-node.svg)](https://www.npmjs.com/package/html-to-markdown-node)
40
+ [![npm (wasm)](https://badge.fury.io/js/html-to-markdown-wasm.svg)](https://www.npmjs.com/package/html-to-markdown-wasm)
41
+ [![npm (typescript)](https://badge.fury.io/js/html-to-markdown.svg)](https://www.npmjs.com/package/html-to-markdown)
42
+ [![PyPI](https://badge.fury.io/py/html-to-markdown.svg)](https://pypi.org/project/html-to-markdown/)
43
+ [![Packagist](https://img.shields.io/packagist/v/goldziher/html-to-markdown.svg)](https://packagist.org/packages/goldziher/html-to-markdown)
44
+ [![RubyGems](https://badge.fury.io/rb/html-to-markdown.svg)](https://rubygems.org/gems/html-to-markdown)
45
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://github.com/Goldziher/html-to-markdown/blob/main/LICENSE)
46
+
47
+ ## Installation
48
+
49
+ ```bash
50
+ pip install html-to-markdown
51
+ ```
52
+
53
+ ## Performance Snapshot
54
+
55
+ Apple M4 • Real Wikipedia documents • `convert()` (Python)
56
+
57
+ | Document | Size | Latency | Throughput | Docs/sec |
58
+ | ------------------- | ----- | ------- | ---------- | -------- |
59
+ | Lists (Timeline) | 129KB | 0.62ms | 208 MB/s | 1,613 |
60
+ | Tables (Countries) | 360KB | 2.02ms | 178 MB/s | 495 |
61
+ | Mixed (Python wiki) | 656KB | 4.56ms | 144 MB/s | 219 |
62
+
63
+ > V1 averaged ~2.5 MB/s (Python/BeautifulSoup). V2's Rust engine delivers 60–80× higher throughput.
64
+
65
+ ## Quick Start
66
+
67
+ ```python
68
+ from html_to_markdown import convert
69
+
70
+ html = """
71
+ <h1>Welcome</h1>
72
+ <p>This is <strong>fast</strong> Rust-powered conversion!</p>
73
+ <ul>
74
+ <li>Blazing fast</li>
75
+ <li>Type safe</li>
76
+ <li>Easy to use</li>
77
+ </ul>
78
+ """
79
+
80
+ markdown = convert(html)
81
+ print(markdown)
82
+ ```
83
+
84
+ ## Configuration (v2 API)
85
+
86
+ ```python
87
+ from html_to_markdown import ConversionOptions, convert
88
+
89
+ options = ConversionOptions(
90
+ heading_style="atx",
91
+ list_indent_width=2,
92
+ bullets="*+-",
93
+ )
94
+ options.escape_asterisks = True
95
+ options.code_language = "python"
96
+ options.extract_metadata = True
97
+
98
+ markdown = convert(html, options)
99
+ ```
100
+
101
+ ### HTML Preprocessing
102
+
103
+ ```python
104
+ from html_to_markdown import ConversionOptions, PreprocessingOptions, convert
105
+
106
+ options = ConversionOptions(
107
+ preprocessing=PreprocessingOptions(enabled=True, preset="aggressive"),
108
+ )
109
+
110
+ markdown = convert(scraped_html, options)
111
+ ```
112
+
113
+ ### Inline Image Extraction
114
+
115
+ ```python
116
+ from html_to_markdown import InlineImageConfig, convert_with_inline_images
117
+
118
+ markdown, inline_images, warnings = convert_with_inline_images(
119
+ '<p><img src="data:image/png;base64,...==" alt="Pixel" width="1" height="1"></p>',
120
+ image_config=InlineImageConfig(max_decoded_size_bytes=1024, infer_dimensions=True),
121
+ )
122
+
123
+ if inline_images:
124
+ first = inline_images[0]
125
+ print(first["format"], first["dimensions"], first["attributes"]) # e.g. "png", (1, 1), {"width": "1"}
126
+ ```
127
+
128
+ Each inline image is returned as a typed dictionary (`bytes` payload, metadata, and relevant HTML attributes). Warnings are human-readable skip reasons.
129
+
130
+ ### hOCR (HTML OCR) Support
131
+
132
+ ```python
133
+ from html_to_markdown import ConversionOptions, convert
134
+
135
+ # Default: emit structured Markdown directly
136
+ markdown = convert(hocr_html)
137
+
138
+ # hOCR documents are detected automatically; tables are reconstructed without extra configuration.
139
+ markdown = convert(hocr_html)
140
+ ```
141
+
142
+ ## CLI (same engine)
143
+
144
+ ```bash
145
+ pipx install html-to-markdown # or: pip install html-to-markdown
146
+
147
+ html-to-markdown page.html > page.md
148
+ cat page.html | html-to-markdown --heading-style atx > page.md
149
+ ```
150
+
151
+ ## API Surface
152
+
153
+ ### `ConversionOptions`
154
+
155
+ Key fields (see docstring for full matrix):
156
+
157
+ - `heading_style`: `"underlined" | "atx" | "atx_closed"`
158
+ - `list_indent_width`: spaces per indent level (default 2)
159
+ - `bullets`: cycle of bullet characters (`"*+-"`)
160
+ - `strong_em_symbol`: `"*"` or `"_"`
161
+ - `code_language`: default fenced code block language
162
+ - `wrap`, `wrap_width`: wrap Markdown output
163
+ - `strip_tags`: remove specific HTML tags
164
+ - `preprocessing`: `PreprocessingOptions`
165
+ - `encoding`: input character encoding (informational)
166
+
167
+ ### `PreprocessingOptions`
168
+
169
+ - `enabled`: enable HTML sanitisation (default: `True` since v2.4.2 for robust malformed HTML handling)
170
+ - `preset`: `"minimal" | "standard" | "aggressive"` (default: `"standard"`)
171
+ - `remove_navigation`: remove navigation elements (default: `True`)
172
+ - `remove_forms`: remove form elements (default: `True`)
173
+
174
+ **Note:** As of v2.4.2, preprocessing is enabled by default to ensure robust handling of malformed HTML (e.g., bare angle brackets like `1<2` in content). Set `enabled=False` if you need minimal preprocessing.
175
+
176
+ ### `InlineImageConfig`
177
+
178
+ - `max_decoded_size_bytes`: reject larger payloads
179
+ - `filename_prefix`: generated name prefix (`embedded_image` default)
180
+ - `capture_svg`: collect inline `<svg>` (default `True`)
181
+ - `infer_dimensions`: decode raster images to obtain dimensions (default `False`)
182
+
183
+ ## Performance: V2 vs V1 Compatibility Layer
184
+
185
+ ### ⚠️ Important: Always Use V2 API
186
+
187
+ The v2 API (`convert()`) is **strongly recommended** for all code. The v1 compatibility layer adds significant overhead and should only be used for gradual migration:
188
+
189
+ ```python
190
+ # ✅ RECOMMENDED - V2 Direct API (Fast)
191
+ from html_to_markdown import convert, ConversionOptions
192
+
193
+ markdown = convert(html) # Simple conversion - FAST
194
+ markdown = convert(html, ConversionOptions(heading_style="atx")) # With options - FAST
195
+
196
+ # ❌ AVOID - V1 Compatibility Layer (Slow)
197
+ from html_to_markdown import convert_to_markdown
198
+
199
+ markdown = convert_to_markdown(html, heading_style="atx") # Adds 77% overhead
200
+ ```
201
+
202
+ ### Performance Comparison
203
+
204
+ Benchmarked on Apple M4 with 25-paragraph HTML document:
205
+
206
+ | API | ops/sec | Relative Performance | Recommendation |
207
+ | ------------------------ | ---------------- | -------------------- | ------------------- |
208
+ | **V2 API** (`convert()`) | **129,822** | baseline | ✅ **Use this** |
209
+ | **V1 Compat Layer** | **67,673** | **77% slower** | ⚠️ Migration only |
210
+ | **CLI** | **150-210 MB/s** | Fastest | ✅ Batch processing |
211
+
212
+ The v1 compatibility layer creates extra Python objects and performs additional conversions, significantly impacting performance.
213
+
214
+ ### When to Use Each
215
+
216
+ - **V2 API (`convert()`)**: All new code, production systems, performance-critical applications ← **Use this**
217
+ - **V1 Compat (`convert_to_markdown()`)**: Only for gradual migration from legacy codebases
218
+ - **CLI (`html-to-markdown`)**: Batch processing, shell scripts, maximum throughput
219
+
220
+ ## v1 Compatibility
221
+
222
+ A compatibility layer is provided to ease migration from v1.x:
223
+
224
+ - **Compat shim**: `html_to_markdown.v1_compat` exposes `convert_to_markdown`, `convert_to_markdown_stream`, and `markdownify`. Keyword mappings are listed in the [changelog](CHANGELOG.md#v200).
225
+ - **⚠️ Performance warning**: These compatibility functions add 77% overhead. Migrate to v2 API as soon as possible.
226
+ - **CLI**: The Rust CLI replaces the old Python script. New flags are documented via `html-to-markdown --help`.
227
+ - **Removed options**: `code_language_callback`, `strip`, and streaming APIs were removed; use `ConversionOptions`, `PreprocessingOptions`, and the inline-image helpers instead.
228
+
229
+ ## Links
230
+
231
+ - GitHub: [https://github.com/Goldziher/html-to-markdown](https://github.com/Goldziher/html-to-markdown)
232
+ - Discord: [https://discord.gg/pXxagNK2zN](https://discord.gg/pXxagNK2zN)
233
+ - Kreuzberg ecosystem: [https://kreuzberg.dev](https://kreuzberg.dev)
234
+
235
+ ## License
236
+
237
+ MIT License – see [LICENSE](https://github.com/Goldziher/html-to-markdown/blob/main/LICENSE).
238
+
239
+ ## Support
240
+
241
+ If you find this library useful, consider [sponsoring the project](https://github.com/sponsors/Goldziher).
242
+
@@ -0,0 +1,17 @@
1
+ html_to_markdown-2.6.3.dist-info/RECORD,,
2
+ html_to_markdown-2.6.3.dist-info/WHEEL,sha256=HtAbUhtjhH1WdiDuIy2CapdoAiKCwe6bij_Tlxr1lEg,131
3
+ html_to_markdown-2.6.3.dist-info/METADATA,sha256=2jZEsKKkOxN4Bn8ID11oyONT7_hvHm23pmVvpgyAVK0,9789
4
+ html_to_markdown-2.6.3.dist-info/licenses/LICENSE,sha256=oQvPC-0UWvfg0WaeUBe11OJMtX60An-TW1ev_oaAA0k,1086
5
+ html_to_markdown-2.6.3.data/scripts/html-to-markdown,sha256=bRYI80buxtoHkQ5BaaFLRnONic_sG0ODud44rqh7TX0,3884128
6
+ html_to_markdown/options.py,sha256=vImRfeHAeyAy0Lnt6cTPHGbj7mTdw8AEUgo19u7MAA0,5080
7
+ html_to_markdown/_html_to_markdown.abi3.so,sha256=5nhsd3DFA0t6C_UbeZrT99KNUSTkRwrBEsR6ySQV5eA,3701616
8
+ html_to_markdown/__init__.py,sha256=bXngQAyZfyVClaa1YyVAUlgLYXcOFki_eaEpD42yuvM,1358
9
+ html_to_markdown/api.py,sha256=uiNoieNXrcXTJI2_vV7ruDv9HKD7XFuosCAeqZL-C_Q,4944
10
+ html_to_markdown/_rust.pyi,sha256=pi6C_qAdB81qUlC89Dy1ZKC1JrpdnqVce-caJx3ekPA,2098
11
+ html_to_markdown/v1_compat.py,sha256=5DZA-fPMqZ5hYiA43rFaOAqshLS8MScbBnivDXuvQII,8034
12
+ html_to_markdown/cli.py,sha256=Rn-s3FZPea1jgCJtDzH_TFvOEiA_uZFVfgjhr6xyL_g,64
13
+ html_to_markdown/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ html_to_markdown/exceptions.py,sha256=aTASOzbywgfqOYjlw18ZkOWSxKff4EbUbmMua_73TGA,2370
15
+ html_to_markdown/cli_proxy.py,sha256=HPYKH5Mf5OUvkbEQISJvAkxrbjWKxE5GokA44HoQ6z8,3858
16
+ html_to_markdown/__main__.py,sha256=3Ic_EbOt2h6W88q084pkz5IKU6iY5z_woBygH6u9aw0,327
17
+ html_to_markdown/bin/html-to-markdown,sha256=bRYI80buxtoHkQ5BaaFLRnONic_sG0ODud44rqh7TX0,3884128
@@ -0,0 +1,6 @@
1
+ Wheel-Version: 1.0
2
+ Generator: maturin (1.9.6)
3
+ Root-Is-Purelib: false
4
+ Tag: cp310-abi3-macosx_11_0_arm64
5
+ Generator: delocate 0.13.0
6
+
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright 2024-2025 Na'aman Hirschfeld
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.