html-to-markdown 2.14.4__cp310-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of html-to-markdown might be problematic. Click here for more details.

@@ -0,0 +1,62 @@
1
+ """html-to-markdown: Convert HTML to Markdown using Rust backend.
2
+
3
+ This package provides high-performance HTML to Markdown conversion
4
+ powered by Rust with a clean Python API.
5
+
6
+ V2 API (current):
7
+ from html_to_markdown import convert, ConversionOptions
8
+
9
+ options = ConversionOptions(heading_style="atx")
10
+ markdown = convert(html, options)
11
+
12
+ V1 API (backward compatibility):
13
+ from html_to_markdown import convert_to_markdown
14
+
15
+ markdown = convert_to_markdown(html, heading_style="atx")
16
+ """
17
+
18
+ from html_to_markdown.api import (
19
+ InlineImage,
20
+ InlineImageConfig,
21
+ InlineImageWarning,
22
+ MetadataConfig,
23
+ OptionsHandle,
24
+ convert,
25
+ convert_with_handle,
26
+ convert_with_inline_images,
27
+ convert_with_metadata,
28
+ create_options_handle,
29
+ )
30
+ from html_to_markdown.exceptions import (
31
+ ConflictingOptionsError,
32
+ EmptyHtmlError,
33
+ HtmlToMarkdownError,
34
+ InvalidParserError,
35
+ MissingDependencyError,
36
+ )
37
+ from html_to_markdown.options import ConversionOptions, PreprocessingOptions
38
+ from html_to_markdown.v1_compat import convert_to_markdown, markdownify
39
+
40
+ __all__ = [
41
+ "ConflictingOptionsError",
42
+ "ConversionOptions",
43
+ "EmptyHtmlError",
44
+ "HtmlToMarkdownError",
45
+ "InlineImage",
46
+ "InlineImageConfig",
47
+ "InlineImageWarning",
48
+ "InvalidParserError",
49
+ "MetadataConfig",
50
+ "MissingDependencyError",
51
+ "OptionsHandle",
52
+ "PreprocessingOptions",
53
+ "convert",
54
+ "convert_to_markdown",
55
+ "convert_with_handle",
56
+ "convert_with_inline_images",
57
+ "convert_with_metadata",
58
+ "create_options_handle",
59
+ "markdownify",
60
+ ]
61
+
62
+ __version__ = "2.14.4"
@@ -0,0 +1,16 @@
1
+ import sys
2
+
3
+ from html_to_markdown.cli_proxy import main
4
+
5
+
6
+ def cli() -> None:
7
+ try:
8
+ result = main(sys.argv[1:])
9
+ print(result, end="") # noqa: T201
10
+ except (ValueError, FileNotFoundError) as e:
11
+ print(str(e), file=sys.stderr) # noqa: T201
12
+ sys.exit(1)
13
+
14
+
15
+ if __name__ == "__main__":
16
+ cli()
@@ -0,0 +1,196 @@
1
+ from typing import Literal, TypedDict
2
+
3
+ class PreprocessingOptions:
4
+ enabled: bool
5
+ preset: Literal["minimal", "standard", "aggressive"]
6
+ remove_navigation: bool
7
+ remove_forms: bool
8
+
9
+ def __init__(
10
+ self,
11
+ *,
12
+ enabled: bool = False,
13
+ preset: Literal["minimal", "standard", "aggressive"] = "standard",
14
+ remove_navigation: bool = True,
15
+ remove_forms: bool = True,
16
+ ) -> None: ...
17
+
18
+ class ConversionOptions:
19
+ heading_style: Literal["underlined", "atx", "atx_closed"]
20
+ list_indent_type: Literal["spaces", "tabs"]
21
+ list_indent_width: int
22
+ bullets: str
23
+ strong_em_symbol: str
24
+ escape_asterisks: bool
25
+ escape_underscores: bool
26
+ escape_misc: bool
27
+ escape_ascii: bool
28
+ code_language: str
29
+ autolinks: bool
30
+ default_title: bool
31
+ br_in_tables: bool
32
+ hocr_spatial_tables: bool
33
+ highlight_style: Literal["double-equal", "html", "bold", "none"]
34
+ extract_metadata: bool
35
+ whitespace_mode: Literal["normalized", "strict"]
36
+ strip_newlines: bool
37
+ wrap: bool
38
+ wrap_width: int
39
+ convert_as_inline: bool
40
+ sub_symbol: str
41
+ sup_symbol: str
42
+ newline_style: Literal["spaces", "backslash"]
43
+ code_block_style: Literal["indented", "backticks", "tildes"]
44
+ keep_inline_images_in: list[str]
45
+ preprocessing: PreprocessingOptions
46
+ encoding: str
47
+ debug: bool
48
+ strip_tags: list[str]
49
+ preserve_tags: list[str]
50
+
51
+ def __init__(
52
+ self,
53
+ *,
54
+ heading_style: Literal["underlined", "atx", "atx_closed"] = "underlined",
55
+ list_indent_type: Literal["spaces", "tabs"] = "spaces",
56
+ list_indent_width: int = 4,
57
+ bullets: str = "*+-",
58
+ strong_em_symbol: str = "*",
59
+ escape_asterisks: bool = False,
60
+ escape_underscores: bool = False,
61
+ escape_misc: bool = False,
62
+ escape_ascii: bool = False,
63
+ code_language: str = "",
64
+ autolinks: bool = True,
65
+ default_title: bool = False,
66
+ br_in_tables: bool = False,
67
+ hocr_spatial_tables: bool = True,
68
+ highlight_style: Literal["double-equal", "html", "bold", "none"] = "double-equal",
69
+ extract_metadata: bool = True,
70
+ whitespace_mode: Literal["normalized", "strict"] = "normalized",
71
+ strip_newlines: bool = False,
72
+ wrap: bool = False,
73
+ wrap_width: int = 80,
74
+ convert_as_inline: bool = False,
75
+ sub_symbol: str = "",
76
+ sup_symbol: str = "",
77
+ newline_style: Literal["spaces", "backslash"] = "spaces",
78
+ code_block_style: Literal["indented", "backticks", "tildes"] = "indented",
79
+ keep_inline_images_in: list[str] = [],
80
+ preprocessing: PreprocessingOptions | None = None,
81
+ encoding: str = "utf-8",
82
+ debug: bool = False,
83
+ strip_tags: list[str] = [],
84
+ preserve_tags: list[str] = [],
85
+ ) -> None: ...
86
+
87
+ class InlineImageConfig:
88
+ max_decoded_size_bytes: int
89
+ filename_prefix: str | None
90
+ capture_svg: bool
91
+ infer_dimensions: bool
92
+
93
+ def __init__(
94
+ self,
95
+ max_decoded_size_bytes: int = ...,
96
+ filename_prefix: str | None = None,
97
+ capture_svg: bool = True,
98
+ infer_dimensions: bool = False,
99
+ ) -> None: ...
100
+
101
+ class ConversionOptionsHandle:
102
+ def __init__(self, options: ConversionOptions | None = None) -> None: ...
103
+
104
+ class InlineImage(TypedDict):
105
+ data: bytes
106
+ format: str
107
+ filename: str | None
108
+ description: str | None
109
+ dimensions: tuple[int, int] | None
110
+ source: Literal["img_data_uri", "svg_element"]
111
+ attributes: dict[str, str]
112
+
113
+ class InlineImageWarning(TypedDict):
114
+ index: int
115
+ message: str
116
+
117
+ class MetadataConfig:
118
+ extract_document: bool
119
+ extract_headers: bool
120
+ extract_links: bool
121
+ extract_images: bool
122
+ extract_structured_data: bool
123
+ max_structured_data_size: int
124
+
125
+ def __init__(
126
+ self,
127
+ *,
128
+ extract_document: bool = True,
129
+ extract_headers: bool = True,
130
+ extract_links: bool = True,
131
+ extract_images: bool = True,
132
+ extract_structured_data: bool = True,
133
+ max_structured_data_size: int = 1_000_000,
134
+ ) -> None: ...
135
+
136
+ class DocumentMetadata(TypedDict):
137
+ title: str | None
138
+ description: str | None
139
+ keywords: list[str]
140
+ author: str | None
141
+ canonical_url: str | None
142
+ base_href: str | None
143
+ language: str | None
144
+ text_direction: str | None
145
+ open_graph: dict[str, str]
146
+ twitter_card: dict[str, str]
147
+ meta_tags: dict[str, str]
148
+
149
+ class HeaderMetadata(TypedDict):
150
+ level: int
151
+ text: str
152
+ id: str | None
153
+ depth: int
154
+ html_offset: int
155
+
156
+ class LinkMetadata(TypedDict):
157
+ href: str
158
+ text: str
159
+ title: str | None
160
+ link_type: str
161
+ rel: list[str]
162
+ attributes: dict[str, str]
163
+
164
+ class ImageMetadata(TypedDict):
165
+ src: str
166
+ alt: str | None
167
+ title: str | None
168
+ dimensions: tuple[int, int] | None
169
+ image_type: str
170
+ attributes: dict[str, str]
171
+
172
+ class StructuredData(TypedDict):
173
+ data_type: str
174
+ raw_json: str
175
+ schema_type: str | None
176
+
177
+ class ExtendedMetadata(TypedDict):
178
+ document: DocumentMetadata
179
+ headers: list[HeaderMetadata]
180
+ links: list[LinkMetadata]
181
+ images: list[ImageMetadata]
182
+ structured_data: list[StructuredData]
183
+
184
+ def convert(html: str, options: ConversionOptions | None = None) -> str: ...
185
+ def convert_with_inline_images(
186
+ html: str,
187
+ options: ConversionOptions | None = None,
188
+ image_config: InlineImageConfig | None = None,
189
+ ) -> tuple[str, list[InlineImage], list[InlineImageWarning]]: ...
190
+ def convert_with_metadata(
191
+ html: str,
192
+ options: ConversionOptions | None = None,
193
+ metadata_config: MetadataConfig | None = None,
194
+ ) -> tuple[str, ExtendedMetadata]: ...
195
+ def create_options_handle(options: ConversionOptions | None = None) -> ConversionOptionsHandle: ...
196
+ def convert_with_options_handle(html: str, handle: ConversionOptionsHandle) -> str: ...
@@ -0,0 +1,195 @@
1
+ """High-level Python API backed by the Rust core."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING, Literal, TypedDict
6
+
7
+ import html_to_markdown._html_to_markdown as _rust
8
+ from html_to_markdown._html_to_markdown import (
9
+ ConversionOptionsHandle as OptionsHandle,
10
+ )
11
+ from html_to_markdown._html_to_markdown import (
12
+ InlineImageConfig,
13
+ MetadataConfig,
14
+ )
15
+ from html_to_markdown.options import ConversionOptions, PreprocessingOptions
16
+
17
+ if TYPE_CHECKING:
18
+ from html_to_markdown._html_to_markdown import ExtendedMetadata # pragma: no cover
19
+ else:
20
+ ExtendedMetadata = dict[str, object] # type: ignore[assignment]
21
+
22
+
23
+ class InlineImage(TypedDict):
24
+ """Inline image extracted during conversion."""
25
+
26
+ data: bytes
27
+ format: str
28
+ filename: str | None
29
+ description: str | None
30
+ dimensions: tuple[int, int] | None
31
+ source: Literal["img_data_uri", "svg_element"]
32
+ attributes: dict[str, str]
33
+
34
+
35
+ class InlineImageWarning(TypedDict):
36
+ """Warning produced during inline image extraction."""
37
+
38
+ index: int
39
+ message: str
40
+
41
+
42
+ def _to_rust_preprocessing(options: PreprocessingOptions) -> _rust.PreprocessingOptions:
43
+ return _rust.PreprocessingOptions(
44
+ enabled=options.enabled,
45
+ preset=options.preset,
46
+ remove_navigation=options.remove_navigation,
47
+ remove_forms=options.remove_forms,
48
+ )
49
+
50
+
51
+ def _to_rust_options(
52
+ options: ConversionOptions,
53
+ preprocessing: PreprocessingOptions,
54
+ ) -> _rust.ConversionOptions:
55
+ return _rust.ConversionOptions(
56
+ heading_style=options.heading_style,
57
+ list_indent_type=options.list_indent_type,
58
+ list_indent_width=options.list_indent_width,
59
+ bullets=options.bullets,
60
+ strong_em_symbol=options.strong_em_symbol,
61
+ escape_asterisks=options.escape_asterisks,
62
+ escape_underscores=options.escape_underscores,
63
+ escape_misc=options.escape_misc,
64
+ escape_ascii=options.escape_ascii,
65
+ code_language=options.code_language,
66
+ autolinks=options.autolinks,
67
+ default_title=options.default_title,
68
+ br_in_tables=options.br_in_tables,
69
+ hocr_spatial_tables=options.hocr_spatial_tables,
70
+ highlight_style=options.highlight_style,
71
+ extract_metadata=options.extract_metadata,
72
+ whitespace_mode=options.whitespace_mode,
73
+ strip_newlines=options.strip_newlines,
74
+ wrap=options.wrap,
75
+ wrap_width=options.wrap_width,
76
+ convert_as_inline=options.convert_as_inline,
77
+ sub_symbol=options.sub_symbol,
78
+ sup_symbol=options.sup_symbol,
79
+ newline_style=options.newline_style,
80
+ code_block_style=options.code_block_style,
81
+ keep_inline_images_in=list(options.keep_inline_images_in) if options.keep_inline_images_in else [],
82
+ preprocessing=_to_rust_preprocessing(preprocessing),
83
+ encoding=options.encoding,
84
+ debug=options.debug,
85
+ strip_tags=list(options.strip_tags) if options.strip_tags else [],
86
+ preserve_tags=list(options.preserve_tags) if options.preserve_tags else [],
87
+ )
88
+
89
+
90
+ def convert(
91
+ html: str,
92
+ options: ConversionOptions | None = None,
93
+ preprocessing: PreprocessingOptions | None = None,
94
+ ) -> str:
95
+ """Convert HTML to Markdown using the Rust backend."""
96
+ if options is None and preprocessing is None:
97
+ return _rust.convert(html, None)
98
+
99
+ if options is None:
100
+ options = ConversionOptions()
101
+ if preprocessing is None:
102
+ preprocessing = PreprocessingOptions()
103
+
104
+ rust_options = _to_rust_options(options, preprocessing)
105
+ return _rust.convert(html, rust_options)
106
+
107
+
108
+ def convert_with_inline_images(
109
+ html: str,
110
+ options: ConversionOptions | None = None,
111
+ preprocessing: PreprocessingOptions | None = None,
112
+ image_config: InlineImageConfig | None = None,
113
+ ) -> tuple[str, list[InlineImage], list[InlineImageWarning]]:
114
+ """Convert HTML and extract inline images."""
115
+ if options is None:
116
+ options = ConversionOptions()
117
+ if preprocessing is None:
118
+ preprocessing = PreprocessingOptions()
119
+ if image_config is None:
120
+ image_config = InlineImageConfig()
121
+
122
+ rust_options = _to_rust_options(options, preprocessing)
123
+ markdown, images, warnings = _rust.convert_with_inline_images(html, rust_options, image_config)
124
+ return markdown, list(images), list(warnings)
125
+
126
+
127
+ def create_options_handle(
128
+ options: ConversionOptions | None = None,
129
+ preprocessing: PreprocessingOptions | None = None,
130
+ ) -> OptionsHandle:
131
+ """Create a reusable ConversionOptions handle backed by Rust."""
132
+ if options is None:
133
+ options = ConversionOptions()
134
+ if preprocessing is None:
135
+ preprocessing = PreprocessingOptions()
136
+ rust_options = _to_rust_options(options, preprocessing)
137
+ return _rust.create_options_handle(rust_options)
138
+
139
+
140
+ def convert_with_handle(html: str, handle: OptionsHandle) -> str:
141
+ """Convert HTML using a pre-parsed ConversionOptions handle."""
142
+ return _rust.convert_with_options_handle(html, handle)
143
+
144
+
145
+ def convert_with_metadata(
146
+ html: str,
147
+ options: ConversionOptions | None = None,
148
+ preprocessing: PreprocessingOptions | None = None,
149
+ metadata_config: MetadataConfig | None = None,
150
+ ) -> tuple[str, ExtendedMetadata]:
151
+ """Convert HTML and extract comprehensive metadata.
152
+
153
+ Args:
154
+ html: HTML string to convert
155
+ options: Optional conversion configuration
156
+ preprocessing: Optional preprocessing configuration
157
+ metadata_config: Optional metadata extraction configuration
158
+
159
+ Returns:
160
+ Tuple of (markdown, metadata_dict) where metadata_dict contains:
161
+ - document: Document-level metadata (title, description, lang, etc.)
162
+ - headers: List of header elements with hierarchy
163
+ - links: List of extracted hyperlinks with classification
164
+ - images: List of extracted images with metadata
165
+ - structured_data: List of JSON-LD, Microdata, or RDFa blocks
166
+ """
167
+ if not hasattr(_rust, "convert_with_metadata"):
168
+ raise ImportError(
169
+ "convert_with_metadata is missing from the native extension; this indicates a broken/partial installation."
170
+ )
171
+
172
+ if options is None:
173
+ options = ConversionOptions()
174
+ if preprocessing is None:
175
+ preprocessing = PreprocessingOptions()
176
+ if metadata_config is None:
177
+ metadata_config = MetadataConfig()
178
+
179
+ rust_options = _to_rust_options(options, preprocessing)
180
+ markdown, metadata = _rust.convert_with_metadata(html, rust_options, metadata_config)
181
+ return markdown, metadata
182
+
183
+
184
+ __all__ = [
185
+ "InlineImage",
186
+ "InlineImageConfig",
187
+ "InlineImageWarning",
188
+ "MetadataConfig",
189
+ "OptionsHandle",
190
+ "convert",
191
+ "convert_with_handle",
192
+ "convert_with_inline_images",
193
+ "convert_with_metadata",
194
+ "create_options_handle",
195
+ ]
Binary file
@@ -0,0 +1,3 @@
1
+ from html_to_markdown.cli_proxy import main
2
+
3
+ __all__ = ["main"]
@@ -0,0 +1,142 @@
1
+ import subprocess
2
+ import sys
3
+ import warnings
4
+ from pathlib import Path
5
+
6
+ from html_to_markdown.exceptions import RedundantV1FlagError, RemovedV1FlagError
7
+
8
+
9
+ def find_cli_binary() -> Path:
10
+ """Find the html-to-markdown CLI binary in expected locations.
11
+
12
+ Returns:
13
+ Path to the CLI binary.
14
+
15
+ Raises:
16
+ FileNotFoundError: If the binary cannot be found.
17
+ """
18
+ binary_name = "html-to-markdown.exe" if sys.platform == "win32" else "html-to-markdown"
19
+
20
+ module_dir = Path(__file__).resolve().parent
21
+ parent_dirs = list(module_dir.parents)
22
+
23
+ search_roots = []
24
+ for parent in parent_dirs:
25
+ candidate = parent / "target" / "release" / binary_name
26
+ search_roots.append(candidate)
27
+
28
+ possible_locations = [
29
+ *search_roots,
30
+ module_dir / "bin" / binary_name,
31
+ module_dir / binary_name,
32
+ ]
33
+
34
+ for location in possible_locations:
35
+ if location.exists() and location.is_file():
36
+ return location
37
+
38
+ msg = "html-to-markdown CLI binary not found. Please install or build the package."
39
+ raise FileNotFoundError(msg)
40
+
41
+
42
+ def translate_v1_args_to_v2(argv: list[str]) -> list[str]:
43
+ """Translate v1 CLI arguments to v2 format.
44
+
45
+ Args:
46
+ argv: List of command-line arguments.
47
+
48
+ Returns:
49
+ Translated list of arguments compatible with v2.
50
+
51
+ Raises:
52
+ RemovedV1FlagError: If a v1 flag has been removed in v2.
53
+ """
54
+ translated = []
55
+ i = 0
56
+ while i < len(argv):
57
+ arg = argv[i]
58
+
59
+ if arg in ("--strip", "--convert"):
60
+ raise RemovedV1FlagError(
61
+ flag=arg,
62
+ reason=f"{arg} option has been removed in v2.",
63
+ migration="Remove this flag from your command. The feature is no longer available.",
64
+ )
65
+
66
+ if arg in (
67
+ "--no-escape-asterisks",
68
+ "--no-escape-underscores",
69
+ "--no-escape-misc",
70
+ "--no-wrap",
71
+ "--no-autolinks",
72
+ "--no-extract-metadata",
73
+ ):
74
+ warnings.warn(
75
+ f"'{arg}' is deprecated and redundant in v2. "
76
+ f"These options are now disabled by default. Remove this flag.",
77
+ DeprecationWarning,
78
+ stacklevel=2,
79
+ )
80
+
81
+ elif arg == "--preprocess-html":
82
+ warnings.warn(
83
+ "'--preprocess-html' is deprecated. Use '--preprocess' instead.",
84
+ DeprecationWarning,
85
+ stacklevel=2,
86
+ )
87
+ translated.append("--preprocess")
88
+
89
+ elif arg in (
90
+ "--escape-asterisks",
91
+ "--escape-underscores",
92
+ "--escape-misc",
93
+ "--autolinks",
94
+ "--extract-metadata",
95
+ "--wrap",
96
+ ):
97
+ translated.append(arg)
98
+
99
+ else:
100
+ translated.append(arg)
101
+
102
+ i += 1
103
+
104
+ return translated
105
+
106
+
107
+ def main(argv: list[str]) -> str:
108
+ """Execute the CLI proxy.
109
+
110
+ Translates v1 arguments to v2 and invokes the native Rust CLI binary.
111
+
112
+ Args:
113
+ argv: Command-line arguments.
114
+
115
+ Returns:
116
+ Stdout from the CLI binary.
117
+ """
118
+ cli_binary = find_cli_binary()
119
+
120
+ try:
121
+ translated_args = translate_v1_args_to_v2(argv)
122
+ except (RemovedV1FlagError, RedundantV1FlagError) as e:
123
+ sys.stderr.write(f"\n❌ Error: {e.flag}\n\n")
124
+ sys.stderr.write(f" {e.reason}\n\n")
125
+ sys.stderr.write(f" 💡 {e.migration}\n\n")
126
+ sys.exit(1)
127
+ except ValueError as e:
128
+ sys.stderr.write(f"Error: {e}\n")
129
+ sys.exit(1)
130
+
131
+ result = subprocess.run( # noqa: S603
132
+ [str(cli_binary), *translated_args],
133
+ capture_output=True,
134
+ text=True,
135
+ check=False,
136
+ )
137
+
138
+ if result.returncode != 0:
139
+ sys.stderr.write(result.stderr)
140
+ sys.exit(result.returncode)
141
+
142
+ return result.stdout
@@ -0,0 +1,73 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ class HtmlToMarkdownError(Exception):
5
+ """Base exception for all html-to-markdown errors."""
6
+
7
+
8
+ class MissingDependencyError(HtmlToMarkdownError):
9
+ """Raised when a required dependency is not installed."""
10
+
11
+ def __init__(self, dependency: str, install_command: str | None = None) -> None:
12
+ self.dependency = dependency
13
+ self.install_command = install_command
14
+
15
+ message = f"{dependency} is not installed."
16
+ if install_command:
17
+ message += f" Install with: {install_command}"
18
+
19
+ super().__init__(message)
20
+
21
+
22
+ class InvalidParserError(HtmlToMarkdownError):
23
+ """Raised when an invalid parser is specified."""
24
+
25
+ def __init__(self, parser: str, available_parsers: list[str]) -> None:
26
+ self.parser = parser
27
+ self.available_parsers = available_parsers
28
+
29
+ message = f"Invalid parser '{parser}'. Available parsers: {', '.join(available_parsers)}"
30
+ super().__init__(message)
31
+
32
+
33
+ class EmptyHtmlError(HtmlToMarkdownError):
34
+ """Raised when input HTML is empty."""
35
+
36
+ def __init__(self) -> None:
37
+ super().__init__("The input HTML is empty.")
38
+
39
+
40
+ class ConflictingOptionsError(HtmlToMarkdownError):
41
+ """Raised when conflicting configuration options are specified."""
42
+
43
+ def __init__(self, option1: str, option2: str) -> None:
44
+ self.option1 = option1
45
+ self.option2 = option2
46
+
47
+ super().__init__(f"Only one of '{option1}' and '{option2}' can be specified.")
48
+
49
+
50
+ class InvalidEncodingError(HtmlToMarkdownError):
51
+ """Raised when an invalid character encoding is specified."""
52
+
53
+ def __init__(self, encoding: str) -> None:
54
+ super().__init__(f"The specified encoding ({encoding}) is not valid.")
55
+
56
+
57
+ class UnsupportedV1FeatureError(HtmlToMarkdownError):
58
+ """Raised when a v1 feature is not supported in v2."""
59
+
60
+ def __init__(self, flag: str, reason: str, migration: str) -> None:
61
+ self.flag = flag
62
+ self.reason = reason
63
+ self.migration = migration
64
+ message = f"'{flag}' is not supported in v2.\n\nReason: {reason}\n\nMigration: {migration}"
65
+ super().__init__(message)
66
+
67
+
68
+ class RemovedV1FlagError(UnsupportedV1FeatureError):
69
+ """Raised when a v1 flag has been removed in v2."""
70
+
71
+
72
+ class RedundantV1FlagError(UnsupportedV1FeatureError):
73
+ """Raised when a v1 flag is redundant in v2."""