html-to-markdown 1.2.1__tar.gz → 1.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of html-to-markdown might be problematic. Click here for more details.
- {html_to_markdown-1.2.1 → html_to_markdown-1.3.0}/PKG-INFO +26 -4
- {html_to_markdown-1.2.1 → html_to_markdown-1.3.0}/README.md +21 -0
- {html_to_markdown-1.2.1 → html_to_markdown-1.3.0}/html_to_markdown/converters.py +6 -2
- {html_to_markdown-1.2.1 → html_to_markdown-1.3.0}/html_to_markdown/processing.py +11 -3
- html_to_markdown-1.3.0/html_to_markdown.egg-info/PKG-INFO +242 -0
- html_to_markdown-1.3.0/html_to_markdown.egg-info/SOURCES.txt +16 -0
- html_to_markdown-1.3.0/html_to_markdown.egg-info/dependency_links.txt +1 -0
- html_to_markdown-1.3.0/html_to_markdown.egg-info/requires.txt +1 -0
- html_to_markdown-1.3.0/html_to_markdown.egg-info/top_level.txt +1 -0
- {html_to_markdown-1.2.1 → html_to_markdown-1.3.0}/pyproject.toml +15 -9
- html_to_markdown-1.3.0/setup.cfg +4 -0
- html_to_markdown-1.2.1/.gitignore +0 -22
- {html_to_markdown-1.2.1 → html_to_markdown-1.3.0}/LICENSE +0 -0
- {html_to_markdown-1.2.1 → html_to_markdown-1.3.0}/html_to_markdown/__init__.py +0 -0
- {html_to_markdown-1.2.1 → html_to_markdown-1.3.0}/html_to_markdown/__main__.py +0 -0
- {html_to_markdown-1.2.1 → html_to_markdown-1.3.0}/html_to_markdown/cli.py +0 -0
- {html_to_markdown-1.2.1 → html_to_markdown-1.3.0}/html_to_markdown/constants.py +0 -0
- {html_to_markdown-1.2.1 → html_to_markdown-1.3.0}/html_to_markdown/py.typed +0 -0
- {html_to_markdown-1.2.1 → html_to_markdown-1.3.0}/html_to_markdown/utils.py +0 -0
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: html-to-markdown
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.0
|
|
4
4
|
Summary: Convert HTML to markdown
|
|
5
|
-
Project-URL: homepage, https://github.com/Goldziher/html-to-markdown
|
|
6
5
|
Author-email: Na'aman Hirschfeld <nhirschfeld@gmail.com>
|
|
7
6
|
License: MIT
|
|
8
|
-
|
|
7
|
+
Project-URL: homepage, https://github.com/Goldziher/html-to-markdown
|
|
9
8
|
Keywords: converter,html,markdown,text-extraction,text-processing
|
|
10
9
|
Classifier: Intended Audience :: Developers
|
|
11
10
|
Classifier: License :: OSI Approved :: MIT License
|
|
@@ -23,8 +22,10 @@ Classifier: Topic :: Text Processing :: Markup :: Markdown
|
|
|
23
22
|
Classifier: Topic :: Utilities
|
|
24
23
|
Classifier: Typing :: Typed
|
|
25
24
|
Requires-Python: >=3.9
|
|
26
|
-
Requires-Dist: beautifulsoup4>=4.12.3
|
|
27
25
|
Description-Content-Type: text/markdown
|
|
26
|
+
License-File: LICENSE
|
|
27
|
+
Requires-Dist: beautifulsoup4>=4.12.3
|
|
28
|
+
Dynamic: license-file
|
|
28
29
|
|
|
29
30
|
# html-to-markdown
|
|
30
31
|
|
|
@@ -116,6 +117,26 @@ markdown = convert_to_markdown(
|
|
|
116
117
|
)
|
|
117
118
|
```
|
|
118
119
|
|
|
120
|
+
### Custom Converters
|
|
121
|
+
|
|
122
|
+
You can provide your own conversion functions for specific HTML tags:
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
from bs4.element import Tag
|
|
126
|
+
from html_to_markdown import convert_to_markdown
|
|
127
|
+
|
|
128
|
+
# Define a custom converter for the <b> tag
|
|
129
|
+
def custom_bold_converter(*, tag: Tag, text: str, **kwargs) -> str:
|
|
130
|
+
return f"IMPORTANT: {text}"
|
|
131
|
+
|
|
132
|
+
html = "<p>This is a <b>bold statement</b>.</p>"
|
|
133
|
+
markdown = convert_to_markdown(html, custom_converters={"b": custom_bold_converter})
|
|
134
|
+
print(markdown)
|
|
135
|
+
# Output: This is a IMPORTANT: bold statement.
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
Custom converters take precedence over the built-in converters and can be used alongside other configuration options.
|
|
139
|
+
|
|
119
140
|
### Configuration Options
|
|
120
141
|
|
|
121
142
|
| Option | Type | Default | Description |
|
|
@@ -189,6 +210,7 @@ Full list of configuration options:
|
|
|
189
210
|
- `wrap`: Enable text wrapping
|
|
190
211
|
- `wrap_width`: Width for text wrapping
|
|
191
212
|
- `convert_as_inline`: Treat content as inline elements
|
|
213
|
+
- `custom_converters`: A mapping of HTML tag names to custom converter functions
|
|
192
214
|
|
|
193
215
|
## Contribution
|
|
194
216
|
|
|
@@ -88,6 +88,26 @@ markdown = convert_to_markdown(
|
|
|
88
88
|
)
|
|
89
89
|
```
|
|
90
90
|
|
|
91
|
+
### Custom Converters
|
|
92
|
+
|
|
93
|
+
You can provide your own conversion functions for specific HTML tags:
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
from bs4.element import Tag
|
|
97
|
+
from html_to_markdown import convert_to_markdown
|
|
98
|
+
|
|
99
|
+
# Define a custom converter for the <b> tag
|
|
100
|
+
def custom_bold_converter(*, tag: Tag, text: str, **kwargs) -> str:
|
|
101
|
+
return f"IMPORTANT: {text}"
|
|
102
|
+
|
|
103
|
+
html = "<p>This is a <b>bold statement</b>.</p>"
|
|
104
|
+
markdown = convert_to_markdown(html, custom_converters={"b": custom_bold_converter})
|
|
105
|
+
print(markdown)
|
|
106
|
+
# Output: This is a IMPORTANT: bold statement.
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Custom converters take precedence over the built-in converters and can be used alongside other configuration options.
|
|
110
|
+
|
|
91
111
|
### Configuration Options
|
|
92
112
|
|
|
93
113
|
| Option | Type | Default | Description |
|
|
@@ -161,6 +181,7 @@ Full list of configuration options:
|
|
|
161
181
|
- `wrap`: Enable text wrapping
|
|
162
182
|
- `wrap_width`: Width for text wrapping
|
|
163
183
|
- `convert_as_inline`: Treat content as inline elements
|
|
184
|
+
- `custom_converters`: A mapping of HTML tag names to custom converter functions
|
|
164
185
|
|
|
165
186
|
## Contribution
|
|
166
187
|
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
if TYPE_CHECKING:
|
|
6
|
+
from collections.abc import Iterable
|
|
4
7
|
from functools import partial
|
|
5
8
|
from inspect import getfullargspec
|
|
6
9
|
from textwrap import fill
|
|
@@ -55,7 +58,8 @@ SupportedElements = Literal[
|
|
|
55
58
|
"kbd",
|
|
56
59
|
]
|
|
57
60
|
|
|
58
|
-
|
|
61
|
+
Converter = Callable[[str, Tag], str]
|
|
62
|
+
ConvertersMap = dict[SupportedElements, Converter]
|
|
59
63
|
|
|
60
64
|
T = TypeVar("T")
|
|
61
65
|
|
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
if TYPE_CHECKING:
|
|
6
|
+
from collections.abc import Mapping
|
|
3
7
|
from itertools import chain
|
|
4
8
|
from typing import TYPE_CHECKING, Any, Callable, Literal, cast
|
|
5
9
|
|
|
@@ -12,7 +16,7 @@ from html_to_markdown.constants import (
|
|
|
12
16
|
html_heading_re,
|
|
13
17
|
whitespace_re,
|
|
14
18
|
)
|
|
15
|
-
from html_to_markdown.converters import ConvertersMap, create_converters_map
|
|
19
|
+
from html_to_markdown.converters import Converter, ConvertersMap, SupportedElements, create_converters_map
|
|
16
20
|
from html_to_markdown.utils import escape
|
|
17
21
|
|
|
18
22
|
if TYPE_CHECKING:
|
|
@@ -189,6 +193,8 @@ def convert_to_markdown(
|
|
|
189
193
|
code_language: str = "",
|
|
190
194
|
code_language_callback: Callable[[Any], str] | None = None,
|
|
191
195
|
convert: str | Iterable[str] | None = None,
|
|
196
|
+
convert_as_inline: bool = False,
|
|
197
|
+
custom_converters: Mapping[SupportedElements, Converter] | None = None,
|
|
192
198
|
default_title: bool = False,
|
|
193
199
|
escape_asterisks: bool = True,
|
|
194
200
|
escape_misc: bool = True,
|
|
@@ -202,7 +208,6 @@ def convert_to_markdown(
|
|
|
202
208
|
sup_symbol: str = "",
|
|
203
209
|
wrap: bool = False,
|
|
204
210
|
wrap_width: int = 80,
|
|
205
|
-
convert_as_inline: bool = False,
|
|
206
211
|
) -> str:
|
|
207
212
|
"""Convert HTML to Markdown.
|
|
208
213
|
|
|
@@ -213,6 +218,8 @@ def convert_to_markdown(
|
|
|
213
218
|
code_language: Default language identifier for fenced code blocks. Defaults to an empty string.
|
|
214
219
|
code_language_callback: Function to dynamically determine the language for code blocks.
|
|
215
220
|
convert: A list of tag names to convert to Markdown. If None, all supported tags are converted.
|
|
221
|
+
convert_as_inline: Treat the content as inline elements (no block elements like paragraphs). Defaults to False.
|
|
222
|
+
custom_converters: A mapping of custom converters for specific HTML tags. Defaults to None.
|
|
216
223
|
default_title: Use the default title when converting certain elements (e.g., links). Defaults to False.
|
|
217
224
|
escape_asterisks: Escape asterisks (*) to prevent unintended Markdown formatting. Defaults to True.
|
|
218
225
|
escape_misc: Escape miscellaneous characters to prevent conflicts in Markdown. Defaults to True.
|
|
@@ -226,7 +233,6 @@ def convert_to_markdown(
|
|
|
226
233
|
sup_symbol: Custom symbol for superscript text. Defaults to an empty string.
|
|
227
234
|
wrap: Wrap text to the specified width. Defaults to False.
|
|
228
235
|
wrap_width: The number of characters at which to wrap text. Defaults to 80.
|
|
229
|
-
convert_as_inline: Treat the content as inline elements (no block elements like paragraphs). Defaults to False.
|
|
230
236
|
|
|
231
237
|
Raises:
|
|
232
238
|
ValueError: If both 'strip' and 'convert' are specified, or when the input HTML is empty.
|
|
@@ -260,6 +266,8 @@ def convert_to_markdown(
|
|
|
260
266
|
wrap=wrap,
|
|
261
267
|
wrap_width=wrap_width,
|
|
262
268
|
)
|
|
269
|
+
if custom_converters:
|
|
270
|
+
converters_map.update(cast("ConvertersMap", custom_converters))
|
|
263
271
|
|
|
264
272
|
return _process_tag(
|
|
265
273
|
source,
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: html-to-markdown
|
|
3
|
+
Version: 1.3.0
|
|
4
|
+
Summary: Convert HTML to markdown
|
|
5
|
+
Author-email: Na'aman Hirschfeld <nhirschfeld@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: homepage, https://github.com/Goldziher/html-to-markdown
|
|
8
|
+
Keywords: converter,html,markdown,text-extraction,text-processing
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Topic :: Text Processing
|
|
19
|
+
Classifier: Topic :: Text Processing :: Markup
|
|
20
|
+
Classifier: Topic :: Text Processing :: Markup :: HTML
|
|
21
|
+
Classifier: Topic :: Text Processing :: Markup :: Markdown
|
|
22
|
+
Classifier: Topic :: Utilities
|
|
23
|
+
Classifier: Typing :: Typed
|
|
24
|
+
Requires-Python: >=3.9
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
License-File: LICENSE
|
|
27
|
+
Requires-Dist: beautifulsoup4>=4.12.3
|
|
28
|
+
Dynamic: license-file
|
|
29
|
+
|
|
30
|
+
# html-to-markdown
|
|
31
|
+
|
|
32
|
+
A modern, fully typed Python library for converting HTML to Markdown. This library is a completely rewritten fork
|
|
33
|
+
of [markdownify](https://pypi.org/project/markdownify/) with a modernized codebase, strict type safety and support for
|
|
34
|
+
Python 3.9+.
|
|
35
|
+
|
|
36
|
+
## Features
|
|
37
|
+
|
|
38
|
+
- Full type safety with strict MyPy adherence
|
|
39
|
+
- Functional API design
|
|
40
|
+
- Extensive test coverage
|
|
41
|
+
- Configurable conversion options
|
|
42
|
+
- CLI tool for easy conversions
|
|
43
|
+
- Support for pre-configured BeautifulSoup instances
|
|
44
|
+
- Strict semver versioning
|
|
45
|
+
|
|
46
|
+
## Installation
|
|
47
|
+
|
|
48
|
+
```shell
|
|
49
|
+
pip install html-to-markdown
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## Quick Start
|
|
53
|
+
|
|
54
|
+
Convert HTML to Markdown with a single function call:
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
from html_to_markdown import convert_to_markdown
|
|
58
|
+
|
|
59
|
+
html = """
|
|
60
|
+
<article>
|
|
61
|
+
<h1>Welcome</h1>
|
|
62
|
+
<p>This is a <strong>sample</strong> with a <a href="https://example.com">link</a>.</p>
|
|
63
|
+
<ul>
|
|
64
|
+
<li>Item 1</li>
|
|
65
|
+
<li>Item 2</li>
|
|
66
|
+
</ul>
|
|
67
|
+
</article>
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
markdown = convert_to_markdown(html)
|
|
71
|
+
print(markdown)
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Output:
|
|
75
|
+
|
|
76
|
+
```markdown
|
|
77
|
+
# Welcome
|
|
78
|
+
|
|
79
|
+
This is a **sample** with a [link](https://example.com).
|
|
80
|
+
|
|
81
|
+
* Item 1
|
|
82
|
+
* Item 2
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### Working with BeautifulSoup
|
|
86
|
+
|
|
87
|
+
If you need more control over HTML parsing, you can pass a pre-configured BeautifulSoup instance:
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
from bs4 import BeautifulSoup
|
|
91
|
+
from html_to_markdown import convert_to_markdown
|
|
92
|
+
|
|
93
|
+
# Configure BeautifulSoup with your preferred parser
|
|
94
|
+
soup = BeautifulSoup(html, "lxml") # Note: lxml requires additional installation
|
|
95
|
+
markdown = convert_to_markdown(soup)
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## Advanced Usage
|
|
99
|
+
|
|
100
|
+
### Customizing Conversion Options
|
|
101
|
+
|
|
102
|
+
The library offers extensive customization through various options:
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
from html_to_markdown import convert_to_markdown
|
|
106
|
+
|
|
107
|
+
html = "<div>Your content here...</div>"
|
|
108
|
+
markdown = convert_to_markdown(
|
|
109
|
+
html,
|
|
110
|
+
heading_style="atx", # Use # style headers
|
|
111
|
+
strong_em_symbol="*", # Use * for bold/italic
|
|
112
|
+
bullets="*+-", # Define bullet point characters
|
|
113
|
+
wrap=True, # Enable text wrapping
|
|
114
|
+
wrap_width=100, # Set wrap width
|
|
115
|
+
escape_asterisks=True, # Escape * characters
|
|
116
|
+
code_language="python", # Default code block language
|
|
117
|
+
)
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### Custom Converters
|
|
121
|
+
|
|
122
|
+
You can provide your own conversion functions for specific HTML tags:
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
from bs4.element import Tag
|
|
126
|
+
from html_to_markdown import convert_to_markdown
|
|
127
|
+
|
|
128
|
+
# Define a custom converter for the <b> tag
|
|
129
|
+
def custom_bold_converter(*, tag: Tag, text: str, **kwargs) -> str:
|
|
130
|
+
return f"IMPORTANT: {text}"
|
|
131
|
+
|
|
132
|
+
html = "<p>This is a <b>bold statement</b>.</p>"
|
|
133
|
+
markdown = convert_to_markdown(html, custom_converters={"b": custom_bold_converter})
|
|
134
|
+
print(markdown)
|
|
135
|
+
# Output: This is a IMPORTANT: bold statement.
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
Custom converters take precedence over the built-in converters and can be used alongside other configuration options.
|
|
139
|
+
|
|
140
|
+
### Configuration Options
|
|
141
|
+
|
|
142
|
+
| Option | Type | Default | Description |
|
|
143
|
+
| -------------------- | ---- | -------------- | ------------------------------------------------------ |
|
|
144
|
+
| `autolinks` | bool | `True` | Auto-convert URLs to Markdown links |
|
|
145
|
+
| `bullets` | str | `'*+-'` | Characters to use for bullet points |
|
|
146
|
+
| `code_language` | str | `''` | Default language for code blocks |
|
|
147
|
+
| `heading_style` | str | `'underlined'` | Header style (`'underlined'`, `'atx'`, `'atx_closed'`) |
|
|
148
|
+
| `escape_asterisks` | bool | `True` | Escape * characters |
|
|
149
|
+
| `escape_underscores` | bool | `True` | Escape _ characters |
|
|
150
|
+
| `wrap` | bool | `False` | Enable text wrapping |
|
|
151
|
+
| `wrap_width` | int | `80` | Text wrap width |
|
|
152
|
+
|
|
153
|
+
For a complete list of options, see the [Configuration](#configuration) section below.
|
|
154
|
+
|
|
155
|
+
## CLI Usage
|
|
156
|
+
|
|
157
|
+
Convert HTML files directly from the command line:
|
|
158
|
+
|
|
159
|
+
```shell
|
|
160
|
+
# Convert a file
|
|
161
|
+
html_to_markdown input.html > output.md
|
|
162
|
+
|
|
163
|
+
# Process stdin
|
|
164
|
+
cat input.html | html_to_markdown > output.md
|
|
165
|
+
|
|
166
|
+
# Use custom options
|
|
167
|
+
html_to_markdown --heading-style atx --wrap --wrap-width 100 input.html > output.md
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
View all available options:
|
|
171
|
+
|
|
172
|
+
```shell
|
|
173
|
+
html_to_markdown --help
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
## Migration from Markdownify
|
|
177
|
+
|
|
178
|
+
For existing projects using Markdownify, a compatibility layer is provided:
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
# Old code
|
|
182
|
+
from markdownify import markdownify as md
|
|
183
|
+
|
|
184
|
+
# New code - works the same way
|
|
185
|
+
from html_to_markdown import markdownify as md
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
The `markdownify` function is an alias for `convert_to_markdown` and provides identical functionality.
|
|
189
|
+
|
|
190
|
+
## Configuration
|
|
191
|
+
|
|
192
|
+
Full list of configuration options:
|
|
193
|
+
|
|
194
|
+
- `autolinks`: Convert valid URLs to Markdown links automatically
|
|
195
|
+
- `bullets`: Characters to use for bullet points in lists
|
|
196
|
+
- `code_language`: Default language for fenced code blocks
|
|
197
|
+
- `code_language_callback`: Function to determine code block language
|
|
198
|
+
- `convert`: List of HTML tags to convert (None = all supported tags)
|
|
199
|
+
- `default_title`: Use default titles for elements like links
|
|
200
|
+
- `escape_asterisks`: Escape * characters
|
|
201
|
+
- `escape_misc`: Escape miscellaneous Markdown characters
|
|
202
|
+
- `escape_underscores`: Escape _ characters
|
|
203
|
+
- `heading_style`: Header style (underlined/atx/atx_closed)
|
|
204
|
+
- `keep_inline_images_in`: Tags where inline images should be kept
|
|
205
|
+
- `newline_style`: Style for handling newlines (spaces/backslash)
|
|
206
|
+
- `strip`: Tags to remove from output
|
|
207
|
+
- `strong_em_symbol`: Symbol for strong/emphasized text (\* or \_)
|
|
208
|
+
- `sub_symbol`: Symbol for subscript text
|
|
209
|
+
- `sup_symbol`: Symbol for superscript text
|
|
210
|
+
- `wrap`: Enable text wrapping
|
|
211
|
+
- `wrap_width`: Width for text wrapping
|
|
212
|
+
- `convert_as_inline`: Treat content as inline elements
|
|
213
|
+
- `custom_converters`: A mapping of HTML tag names to custom converter functions
|
|
214
|
+
|
|
215
|
+
## Contribution
|
|
216
|
+
|
|
217
|
+
This library is open to contribution. Feel free to open issues or submit PRs. Its better to discuss issues before
|
|
218
|
+
submitting PRs to avoid disappointment.
|
|
219
|
+
|
|
220
|
+
### Local Development
|
|
221
|
+
|
|
222
|
+
1. Clone the repo
|
|
223
|
+
|
|
224
|
+
1. Install the system dependencies
|
|
225
|
+
|
|
226
|
+
1. Install the full dependencies with `uv sync`
|
|
227
|
+
|
|
228
|
+
1. Install the pre-commit hooks with:
|
|
229
|
+
|
|
230
|
+
```shell
|
|
231
|
+
pre-commit install && pre-commit install --hook-type commit-msg
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
1. Make your changes and submit a PR
|
|
235
|
+
|
|
236
|
+
## License
|
|
237
|
+
|
|
238
|
+
This library uses the MIT license.
|
|
239
|
+
|
|
240
|
+
## Acknowledgments
|
|
241
|
+
|
|
242
|
+
Special thanks to the original [markdownify](https://pypi.org/project/markdownify/) project creators and contributors.
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
html_to_markdown/__init__.py
|
|
5
|
+
html_to_markdown/__main__.py
|
|
6
|
+
html_to_markdown/cli.py
|
|
7
|
+
html_to_markdown/constants.py
|
|
8
|
+
html_to_markdown/converters.py
|
|
9
|
+
html_to_markdown/processing.py
|
|
10
|
+
html_to_markdown/py.typed
|
|
11
|
+
html_to_markdown/utils.py
|
|
12
|
+
html_to_markdown.egg-info/PKG-INFO
|
|
13
|
+
html_to_markdown.egg-info/SOURCES.txt
|
|
14
|
+
html_to_markdown.egg-info/dependency_links.txt
|
|
15
|
+
html_to_markdown.egg-info/requires.txt
|
|
16
|
+
html_to_markdown.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
beautifulsoup4>=4.12.3
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
html_to_markdown
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
[build-system]
|
|
2
|
-
build-backend = "
|
|
2
|
+
build-backend = "setuptools.build_meta"
|
|
3
3
|
|
|
4
|
-
requires = [ "
|
|
4
|
+
requires = [ "setuptools>=78.1" ]
|
|
5
5
|
|
|
6
6
|
[project]
|
|
7
7
|
name = "html-to-markdown"
|
|
8
|
-
version = "1.
|
|
8
|
+
version = "1.3.0"
|
|
9
9
|
description = "Convert HTML to markdown"
|
|
10
10
|
readme = "README.md"
|
|
11
11
|
keywords = [ "converter", "html", "markdown", "text-extraction", "text-processing" ]
|
|
@@ -42,20 +42,23 @@ dev = [
|
|
|
42
42
|
"mypy>=1.14.1",
|
|
43
43
|
"pre-commit>=4.1",
|
|
44
44
|
"pytest>=8.3.4",
|
|
45
|
-
"pytest-cov>=6",
|
|
45
|
+
"pytest-cov>=6.1",
|
|
46
46
|
"pytest-mock>=3.14",
|
|
47
47
|
"ruff>=0.9.3",
|
|
48
48
|
"types-beautifulsoup4>=4.12.0.20241020",
|
|
49
|
+
"uv-bump",
|
|
49
50
|
]
|
|
50
51
|
|
|
52
|
+
[tool.setuptools.packages.find]
|
|
53
|
+
include = [ "html_to_markdown" ]
|
|
54
|
+
|
|
55
|
+
[tool.setuptools.package-data]
|
|
56
|
+
html_to_markdown = [ "py.typed" ]
|
|
57
|
+
|
|
51
58
|
[tool.hatch.build]
|
|
52
59
|
skip-excluded-dirs = true
|
|
53
60
|
|
|
54
|
-
|
|
55
|
-
only-include = [ "html_to_markdown" ]
|
|
56
|
-
|
|
57
|
-
[tool.hatch.build.targets.wheel]
|
|
58
|
-
only-include = [ "html_to_markdown" ]
|
|
61
|
+
scripts.html_to_markdown = "html_to_markdown.__main__:cli"
|
|
59
62
|
|
|
60
63
|
[tool.ruff]
|
|
61
64
|
target-version = "py39"
|
|
@@ -113,3 +116,6 @@ disallow_untyped_decorators = false
|
|
|
113
116
|
|
|
114
117
|
[tool.uv]
|
|
115
118
|
default-groups = [ "dev" ]
|
|
119
|
+
|
|
120
|
+
[tool.uv.sources]
|
|
121
|
+
uv-bump = { git = "https://github.com/Goldziher/uv-bump" }
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
*$py.class
|
|
2
|
-
*.iml
|
|
3
|
-
*.log
|
|
4
|
-
*.py[cod]
|
|
5
|
-
.coverage
|
|
6
|
-
.env
|
|
7
|
-
.idea/
|
|
8
|
-
.run/
|
|
9
|
-
.mypy_cache/
|
|
10
|
-
.pdm-build/
|
|
11
|
-
.pdm-python
|
|
12
|
-
.pdm.toml
|
|
13
|
-
.pytest_cache/
|
|
14
|
-
.python-version
|
|
15
|
-
.ruff_cache/
|
|
16
|
-
.tox/
|
|
17
|
-
.venv/
|
|
18
|
-
.vscode/
|
|
19
|
-
__pycache__/
|
|
20
|
-
__pypackages__/
|
|
21
|
-
coverage.xml
|
|
22
|
-
dist/
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|