html-to-markdown 1.3.1__py3-none-any.whl → 1.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of html-to-markdown might be problematic. Click here for more details.
- html_to_markdown/__main__.py +4 -1
- html_to_markdown/processing.py +33 -11
- {html_to_markdown-1.3.1.dist-info → html_to_markdown-1.3.3.dist-info}/METADATA +1 -1
- html_to_markdown-1.3.3.dist-info/RECORD +14 -0
- {html_to_markdown-1.3.1.dist-info → html_to_markdown-1.3.3.dist-info}/WHEEL +1 -1
- html_to_markdown-1.3.3.dist-info/entry_points.txt +2 -0
- html_to_markdown-1.3.1.dist-info/RECORD +0 -13
- {html_to_markdown-1.3.1.dist-info → html_to_markdown-1.3.3.dist-info}/licenses/LICENSE +0 -0
- {html_to_markdown-1.3.1.dist-info → html_to_markdown-1.3.3.dist-info}/top_level.txt +0 -0
html_to_markdown/__main__.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import sys
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
def cli():
|
|
4
4
|
from html_to_markdown.cli import main
|
|
5
5
|
|
|
6
6
|
try:
|
|
@@ -9,3 +9,6 @@ if __name__ == "__main__":
|
|
|
9
9
|
except ValueError as e:
|
|
10
10
|
print(str(e), file=sys.stderr) # noqa: T201
|
|
11
11
|
sys.exit(1)
|
|
12
|
+
|
|
13
|
+
if __name__ == "__main__":
|
|
14
|
+
cli()
|
html_to_markdown/processing.py
CHANGED
|
@@ -89,6 +89,7 @@ def _process_tag(
|
|
|
89
89
|
escape_misc: bool,
|
|
90
90
|
escape_underscores: bool,
|
|
91
91
|
strip: set[str] | None,
|
|
92
|
+
context_before: str = "",
|
|
92
93
|
) -> str:
|
|
93
94
|
should_convert_tag = _should_convert_tag(tag_name=tag.name, strip=strip, convert=convert)
|
|
94
95
|
tag_name: SupportedTag | None = (
|
|
@@ -129,12 +130,21 @@ def _process_tag(
|
|
|
129
130
|
escape_misc=escape_misc,
|
|
130
131
|
escape_underscores=escape_underscores,
|
|
131
132
|
strip=strip,
|
|
133
|
+
context_before=(context_before + text)[-2:],
|
|
132
134
|
)
|
|
133
135
|
|
|
134
136
|
if tag_name and should_convert_tag:
|
|
135
|
-
|
|
137
|
+
rendered = converters_map[tag_name]( # type: ignore[call-arg]
|
|
136
138
|
tag=tag, text=text, convert_as_inline=convert_as_inline
|
|
137
139
|
)
|
|
140
|
+
# For headings, ensure two newlines before if not already present
|
|
141
|
+
# Edge case where the document starts with a \n and then a heading
|
|
142
|
+
if is_heading and context_before not in {"", "\n"}:
|
|
143
|
+
n_eol_to_add = 2 - (len(context_before) - len(context_before.rstrip("\n")))
|
|
144
|
+
if n_eol_to_add > 0:
|
|
145
|
+
prefix = "\n" * n_eol_to_add
|
|
146
|
+
return f"{prefix}{rendered}"
|
|
147
|
+
return rendered
|
|
138
148
|
|
|
139
149
|
return text
|
|
140
150
|
|
|
@@ -275,13 +285,25 @@ def convert_to_markdown(
|
|
|
275
285
|
if custom_converters:
|
|
276
286
|
converters_map.update(cast("ConvertersMap", custom_converters))
|
|
277
287
|
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
+
text = ""
|
|
289
|
+
for el in filter(lambda value: not isinstance(value, (Comment, Doctype)), source.children):
|
|
290
|
+
if isinstance(el, NavigableString):
|
|
291
|
+
text += _process_text(
|
|
292
|
+
el=el,
|
|
293
|
+
escape_misc=escape_misc,
|
|
294
|
+
escape_asterisks=escape_asterisks,
|
|
295
|
+
escape_underscores=escape_underscores,
|
|
296
|
+
)
|
|
297
|
+
elif isinstance(el, Tag):
|
|
298
|
+
text += _process_tag(
|
|
299
|
+
el,
|
|
300
|
+
converters_map,
|
|
301
|
+
convert_as_inline=convert_as_inline,
|
|
302
|
+
convert=_as_optional_set(convert),
|
|
303
|
+
escape_asterisks=escape_asterisks,
|
|
304
|
+
escape_misc=escape_misc,
|
|
305
|
+
escape_underscores=escape_underscores,
|
|
306
|
+
strip=_as_optional_set(strip),
|
|
307
|
+
context_before=text[-2:],
|
|
308
|
+
)
|
|
309
|
+
return text
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
html_to_markdown/__init__.py,sha256=95S7_7mR_g88uTnFI0FaRNykrtAaSKb6sJbwSea2zjk,145
|
|
2
|
+
html_to_markdown/__main__.py,sha256=_EeKI8veMWZO7xsl-mBHBE-OmH1vnkVyXuExsOfduFI,286
|
|
3
|
+
html_to_markdown/cli.py,sha256=HVnzmcyrYwah_yWhZ87mZcG0VgnKYp6y89fJh2R-Rlw,4532
|
|
4
|
+
html_to_markdown/constants.py,sha256=Usk67k18tuRovJpKDsiEXdgH20KgqI9KOnK4Fbx-M5c,547
|
|
5
|
+
html_to_markdown/converters.py,sha256=p8arBdejEeuAp9_wIYvp5PuWNBB0M699CgLSEkW3v88,11910
|
|
6
|
+
html_to_markdown/processing.py,sha256=JNCjDgbfuW3YI7mfsj9aHlk2-KriQXJHU8Eo5D9Qj1E,10280
|
|
7
|
+
html_to_markdown/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
+
html_to_markdown/utils.py,sha256=HJUDej5HSpXRtYv-OkCyD0hwnPnVfQCwY6rBRlIOt9s,1989
|
|
9
|
+
html_to_markdown-1.3.3.dist-info/licenses/LICENSE,sha256=3J_HR5BWvUM1mlIrlkF32-uC1FM64gy8JfG17LBuheQ,1122
|
|
10
|
+
html_to_markdown-1.3.3.dist-info/METADATA,sha256=BcfsHs0cSG8Y1BScMsGFOGfN5mrxiu-HA_fJC6DrtFg,7653
|
|
11
|
+
html_to_markdown-1.3.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
12
|
+
html_to_markdown-1.3.3.dist-info/entry_points.txt,sha256=jhMqXDYvIyzQDLKjCn4xCyzCCbAMl94tzQx_HiG5Qi0,67
|
|
13
|
+
html_to_markdown-1.3.3.dist-info/top_level.txt,sha256=Ev6djb1c4dSKr_-n4K-FpEGDkzBigXY6LuZ5onqS7AE,17
|
|
14
|
+
html_to_markdown-1.3.3.dist-info/RECORD,,
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
html_to_markdown/__init__.py,sha256=95S7_7mR_g88uTnFI0FaRNykrtAaSKb6sJbwSea2zjk,145
|
|
2
|
-
html_to_markdown/__main__.py,sha256=u5xevySlT5eIGyLUaethdDQIKJygaKnc3F2sHWoz75g,264
|
|
3
|
-
html_to_markdown/cli.py,sha256=HVnzmcyrYwah_yWhZ87mZcG0VgnKYp6y89fJh2R-Rlw,4532
|
|
4
|
-
html_to_markdown/constants.py,sha256=Usk67k18tuRovJpKDsiEXdgH20KgqI9KOnK4Fbx-M5c,547
|
|
5
|
-
html_to_markdown/converters.py,sha256=p8arBdejEeuAp9_wIYvp5PuWNBB0M699CgLSEkW3v88,11910
|
|
6
|
-
html_to_markdown/processing.py,sha256=ZYp4sMsC2Plb0iyGTFmyCKWc7lSHHFYc3S46UrlfOHw,9199
|
|
7
|
-
html_to_markdown/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
-
html_to_markdown/utils.py,sha256=HJUDej5HSpXRtYv-OkCyD0hwnPnVfQCwY6rBRlIOt9s,1989
|
|
9
|
-
html_to_markdown-1.3.1.dist-info/licenses/LICENSE,sha256=3J_HR5BWvUM1mlIrlkF32-uC1FM64gy8JfG17LBuheQ,1122
|
|
10
|
-
html_to_markdown-1.3.1.dist-info/METADATA,sha256=CR__rjsnqp1XncpI9oWUTHxetKQY1wX6sxVfl_U1fEo,7653
|
|
11
|
-
html_to_markdown-1.3.1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
12
|
-
html_to_markdown-1.3.1.dist-info/top_level.txt,sha256=Ev6djb1c4dSKr_-n4K-FpEGDkzBigXY6LuZ5onqS7AE,17
|
|
13
|
-
html_to_markdown-1.3.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|