docs2epub 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docs2epub/docusaurus_next.py +20 -0
- {docs2epub-0.1.7.dist-info → docs2epub-0.1.8.dist-info}/METADATA +1 -1
- {docs2epub-0.1.7.dist-info → docs2epub-0.1.8.dist-info}/RECORD +5 -5
- {docs2epub-0.1.7.dist-info → docs2epub-0.1.8.dist-info}/WHEEL +0 -0
- {docs2epub-0.1.7.dist-info → docs2epub-0.1.8.dist-info}/entry_points.txt +0 -0
docs2epub/docusaurus_next.py
CHANGED
|
@@ -85,6 +85,22 @@ def _extract_article(soup: BeautifulSoup) -> Tag:
|
|
|
85
85
|
role_main = soup.find(attrs={"role": "main"})
|
|
86
86
|
if role_main:
|
|
87
87
|
return role_main
|
|
88
|
+
for selector in [
|
|
89
|
+
"div#content",
|
|
90
|
+
"div.content",
|
|
91
|
+
"div#main",
|
|
92
|
+
"div.main",
|
|
93
|
+
"div#page",
|
|
94
|
+
"div.page",
|
|
95
|
+
"div.document",
|
|
96
|
+
"div#document",
|
|
97
|
+
]:
|
|
98
|
+
candidate = soup.select_one(selector)
|
|
99
|
+
if candidate:
|
|
100
|
+
return candidate
|
|
101
|
+
body = soup.find("body")
|
|
102
|
+
if body:
|
|
103
|
+
return body
|
|
88
104
|
raise RuntimeError("Could not find <article> in page HTML")
|
|
89
105
|
|
|
90
106
|
|
|
@@ -386,6 +402,10 @@ def iter_docusaurus_next(options: DocusaurusNextOptions) -> list[Chapter]:
|
|
|
386
402
|
if title_el
|
|
387
403
|
else f"Chapter {len(chapters) + 1}"
|
|
388
404
|
)
|
|
405
|
+
if title_el is None and article.name == "body":
|
|
406
|
+
body_text = " ".join(article.get_text(" ", strip=True).split())
|
|
407
|
+
if len(body_text) < 200:
|
|
408
|
+
return None
|
|
389
409
|
|
|
390
410
|
_remove_unwanted(article)
|
|
391
411
|
_absolutize_urls(article, base_url=target_url)
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
docs2epub/__init__.py,sha256=iccyEu4zlubhvd6pM7Z2Gjwn8tPw9IhZ4ABKhbiFjUY,54
|
|
2
2
|
docs2epub/cli.py,sha256=pt1crvrkr2k1ybf_p0m4xSYyoZVluFsDNGuwJ7CykYM,3863
|
|
3
|
-
docs2epub/docusaurus_next.py,sha256=
|
|
3
|
+
docs2epub/docusaurus_next.py,sha256=nQYkNecXgh4TsxaTydoiC1tVmIqjYiLiyEtYlpXGmXg,12507
|
|
4
4
|
docs2epub/epub.py,sha256=OsPWcPGTgazAeNpWASIE6e4HQ5ILQr2VFO1-Aj3y1kg,2986
|
|
5
5
|
docs2epub/kindle_html.py,sha256=LN0CGj9ap9b8iC_MlZcQLuhJ7FehZr_VbIfMOz78E5c,2297
|
|
6
6
|
docs2epub/model.py,sha256=uL7uwbG6yU0bEGpSFxxIv2pcZHQR9cs2prfqk5iNQwc,160
|
|
7
7
|
docs2epub/pandoc_epub2.py,sha256=l22-QAQcCgJyl7HF0_b5weC3qEGVQLwOhxdbAvd8C2o,3610
|
|
8
|
-
docs2epub-0.1.
|
|
9
|
-
docs2epub-0.1.
|
|
10
|
-
docs2epub-0.1.
|
|
11
|
-
docs2epub-0.1.
|
|
8
|
+
docs2epub-0.1.8.dist-info/METADATA,sha256=KdwbHGiBRLuXLQKlTypnDH8eOogD5bDoSGDIJgNriZs,1886
|
|
9
|
+
docs2epub-0.1.8.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
10
|
+
docs2epub-0.1.8.dist-info/entry_points.txt,sha256=DHK4mzthrIXUvM8Y8Vo_3jG2IhegEDDM7T9CvCkUtvw,49
|
|
11
|
+
docs2epub-0.1.8.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|