novel-downloader 1.5.0__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/__init__.py +1 -3
- novel_downloader/cli/clean.py +21 -88
- novel_downloader/cli/config.py +26 -21
- novel_downloader/cli/download.py +77 -64
- novel_downloader/cli/export.py +16 -20
- novel_downloader/cli/main.py +1 -1
- novel_downloader/cli/search.py +62 -65
- novel_downloader/cli/ui.py +156 -0
- novel_downloader/config/__init__.py +8 -5
- novel_downloader/config/adapter.py +65 -105
- novel_downloader/config/{loader.py → file_io.py} +53 -26
- novel_downloader/core/__init__.py +1 -0
- novel_downloader/core/archived/deqixs/fetcher.py +115 -0
- novel_downloader/core/archived/deqixs/parser.py +132 -0
- novel_downloader/core/archived/deqixs/searcher.py +89 -0
- novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} +12 -20
- novel_downloader/core/archived/wanbengo/searcher.py +98 -0
- novel_downloader/core/archived/xshbook/searcher.py +93 -0
- novel_downloader/core/downloaders/__init__.py +3 -24
- novel_downloader/core/downloaders/base.py +49 -23
- novel_downloader/core/downloaders/common.py +191 -137
- novel_downloader/core/downloaders/qianbi.py +187 -146
- novel_downloader/core/downloaders/qidian.py +187 -141
- novel_downloader/core/downloaders/registry.py +4 -2
- novel_downloader/core/downloaders/signals.py +46 -0
- novel_downloader/core/exporters/__init__.py +3 -20
- novel_downloader/core/exporters/base.py +33 -37
- novel_downloader/core/exporters/common/__init__.py +1 -2
- novel_downloader/core/exporters/common/epub.py +15 -10
- novel_downloader/core/exporters/common/main_exporter.py +19 -12
- novel_downloader/core/exporters/common/txt.py +14 -9
- novel_downloader/core/exporters/epub_util.py +59 -29
- novel_downloader/core/exporters/linovelib/__init__.py +1 -0
- novel_downloader/core/exporters/linovelib/epub.py +23 -25
- novel_downloader/core/exporters/linovelib/main_exporter.py +8 -12
- novel_downloader/core/exporters/linovelib/txt.py +17 -11
- novel_downloader/core/exporters/qidian.py +2 -8
- novel_downloader/core/exporters/registry.py +4 -2
- novel_downloader/core/exporters/txt_util.py +7 -7
- novel_downloader/core/fetchers/__init__.py +54 -48
- novel_downloader/core/fetchers/aaatxt.py +83 -0
- novel_downloader/core/fetchers/{biquge/session.py → b520.py} +6 -11
- novel_downloader/core/fetchers/{base/session.py → base.py} +37 -46
- novel_downloader/core/fetchers/{biquge/browser.py → biquyuedu.py} +12 -17
- novel_downloader/core/fetchers/dxmwx.py +110 -0
- novel_downloader/core/fetchers/eightnovel.py +139 -0
- novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +19 -12
- novel_downloader/core/fetchers/guidaye.py +85 -0
- novel_downloader/core/fetchers/hetushu.py +92 -0
- novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +19 -28
- novel_downloader/core/fetchers/ixdzs8.py +113 -0
- novel_downloader/core/fetchers/jpxs123.py +101 -0
- novel_downloader/core/fetchers/lewenn.py +83 -0
- novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +12 -13
- novel_downloader/core/fetchers/piaotia.py +105 -0
- novel_downloader/core/fetchers/qbtr.py +101 -0
- novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +5 -10
- novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +46 -39
- novel_downloader/core/fetchers/quanben5.py +92 -0
- novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
- novel_downloader/core/fetchers/registry.py +5 -16
- novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +7 -10
- novel_downloader/core/fetchers/shencou.py +106 -0
- novel_downloader/core/fetchers/shuhaige.py +84 -0
- novel_downloader/core/fetchers/tongrenquan.py +84 -0
- novel_downloader/core/fetchers/ttkan.py +95 -0
- novel_downloader/core/fetchers/wanbengo.py +83 -0
- novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
- novel_downloader/core/fetchers/xiguashuwu.py +177 -0
- novel_downloader/core/fetchers/xs63b.py +171 -0
- novel_downloader/core/fetchers/xshbook.py +85 -0
- novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +19 -12
- novel_downloader/core/fetchers/yibige.py +114 -0
- novel_downloader/core/interfaces/__init__.py +1 -9
- novel_downloader/core/interfaces/downloader.py +6 -2
- novel_downloader/core/interfaces/exporter.py +7 -7
- novel_downloader/core/interfaces/fetcher.py +4 -17
- novel_downloader/core/interfaces/parser.py +5 -6
- novel_downloader/core/interfaces/searcher.py +9 -1
- novel_downloader/core/parsers/__init__.py +49 -12
- novel_downloader/core/parsers/aaatxt.py +132 -0
- novel_downloader/core/parsers/b520.py +116 -0
- novel_downloader/core/parsers/base.py +63 -12
- novel_downloader/core/parsers/biquyuedu.py +133 -0
- novel_downloader/core/parsers/dxmwx.py +162 -0
- novel_downloader/core/parsers/eightnovel.py +224 -0
- novel_downloader/core/parsers/esjzone.py +61 -66
- novel_downloader/core/parsers/guidaye.py +128 -0
- novel_downloader/core/parsers/hetushu.py +139 -0
- novel_downloader/core/parsers/i25zw.py +137 -0
- novel_downloader/core/parsers/ixdzs8.py +186 -0
- novel_downloader/core/parsers/jpxs123.py +137 -0
- novel_downloader/core/parsers/lewenn.py +142 -0
- novel_downloader/core/parsers/linovelib.py +48 -64
- novel_downloader/core/parsers/piaotia.py +189 -0
- novel_downloader/core/parsers/qbtr.py +136 -0
- novel_downloader/core/parsers/qianbi.py +48 -50
- novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +272 -330
- novel_downloader/core/parsers/qidian/chapter_normal.py +24 -55
- novel_downloader/core/parsers/qidian/main_parser.py +11 -38
- novel_downloader/core/parsers/qidian/utils/__init__.py +1 -0
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +1 -1
- novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
- novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
- novel_downloader/core/parsers/quanben5.py +103 -0
- novel_downloader/core/parsers/registry.py +5 -16
- novel_downloader/core/parsers/sfacg.py +38 -45
- novel_downloader/core/parsers/shencou.py +215 -0
- novel_downloader/core/parsers/shuhaige.py +111 -0
- novel_downloader/core/parsers/tongrenquan.py +116 -0
- novel_downloader/core/parsers/ttkan.py +132 -0
- novel_downloader/core/parsers/wanbengo.py +191 -0
- novel_downloader/core/parsers/xiaoshuowu.py +173 -0
- novel_downloader/core/parsers/xiguashuwu.py +435 -0
- novel_downloader/core/parsers/xs63b.py +161 -0
- novel_downloader/core/parsers/xshbook.py +134 -0
- novel_downloader/core/parsers/yamibo.py +87 -131
- novel_downloader/core/parsers/yibige.py +166 -0
- novel_downloader/core/searchers/__init__.py +34 -3
- novel_downloader/core/searchers/aaatxt.py +107 -0
- novel_downloader/core/searchers/{biquge.py → b520.py} +29 -28
- novel_downloader/core/searchers/base.py +112 -36
- novel_downloader/core/searchers/dxmwx.py +105 -0
- novel_downloader/core/searchers/eightnovel.py +84 -0
- novel_downloader/core/searchers/esjzone.py +43 -25
- novel_downloader/core/searchers/hetushu.py +92 -0
- novel_downloader/core/searchers/i25zw.py +93 -0
- novel_downloader/core/searchers/ixdzs8.py +107 -0
- novel_downloader/core/searchers/jpxs123.py +107 -0
- novel_downloader/core/searchers/piaotia.py +100 -0
- novel_downloader/core/searchers/qbtr.py +106 -0
- novel_downloader/core/searchers/qianbi.py +74 -40
- novel_downloader/core/searchers/quanben5.py +144 -0
- novel_downloader/core/searchers/registry.py +24 -8
- novel_downloader/core/searchers/shuhaige.py +124 -0
- novel_downloader/core/searchers/tongrenquan.py +110 -0
- novel_downloader/core/searchers/ttkan.py +92 -0
- novel_downloader/core/searchers/xiaoshuowu.py +122 -0
- novel_downloader/core/searchers/xiguashuwu.py +95 -0
- novel_downloader/core/searchers/xs63b.py +104 -0
- novel_downloader/locales/en.json +31 -82
- novel_downloader/locales/zh.json +32 -83
- novel_downloader/models/__init__.py +21 -22
- novel_downloader/models/book.py +44 -0
- novel_downloader/models/config.py +4 -37
- novel_downloader/models/login.py +1 -1
- novel_downloader/models/search.py +5 -0
- novel_downloader/resources/config/settings.toml +8 -70
- novel_downloader/resources/json/xiguashuwu.json +718 -0
- novel_downloader/utils/__init__.py +13 -22
- novel_downloader/utils/chapter_storage.py +3 -2
- novel_downloader/utils/constants.py +4 -29
- novel_downloader/utils/cookies.py +6 -18
- novel_downloader/utils/crypto_utils/__init__.py +13 -0
- novel_downloader/utils/crypto_utils/aes_util.py +90 -0
- novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
- novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
- novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
- novel_downloader/utils/epub/__init__.py +1 -1
- novel_downloader/utils/epub/constants.py +57 -16
- novel_downloader/utils/epub/documents.py +88 -194
- novel_downloader/utils/epub/models.py +0 -14
- novel_downloader/utils/epub/utils.py +63 -96
- novel_downloader/utils/file_utils/__init__.py +2 -23
- novel_downloader/utils/file_utils/io.py +3 -113
- novel_downloader/utils/file_utils/sanitize.py +0 -4
- novel_downloader/utils/fontocr.py +207 -0
- novel_downloader/utils/logger.py +8 -16
- novel_downloader/utils/network.py +2 -2
- novel_downloader/utils/state.py +4 -90
- novel_downloader/utils/text_utils/__init__.py +1 -7
- novel_downloader/utils/text_utils/diff_display.py +5 -7
- novel_downloader/utils/time_utils/__init__.py +5 -11
- novel_downloader/utils/time_utils/datetime_utils.py +20 -29
- novel_downloader/utils/time_utils/sleep_utils.py +4 -8
- novel_downloader/web/__init__.py +13 -0
- novel_downloader/web/components/__init__.py +11 -0
- novel_downloader/web/components/navigation.py +35 -0
- novel_downloader/web/main.py +66 -0
- novel_downloader/web/pages/__init__.py +17 -0
- novel_downloader/web/pages/download.py +78 -0
- novel_downloader/web/pages/progress.py +147 -0
- novel_downloader/web/pages/search.py +329 -0
- novel_downloader/web/services/__init__.py +17 -0
- novel_downloader/web/services/client_dialog.py +164 -0
- novel_downloader/web/services/cred_broker.py +113 -0
- novel_downloader/web/services/cred_models.py +35 -0
- novel_downloader/web/services/task_manager.py +264 -0
- novel_downloader-2.0.0.dist-info/METADATA +171 -0
- novel_downloader-2.0.0.dist-info/RECORD +210 -0
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
- novel_downloader/core/downloaders/biquge.py +0 -29
- novel_downloader/core/downloaders/esjzone.py +0 -29
- novel_downloader/core/downloaders/linovelib.py +0 -29
- novel_downloader/core/downloaders/sfacg.py +0 -29
- novel_downloader/core/downloaders/yamibo.py +0 -29
- novel_downloader/core/exporters/biquge.py +0 -22
- novel_downloader/core/exporters/esjzone.py +0 -22
- novel_downloader/core/exporters/qianbi.py +0 -22
- novel_downloader/core/exporters/sfacg.py +0 -22
- novel_downloader/core/exporters/yamibo.py +0 -22
- novel_downloader/core/fetchers/base/__init__.py +0 -14
- novel_downloader/core/fetchers/base/browser.py +0 -422
- novel_downloader/core/fetchers/biquge/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/browser.py +0 -209
- novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
- novel_downloader/core/fetchers/linovelib/browser.py +0 -198
- novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/browser.py +0 -326
- novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
- novel_downloader/core/fetchers/sfacg/browser.py +0 -194
- novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
- novel_downloader/core/fetchers/yamibo/browser.py +0 -234
- novel_downloader/core/parsers/biquge.py +0 -139
- novel_downloader/models/chapter.py +0 -25
- novel_downloader/models/types.py +0 -13
- novel_downloader/tui/__init__.py +0 -7
- novel_downloader/tui/app.py +0 -32
- novel_downloader/tui/main.py +0 -17
- novel_downloader/tui/screens/__init__.py +0 -14
- novel_downloader/tui/screens/home.py +0 -198
- novel_downloader/tui/screens/login.py +0 -74
- novel_downloader/tui/styles/home_layout.tcss +0 -79
- novel_downloader/tui/widgets/richlog_handler.py +0 -24
- novel_downloader/utils/cache.py +0 -24
- novel_downloader/utils/fontocr/__init__.py +0 -22
- novel_downloader/utils/fontocr/hash_store.py +0 -280
- novel_downloader/utils/fontocr/hash_utils.py +0 -103
- novel_downloader/utils/fontocr/model_loader.py +0 -69
- novel_downloader/utils/fontocr/ocr_v1.py +0 -315
- novel_downloader/utils/fontocr/ocr_v2.py +0 -764
- novel_downloader/utils/fontocr/ocr_v3.py +0 -744
- novel_downloader-1.5.0.dist-info/METADATA +0 -196
- novel_downloader-1.5.0.dist-info/RECORD +0 -164
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,134 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.parsers.xshbook
|
4
|
+
-------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import Any
|
9
|
+
|
10
|
+
from lxml import html
|
11
|
+
|
12
|
+
from novel_downloader.core.parsers.base import BaseParser
|
13
|
+
from novel_downloader.core.parsers.registry import register_parser
|
14
|
+
from novel_downloader.models import (
|
15
|
+
BookInfoDict,
|
16
|
+
ChapterDict,
|
17
|
+
ChapterInfoDict,
|
18
|
+
VolumeInfoDict,
|
19
|
+
)
|
20
|
+
|
21
|
+
|
22
|
+
@register_parser(
|
23
|
+
site_keys=["xshbook"],
|
24
|
+
)
|
25
|
+
class XshbookParser(BaseParser):
|
26
|
+
"""Parser for 小说虎 book pages."""
|
27
|
+
|
28
|
+
BASE = "http://www.xshbook.com"
|
29
|
+
|
30
|
+
def parse_book_info(
|
31
|
+
self,
|
32
|
+
html_list: list[str],
|
33
|
+
**kwargs: Any,
|
34
|
+
) -> BookInfoDict | None:
|
35
|
+
if not html_list:
|
36
|
+
return None
|
37
|
+
|
38
|
+
tree = html.fromstring(html_list[0])
|
39
|
+
|
40
|
+
book_name = self._first_str(tree.xpath("//div[@id='info']/h1/text()"))
|
41
|
+
|
42
|
+
author = self._first_str(
|
43
|
+
tree.xpath("//div[@id='info']/p[1]/text()"),
|
44
|
+
replaces=[("\xa0", ""), ("作者:", "")],
|
45
|
+
)
|
46
|
+
|
47
|
+
update_time = self._first_str(
|
48
|
+
tree.xpath("//meta[@property='og:novel:update_time']/@content")
|
49
|
+
)
|
50
|
+
|
51
|
+
summary = "\n".join(
|
52
|
+
self._first_str(p.xpath("string()").splitlines())
|
53
|
+
for p in tree.xpath("//div[@id='intro']//p")
|
54
|
+
).strip()
|
55
|
+
summary = summary.split("本站提示", 1)[0].strip()
|
56
|
+
|
57
|
+
cover_url = self._first_str(tree.xpath("//div[@id='fmimg']//img/@src"))
|
58
|
+
|
59
|
+
book_type = self._first_str(tree.xpath("//div[@class='con_top']/a[2]/text()"))
|
60
|
+
tags: list[str] = [book_type] if book_type else []
|
61
|
+
|
62
|
+
chapters: list[ChapterInfoDict] = []
|
63
|
+
for a in tree.xpath("//div[@id='list']//dd/a"):
|
64
|
+
href = a.get("href", "")
|
65
|
+
title = self._norm_space(a.text_content())
|
66
|
+
# /95071/95071941/389027455.html -> "389027455"
|
67
|
+
chapter_id = href.rsplit("/", 1)[-1].split(".", 1)[0]
|
68
|
+
chapters.append({"title": title, "url": href, "chapterId": chapter_id})
|
69
|
+
|
70
|
+
volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
|
71
|
+
|
72
|
+
return {
|
73
|
+
"book_name": book_name,
|
74
|
+
"author": author,
|
75
|
+
"cover_url": cover_url,
|
76
|
+
"update_time": update_time,
|
77
|
+
"summary": summary,
|
78
|
+
"tags": tags,
|
79
|
+
"volumes": volumes,
|
80
|
+
"extra": {},
|
81
|
+
}
|
82
|
+
|
83
|
+
def parse_chapter(
|
84
|
+
self,
|
85
|
+
html_list: list[str],
|
86
|
+
chapter_id: str,
|
87
|
+
**kwargs: Any,
|
88
|
+
) -> ChapterDict | None:
|
89
|
+
if not html_list:
|
90
|
+
return None
|
91
|
+
tree = html.fromstring(html_list[0])
|
92
|
+
|
93
|
+
title = self._first_str(tree.xpath("//div[@class='bookname']/h1/text()"))
|
94
|
+
if not title:
|
95
|
+
title = self._first_str(
|
96
|
+
tree.xpath("//div[@class='con_top']/text()[last()]")
|
97
|
+
)
|
98
|
+
|
99
|
+
cont_nodes = tree.xpath("//div[@id='content']")
|
100
|
+
if not cont_nodes:
|
101
|
+
return None
|
102
|
+
cont = cont_nodes[0]
|
103
|
+
|
104
|
+
# remove scripts under content
|
105
|
+
for s in cont.xpath(".//script"):
|
106
|
+
s.getparent().remove(s)
|
107
|
+
|
108
|
+
paragraphs: list[str] = []
|
109
|
+
for p in cont.xpath(".//p"):
|
110
|
+
text = html.tostring(p, method="text", encoding="unicode")
|
111
|
+
text = text.replace("\xa0", " ")
|
112
|
+
# filter boilerplate lines
|
113
|
+
bad = (
|
114
|
+
"谨记我们的网址" in text
|
115
|
+
or "温馨提示" in text
|
116
|
+
or "提示" in text
|
117
|
+
and "本文" not in text
|
118
|
+
and len(text) < 60
|
119
|
+
or "分享" in text
|
120
|
+
and len(text) < 40
|
121
|
+
)
|
122
|
+
if not bad:
|
123
|
+
paragraphs.append(text)
|
124
|
+
|
125
|
+
content = "\n".join(self._norm_space(p) for p in paragraphs if p.strip())
|
126
|
+
if not content.strip():
|
127
|
+
return None
|
128
|
+
|
129
|
+
return {
|
130
|
+
"id": chapter_id,
|
131
|
+
"title": title,
|
132
|
+
"content": content,
|
133
|
+
"extra": {"site": "xshbook"},
|
134
|
+
}
|
@@ -11,148 +11,104 @@ from lxml import html
|
|
11
11
|
|
12
12
|
from novel_downloader.core.parsers.base import BaseParser
|
13
13
|
from novel_downloader.core.parsers.registry import register_parser
|
14
|
-
from novel_downloader.models import
|
14
|
+
from novel_downloader.models import (
|
15
|
+
BookInfoDict,
|
16
|
+
ChapterDict,
|
17
|
+
ChapterInfoDict,
|
18
|
+
VolumeInfoDict,
|
19
|
+
)
|
15
20
|
|
16
21
|
|
17
22
|
@register_parser(
|
18
23
|
site_keys=["yamibo"],
|
19
|
-
backends=["session", "browser"],
|
20
24
|
)
|
21
25
|
class YamiboParser(BaseParser):
|
22
|
-
"""
|
26
|
+
"""
|
27
|
+
Parser for 百合会 book pages.
|
28
|
+
"""
|
23
29
|
|
24
30
|
BASE_URL = "https://www.yamibo.com"
|
25
|
-
# Book info XPaths
|
26
|
-
_BOOK_NAME_XPATH = 'string(//h3[contains(@class, "col-md-12")])'
|
27
|
-
_AUTHOR_XPATH = 'string(//h5[contains(@class, "text-warning")])'
|
28
|
-
_COVER_URL_XPATH = '//img[contains(@class, "img-responsive")]/@src'
|
29
|
-
_UPDATE_TIME_XPATH = '//p[contains(text(), "更新时间:")]'
|
30
|
-
_SERIAL_STATUS_XPATH = '//p[contains(text(), "作品状态:")]'
|
31
|
-
_TYPE_XPATH = '//p[contains(text(), "作品分类:")]'
|
32
|
-
_SUMMARY_XPATH = 'string(//div[@id="w0-collapse1"]/div)'
|
33
|
-
|
34
|
-
_VOLUME_NODE_XPATH = (
|
35
|
-
'//div[contains(@class, "panel-info") and contains(@class, "panel-default")]'
|
36
|
-
)
|
37
|
-
_VOLUME_TITLE_XPATH = './/div[contains(@class, "panel-heading")]//a/text()'
|
38
|
-
_CHAPTER_NODE_XPATH = (
|
39
|
-
'.//div[contains(@class, "panel-body")]//a[contains(@href, "view-chapter")]'
|
40
|
-
)
|
41
|
-
_CHAPTER_FLAT_XPATH = (
|
42
|
-
'//div[@class="panel-body"]//a[contains(@href, "view-chapter")]'
|
43
|
-
)
|
44
|
-
|
45
|
-
# Chapter field XPaths
|
46
|
-
_CHAPTER_TITLE_XPATH = "string(//section[contains(@class, 'col-md-9')]//h3)"
|
47
|
-
_CHAPTER_TIME_XPATH = (
|
48
|
-
"//div[contains(@class, 'row')]//div[contains(text(), '更新时间')]"
|
49
|
-
)
|
50
|
-
_CHAPTER_WORD_COUNT_XPATH = (
|
51
|
-
"//div[contains(@class, 'row')]//div[contains(text(), '章节字数')]"
|
52
|
-
)
|
53
|
-
_CHAPTER_CONTENT_XPATH = "//div[@id='w0-collapse1']//p//text()"
|
54
31
|
|
55
32
|
def parse_book_info(
|
56
33
|
self,
|
57
34
|
html_list: list[str],
|
58
35
|
**kwargs: Any,
|
59
|
-
) ->
|
60
|
-
"""
|
61
|
-
Parse a book info page and extract metadata and chapter structure.
|
62
|
-
|
63
|
-
:param html_list: Raw HTML of the book info page.
|
64
|
-
:return: Parsed metadata and chapter structure as a dictionary.
|
65
|
-
"""
|
36
|
+
) -> BookInfoDict | None:
|
66
37
|
if not html_list:
|
67
|
-
return
|
38
|
+
return None
|
68
39
|
|
69
40
|
tree = html.fromstring(html_list[0])
|
70
|
-
result: dict[str, Any] = {}
|
71
41
|
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
cover = tree.xpath(self._COVER_URL_XPATH)
|
76
|
-
result["cover_url"] = f"{self.BASE_URL}{cover[0]}" if cover else ""
|
77
|
-
|
78
|
-
update_node = tree.xpath(self._UPDATE_TIME_XPATH)
|
79
|
-
result["update_time"] = (
|
80
|
-
update_node[0].xpath("string()").replace("更新时间:", "").strip()
|
81
|
-
if update_node
|
82
|
-
else ""
|
42
|
+
book_name = self._first_str(
|
43
|
+
tree.xpath('//h3[contains(@class,"col-md-12")]/text()')
|
83
44
|
)
|
84
|
-
|
85
|
-
|
86
|
-
result["serial_status"] = (
|
87
|
-
serial_node[0].xpath("string()").replace("作品状态:", "").strip()
|
88
|
-
if serial_node
|
89
|
-
else ""
|
45
|
+
author = self._first_str(
|
46
|
+
tree.xpath('//h5[contains(@class,"text-warning")]/text()')
|
90
47
|
)
|
91
|
-
|
92
|
-
|
93
|
-
result["type"] = (
|
94
|
-
type_node[0].xpath("string()").replace("作品分类:", "").strip()
|
95
|
-
if type_node
|
96
|
-
else ""
|
48
|
+
cover_url = self.BASE_URL + self._first_str(
|
49
|
+
tree.xpath('//img[contains(@class,"img-responsive")]/@src')
|
97
50
|
)
|
98
51
|
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
52
|
+
update_time = self._first_str(
|
53
|
+
tree.xpath('//p[contains(text(),"更新时间:")]/text()'),
|
54
|
+
replaces=[("更新时间:", "")],
|
55
|
+
)
|
56
|
+
serial_status = self._first_str(
|
57
|
+
tree.xpath('//p[contains(text(),"作品状态:")]/text()'),
|
58
|
+
replaces=[("作品状态:", "")],
|
59
|
+
)
|
60
|
+
book_type = self._first_str(
|
61
|
+
tree.xpath('//p[contains(text(),"作品分类:")]/text()'),
|
62
|
+
replaces=[("作品分类:", "")],
|
63
|
+
)
|
64
|
+
summary = self._first_str([tree.xpath('string(//div[@id="w0-collapse1"]/div)')])
|
65
|
+
|
66
|
+
# volumes & chapters
|
67
|
+
volumes: list[VolumeInfoDict] = []
|
68
|
+
for volume_node in tree.xpath(
|
69
|
+
'//div[contains(@class,"panel-info") and contains(@class,"panel-default")]'
|
70
|
+
):
|
71
|
+
volume_name = (
|
72
|
+
self._first_str(
|
73
|
+
volume_node.xpath(
|
74
|
+
'.//div[contains(@class,"panel-heading")]//a/text()'
|
121
75
|
)
|
122
|
-
|
123
|
-
volumes.append(
|
124
|
-
{
|
125
|
-
"volume_name": volume_name,
|
126
|
-
"chapters": chapters,
|
127
|
-
}
|
128
76
|
)
|
77
|
+
or "未命名卷"
|
78
|
+
)
|
79
|
+
chapters: list[ChapterInfoDict] = []
|
80
|
+
for chap in volume_node.xpath(
|
81
|
+
'.//div[contains(@class,"panel-body")]//a[contains(@href,"view-chapter")]'
|
82
|
+
):
|
83
|
+
title = self._first_str([chap.xpath("string()")])
|
84
|
+
url = chap.get("href", "")
|
85
|
+
chapter_id = url.split("id=")[-1]
|
86
|
+
chapters.append({"title": title, "url": url, "chapterId": chapter_id})
|
87
|
+
volumes.append({"volume_name": volume_name, "chapters": chapters})
|
129
88
|
|
130
|
-
|
131
|
-
|
132
|
-
chapter_nodes = tree.xpath(self._CHAPTER_FLAT_XPATH)
|
89
|
+
# fallback: flat chapter list
|
90
|
+
if not volumes:
|
133
91
|
chapters = []
|
134
|
-
for chap in
|
135
|
-
|
92
|
+
for chap in tree.xpath(
|
93
|
+
'//div[@class="panel-body"]//a[contains(@href,"view-chapter")]'
|
94
|
+
):
|
95
|
+
title = self._first_str([chap.xpath("string()")])
|
136
96
|
url = chap.get("href", "")
|
137
97
|
chapter_id = url.split("id=")[-1] if "id=" in url else ""
|
138
|
-
chapters.append(
|
139
|
-
|
140
|
-
"title": title,
|
141
|
-
"url": url,
|
142
|
-
"chapterId": chapter_id,
|
143
|
-
}
|
144
|
-
)
|
145
|
-
|
146
|
-
volumes = [
|
147
|
-
{
|
148
|
-
"volume_name": "单卷",
|
149
|
-
"chapters": chapters,
|
150
|
-
}
|
151
|
-
]
|
98
|
+
chapters.append({"title": title, "url": url, "chapterId": chapter_id})
|
99
|
+
volumes = [{"volume_name": "单卷", "chapters": chapters}]
|
152
100
|
|
153
|
-
|
154
|
-
|
155
|
-
|
101
|
+
return {
|
102
|
+
"book_name": book_name,
|
103
|
+
"author": author,
|
104
|
+
"cover_url": cover_url,
|
105
|
+
"update_time": update_time,
|
106
|
+
"serial_status": serial_status,
|
107
|
+
"tags": [book_type],
|
108
|
+
"summary": summary,
|
109
|
+
"volumes": volumes,
|
110
|
+
"extra": {},
|
111
|
+
}
|
156
112
|
|
157
113
|
def parse_chapter(
|
158
114
|
self,
|
@@ -160,32 +116,32 @@ class YamiboParser(BaseParser):
|
|
160
116
|
chapter_id: str,
|
161
117
|
**kwargs: Any,
|
162
118
|
) -> ChapterDict | None:
|
163
|
-
"""
|
164
|
-
Parse a single chapter page and extract clean text or simplified HTML.
|
165
|
-
|
166
|
-
:param html_list: Raw HTML of the chapter page.
|
167
|
-
:param chapter_id: Identifier of the chapter being parsed.
|
168
|
-
:return: Cleaned chapter content as plain text or minimal HTML.
|
169
|
-
"""
|
170
119
|
if not html_list:
|
171
120
|
return None
|
172
121
|
tree = html.fromstring(html_list[0])
|
173
122
|
|
174
|
-
content_lines = tree.xpath(
|
175
|
-
content = "\n
|
123
|
+
content_lines = tree.xpath("//div[@id='w0-collapse1']//p//text()")
|
124
|
+
content = "\n".join(line.strip() for line in content_lines if line.strip())
|
176
125
|
if not content:
|
177
126
|
return None
|
178
127
|
|
179
|
-
title =
|
180
|
-
|
181
|
-
update_node = tree.xpath(self._CHAPTER_TIME_XPATH)
|
182
|
-
updated_at = (
|
183
|
-
update_node[0].text.strip().replace("更新时间:", "") if update_node else ""
|
128
|
+
title = self._first_str(
|
129
|
+
[tree.xpath("string(//section[contains(@class,'col-md-9')]//h3)")]
|
184
130
|
)
|
185
131
|
|
186
|
-
|
187
|
-
|
188
|
-
|
132
|
+
updated_at = self._first_str(
|
133
|
+
tree.xpath(
|
134
|
+
"//div[contains(@class,'row')]//div[contains(text(),'更新时间')]/text()"
|
135
|
+
),
|
136
|
+
replaces=[("更新时间:", "")],
|
137
|
+
)
|
138
|
+
word_str = self._first_str(
|
139
|
+
tree.xpath(
|
140
|
+
"//div[contains(@class,'row')]//div[contains(text(),'章节字数')]/text()"
|
141
|
+
),
|
142
|
+
replaces=[("章节字数:", "")],
|
143
|
+
)
|
144
|
+
word_count = int(word_str) if word_str.isdigit() else 0
|
189
145
|
|
190
146
|
return {
|
191
147
|
"id": chapter_id,
|
@@ -0,0 +1,166 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.parsers.yibige
|
4
|
+
------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import Any
|
9
|
+
|
10
|
+
from lxml import html
|
11
|
+
|
12
|
+
from novel_downloader.core.parsers.base import BaseParser
|
13
|
+
from novel_downloader.core.parsers.registry import register_parser
|
14
|
+
from novel_downloader.models import (
|
15
|
+
BookInfoDict,
|
16
|
+
ChapterDict,
|
17
|
+
ChapterInfoDict,
|
18
|
+
VolumeInfoDict,
|
19
|
+
)
|
20
|
+
|
21
|
+
|
22
|
+
@register_parser(
|
23
|
+
site_keys=["yibige"],
|
24
|
+
)
|
25
|
+
class YibigeParser(BaseParser):
|
26
|
+
"""
|
27
|
+
Parser for 一笔阁 book pages.
|
28
|
+
"""
|
29
|
+
|
30
|
+
ADS = {
|
31
|
+
"首发无广告",
|
32
|
+
"请分享",
|
33
|
+
"读之阁",
|
34
|
+
"小说网",
|
35
|
+
"首发地址",
|
36
|
+
"手机阅读",
|
37
|
+
"一笔阁",
|
38
|
+
"site_con_ad(",
|
39
|
+
"chapter_content(",
|
40
|
+
}
|
41
|
+
|
42
|
+
def parse_book_info(
|
43
|
+
self,
|
44
|
+
html_list: list[str],
|
45
|
+
**kwargs: Any,
|
46
|
+
) -> BookInfoDict | None:
|
47
|
+
if len(html_list) < 2:
|
48
|
+
return None
|
49
|
+
|
50
|
+
# Parse trees
|
51
|
+
info_tree = html.fromstring(html_list[0])
|
52
|
+
catalog_tree = html.fromstring(html_list[1])
|
53
|
+
|
54
|
+
# --- From <meta> data ---
|
55
|
+
book_name = self._meta(info_tree, "og:novel:book_name") or self._first_str(
|
56
|
+
info_tree.xpath("//div[@id='info']/h1/text()")
|
57
|
+
)
|
58
|
+
|
59
|
+
author = self._meta(info_tree, "og:novel:author") or self._first_str(
|
60
|
+
info_tree.xpath("//div[@id='info']/p[a]/a/text()")
|
61
|
+
)
|
62
|
+
|
63
|
+
cover_url = self._meta(info_tree, "og:image") or self._first_str(
|
64
|
+
info_tree.xpath("//div[@id='fmimg']//img/@src")
|
65
|
+
)
|
66
|
+
|
67
|
+
update_time = self._meta(info_tree, "og:novel:update_time").replace("T", " ")
|
68
|
+
serial_status = self._meta(info_tree, "og:novel:status") or "连载中"
|
69
|
+
|
70
|
+
word_count = self._first_str(
|
71
|
+
info_tree.xpath("//div[@id='info']/p[contains(., '字数:')]/text()[1]"),
|
72
|
+
replaces=[("字数:", "")],
|
73
|
+
)
|
74
|
+
|
75
|
+
# Summary: first paragraph under #intro
|
76
|
+
summary = self._first_str(info_tree.xpath("//div[@id='intro']//p[1]/text()"))
|
77
|
+
|
78
|
+
# Category and tags
|
79
|
+
book_type = self._meta(info_tree, "og:novel:category")
|
80
|
+
tags_set = set(self._meta_all(info_tree, "book:tag"))
|
81
|
+
if book_type:
|
82
|
+
tags_set.add(book_type)
|
83
|
+
tags = list(tags_set)
|
84
|
+
|
85
|
+
# --- Chapters from the catalog page ---
|
86
|
+
chapters: list[ChapterInfoDict] = []
|
87
|
+
for a in catalog_tree.xpath("//div[@id='list']/dl/dd/a"):
|
88
|
+
href = (a.get("href") or "").strip()
|
89
|
+
if not href:
|
90
|
+
continue
|
91
|
+
title = (a.text_content() or "").strip()
|
92
|
+
if not title:
|
93
|
+
continue
|
94
|
+
# /6238/2496.html -> 2496
|
95
|
+
chap_id = href.split("/")[-1].split(".")[0]
|
96
|
+
chapters.append({"title": title, "url": href, "chapterId": chap_id})
|
97
|
+
|
98
|
+
volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
|
99
|
+
|
100
|
+
return {
|
101
|
+
"book_name": book_name,
|
102
|
+
"author": author,
|
103
|
+
"cover_url": cover_url,
|
104
|
+
"update_time": update_time,
|
105
|
+
"serial_status": serial_status,
|
106
|
+
"word_count": word_count,
|
107
|
+
"summary": summary,
|
108
|
+
"tags": tags,
|
109
|
+
"volumes": volumes,
|
110
|
+
"extra": {},
|
111
|
+
}
|
112
|
+
|
113
|
+
def parse_chapter(
|
114
|
+
self,
|
115
|
+
html_list: list[str],
|
116
|
+
chapter_id: str,
|
117
|
+
**kwargs: Any,
|
118
|
+
) -> ChapterDict | None:
|
119
|
+
if not html_list:
|
120
|
+
return None
|
121
|
+
tree = html.fromstring(html_list[0])
|
122
|
+
|
123
|
+
title = self._first_str(tree.xpath("//div[@class='bookname']/h1/text()"))
|
124
|
+
|
125
|
+
paragraphs: list[str] = []
|
126
|
+
for p in tree.xpath("//div[@id='content']//p"):
|
127
|
+
txt = self._norm_space(p.text_content())
|
128
|
+
if not txt or self._is_ad(txt):
|
129
|
+
continue
|
130
|
+
paragraphs.append(txt)
|
131
|
+
|
132
|
+
content = "\n".join(paragraphs).strip()
|
133
|
+
if not content:
|
134
|
+
return None
|
135
|
+
|
136
|
+
return {
|
137
|
+
"id": chapter_id,
|
138
|
+
"title": title,
|
139
|
+
"content": content,
|
140
|
+
"extra": {"site": "yibige"},
|
141
|
+
}
|
142
|
+
|
143
|
+
def _is_ad(self, s: str) -> bool:
|
144
|
+
"""
|
145
|
+
Filter for footer junk inside #content.
|
146
|
+
"""
|
147
|
+
if self._is_ad_line(s):
|
148
|
+
return True
|
149
|
+
|
150
|
+
ss = s.replace(" ", "")
|
151
|
+
# return any(b in s or b in ss for b in self.ADS)
|
152
|
+
return self._is_ad_line(ss)
|
153
|
+
|
154
|
+
@classmethod
|
155
|
+
def _meta(cls, tree: html.HtmlElement, prop: str) -> str:
|
156
|
+
"""
|
157
|
+
Get a single meta property content
|
158
|
+
"""
|
159
|
+
return cls._first_str(tree.xpath(f"//meta[@property='{prop}']/@content"))
|
160
|
+
|
161
|
+
@staticmethod
|
162
|
+
def _meta_all(tree: html.HtmlElement, prop: str) -> list[str]:
|
163
|
+
"""
|
164
|
+
Get all meta property content values
|
165
|
+
"""
|
166
|
+
return tree.xpath(f"//meta[@property='{prop}']/@content") or []
|
@@ -3,18 +3,49 @@
|
|
3
3
|
novel_downloader.core.searchers
|
4
4
|
-------------------------------
|
5
5
|
|
6
|
+
Site-specific searcher implementations for discovering novels across multiple sources
|
6
7
|
"""
|
7
8
|
|
8
9
|
__all__ = [
|
9
10
|
"search",
|
11
|
+
"AaatxtSearcher",
|
10
12
|
"BiqugeSearcher",
|
13
|
+
"DxmwxSearcher",
|
14
|
+
"EightnovelSearcher",
|
11
15
|
"EsjzoneSearcher",
|
16
|
+
"HetushuSearcher",
|
17
|
+
"I25zwSearcher",
|
18
|
+
"Ixdzs8Searcher",
|
19
|
+
"Jpxs123Searcher",
|
20
|
+
"PiaotiaSearcher",
|
21
|
+
"QbtrSearcher",
|
12
22
|
"QianbiSearcher",
|
13
|
-
"
|
23
|
+
"Quanben5Searcher",
|
24
|
+
"ShuhaigeSearcher",
|
25
|
+
"TongrenquanSearcher",
|
26
|
+
"TtkanSearcher",
|
27
|
+
"XiaoshuowuSearcher",
|
28
|
+
"XiguashuwuSearcher",
|
29
|
+
"Xs63bSearcher",
|
14
30
|
]
|
15
31
|
|
16
|
-
from .
|
32
|
+
from .aaatxt import AaatxtSearcher
|
33
|
+
from .b520 import BiqugeSearcher
|
34
|
+
from .dxmwx import DxmwxSearcher
|
35
|
+
from .eightnovel import EightnovelSearcher
|
17
36
|
from .esjzone import EsjzoneSearcher
|
37
|
+
from .hetushu import HetushuSearcher
|
38
|
+
from .i25zw import I25zwSearcher
|
39
|
+
from .ixdzs8 import Ixdzs8Searcher
|
40
|
+
from .jpxs123 import Jpxs123Searcher
|
41
|
+
from .piaotia import PiaotiaSearcher
|
42
|
+
from .qbtr import QbtrSearcher
|
18
43
|
from .qianbi import QianbiSearcher
|
19
|
-
from .
|
44
|
+
from .quanben5 import Quanben5Searcher
|
20
45
|
from .registry import search
|
46
|
+
from .shuhaige import ShuhaigeSearcher
|
47
|
+
from .tongrenquan import TongrenquanSearcher
|
48
|
+
from .ttkan import TtkanSearcher
|
49
|
+
from .xiaoshuowu import XiaoshuowuSearcher
|
50
|
+
from .xiguashuwu import XiguashuwuSearcher
|
51
|
+
from .xs63b import Xs63bSearcher
|