novel-downloader 1.5.0__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/__init__.py +1 -3
- novel_downloader/cli/clean.py +21 -88
- novel_downloader/cli/config.py +26 -21
- novel_downloader/cli/download.py +77 -64
- novel_downloader/cli/export.py +16 -20
- novel_downloader/cli/main.py +1 -1
- novel_downloader/cli/search.py +62 -65
- novel_downloader/cli/ui.py +156 -0
- novel_downloader/config/__init__.py +8 -5
- novel_downloader/config/adapter.py +65 -105
- novel_downloader/config/{loader.py → file_io.py} +53 -26
- novel_downloader/core/__init__.py +1 -0
- novel_downloader/core/archived/deqixs/fetcher.py +115 -0
- novel_downloader/core/archived/deqixs/parser.py +132 -0
- novel_downloader/core/archived/deqixs/searcher.py +89 -0
- novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} +12 -20
- novel_downloader/core/archived/wanbengo/searcher.py +98 -0
- novel_downloader/core/archived/xshbook/searcher.py +93 -0
- novel_downloader/core/downloaders/__init__.py +3 -24
- novel_downloader/core/downloaders/base.py +49 -23
- novel_downloader/core/downloaders/common.py +191 -137
- novel_downloader/core/downloaders/qianbi.py +187 -146
- novel_downloader/core/downloaders/qidian.py +187 -141
- novel_downloader/core/downloaders/registry.py +4 -2
- novel_downloader/core/downloaders/signals.py +46 -0
- novel_downloader/core/exporters/__init__.py +3 -20
- novel_downloader/core/exporters/base.py +33 -37
- novel_downloader/core/exporters/common/__init__.py +1 -2
- novel_downloader/core/exporters/common/epub.py +15 -10
- novel_downloader/core/exporters/common/main_exporter.py +19 -12
- novel_downloader/core/exporters/common/txt.py +14 -9
- novel_downloader/core/exporters/epub_util.py +59 -29
- novel_downloader/core/exporters/linovelib/__init__.py +1 -0
- novel_downloader/core/exporters/linovelib/epub.py +23 -25
- novel_downloader/core/exporters/linovelib/main_exporter.py +8 -12
- novel_downloader/core/exporters/linovelib/txt.py +17 -11
- novel_downloader/core/exporters/qidian.py +2 -8
- novel_downloader/core/exporters/registry.py +4 -2
- novel_downloader/core/exporters/txt_util.py +7 -7
- novel_downloader/core/fetchers/__init__.py +54 -48
- novel_downloader/core/fetchers/aaatxt.py +83 -0
- novel_downloader/core/fetchers/{biquge/session.py → b520.py} +6 -11
- novel_downloader/core/fetchers/{base/session.py → base.py} +37 -46
- novel_downloader/core/fetchers/{biquge/browser.py → biquyuedu.py} +12 -17
- novel_downloader/core/fetchers/dxmwx.py +110 -0
- novel_downloader/core/fetchers/eightnovel.py +139 -0
- novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +19 -12
- novel_downloader/core/fetchers/guidaye.py +85 -0
- novel_downloader/core/fetchers/hetushu.py +92 -0
- novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +19 -28
- novel_downloader/core/fetchers/ixdzs8.py +113 -0
- novel_downloader/core/fetchers/jpxs123.py +101 -0
- novel_downloader/core/fetchers/lewenn.py +83 -0
- novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +12 -13
- novel_downloader/core/fetchers/piaotia.py +105 -0
- novel_downloader/core/fetchers/qbtr.py +101 -0
- novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +5 -10
- novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +46 -39
- novel_downloader/core/fetchers/quanben5.py +92 -0
- novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
- novel_downloader/core/fetchers/registry.py +5 -16
- novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +7 -10
- novel_downloader/core/fetchers/shencou.py +106 -0
- novel_downloader/core/fetchers/shuhaige.py +84 -0
- novel_downloader/core/fetchers/tongrenquan.py +84 -0
- novel_downloader/core/fetchers/ttkan.py +95 -0
- novel_downloader/core/fetchers/wanbengo.py +83 -0
- novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
- novel_downloader/core/fetchers/xiguashuwu.py +177 -0
- novel_downloader/core/fetchers/xs63b.py +171 -0
- novel_downloader/core/fetchers/xshbook.py +85 -0
- novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +19 -12
- novel_downloader/core/fetchers/yibige.py +114 -0
- novel_downloader/core/interfaces/__init__.py +1 -9
- novel_downloader/core/interfaces/downloader.py +6 -2
- novel_downloader/core/interfaces/exporter.py +7 -7
- novel_downloader/core/interfaces/fetcher.py +4 -17
- novel_downloader/core/interfaces/parser.py +5 -6
- novel_downloader/core/interfaces/searcher.py +9 -1
- novel_downloader/core/parsers/__init__.py +49 -12
- novel_downloader/core/parsers/aaatxt.py +132 -0
- novel_downloader/core/parsers/b520.py +116 -0
- novel_downloader/core/parsers/base.py +63 -12
- novel_downloader/core/parsers/biquyuedu.py +133 -0
- novel_downloader/core/parsers/dxmwx.py +162 -0
- novel_downloader/core/parsers/eightnovel.py +224 -0
- novel_downloader/core/parsers/esjzone.py +61 -66
- novel_downloader/core/parsers/guidaye.py +128 -0
- novel_downloader/core/parsers/hetushu.py +139 -0
- novel_downloader/core/parsers/i25zw.py +137 -0
- novel_downloader/core/parsers/ixdzs8.py +186 -0
- novel_downloader/core/parsers/jpxs123.py +137 -0
- novel_downloader/core/parsers/lewenn.py +142 -0
- novel_downloader/core/parsers/linovelib.py +48 -64
- novel_downloader/core/parsers/piaotia.py +189 -0
- novel_downloader/core/parsers/qbtr.py +136 -0
- novel_downloader/core/parsers/qianbi.py +48 -50
- novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +272 -330
- novel_downloader/core/parsers/qidian/chapter_normal.py +24 -55
- novel_downloader/core/parsers/qidian/main_parser.py +11 -38
- novel_downloader/core/parsers/qidian/utils/__init__.py +1 -0
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +1 -1
- novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
- novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
- novel_downloader/core/parsers/quanben5.py +103 -0
- novel_downloader/core/parsers/registry.py +5 -16
- novel_downloader/core/parsers/sfacg.py +38 -45
- novel_downloader/core/parsers/shencou.py +215 -0
- novel_downloader/core/parsers/shuhaige.py +111 -0
- novel_downloader/core/parsers/tongrenquan.py +116 -0
- novel_downloader/core/parsers/ttkan.py +132 -0
- novel_downloader/core/parsers/wanbengo.py +191 -0
- novel_downloader/core/parsers/xiaoshuowu.py +173 -0
- novel_downloader/core/parsers/xiguashuwu.py +435 -0
- novel_downloader/core/parsers/xs63b.py +161 -0
- novel_downloader/core/parsers/xshbook.py +134 -0
- novel_downloader/core/parsers/yamibo.py +87 -131
- novel_downloader/core/parsers/yibige.py +166 -0
- novel_downloader/core/searchers/__init__.py +34 -3
- novel_downloader/core/searchers/aaatxt.py +107 -0
- novel_downloader/core/searchers/{biquge.py → b520.py} +29 -28
- novel_downloader/core/searchers/base.py +112 -36
- novel_downloader/core/searchers/dxmwx.py +105 -0
- novel_downloader/core/searchers/eightnovel.py +84 -0
- novel_downloader/core/searchers/esjzone.py +43 -25
- novel_downloader/core/searchers/hetushu.py +92 -0
- novel_downloader/core/searchers/i25zw.py +93 -0
- novel_downloader/core/searchers/ixdzs8.py +107 -0
- novel_downloader/core/searchers/jpxs123.py +107 -0
- novel_downloader/core/searchers/piaotia.py +100 -0
- novel_downloader/core/searchers/qbtr.py +106 -0
- novel_downloader/core/searchers/qianbi.py +74 -40
- novel_downloader/core/searchers/quanben5.py +144 -0
- novel_downloader/core/searchers/registry.py +24 -8
- novel_downloader/core/searchers/shuhaige.py +124 -0
- novel_downloader/core/searchers/tongrenquan.py +110 -0
- novel_downloader/core/searchers/ttkan.py +92 -0
- novel_downloader/core/searchers/xiaoshuowu.py +122 -0
- novel_downloader/core/searchers/xiguashuwu.py +95 -0
- novel_downloader/core/searchers/xs63b.py +104 -0
- novel_downloader/locales/en.json +31 -82
- novel_downloader/locales/zh.json +32 -83
- novel_downloader/models/__init__.py +21 -22
- novel_downloader/models/book.py +44 -0
- novel_downloader/models/config.py +4 -37
- novel_downloader/models/login.py +1 -1
- novel_downloader/models/search.py +5 -0
- novel_downloader/resources/config/settings.toml +8 -70
- novel_downloader/resources/json/xiguashuwu.json +718 -0
- novel_downloader/utils/__init__.py +13 -22
- novel_downloader/utils/chapter_storage.py +3 -2
- novel_downloader/utils/constants.py +4 -29
- novel_downloader/utils/cookies.py +6 -18
- novel_downloader/utils/crypto_utils/__init__.py +13 -0
- novel_downloader/utils/crypto_utils/aes_util.py +90 -0
- novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
- novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
- novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
- novel_downloader/utils/epub/__init__.py +1 -1
- novel_downloader/utils/epub/constants.py +57 -16
- novel_downloader/utils/epub/documents.py +88 -194
- novel_downloader/utils/epub/models.py +0 -14
- novel_downloader/utils/epub/utils.py +63 -96
- novel_downloader/utils/file_utils/__init__.py +2 -23
- novel_downloader/utils/file_utils/io.py +3 -113
- novel_downloader/utils/file_utils/sanitize.py +0 -4
- novel_downloader/utils/fontocr.py +207 -0
- novel_downloader/utils/logger.py +8 -16
- novel_downloader/utils/network.py +2 -2
- novel_downloader/utils/state.py +4 -90
- novel_downloader/utils/text_utils/__init__.py +1 -7
- novel_downloader/utils/text_utils/diff_display.py +5 -7
- novel_downloader/utils/time_utils/__init__.py +5 -11
- novel_downloader/utils/time_utils/datetime_utils.py +20 -29
- novel_downloader/utils/time_utils/sleep_utils.py +4 -8
- novel_downloader/web/__init__.py +13 -0
- novel_downloader/web/components/__init__.py +11 -0
- novel_downloader/web/components/navigation.py +35 -0
- novel_downloader/web/main.py +66 -0
- novel_downloader/web/pages/__init__.py +17 -0
- novel_downloader/web/pages/download.py +78 -0
- novel_downloader/web/pages/progress.py +147 -0
- novel_downloader/web/pages/search.py +329 -0
- novel_downloader/web/services/__init__.py +17 -0
- novel_downloader/web/services/client_dialog.py +164 -0
- novel_downloader/web/services/cred_broker.py +113 -0
- novel_downloader/web/services/cred_models.py +35 -0
- novel_downloader/web/services/task_manager.py +264 -0
- novel_downloader-2.0.0.dist-info/METADATA +171 -0
- novel_downloader-2.0.0.dist-info/RECORD +210 -0
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
- novel_downloader/core/downloaders/biquge.py +0 -29
- novel_downloader/core/downloaders/esjzone.py +0 -29
- novel_downloader/core/downloaders/linovelib.py +0 -29
- novel_downloader/core/downloaders/sfacg.py +0 -29
- novel_downloader/core/downloaders/yamibo.py +0 -29
- novel_downloader/core/exporters/biquge.py +0 -22
- novel_downloader/core/exporters/esjzone.py +0 -22
- novel_downloader/core/exporters/qianbi.py +0 -22
- novel_downloader/core/exporters/sfacg.py +0 -22
- novel_downloader/core/exporters/yamibo.py +0 -22
- novel_downloader/core/fetchers/base/__init__.py +0 -14
- novel_downloader/core/fetchers/base/browser.py +0 -422
- novel_downloader/core/fetchers/biquge/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/browser.py +0 -209
- novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
- novel_downloader/core/fetchers/linovelib/browser.py +0 -198
- novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/browser.py +0 -326
- novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
- novel_downloader/core/fetchers/sfacg/browser.py +0 -194
- novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
- novel_downloader/core/fetchers/yamibo/browser.py +0 -234
- novel_downloader/core/parsers/biquge.py +0 -139
- novel_downloader/models/chapter.py +0 -25
- novel_downloader/models/types.py +0 -13
- novel_downloader/tui/__init__.py +0 -7
- novel_downloader/tui/app.py +0 -32
- novel_downloader/tui/main.py +0 -17
- novel_downloader/tui/screens/__init__.py +0 -14
- novel_downloader/tui/screens/home.py +0 -198
- novel_downloader/tui/screens/login.py +0 -74
- novel_downloader/tui/styles/home_layout.tcss +0 -79
- novel_downloader/tui/widgets/richlog_handler.py +0 -24
- novel_downloader/utils/cache.py +0 -24
- novel_downloader/utils/fontocr/__init__.py +0 -22
- novel_downloader/utils/fontocr/hash_store.py +0 -280
- novel_downloader/utils/fontocr/hash_utils.py +0 -103
- novel_downloader/utils/fontocr/model_loader.py +0 -69
- novel_downloader/utils/fontocr/ocr_v1.py +0 -315
- novel_downloader/utils/fontocr/ocr_v2.py +0 -764
- novel_downloader/utils/fontocr/ocr_v3.py +0 -744
- novel_downloader-1.5.0.dist-info/METADATA +0 -196
- novel_downloader-1.5.0.dist-info/RECORD +0 -164
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,136 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.parsers.qbtr
|
4
|
+
----------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
import re
|
9
|
+
from typing import Any
|
10
|
+
|
11
|
+
from lxml import html
|
12
|
+
|
13
|
+
from novel_downloader.core.parsers.base import BaseParser
|
14
|
+
from novel_downloader.core.parsers.registry import register_parser
|
15
|
+
from novel_downloader.models import (
|
16
|
+
BookInfoDict,
|
17
|
+
ChapterDict,
|
18
|
+
ChapterInfoDict,
|
19
|
+
VolumeInfoDict,
|
20
|
+
)
|
21
|
+
|
22
|
+
|
23
|
+
@register_parser(
|
24
|
+
site_keys=["qbtr"],
|
25
|
+
)
|
26
|
+
class QbtrParser(BaseParser):
|
27
|
+
"""
|
28
|
+
Parser for 全本同人小说 book pages.
|
29
|
+
"""
|
30
|
+
|
31
|
+
BASE_URL = "https://www.qbtr.cc"
|
32
|
+
|
33
|
+
def parse_book_info(
|
34
|
+
self,
|
35
|
+
html_list: list[str],
|
36
|
+
**kwargs: Any,
|
37
|
+
) -> BookInfoDict | None:
|
38
|
+
if not html_list:
|
39
|
+
return None
|
40
|
+
|
41
|
+
# Parse the main info page
|
42
|
+
tree = html.fromstring(html_list[0])
|
43
|
+
# Book name
|
44
|
+
book_name = self._first_str(tree.xpath('//div[@class="infos"]/h1/text()'))
|
45
|
+
# Tags: the second breadcrumb (e.g., "同人小说")
|
46
|
+
tag = self._first_str(
|
47
|
+
tree.xpath('//div[contains(@class,"menNav")]/a[2]/text()')
|
48
|
+
)
|
49
|
+
tags = [tag] if tag else []
|
50
|
+
|
51
|
+
# Author & update_time from the date div
|
52
|
+
date_div = tree.xpath('//div[@class="date"]')
|
53
|
+
date_text = html.tostring(date_div[0], encoding="unicode", method="text")
|
54
|
+
author_match = re.search(r"作者[::]\s*([^日]+)", date_text)
|
55
|
+
author = author_match.group(1).strip() if author_match else ""
|
56
|
+
date_match = re.search(r"日期[::]\s*([\d-]+)", date_text)
|
57
|
+
update_time = date_match.group(1) if date_match else ""
|
58
|
+
|
59
|
+
# Summary from the <p> inside infos
|
60
|
+
paras = tree.xpath('//div[@class="infos"]/p//text()')
|
61
|
+
summary = "\n".join(p.strip() for p in paras if p.strip())
|
62
|
+
|
63
|
+
# Chapters from the book_list
|
64
|
+
chapters: list[ChapterInfoDict] = []
|
65
|
+
for a in tree.xpath('//div[contains(@class,"book_list")]//li/a'):
|
66
|
+
url = a.get("href", "").strip()
|
67
|
+
title = a.text_content().strip()
|
68
|
+
# General regex: /{category}/{bookId}/{chapterId}.html
|
69
|
+
m = re.search(r"^/[^/]+/\d+/(\d+)\.html$", url)
|
70
|
+
cid = m.group(1) if m else ""
|
71
|
+
chapters.append({"title": title, "url": url, "chapterId": cid})
|
72
|
+
|
73
|
+
volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
|
74
|
+
|
75
|
+
# Parse the download page (second HTML)
|
76
|
+
download_url = ""
|
77
|
+
if len(html_list) > 1 and html_list[1]:
|
78
|
+
dtree = html.fromstring(html_list[1])
|
79
|
+
a = dtree.xpath('//a[@id="dowloadnUrl"]')
|
80
|
+
if a:
|
81
|
+
link = a[0].get("link") or a[0].get("href") or ""
|
82
|
+
download_url = self._fix_download_link(link)
|
83
|
+
|
84
|
+
return {
|
85
|
+
"book_name": book_name,
|
86
|
+
"author": author,
|
87
|
+
"cover_url": "",
|
88
|
+
"update_time": update_time,
|
89
|
+
"tags": tags,
|
90
|
+
"summary": summary,
|
91
|
+
"volumes": volumes,
|
92
|
+
"extra": {"download_url": download_url},
|
93
|
+
}
|
94
|
+
|
95
|
+
def parse_chapter(
|
96
|
+
self,
|
97
|
+
html_list: list[str],
|
98
|
+
chapter_id: str,
|
99
|
+
**kwargs: Any,
|
100
|
+
) -> ChapterDict | None:
|
101
|
+
if not html_list:
|
102
|
+
return None
|
103
|
+
|
104
|
+
tree = html.fromstring(html_list[0])
|
105
|
+
|
106
|
+
raw_title = self._first_str(
|
107
|
+
tree.xpath('//div[contains(@class,"read_chapterName")]//h1/text()')
|
108
|
+
)
|
109
|
+
|
110
|
+
crumbs = tree.xpath('//div[contains(@class,"readTop")]//a/text()')
|
111
|
+
book_name = crumbs[-1].strip() if crumbs else ""
|
112
|
+
|
113
|
+
title = raw_title.replace(book_name, "").strip()
|
114
|
+
|
115
|
+
paragraphs = tree.xpath('//div[contains(@class,"read_chapterDetail")]/p')
|
116
|
+
texts = []
|
117
|
+
for p in paragraphs:
|
118
|
+
txt = p.text_content().strip()
|
119
|
+
if txt:
|
120
|
+
texts.append(txt)
|
121
|
+
|
122
|
+
content = "\n".join(texts)
|
123
|
+
if not content:
|
124
|
+
return None
|
125
|
+
|
126
|
+
return {
|
127
|
+
"id": chapter_id,
|
128
|
+
"title": title,
|
129
|
+
"content": content,
|
130
|
+
"extra": {"site": "qbtr"},
|
131
|
+
}
|
132
|
+
|
133
|
+
@classmethod
|
134
|
+
def _fix_download_link(cls, link: str) -> str:
|
135
|
+
true_link = link.replace("qb../", "/e/DownSys/")
|
136
|
+
return f"{cls.BASE_URL}{true_link}"
|
@@ -12,63 +12,60 @@ from lxml import html
|
|
12
12
|
|
13
13
|
from novel_downloader.core.parsers.base import BaseParser
|
14
14
|
from novel_downloader.core.parsers.registry import register_parser
|
15
|
-
from novel_downloader.models import
|
15
|
+
from novel_downloader.models import (
|
16
|
+
BookInfoDict,
|
17
|
+
ChapterDict,
|
18
|
+
VolumeInfoDict,
|
19
|
+
)
|
16
20
|
|
17
21
|
|
18
22
|
@register_parser(
|
19
23
|
site_keys=["qianbi"],
|
20
|
-
backends=["session", "browser"],
|
21
24
|
)
|
22
25
|
class QianbiParser(BaseParser):
|
23
|
-
"""
|
26
|
+
"""
|
27
|
+
Parser for 铅笔小说 book pages.
|
28
|
+
"""
|
24
29
|
|
25
30
|
def parse_book_info(
|
26
31
|
self,
|
27
32
|
html_list: list[str],
|
28
33
|
**kwargs: Any,
|
29
|
-
) ->
|
30
|
-
"""
|
31
|
-
Parse a book info page and extract metadata and chapter structure.
|
32
|
-
|
33
|
-
:param html_list: Raw HTML of the book info pages.
|
34
|
-
:return: Parsed metadata and chapter structure as a dictionary.
|
35
|
-
"""
|
34
|
+
) -> BookInfoDict | None:
|
36
35
|
if len(html_list) < 2:
|
37
|
-
return
|
36
|
+
return None
|
38
37
|
|
39
38
|
info_tree = html.fromstring(html_list[0])
|
40
39
|
catalog_tree = html.fromstring(html_list[1])
|
41
|
-
result: dict[str, Any] = {}
|
42
|
-
|
43
|
-
title = info_tree.xpath('//h1[@class="page-title"]/text()')
|
44
|
-
result["book_name"] = title[0].strip() if title else ""
|
45
40
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
41
|
+
book_name = self._first_str(info_tree.xpath('//h1[@class="page-title"]/text()'))
|
42
|
+
author = self._first_str(
|
43
|
+
info_tree.xpath('//a[contains(@href,"/author/")]/@title')
|
44
|
+
)
|
45
|
+
cover_url = self._first_str(
|
46
|
+
info_tree.xpath('//div[@class="novel-cover"]//img/@data-src')
|
47
|
+
)
|
48
|
+
serial_status = self._first_str(
|
49
|
+
info_tree.xpath(
|
50
|
+
'//a[@class="tag-link" and (text()="完结" or text()="连载")]/text()'
|
51
|
+
)
|
52
|
+
)
|
53
|
+
word_count = self._first_str(
|
54
|
+
info_tree.xpath('//span[contains(text(), "字")]/text()')
|
54
55
|
)
|
55
|
-
result["serial_status"] = status[0] if status else ""
|
56
|
-
|
57
|
-
word_count_raw = info_tree.xpath('//span[contains(text(), "万字")]/text()')
|
58
|
-
result["word_count"] = word_count_raw[0].strip() if word_count_raw else ""
|
59
56
|
|
60
57
|
summary_node = info_tree.xpath(
|
61
58
|
'//div[@class="novel-info-item novel-info-content"]/span'
|
62
59
|
)
|
63
60
|
if summary_node and summary_node[0] is not None:
|
64
|
-
|
61
|
+
summary = str(summary_node[0].text_content()).strip()
|
65
62
|
else:
|
66
|
-
|
63
|
+
summary = ""
|
67
64
|
|
68
|
-
|
65
|
+
update_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
69
66
|
|
70
|
-
volumes: list[
|
71
|
-
current_volume = None
|
67
|
+
volumes: list[VolumeInfoDict] = []
|
68
|
+
current_volume: VolumeInfoDict | None = None
|
72
69
|
|
73
70
|
for elem in catalog_tree.xpath('//div[@class="box"]/*'):
|
74
71
|
class_attr = elem.get("class", "")
|
@@ -104,9 +101,17 @@ class QianbiParser(BaseParser):
|
|
104
101
|
if current_volume:
|
105
102
|
volumes.append(current_volume)
|
106
103
|
|
107
|
-
|
108
|
-
|
109
|
-
|
104
|
+
return {
|
105
|
+
"book_name": book_name,
|
106
|
+
"author": author,
|
107
|
+
"cover_url": cover_url,
|
108
|
+
"update_time": update_time,
|
109
|
+
"word_count": word_count,
|
110
|
+
"serial_status": serial_status,
|
111
|
+
"summary": summary,
|
112
|
+
"volumes": volumes,
|
113
|
+
"extra": {},
|
114
|
+
}
|
110
115
|
|
111
116
|
def parse_chapter(
|
112
117
|
self,
|
@@ -114,31 +119,24 @@ class QianbiParser(BaseParser):
|
|
114
119
|
chapter_id: str,
|
115
120
|
**kwargs: Any,
|
116
121
|
) -> ChapterDict | None:
|
117
|
-
"""
|
118
|
-
Parse a single chapter page and extract clean text or simplified HTML.
|
119
|
-
|
120
|
-
:param html_list: Raw HTML of the chapter page.
|
121
|
-
:param chapter_id: Identifier of the chapter being parsed.
|
122
|
-
:return: Cleaned chapter content as plain text or minimal HTML.
|
123
|
-
"""
|
124
122
|
if not html_list:
|
125
123
|
return None
|
126
124
|
tree = html.fromstring(html_list[0])
|
127
125
|
|
126
|
+
# Content paragraphs
|
128
127
|
paras = tree.xpath('//div[@class="article-content"]/p/text()')
|
129
|
-
content_text = "\n
|
128
|
+
content_text = "\n".join(p.strip() for p in paras if p.strip())
|
130
129
|
if not content_text:
|
131
130
|
return None
|
132
131
|
|
133
|
-
|
134
|
-
|
132
|
+
title_text = self._first_str(tree.xpath('//h1[@class="article-title"]/text()'))
|
133
|
+
volume_text = self._first_str(tree.xpath('//h3[@class="text-muted"]/text()'))
|
135
134
|
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
next_href = tree.xpath('//div[@class="footer"]/a[@class="f-right"]/@href')
|
135
|
+
next_href = self._first_str(
|
136
|
+
tree.xpath('//div[@class="footer"]/a[@class="f-right"]/@href')
|
137
|
+
)
|
140
138
|
next_chapter_id = (
|
141
|
-
next_href
|
139
|
+
next_href.split("/")[-1].replace(".html", "") if next_href else ""
|
142
140
|
)
|
143
141
|
|
144
142
|
return {
|
@@ -12,10 +12,11 @@ time, status, word count, summary, and volume-chapter structure.
|
|
12
12
|
import logging
|
13
13
|
import re
|
14
14
|
from datetime import datetime
|
15
|
-
from typing import Any
|
16
15
|
|
17
16
|
from lxml import html
|
18
17
|
|
18
|
+
from novel_downloader.models import BookInfoDict, ChapterInfoDict, VolumeInfoDict
|
19
|
+
|
19
20
|
logger = logging.getLogger(__name__)
|
20
21
|
|
21
22
|
|
@@ -23,7 +24,7 @@ def _chapter_url_to_id(url: str) -> str:
|
|
23
24
|
return url.rstrip("/").split("/")[-1]
|
24
25
|
|
25
26
|
|
26
|
-
def parse_book_info(html_str: str) ->
|
27
|
+
def parse_book_info(html_str: str) -> BookInfoDict | None:
|
27
28
|
"""
|
28
29
|
Extract metadata: title, author, cover_url, update_time, status,
|
29
30
|
word_count, summary, and volumes with chapters.
|
@@ -31,60 +32,58 @@ def parse_book_info(html_str: str) -> dict[str, Any]:
|
|
31
32
|
:param html_str: Raw HTML of the book info page.
|
32
33
|
:return: A dict containing book metadata.
|
33
34
|
"""
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
return info
|
35
|
+
doc = html.fromstring(html_str)
|
36
|
+
|
37
|
+
book_name = doc.xpath('string(//h1[@id="bookName"])').strip()
|
38
|
+
|
39
|
+
author = doc.xpath('string(//a[@class="writer-name"])').strip()
|
40
|
+
|
41
|
+
book_id = doc.xpath('//a[@id="bookImg"]/@data-bid')[0]
|
42
|
+
cover_url = f"https://bookcover.yuewen.com/qdbimg/349573/{book_id}/600.webp"
|
43
|
+
|
44
|
+
ut = doc.xpath('string(//span[@class="update-time"])')
|
45
|
+
ut = ut.replace("更新时间:", "").strip()
|
46
|
+
if re.match(r"^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}$", ut):
|
47
|
+
update_time = ut
|
48
|
+
else:
|
49
|
+
update_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
50
|
+
|
51
|
+
serial_status = doc.xpath('string(//p[@class="book-attribute"]/span[1])').strip()
|
52
|
+
|
53
|
+
tags_elem = doc.xpath('//p[contains(@class,"all-label")]//a/text()')
|
54
|
+
tags = [t.strip() for t in tags_elem if t.strip()]
|
55
|
+
|
56
|
+
word_count = doc.xpath('string(//p[@class="count"]/em[1])').strip()
|
57
|
+
|
58
|
+
summary_brief = doc.xpath('string(//p[@class="intro"])').strip()
|
59
|
+
|
60
|
+
raw = doc.xpath('//p[@id="book-intro-detail"]//text()')
|
61
|
+
summary = "\n".join(line.strip() for line in raw if line.strip())
|
62
|
+
|
63
|
+
volumes: list[VolumeInfoDict] = []
|
64
|
+
for vol in doc.xpath('//div[@id="allCatalog"]//div[@class="catalog-volume"]'):
|
65
|
+
vol_name = vol.xpath('string(.//h3[@class="volume-name"])').strip()
|
66
|
+
vol_name = vol_name.split(chr(183))[0].strip()
|
67
|
+
chapters: list[ChapterInfoDict] = []
|
68
|
+
for li in vol.xpath('.//ul[contains(@class,"volume-chapters")]/li'):
|
69
|
+
a = li.xpath('.//a[@class="chapter-name"]')[0]
|
70
|
+
title = a.text.strip()
|
71
|
+
url = a.get("href")
|
72
|
+
chapters.append(
|
73
|
+
{"title": title, "url": url, "chapterId": _chapter_url_to_id(url)}
|
74
|
+
)
|
75
|
+
volumes.append({"volume_name": vol_name, "chapters": chapters})
|
76
|
+
|
77
|
+
return {
|
78
|
+
"book_name": book_name,
|
79
|
+
"author": author,
|
80
|
+
"cover_url": cover_url,
|
81
|
+
"update_time": update_time,
|
82
|
+
"word_count": word_count,
|
83
|
+
"serial_status": serial_status,
|
84
|
+
"tags": tags,
|
85
|
+
"summary_brief": summary_brief,
|
86
|
+
"summary": summary,
|
87
|
+
"volumes": volumes,
|
88
|
+
"extra": {},
|
89
|
+
}
|