novel-downloader 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/__init__.py +2 -4
- novel_downloader/cli/clean.py +21 -88
- novel_downloader/cli/config.py +27 -104
- novel_downloader/cli/download.py +78 -66
- novel_downloader/cli/export.py +20 -21
- novel_downloader/cli/main.py +3 -1
- novel_downloader/cli/search.py +120 -0
- novel_downloader/cli/ui.py +156 -0
- novel_downloader/config/__init__.py +10 -14
- novel_downloader/config/adapter.py +195 -99
- novel_downloader/config/{loader.py → file_io.py} +53 -27
- novel_downloader/core/__init__.py +14 -13
- novel_downloader/core/archived/deqixs/fetcher.py +115 -0
- novel_downloader/core/archived/deqixs/parser.py +132 -0
- novel_downloader/core/archived/deqixs/searcher.py +89 -0
- novel_downloader/core/archived/qidian/searcher.py +79 -0
- novel_downloader/core/archived/wanbengo/searcher.py +98 -0
- novel_downloader/core/archived/xshbook/searcher.py +93 -0
- novel_downloader/core/downloaders/__init__.py +8 -30
- novel_downloader/core/downloaders/base.py +182 -30
- novel_downloader/core/downloaders/common.py +217 -384
- novel_downloader/core/downloaders/qianbi.py +332 -4
- novel_downloader/core/downloaders/qidian.py +250 -290
- novel_downloader/core/downloaders/registry.py +69 -0
- novel_downloader/core/downloaders/signals.py +46 -0
- novel_downloader/core/exporters/__init__.py +8 -26
- novel_downloader/core/exporters/base.py +107 -31
- novel_downloader/core/exporters/common/__init__.py +3 -4
- novel_downloader/core/exporters/common/epub.py +92 -171
- novel_downloader/core/exporters/common/main_exporter.py +14 -67
- novel_downloader/core/exporters/common/txt.py +90 -86
- novel_downloader/core/exporters/epub_util.py +184 -1327
- novel_downloader/core/exporters/linovelib/__init__.py +3 -2
- novel_downloader/core/exporters/linovelib/epub.py +165 -222
- novel_downloader/core/exporters/linovelib/main_exporter.py +10 -71
- novel_downloader/core/exporters/linovelib/txt.py +76 -66
- novel_downloader/core/exporters/qidian.py +15 -11
- novel_downloader/core/exporters/registry.py +55 -0
- novel_downloader/core/exporters/txt_util.py +67 -0
- novel_downloader/core/fetchers/__init__.py +57 -56
- novel_downloader/core/fetchers/aaatxt.py +83 -0
- novel_downloader/core/fetchers/{biquge/session.py → b520.py} +10 -10
- novel_downloader/core/fetchers/{base/session.py → base.py} +63 -47
- novel_downloader/core/fetchers/biquyuedu.py +83 -0
- novel_downloader/core/fetchers/dxmwx.py +110 -0
- novel_downloader/core/fetchers/eightnovel.py +139 -0
- novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +23 -11
- novel_downloader/core/fetchers/guidaye.py +85 -0
- novel_downloader/core/fetchers/hetushu.py +92 -0
- novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +22 -26
- novel_downloader/core/fetchers/ixdzs8.py +113 -0
- novel_downloader/core/fetchers/jpxs123.py +101 -0
- novel_downloader/core/fetchers/{biquge/browser.py → lewenn.py} +15 -15
- novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +16 -12
- novel_downloader/core/fetchers/piaotia.py +105 -0
- novel_downloader/core/fetchers/qbtr.py +101 -0
- novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +9 -9
- novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +55 -40
- novel_downloader/core/fetchers/quanben5.py +92 -0
- novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
- novel_downloader/core/fetchers/registry.py +60 -0
- novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +11 -9
- novel_downloader/core/fetchers/shencou.py +106 -0
- novel_downloader/core/fetchers/{common/browser.py → shuhaige.py} +24 -19
- novel_downloader/core/fetchers/tongrenquan.py +84 -0
- novel_downloader/core/fetchers/ttkan.py +95 -0
- novel_downloader/core/fetchers/{common/session.py → wanbengo.py} +21 -17
- novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
- novel_downloader/core/fetchers/xiguashuwu.py +177 -0
- novel_downloader/core/fetchers/xs63b.py +171 -0
- novel_downloader/core/fetchers/xshbook.py +85 -0
- novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +23 -11
- novel_downloader/core/fetchers/yibige.py +114 -0
- novel_downloader/core/interfaces/__init__.py +8 -14
- novel_downloader/core/interfaces/downloader.py +6 -2
- novel_downloader/core/interfaces/exporter.py +7 -7
- novel_downloader/core/interfaces/fetcher.py +4 -17
- novel_downloader/core/interfaces/parser.py +5 -6
- novel_downloader/core/interfaces/searcher.py +26 -0
- novel_downloader/core/parsers/__init__.py +58 -22
- novel_downloader/core/parsers/aaatxt.py +132 -0
- novel_downloader/core/parsers/b520.py +116 -0
- novel_downloader/core/parsers/base.py +63 -12
- novel_downloader/core/parsers/biquyuedu.py +133 -0
- novel_downloader/core/parsers/dxmwx.py +162 -0
- novel_downloader/core/parsers/eightnovel.py +224 -0
- novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +67 -67
- novel_downloader/core/parsers/guidaye.py +128 -0
- novel_downloader/core/parsers/hetushu.py +139 -0
- novel_downloader/core/parsers/i25zw.py +137 -0
- novel_downloader/core/parsers/ixdzs8.py +186 -0
- novel_downloader/core/parsers/jpxs123.py +137 -0
- novel_downloader/core/parsers/lewenn.py +142 -0
- novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +54 -65
- novel_downloader/core/parsers/piaotia.py +189 -0
- novel_downloader/core/parsers/qbtr.py +136 -0
- novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +54 -51
- novel_downloader/core/parsers/qidian/__init__.py +2 -2
- novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +290 -346
- novel_downloader/core/parsers/qidian/chapter_normal.py +25 -56
- novel_downloader/core/parsers/qidian/main_parser.py +19 -57
- novel_downloader/core/parsers/qidian/utils/__init__.py +12 -11
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +6 -7
- novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
- novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
- novel_downloader/core/parsers/quanben5.py +103 -0
- novel_downloader/core/parsers/registry.py +57 -0
- novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +46 -48
- novel_downloader/core/parsers/shencou.py +215 -0
- novel_downloader/core/parsers/shuhaige.py +111 -0
- novel_downloader/core/parsers/tongrenquan.py +116 -0
- novel_downloader/core/parsers/ttkan.py +132 -0
- novel_downloader/core/parsers/wanbengo.py +191 -0
- novel_downloader/core/parsers/xiaoshuowu.py +173 -0
- novel_downloader/core/parsers/xiguashuwu.py +435 -0
- novel_downloader/core/parsers/xs63b.py +161 -0
- novel_downloader/core/parsers/xshbook.py +134 -0
- novel_downloader/core/parsers/yamibo.py +155 -0
- novel_downloader/core/parsers/yibige.py +166 -0
- novel_downloader/core/searchers/__init__.py +51 -0
- novel_downloader/core/searchers/aaatxt.py +107 -0
- novel_downloader/core/searchers/b520.py +84 -0
- novel_downloader/core/searchers/base.py +168 -0
- novel_downloader/core/searchers/dxmwx.py +105 -0
- novel_downloader/core/searchers/eightnovel.py +84 -0
- novel_downloader/core/searchers/esjzone.py +102 -0
- novel_downloader/core/searchers/hetushu.py +92 -0
- novel_downloader/core/searchers/i25zw.py +93 -0
- novel_downloader/core/searchers/ixdzs8.py +107 -0
- novel_downloader/core/searchers/jpxs123.py +107 -0
- novel_downloader/core/searchers/piaotia.py +100 -0
- novel_downloader/core/searchers/qbtr.py +106 -0
- novel_downloader/core/searchers/qianbi.py +165 -0
- novel_downloader/core/searchers/quanben5.py +144 -0
- novel_downloader/core/searchers/registry.py +79 -0
- novel_downloader/core/searchers/shuhaige.py +124 -0
- novel_downloader/core/searchers/tongrenquan.py +110 -0
- novel_downloader/core/searchers/ttkan.py +92 -0
- novel_downloader/core/searchers/xiaoshuowu.py +122 -0
- novel_downloader/core/searchers/xiguashuwu.py +95 -0
- novel_downloader/core/searchers/xs63b.py +104 -0
- novel_downloader/locales/en.json +36 -79
- novel_downloader/locales/zh.json +37 -80
- novel_downloader/models/__init__.py +23 -50
- novel_downloader/models/book.py +44 -0
- novel_downloader/models/config.py +16 -43
- novel_downloader/models/login.py +1 -1
- novel_downloader/models/search.py +21 -0
- novel_downloader/resources/config/settings.toml +39 -74
- novel_downloader/resources/css_styles/intro.css +83 -0
- novel_downloader/resources/css_styles/main.css +30 -89
- novel_downloader/resources/json/xiguashuwu.json +718 -0
- novel_downloader/utils/__init__.py +43 -0
- novel_downloader/utils/chapter_storage.py +247 -226
- novel_downloader/utils/constants.py +5 -50
- novel_downloader/utils/cookies.py +6 -18
- novel_downloader/utils/crypto_utils/__init__.py +13 -0
- novel_downloader/utils/crypto_utils/aes_util.py +90 -0
- novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
- novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
- novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
- novel_downloader/utils/epub/__init__.py +34 -0
- novel_downloader/utils/epub/builder.py +377 -0
- novel_downloader/utils/epub/constants.py +118 -0
- novel_downloader/utils/epub/documents.py +297 -0
- novel_downloader/utils/epub/models.py +120 -0
- novel_downloader/utils/epub/utils.py +179 -0
- novel_downloader/utils/file_utils/__init__.py +5 -30
- novel_downloader/utils/file_utils/io.py +9 -150
- novel_downloader/utils/file_utils/normalize.py +2 -2
- novel_downloader/utils/file_utils/sanitize.py +2 -7
- novel_downloader/utils/fontocr.py +207 -0
- novel_downloader/utils/i18n.py +2 -0
- novel_downloader/utils/logger.py +10 -16
- novel_downloader/utils/network.py +111 -252
- novel_downloader/utils/state.py +5 -90
- novel_downloader/utils/text_utils/__init__.py +16 -21
- novel_downloader/utils/text_utils/diff_display.py +6 -9
- novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
- novel_downloader/utils/text_utils/text_cleaner.py +179 -0
- novel_downloader/utils/text_utils/truncate_utils.py +62 -0
- novel_downloader/utils/time_utils/__init__.py +6 -12
- novel_downloader/utils/time_utils/datetime_utils.py +23 -33
- novel_downloader/utils/time_utils/sleep_utils.py +5 -10
- novel_downloader/web/__init__.py +13 -0
- novel_downloader/web/components/__init__.py +11 -0
- novel_downloader/web/components/navigation.py +35 -0
- novel_downloader/web/main.py +66 -0
- novel_downloader/web/pages/__init__.py +17 -0
- novel_downloader/web/pages/download.py +78 -0
- novel_downloader/web/pages/progress.py +147 -0
- novel_downloader/web/pages/search.py +329 -0
- novel_downloader/web/services/__init__.py +17 -0
- novel_downloader/web/services/client_dialog.py +164 -0
- novel_downloader/web/services/cred_broker.py +113 -0
- novel_downloader/web/services/cred_models.py +35 -0
- novel_downloader/web/services/task_manager.py +264 -0
- novel_downloader-2.0.0.dist-info/METADATA +171 -0
- novel_downloader-2.0.0.dist-info/RECORD +210 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
- novel_downloader/config/site_rules.py +0 -94
- novel_downloader/core/downloaders/biquge.py +0 -25
- novel_downloader/core/downloaders/esjzone.py +0 -25
- novel_downloader/core/downloaders/linovelib.py +0 -25
- novel_downloader/core/downloaders/sfacg.py +0 -25
- novel_downloader/core/downloaders/yamibo.py +0 -25
- novel_downloader/core/exporters/biquge.py +0 -25
- novel_downloader/core/exporters/esjzone.py +0 -25
- novel_downloader/core/exporters/qianbi.py +0 -25
- novel_downloader/core/exporters/sfacg.py +0 -25
- novel_downloader/core/exporters/yamibo.py +0 -25
- novel_downloader/core/factory/__init__.py +0 -20
- novel_downloader/core/factory/downloader.py +0 -73
- novel_downloader/core/factory/exporter.py +0 -58
- novel_downloader/core/factory/fetcher.py +0 -96
- novel_downloader/core/factory/parser.py +0 -86
- novel_downloader/core/fetchers/base/__init__.py +0 -14
- novel_downloader/core/fetchers/base/browser.py +0 -403
- novel_downloader/core/fetchers/biquge/__init__.py +0 -14
- novel_downloader/core/fetchers/common/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/browser.py +0 -204
- novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
- novel_downloader/core/fetchers/linovelib/browser.py +0 -193
- novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/browser.py +0 -318
- novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
- novel_downloader/core/fetchers/sfacg/browser.py +0 -189
- novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
- novel_downloader/core/fetchers/yamibo/browser.py +0 -229
- novel_downloader/core/parsers/biquge/__init__.py +0 -10
- novel_downloader/core/parsers/biquge/main_parser.py +0 -134
- novel_downloader/core/parsers/common/__init__.py +0 -13
- novel_downloader/core/parsers/common/helper.py +0 -323
- novel_downloader/core/parsers/common/main_parser.py +0 -106
- novel_downloader/core/parsers/esjzone/__init__.py +0 -10
- novel_downloader/core/parsers/linovelib/__init__.py +0 -10
- novel_downloader/core/parsers/qianbi/__init__.py +0 -10
- novel_downloader/core/parsers/sfacg/__init__.py +0 -10
- novel_downloader/core/parsers/yamibo/__init__.py +0 -10
- novel_downloader/core/parsers/yamibo/main_parser.py +0 -194
- novel_downloader/models/browser.py +0 -21
- novel_downloader/models/chapter.py +0 -25
- novel_downloader/models/site_rules.py +0 -99
- novel_downloader/models/tasks.py +0 -33
- novel_downloader/models/types.py +0 -15
- novel_downloader/resources/css_styles/volume-intro.css +0 -56
- novel_downloader/resources/json/replace_word_map.json +0 -4
- novel_downloader/resources/text/blacklist.txt +0 -22
- novel_downloader/tui/__init__.py +0 -7
- novel_downloader/tui/app.py +0 -32
- novel_downloader/tui/main.py +0 -17
- novel_downloader/tui/screens/__init__.py +0 -14
- novel_downloader/tui/screens/home.py +0 -198
- novel_downloader/tui/screens/login.py +0 -74
- novel_downloader/tui/styles/home_layout.tcss +0 -79
- novel_downloader/tui/widgets/richlog_handler.py +0 -24
- novel_downloader/utils/cache.py +0 -24
- novel_downloader/utils/fontocr/__init__.py +0 -22
- novel_downloader/utils/fontocr/model_loader.py +0 -69
- novel_downloader/utils/fontocr/ocr_v1.py +0 -303
- novel_downloader/utils/fontocr/ocr_v2.py +0 -752
- novel_downloader/utils/hash_store.py +0 -279
- novel_downloader/utils/hash_utils.py +0 -103
- novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
- novel_downloader/utils/text_utils/font_mapping.py +0 -28
- novel_downloader/utils/text_utils/text_cleaning.py +0 -107
- novel_downloader-1.4.5.dist-info/METADATA +0 -196
- novel_downloader-1.4.5.dist-info/RECORD +0 -165
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -3,18 +3,47 @@
|
|
3
3
|
novel_downloader.core.downloaders.qianbi
|
4
4
|
----------------------------------------
|
5
5
|
|
6
|
+
Downloader implementation for Qianbi novels, with chapter ID repair logic.
|
6
7
|
"""
|
7
8
|
|
8
|
-
|
9
|
+
import asyncio
|
10
|
+
from collections.abc import Awaitable, Callable
|
11
|
+
from pathlib import Path
|
12
|
+
from typing import Any
|
13
|
+
|
14
|
+
from novel_downloader.core.downloaders.base import BaseDownloader
|
15
|
+
from novel_downloader.core.downloaders.registry import register_downloader
|
16
|
+
from novel_downloader.core.downloaders.signals import (
|
17
|
+
STOP,
|
18
|
+
Progress,
|
19
|
+
StopToken,
|
20
|
+
)
|
9
21
|
from novel_downloader.core.interfaces import (
|
10
22
|
FetcherProtocol,
|
11
23
|
ParserProtocol,
|
12
24
|
)
|
13
|
-
from novel_downloader.models import
|
25
|
+
from novel_downloader.models import (
|
26
|
+
BookConfig,
|
27
|
+
BookInfoDict,
|
28
|
+
ChapterDict,
|
29
|
+
DownloaderConfig,
|
30
|
+
)
|
31
|
+
from novel_downloader.utils import (
|
32
|
+
ChapterStorage,
|
33
|
+
async_jitter_sleep,
|
34
|
+
)
|
35
|
+
|
36
|
+
|
37
|
+
@register_downloader(site_keys=["qianbi"])
|
38
|
+
class QianbiDownloader(BaseDownloader):
|
39
|
+
"""
|
40
|
+
Downloader for Qianbi (铅笔) novels.
|
14
41
|
|
42
|
+
Repairs missing chapter IDs by following 'next' links, then downloads
|
43
|
+
each chapter as a unit (fetch -> parse -> enqueue storage).
|
44
|
+
"""
|
15
45
|
|
16
|
-
|
17
|
-
""""""
|
46
|
+
DEFAULT_SOURCE_ID = 0
|
18
47
|
|
19
48
|
def __init__(
|
20
49
|
self,
|
@@ -23,3 +52,302 @@ class QianbiDownloader(CommonDownloader):
|
|
23
52
|
config: DownloaderConfig,
|
24
53
|
):
|
25
54
|
super().__init__(fetcher, parser, config, "qianbi")
|
55
|
+
|
56
|
+
async def _download_one(
|
57
|
+
self,
|
58
|
+
book: BookConfig,
|
59
|
+
*,
|
60
|
+
progress_hook: Callable[[int, int], Awaitable[None]] | None = None,
|
61
|
+
cancel_event: asyncio.Event | None = None,
|
62
|
+
**kwargs: Any,
|
63
|
+
) -> None:
|
64
|
+
"""
|
65
|
+
The full download logic for a single book.
|
66
|
+
|
67
|
+
:param book: BookConfig with at least 'book_id'.
|
68
|
+
"""
|
69
|
+
TAG = "[Downloader]"
|
70
|
+
|
71
|
+
book_id = book["book_id"]
|
72
|
+
start_id = book.get("start_id")
|
73
|
+
end_id = book.get("end_id")
|
74
|
+
ignore_set = set(book.get("ignore_ids", []))
|
75
|
+
|
76
|
+
raw_base = self._raw_data_dir / book_id
|
77
|
+
raw_base.mkdir(parents=True, exist_ok=True)
|
78
|
+
html_dir = self._debug_dir / book_id / "html"
|
79
|
+
|
80
|
+
chapter_storage = ChapterStorage(
|
81
|
+
raw_base=raw_base,
|
82
|
+
priorities=self.PRIORITIES_MAP,
|
83
|
+
)
|
84
|
+
chapter_storage.connect()
|
85
|
+
|
86
|
+
def cancelled() -> bool:
|
87
|
+
return bool(cancel_event and cancel_event.is_set())
|
88
|
+
|
89
|
+
try:
|
90
|
+
# --- metadata ---
|
91
|
+
book_info = await self.load_book_info(book_id=book_id, html_dir=html_dir)
|
92
|
+
if not book_info:
|
93
|
+
return
|
94
|
+
|
95
|
+
book_info = await self._repair_chapter_ids(
|
96
|
+
book_id,
|
97
|
+
book_info,
|
98
|
+
chapter_storage,
|
99
|
+
html_dir,
|
100
|
+
)
|
101
|
+
|
102
|
+
vols = book_info["volumes"]
|
103
|
+
total_chapters = sum(len(v["chapters"]) for v in vols)
|
104
|
+
if total_chapters == 0:
|
105
|
+
self.logger.warning("%s 书籍没有章节可下载: %s", TAG, book_id)
|
106
|
+
return
|
107
|
+
|
108
|
+
progress = Progress(total_chapters, progress_hook)
|
109
|
+
|
110
|
+
# --- queues & batching ---
|
111
|
+
cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue()
|
112
|
+
save_q: asyncio.Queue[ChapterDict | StopToken] = asyncio.Queue()
|
113
|
+
batch: list[ChapterDict] = []
|
114
|
+
|
115
|
+
async def flush_batch() -> None:
|
116
|
+
if not batch:
|
117
|
+
return
|
118
|
+
try:
|
119
|
+
chapter_storage.upsert_chapters(batch, self.DEFAULT_SOURCE_ID)
|
120
|
+
except Exception as e:
|
121
|
+
self.logger.error(
|
122
|
+
"[Storage] batch upsert failed (size=%d): %s",
|
123
|
+
len(batch),
|
124
|
+
e,
|
125
|
+
exc_info=True,
|
126
|
+
)
|
127
|
+
else:
|
128
|
+
await progress.bump(len(batch))
|
129
|
+
finally:
|
130
|
+
batch.clear()
|
131
|
+
|
132
|
+
# --- stage: storage worker ---
|
133
|
+
async def storage_worker() -> None:
|
134
|
+
"""
|
135
|
+
Consumes parsed chapters, writes in batches.
|
136
|
+
|
137
|
+
Terminates after receiving STOP from each chapter worker.
|
138
|
+
|
139
|
+
On cancel: keeps consuming (to avoid blocking producers),
|
140
|
+
flushes, and exits once all STOPs are seen.
|
141
|
+
"""
|
142
|
+
stop_count = 0
|
143
|
+
while True:
|
144
|
+
item = await save_q.get()
|
145
|
+
if isinstance(item, StopToken):
|
146
|
+
stop_count += 1
|
147
|
+
if stop_count == self.workers:
|
148
|
+
# All chapter workers have exited.
|
149
|
+
await flush_batch()
|
150
|
+
return
|
151
|
+
# else keep waiting for remaining STOPs
|
152
|
+
continue
|
153
|
+
|
154
|
+
# Normal chapter
|
155
|
+
batch.append(item)
|
156
|
+
if len(batch) >= self.storage_batch_size:
|
157
|
+
await flush_batch()
|
158
|
+
|
159
|
+
if cancelled():
|
160
|
+
# Drain whatever is already in the queue
|
161
|
+
try:
|
162
|
+
while True:
|
163
|
+
nxt = save_q.get_nowait()
|
164
|
+
if isinstance(nxt, StopToken):
|
165
|
+
stop_count += 1
|
166
|
+
else:
|
167
|
+
batch.append(nxt)
|
168
|
+
except asyncio.QueueEmpty:
|
169
|
+
pass
|
170
|
+
# Final flush of everything
|
171
|
+
await flush_batch()
|
172
|
+
# Wait for remaining STOPs so chapter workers can finish.
|
173
|
+
while stop_count < self.workers:
|
174
|
+
nxt = await save_q.get()
|
175
|
+
if isinstance(nxt, StopToken):
|
176
|
+
stop_count += 1
|
177
|
+
return
|
178
|
+
|
179
|
+
# --- stage: chapter worker ---
|
180
|
+
sem = asyncio.Semaphore(self.workers)
|
181
|
+
|
182
|
+
async def chapter_worker() -> None:
|
183
|
+
"""
|
184
|
+
Fetch + parse with retry, then enqueue to save_q.
|
185
|
+
|
186
|
+
Exits on STOP, or early if cancel is set before starting a new fetch.
|
187
|
+
"""
|
188
|
+
while True:
|
189
|
+
cid = await cid_q.get()
|
190
|
+
if isinstance(cid, StopToken):
|
191
|
+
# Propagate one STOP to storage and exit.
|
192
|
+
await save_q.put(STOP)
|
193
|
+
return
|
194
|
+
|
195
|
+
if not cid or cid in ignore_set:
|
196
|
+
# Ignore silently and continue.
|
197
|
+
continue
|
198
|
+
|
199
|
+
# If cancelled, don't start a new network call; let storage finish.
|
200
|
+
if cancelled():
|
201
|
+
await save_q.put(STOP)
|
202
|
+
return
|
203
|
+
|
204
|
+
async with sem:
|
205
|
+
chap = await self._process_chapter(book_id, cid, html_dir)
|
206
|
+
if chap:
|
207
|
+
await save_q.put(chap)
|
208
|
+
|
209
|
+
# polite pacing
|
210
|
+
await async_jitter_sleep(
|
211
|
+
self.request_interval,
|
212
|
+
mul_spread=1.1,
|
213
|
+
max_sleep=self.request_interval + 2,
|
214
|
+
)
|
215
|
+
|
216
|
+
# --- stage: producer ---
|
217
|
+
async def producer() -> None:
|
218
|
+
"""
|
219
|
+
Enqueue chapter IDs (respecting start/end/skip_existing).
|
220
|
+
Always sends STOP x workers at the end (even if cancelled early),
|
221
|
+
so chapter workers can exit deterministically.
|
222
|
+
"""
|
223
|
+
try:
|
224
|
+
async for cid in self._chapter_ids(vols, start_id, end_id):
|
225
|
+
if cancelled():
|
226
|
+
break
|
227
|
+
if self.skip_existing and chapter_storage.exists(cid):
|
228
|
+
# Count as completed but don't enqueue.
|
229
|
+
await progress.bump(1)
|
230
|
+
else:
|
231
|
+
await cid_q.put(cid)
|
232
|
+
finally:
|
233
|
+
for _ in range(self.workers):
|
234
|
+
await cid_q.put(STOP)
|
235
|
+
|
236
|
+
# --- run the pipeline ---
|
237
|
+
async with asyncio.TaskGroup() as tg:
|
238
|
+
tg.create_task(storage_worker())
|
239
|
+
for _ in range(self.workers):
|
240
|
+
tg.create_task(chapter_worker())
|
241
|
+
tg.create_task(producer())
|
242
|
+
|
243
|
+
# --- done ---
|
244
|
+
if cancelled():
|
245
|
+
self.logger.info(
|
246
|
+
"%s Novel '%s' cancelled: flushed %d/%d chapters.",
|
247
|
+
TAG,
|
248
|
+
book_info.get("book_name", "unknown"),
|
249
|
+
progress.done,
|
250
|
+
progress.total,
|
251
|
+
)
|
252
|
+
else:
|
253
|
+
self.logger.info(
|
254
|
+
"%s Novel '%s' download completed.",
|
255
|
+
TAG,
|
256
|
+
book_info.get("book_name", "unknown"),
|
257
|
+
)
|
258
|
+
|
259
|
+
finally:
|
260
|
+
chapter_storage.close()
|
261
|
+
|
262
|
+
async def _repair_chapter_ids(
|
263
|
+
self,
|
264
|
+
book_id: str,
|
265
|
+
book_info: BookInfoDict,
|
266
|
+
storage: ChapterStorage,
|
267
|
+
html_dir: Path,
|
268
|
+
) -> BookInfoDict:
|
269
|
+
"""
|
270
|
+
Fill in missing chapterId fields by retrieving the previous chapter
|
271
|
+
and following its 'next_chapter_id'. Uses storage to avoid refetching.
|
272
|
+
"""
|
273
|
+
prev_cid: str = ""
|
274
|
+
for vol in book_info["volumes"]:
|
275
|
+
for chap in vol["chapters"]:
|
276
|
+
cid = chap.get("chapterId")
|
277
|
+
if cid:
|
278
|
+
prev_cid = cid
|
279
|
+
continue
|
280
|
+
|
281
|
+
# no valid previous to follow
|
282
|
+
if not prev_cid:
|
283
|
+
continue
|
284
|
+
|
285
|
+
# missing id: try storage
|
286
|
+
data = storage.get_best_chapter(prev_cid)
|
287
|
+
if not data:
|
288
|
+
# fetch+parse previous to discover next
|
289
|
+
data = await self._process_chapter(book_id, prev_cid, html_dir)
|
290
|
+
if not data:
|
291
|
+
self.logger.warning(
|
292
|
+
"failed to fetch chapter %s, skipping repair",
|
293
|
+
prev_cid,
|
294
|
+
)
|
295
|
+
continue
|
296
|
+
storage.upsert_chapter(data, self.DEFAULT_SOURCE_ID)
|
297
|
+
await async_jitter_sleep(
|
298
|
+
self.request_interval,
|
299
|
+
mul_spread=1.1,
|
300
|
+
max_sleep=self.request_interval + 2,
|
301
|
+
)
|
302
|
+
|
303
|
+
next_cid = data.get("extra", {}).get("next_chapter_id")
|
304
|
+
if not next_cid:
|
305
|
+
self.logger.warning(
|
306
|
+
"No next_chapter_id in data for %s",
|
307
|
+
prev_cid,
|
308
|
+
)
|
309
|
+
continue
|
310
|
+
|
311
|
+
self.logger.info(
|
312
|
+
"repaired chapterId: set to %s (from prev %s)",
|
313
|
+
next_cid,
|
314
|
+
prev_cid,
|
315
|
+
)
|
316
|
+
chap["chapterId"] = next_cid
|
317
|
+
prev_cid = next_cid
|
318
|
+
|
319
|
+
self._save_book_info(book_id, book_info)
|
320
|
+
return book_info
|
321
|
+
|
322
|
+
async def _process_chapter(
|
323
|
+
self,
|
324
|
+
book_id: str,
|
325
|
+
cid: str,
|
326
|
+
html_dir: Path,
|
327
|
+
) -> ChapterDict | None:
|
328
|
+
"""
|
329
|
+
Fetches, saves raw HTML, parses a single chapter,
|
330
|
+
retrying up to self.retry_times.
|
331
|
+
|
332
|
+
:return: ChapterDict on success, or None on failure.
|
333
|
+
"""
|
334
|
+
for attempt in range(self.retry_times + 1):
|
335
|
+
try:
|
336
|
+
html_list = await self.fetcher.get_book_chapter(book_id, cid)
|
337
|
+
self._save_html_pages(html_dir, cid, html_list)
|
338
|
+
chap = await asyncio.to_thread(
|
339
|
+
self.parser.parse_chapter, html_list, cid
|
340
|
+
)
|
341
|
+
if not chap:
|
342
|
+
raise ValueError("Empty parse result")
|
343
|
+
return chap
|
344
|
+
except Exception as e:
|
345
|
+
if attempt < self.retry_times:
|
346
|
+
self.logger.info(f"[ChapterWorker] Retry {cid} ({attempt+1}): {e}")
|
347
|
+
backoff = self.backoff_factor * (2**attempt)
|
348
|
+
await async_jitter_sleep(
|
349
|
+
base=backoff, mul_spread=1.2, max_sleep=backoff + 3
|
350
|
+
)
|
351
|
+
else:
|
352
|
+
self.logger.warning(f"[ChapterWorker] Failed {cid}: {e}")
|
353
|
+
return None
|