novel-downloader 1.5.0__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/__init__.py +1 -3
- novel_downloader/cli/clean.py +21 -88
- novel_downloader/cli/config.py +26 -21
- novel_downloader/cli/download.py +77 -64
- novel_downloader/cli/export.py +16 -20
- novel_downloader/cli/main.py +1 -1
- novel_downloader/cli/search.py +62 -65
- novel_downloader/cli/ui.py +156 -0
- novel_downloader/config/__init__.py +8 -5
- novel_downloader/config/adapter.py +65 -105
- novel_downloader/config/{loader.py → file_io.py} +53 -26
- novel_downloader/core/__init__.py +1 -0
- novel_downloader/core/archived/deqixs/fetcher.py +115 -0
- novel_downloader/core/archived/deqixs/parser.py +132 -0
- novel_downloader/core/archived/deqixs/searcher.py +89 -0
- novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} +12 -20
- novel_downloader/core/archived/wanbengo/searcher.py +98 -0
- novel_downloader/core/archived/xshbook/searcher.py +93 -0
- novel_downloader/core/downloaders/__init__.py +3 -24
- novel_downloader/core/downloaders/base.py +49 -23
- novel_downloader/core/downloaders/common.py +191 -137
- novel_downloader/core/downloaders/qianbi.py +187 -146
- novel_downloader/core/downloaders/qidian.py +187 -141
- novel_downloader/core/downloaders/registry.py +4 -2
- novel_downloader/core/downloaders/signals.py +46 -0
- novel_downloader/core/exporters/__init__.py +3 -20
- novel_downloader/core/exporters/base.py +33 -37
- novel_downloader/core/exporters/common/__init__.py +1 -2
- novel_downloader/core/exporters/common/epub.py +15 -10
- novel_downloader/core/exporters/common/main_exporter.py +19 -12
- novel_downloader/core/exporters/common/txt.py +14 -9
- novel_downloader/core/exporters/epub_util.py +59 -29
- novel_downloader/core/exporters/linovelib/__init__.py +1 -0
- novel_downloader/core/exporters/linovelib/epub.py +23 -25
- novel_downloader/core/exporters/linovelib/main_exporter.py +8 -12
- novel_downloader/core/exporters/linovelib/txt.py +17 -11
- novel_downloader/core/exporters/qidian.py +2 -8
- novel_downloader/core/exporters/registry.py +4 -2
- novel_downloader/core/exporters/txt_util.py +7 -7
- novel_downloader/core/fetchers/__init__.py +54 -48
- novel_downloader/core/fetchers/aaatxt.py +83 -0
- novel_downloader/core/fetchers/{biquge/session.py → b520.py} +6 -11
- novel_downloader/core/fetchers/{base/session.py → base.py} +37 -46
- novel_downloader/core/fetchers/{biquge/browser.py → biquyuedu.py} +12 -17
- novel_downloader/core/fetchers/dxmwx.py +110 -0
- novel_downloader/core/fetchers/eightnovel.py +139 -0
- novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +19 -12
- novel_downloader/core/fetchers/guidaye.py +85 -0
- novel_downloader/core/fetchers/hetushu.py +92 -0
- novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +19 -28
- novel_downloader/core/fetchers/ixdzs8.py +113 -0
- novel_downloader/core/fetchers/jpxs123.py +101 -0
- novel_downloader/core/fetchers/lewenn.py +83 -0
- novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +12 -13
- novel_downloader/core/fetchers/piaotia.py +105 -0
- novel_downloader/core/fetchers/qbtr.py +101 -0
- novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +5 -10
- novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +46 -39
- novel_downloader/core/fetchers/quanben5.py +92 -0
- novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
- novel_downloader/core/fetchers/registry.py +5 -16
- novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +7 -10
- novel_downloader/core/fetchers/shencou.py +106 -0
- novel_downloader/core/fetchers/shuhaige.py +84 -0
- novel_downloader/core/fetchers/tongrenquan.py +84 -0
- novel_downloader/core/fetchers/ttkan.py +95 -0
- novel_downloader/core/fetchers/wanbengo.py +83 -0
- novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
- novel_downloader/core/fetchers/xiguashuwu.py +177 -0
- novel_downloader/core/fetchers/xs63b.py +171 -0
- novel_downloader/core/fetchers/xshbook.py +85 -0
- novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +19 -12
- novel_downloader/core/fetchers/yibige.py +114 -0
- novel_downloader/core/interfaces/__init__.py +1 -9
- novel_downloader/core/interfaces/downloader.py +6 -2
- novel_downloader/core/interfaces/exporter.py +7 -7
- novel_downloader/core/interfaces/fetcher.py +4 -17
- novel_downloader/core/interfaces/parser.py +5 -6
- novel_downloader/core/interfaces/searcher.py +9 -1
- novel_downloader/core/parsers/__init__.py +49 -12
- novel_downloader/core/parsers/aaatxt.py +132 -0
- novel_downloader/core/parsers/b520.py +116 -0
- novel_downloader/core/parsers/base.py +63 -12
- novel_downloader/core/parsers/biquyuedu.py +133 -0
- novel_downloader/core/parsers/dxmwx.py +162 -0
- novel_downloader/core/parsers/eightnovel.py +224 -0
- novel_downloader/core/parsers/esjzone.py +61 -66
- novel_downloader/core/parsers/guidaye.py +128 -0
- novel_downloader/core/parsers/hetushu.py +139 -0
- novel_downloader/core/parsers/i25zw.py +137 -0
- novel_downloader/core/parsers/ixdzs8.py +186 -0
- novel_downloader/core/parsers/jpxs123.py +137 -0
- novel_downloader/core/parsers/lewenn.py +142 -0
- novel_downloader/core/parsers/linovelib.py +48 -64
- novel_downloader/core/parsers/piaotia.py +189 -0
- novel_downloader/core/parsers/qbtr.py +136 -0
- novel_downloader/core/parsers/qianbi.py +48 -50
- novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +272 -330
- novel_downloader/core/parsers/qidian/chapter_normal.py +24 -55
- novel_downloader/core/parsers/qidian/main_parser.py +11 -38
- novel_downloader/core/parsers/qidian/utils/__init__.py +1 -0
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +1 -1
- novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
- novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
- novel_downloader/core/parsers/quanben5.py +103 -0
- novel_downloader/core/parsers/registry.py +5 -16
- novel_downloader/core/parsers/sfacg.py +38 -45
- novel_downloader/core/parsers/shencou.py +215 -0
- novel_downloader/core/parsers/shuhaige.py +111 -0
- novel_downloader/core/parsers/tongrenquan.py +116 -0
- novel_downloader/core/parsers/ttkan.py +132 -0
- novel_downloader/core/parsers/wanbengo.py +191 -0
- novel_downloader/core/parsers/xiaoshuowu.py +173 -0
- novel_downloader/core/parsers/xiguashuwu.py +435 -0
- novel_downloader/core/parsers/xs63b.py +161 -0
- novel_downloader/core/parsers/xshbook.py +134 -0
- novel_downloader/core/parsers/yamibo.py +87 -131
- novel_downloader/core/parsers/yibige.py +166 -0
- novel_downloader/core/searchers/__init__.py +34 -3
- novel_downloader/core/searchers/aaatxt.py +107 -0
- novel_downloader/core/searchers/{biquge.py → b520.py} +29 -28
- novel_downloader/core/searchers/base.py +112 -36
- novel_downloader/core/searchers/dxmwx.py +105 -0
- novel_downloader/core/searchers/eightnovel.py +84 -0
- novel_downloader/core/searchers/esjzone.py +43 -25
- novel_downloader/core/searchers/hetushu.py +92 -0
- novel_downloader/core/searchers/i25zw.py +93 -0
- novel_downloader/core/searchers/ixdzs8.py +107 -0
- novel_downloader/core/searchers/jpxs123.py +107 -0
- novel_downloader/core/searchers/piaotia.py +100 -0
- novel_downloader/core/searchers/qbtr.py +106 -0
- novel_downloader/core/searchers/qianbi.py +74 -40
- novel_downloader/core/searchers/quanben5.py +144 -0
- novel_downloader/core/searchers/registry.py +24 -8
- novel_downloader/core/searchers/shuhaige.py +124 -0
- novel_downloader/core/searchers/tongrenquan.py +110 -0
- novel_downloader/core/searchers/ttkan.py +92 -0
- novel_downloader/core/searchers/xiaoshuowu.py +122 -0
- novel_downloader/core/searchers/xiguashuwu.py +95 -0
- novel_downloader/core/searchers/xs63b.py +104 -0
- novel_downloader/locales/en.json +31 -82
- novel_downloader/locales/zh.json +32 -83
- novel_downloader/models/__init__.py +21 -22
- novel_downloader/models/book.py +44 -0
- novel_downloader/models/config.py +4 -37
- novel_downloader/models/login.py +1 -1
- novel_downloader/models/search.py +5 -0
- novel_downloader/resources/config/settings.toml +8 -70
- novel_downloader/resources/json/xiguashuwu.json +718 -0
- novel_downloader/utils/__init__.py +13 -22
- novel_downloader/utils/chapter_storage.py +3 -2
- novel_downloader/utils/constants.py +4 -29
- novel_downloader/utils/cookies.py +6 -18
- novel_downloader/utils/crypto_utils/__init__.py +13 -0
- novel_downloader/utils/crypto_utils/aes_util.py +90 -0
- novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
- novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
- novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
- novel_downloader/utils/epub/__init__.py +1 -1
- novel_downloader/utils/epub/constants.py +57 -16
- novel_downloader/utils/epub/documents.py +88 -194
- novel_downloader/utils/epub/models.py +0 -14
- novel_downloader/utils/epub/utils.py +63 -96
- novel_downloader/utils/file_utils/__init__.py +2 -23
- novel_downloader/utils/file_utils/io.py +3 -113
- novel_downloader/utils/file_utils/sanitize.py +0 -4
- novel_downloader/utils/fontocr.py +207 -0
- novel_downloader/utils/logger.py +8 -16
- novel_downloader/utils/network.py +2 -2
- novel_downloader/utils/state.py +4 -90
- novel_downloader/utils/text_utils/__init__.py +1 -7
- novel_downloader/utils/text_utils/diff_display.py +5 -7
- novel_downloader/utils/time_utils/__init__.py +5 -11
- novel_downloader/utils/time_utils/datetime_utils.py +20 -29
- novel_downloader/utils/time_utils/sleep_utils.py +4 -8
- novel_downloader/web/__init__.py +13 -0
- novel_downloader/web/components/__init__.py +11 -0
- novel_downloader/web/components/navigation.py +35 -0
- novel_downloader/web/main.py +66 -0
- novel_downloader/web/pages/__init__.py +17 -0
- novel_downloader/web/pages/download.py +78 -0
- novel_downloader/web/pages/progress.py +147 -0
- novel_downloader/web/pages/search.py +329 -0
- novel_downloader/web/services/__init__.py +17 -0
- novel_downloader/web/services/client_dialog.py +164 -0
- novel_downloader/web/services/cred_broker.py +113 -0
- novel_downloader/web/services/cred_models.py +35 -0
- novel_downloader/web/services/task_manager.py +264 -0
- novel_downloader-2.0.0.dist-info/METADATA +171 -0
- novel_downloader-2.0.0.dist-info/RECORD +210 -0
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
- novel_downloader/core/downloaders/biquge.py +0 -29
- novel_downloader/core/downloaders/esjzone.py +0 -29
- novel_downloader/core/downloaders/linovelib.py +0 -29
- novel_downloader/core/downloaders/sfacg.py +0 -29
- novel_downloader/core/downloaders/yamibo.py +0 -29
- novel_downloader/core/exporters/biquge.py +0 -22
- novel_downloader/core/exporters/esjzone.py +0 -22
- novel_downloader/core/exporters/qianbi.py +0 -22
- novel_downloader/core/exporters/sfacg.py +0 -22
- novel_downloader/core/exporters/yamibo.py +0 -22
- novel_downloader/core/fetchers/base/__init__.py +0 -14
- novel_downloader/core/fetchers/base/browser.py +0 -422
- novel_downloader/core/fetchers/biquge/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/browser.py +0 -209
- novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
- novel_downloader/core/fetchers/linovelib/browser.py +0 -198
- novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/browser.py +0 -326
- novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
- novel_downloader/core/fetchers/sfacg/browser.py +0 -194
- novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
- novel_downloader/core/fetchers/yamibo/browser.py +0 -234
- novel_downloader/core/parsers/biquge.py +0 -139
- novel_downloader/models/chapter.py +0 -25
- novel_downloader/models/types.py +0 -13
- novel_downloader/tui/__init__.py +0 -7
- novel_downloader/tui/app.py +0 -32
- novel_downloader/tui/main.py +0 -17
- novel_downloader/tui/screens/__init__.py +0 -14
- novel_downloader/tui/screens/home.py +0 -198
- novel_downloader/tui/screens/login.py +0 -74
- novel_downloader/tui/styles/home_layout.tcss +0 -79
- novel_downloader/tui/widgets/richlog_handler.py +0 -24
- novel_downloader/utils/cache.py +0 -24
- novel_downloader/utils/fontocr/__init__.py +0 -22
- novel_downloader/utils/fontocr/hash_store.py +0 -280
- novel_downloader/utils/fontocr/hash_utils.py +0 -103
- novel_downloader/utils/fontocr/model_loader.py +0 -69
- novel_downloader/utils/fontocr/ocr_v1.py +0 -315
- novel_downloader/utils/fontocr/ocr_v2.py +0 -764
- novel_downloader/utils/fontocr/ocr_v3.py +0 -744
- novel_downloader-1.5.0.dist-info/METADATA +0 -196
- novel_downloader-1.5.0.dist-info/RECORD +0 -164
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -3,28 +3,34 @@
|
|
3
3
|
novel_downloader.core.downloaders.qianbi
|
4
4
|
----------------------------------------
|
5
5
|
|
6
|
+
Downloader implementation for Qianbi novels, with chapter ID repair logic.
|
6
7
|
"""
|
7
8
|
|
8
9
|
import asyncio
|
9
|
-
from collections.abc import
|
10
|
-
from contextlib import asynccontextmanager
|
10
|
+
from collections.abc import Awaitable, Callable
|
11
11
|
from pathlib import Path
|
12
12
|
from typing import Any
|
13
13
|
|
14
14
|
from novel_downloader.core.downloaders.base import BaseDownloader
|
15
15
|
from novel_downloader.core.downloaders.registry import register_downloader
|
16
|
+
from novel_downloader.core.downloaders.signals import (
|
17
|
+
STOP,
|
18
|
+
Progress,
|
19
|
+
StopToken,
|
20
|
+
)
|
16
21
|
from novel_downloader.core.interfaces import (
|
17
22
|
FetcherProtocol,
|
18
23
|
ParserProtocol,
|
19
24
|
)
|
20
25
|
from novel_downloader.models import (
|
21
26
|
BookConfig,
|
27
|
+
BookInfoDict,
|
22
28
|
ChapterDict,
|
23
29
|
DownloaderConfig,
|
24
30
|
)
|
25
31
|
from novel_downloader.utils import (
|
26
32
|
ChapterStorage,
|
27
|
-
|
33
|
+
async_jitter_sleep,
|
28
34
|
)
|
29
35
|
|
30
36
|
|
@@ -52,6 +58,7 @@ class QianbiDownloader(BaseDownloader):
|
|
52
58
|
book: BookConfig,
|
53
59
|
*,
|
54
60
|
progress_hook: Callable[[int, int], Awaitable[None]] | None = None,
|
61
|
+
cancel_event: asyncio.Event | None = None,
|
55
62
|
**kwargs: Any,
|
56
63
|
) -> None:
|
57
64
|
"""
|
@@ -60,143 +67,212 @@ class QianbiDownloader(BaseDownloader):
|
|
60
67
|
:param book: BookConfig with at least 'book_id'.
|
61
68
|
"""
|
62
69
|
TAG = "[Downloader]"
|
70
|
+
|
63
71
|
book_id = book["book_id"]
|
64
72
|
start_id = book.get("start_id")
|
65
73
|
end_id = book.get("end_id")
|
66
74
|
ignore_set = set(book.get("ignore_ids", []))
|
67
75
|
|
68
|
-
# prepare storage & dirs
|
69
76
|
raw_base = self._raw_data_dir / book_id
|
70
77
|
raw_base.mkdir(parents=True, exist_ok=True)
|
71
78
|
html_dir = self._debug_dir / book_id / "html"
|
79
|
+
|
72
80
|
chapter_storage = ChapterStorage(
|
73
81
|
raw_base=raw_base,
|
74
|
-
priorities=self.
|
82
|
+
priorities=self.PRIORITIES_MAP,
|
75
83
|
)
|
76
84
|
chapter_storage.connect()
|
77
85
|
|
78
|
-
|
79
|
-
|
80
|
-
book_info = await self._repair_chapter_ids(
|
81
|
-
book_id,
|
82
|
-
book_info,
|
83
|
-
chapter_storage,
|
84
|
-
html_dir,
|
85
|
-
)
|
86
|
+
def cancelled() -> bool:
|
87
|
+
return bool(cancel_event and cancel_event.is_set())
|
86
88
|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
return
|
92
|
-
|
93
|
-
# concurrency primitives
|
94
|
-
sem = asyncio.Semaphore(self.workers)
|
95
|
-
cid_q: asyncio.Queue[str | None] = asyncio.Queue()
|
96
|
-
save_q: asyncio.Queue[ChapterDict | None] = asyncio.Queue()
|
97
|
-
batch: list[ChapterDict] = []
|
98
|
-
completed = 0
|
99
|
-
|
100
|
-
async def _flush_batch() -> None:
|
101
|
-
nonlocal batch, completed
|
102
|
-
if not batch:
|
89
|
+
try:
|
90
|
+
# --- metadata ---
|
91
|
+
book_info = await self.load_book_info(book_id=book_id, html_dir=html_dir)
|
92
|
+
if not book_info:
|
103
93
|
return
|
104
94
|
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
if self.skip_existing and chapter_storage.exists(cid):
|
138
|
-
completed += 1
|
139
|
-
if progress_hook:
|
140
|
-
await progress_hook(completed, total_chapters)
|
95
|
+
book_info = await self._repair_chapter_ids(
|
96
|
+
book_id,
|
97
|
+
book_info,
|
98
|
+
chapter_storage,
|
99
|
+
html_dir,
|
100
|
+
)
|
101
|
+
|
102
|
+
vols = book_info["volumes"]
|
103
|
+
total_chapters = sum(len(v["chapters"]) for v in vols)
|
104
|
+
if total_chapters == 0:
|
105
|
+
self.logger.warning("%s 书籍没有章节可下载: %s", TAG, book_id)
|
106
|
+
return
|
107
|
+
|
108
|
+
progress = Progress(total_chapters, progress_hook)
|
109
|
+
|
110
|
+
# --- queues & batching ---
|
111
|
+
cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue()
|
112
|
+
save_q: asyncio.Queue[ChapterDict | StopToken] = asyncio.Queue()
|
113
|
+
batch: list[ChapterDict] = []
|
114
|
+
|
115
|
+
async def flush_batch() -> None:
|
116
|
+
if not batch:
|
117
|
+
return
|
118
|
+
try:
|
119
|
+
chapter_storage.upsert_chapters(batch, self.DEFAULT_SOURCE_ID)
|
120
|
+
except Exception as e:
|
121
|
+
self.logger.error(
|
122
|
+
"[Storage] batch upsert failed (size=%d): %s",
|
123
|
+
len(batch),
|
124
|
+
e,
|
125
|
+
exc_info=True,
|
126
|
+
)
|
141
127
|
else:
|
142
|
-
await
|
128
|
+
await progress.bump(len(batch))
|
129
|
+
finally:
|
130
|
+
batch.clear()
|
131
|
+
|
132
|
+
# --- stage: storage worker ---
|
133
|
+
async def storage_worker() -> None:
|
134
|
+
"""
|
135
|
+
Consumes parsed chapters, writes in batches.
|
136
|
+
|
137
|
+
Terminates after receiving STOP from each chapter worker.
|
138
|
+
|
139
|
+
On cancel: keeps consuming (to avoid blocking producers),
|
140
|
+
flushes, and exits once all STOPs are seen.
|
141
|
+
"""
|
142
|
+
stop_count = 0
|
143
|
+
while True:
|
144
|
+
item = await save_q.get()
|
145
|
+
if isinstance(item, StopToken):
|
146
|
+
stop_count += 1
|
147
|
+
if stop_count == self.workers:
|
148
|
+
# All chapter workers have exited.
|
149
|
+
await flush_batch()
|
150
|
+
return
|
151
|
+
# else keep waiting for remaining STOPs
|
152
|
+
continue
|
153
|
+
|
154
|
+
# Normal chapter
|
155
|
+
batch.append(item)
|
156
|
+
if len(batch) >= self.storage_batch_size:
|
157
|
+
await flush_batch()
|
158
|
+
|
159
|
+
if cancelled():
|
160
|
+
# Drain whatever is already in the queue
|
161
|
+
try:
|
162
|
+
while True:
|
163
|
+
nxt = save_q.get_nowait()
|
164
|
+
if isinstance(nxt, StopToken):
|
165
|
+
stop_count += 1
|
166
|
+
else:
|
167
|
+
batch.append(nxt)
|
168
|
+
except asyncio.QueueEmpty:
|
169
|
+
pass
|
170
|
+
# Final flush of everything
|
171
|
+
await flush_batch()
|
172
|
+
# Wait for remaining STOPs so chapter workers can finish.
|
173
|
+
while stop_count < self.workers:
|
174
|
+
nxt = await save_q.get()
|
175
|
+
if isinstance(nxt, StopToken):
|
176
|
+
stop_count += 1
|
177
|
+
return
|
178
|
+
|
179
|
+
# --- stage: chapter worker ---
|
180
|
+
sem = asyncio.Semaphore(self.workers)
|
181
|
+
|
182
|
+
async def chapter_worker() -> None:
|
183
|
+
"""
|
184
|
+
Fetch + parse with retry, then enqueue to save_q.
|
185
|
+
|
186
|
+
Exits on STOP, or early if cancel is set before starting a new fetch.
|
187
|
+
"""
|
188
|
+
while True:
|
189
|
+
cid = await cid_q.get()
|
190
|
+
if isinstance(cid, StopToken):
|
191
|
+
# Propagate one STOP to storage and exit.
|
192
|
+
await save_q.put(STOP)
|
193
|
+
return
|
194
|
+
|
195
|
+
if not cid or cid in ignore_set:
|
196
|
+
# Ignore silently and continue.
|
197
|
+
continue
|
143
198
|
|
144
|
-
|
145
|
-
|
199
|
+
# If cancelled, don't start a new network call; let storage finish.
|
200
|
+
if cancelled():
|
201
|
+
await save_q.put(STOP)
|
202
|
+
return
|
203
|
+
|
204
|
+
async with sem:
|
205
|
+
chap = await self._process_chapter(book_id, cid, html_dir)
|
206
|
+
if chap:
|
207
|
+
await save_q.put(chap)
|
208
|
+
|
209
|
+
# polite pacing
|
210
|
+
await async_jitter_sleep(
|
211
|
+
self.request_interval,
|
212
|
+
mul_spread=1.1,
|
213
|
+
max_sleep=self.request_interval + 2,
|
214
|
+
)
|
215
|
+
|
216
|
+
# --- stage: producer ---
|
217
|
+
async def producer() -> None:
|
218
|
+
"""
|
219
|
+
Enqueue chapter IDs (respecting start/end/skip_existing).
|
220
|
+
Always sends STOP x workers at the end (even if cancelled early),
|
221
|
+
so chapter workers can exit deterministically.
|
222
|
+
"""
|
223
|
+
try:
|
224
|
+
async for cid in self._chapter_ids(vols, start_id, end_id):
|
225
|
+
if cancelled():
|
226
|
+
break
|
227
|
+
if self.skip_existing and chapter_storage.exists(cid):
|
228
|
+
# Count as completed but don't enqueue.
|
229
|
+
await progress.bump(1)
|
230
|
+
else:
|
231
|
+
await cid_q.put(cid)
|
232
|
+
finally:
|
233
|
+
for _ in range(self.workers):
|
234
|
+
await cid_q.put(STOP)
|
235
|
+
|
236
|
+
# --- run the pipeline ---
|
146
237
|
async with asyncio.TaskGroup() as tg:
|
147
|
-
|
238
|
+
tg.create_task(storage_worker())
|
148
239
|
for _ in range(self.workers):
|
149
|
-
tg.create_task(
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
)
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
await cid_q.join()
|
171
|
-
|
172
|
-
# signal storage worker to exit
|
173
|
-
await save_q.put(None)
|
174
|
-
await save_q.join()
|
175
|
-
|
176
|
-
# final flush to catch any remaining items
|
177
|
-
await _flush_batch()
|
178
|
-
|
179
|
-
chapter_storage.close()
|
180
|
-
self.logger.info(
|
181
|
-
"%s Novel '%s' download completed.",
|
182
|
-
TAG,
|
183
|
-
book_info.get("book_name", "unknown"),
|
184
|
-
)
|
240
|
+
tg.create_task(chapter_worker())
|
241
|
+
tg.create_task(producer())
|
242
|
+
|
243
|
+
# --- done ---
|
244
|
+
if cancelled():
|
245
|
+
self.logger.info(
|
246
|
+
"%s Novel '%s' cancelled: flushed %d/%d chapters.",
|
247
|
+
TAG,
|
248
|
+
book_info.get("book_name", "unknown"),
|
249
|
+
progress.done,
|
250
|
+
progress.total,
|
251
|
+
)
|
252
|
+
else:
|
253
|
+
self.logger.info(
|
254
|
+
"%s Novel '%s' download completed.",
|
255
|
+
TAG,
|
256
|
+
book_info.get("book_name", "unknown"),
|
257
|
+
)
|
258
|
+
|
259
|
+
finally:
|
260
|
+
chapter_storage.close()
|
185
261
|
|
186
262
|
async def _repair_chapter_ids(
|
187
263
|
self,
|
188
264
|
book_id: str,
|
189
|
-
book_info:
|
265
|
+
book_info: BookInfoDict,
|
190
266
|
storage: ChapterStorage,
|
191
267
|
html_dir: Path,
|
192
|
-
) ->
|
268
|
+
) -> BookInfoDict:
|
193
269
|
"""
|
194
270
|
Fill in missing chapterId fields by retrieving the previous chapter
|
195
271
|
and following its 'next_chapter_id'. Uses storage to avoid refetching.
|
196
272
|
"""
|
197
273
|
prev_cid: str = ""
|
198
|
-
for vol in book_info
|
199
|
-
for chap in vol
|
274
|
+
for vol in book_info["volumes"]:
|
275
|
+
for chap in vol["chapters"]:
|
200
276
|
cid = chap.get("chapterId")
|
201
277
|
if cid:
|
202
278
|
prev_cid = cid
|
@@ -218,7 +294,7 @@ class QianbiDownloader(BaseDownloader):
|
|
218
294
|
)
|
219
295
|
continue
|
220
296
|
storage.upsert_chapter(data, self.DEFAULT_SOURCE_ID)
|
221
|
-
await
|
297
|
+
await async_jitter_sleep(
|
222
298
|
self.request_interval,
|
223
299
|
mul_spread=1.1,
|
224
300
|
max_sleep=self.request_interval + 2,
|
@@ -243,41 +319,6 @@ class QianbiDownloader(BaseDownloader):
|
|
243
319
|
self._save_book_info(book_id, book_info)
|
244
320
|
return book_info
|
245
321
|
|
246
|
-
async def _chapter_worker(
|
247
|
-
self,
|
248
|
-
book_id: str,
|
249
|
-
ignore_set: set[str],
|
250
|
-
cid_q: asyncio.Queue[str | None],
|
251
|
-
save_q: asyncio.Queue[ChapterDict | None],
|
252
|
-
sem: asyncio.Semaphore,
|
253
|
-
) -> None:
|
254
|
-
"""
|
255
|
-
Worker that processes one chapter at a time:
|
256
|
-
fetch + parse with retry, then enqueue to save_q.
|
257
|
-
"""
|
258
|
-
html_dir = self._debug_dir / book_id / "html"
|
259
|
-
while True:
|
260
|
-
cid = await cid_q.get()
|
261
|
-
if cid is None:
|
262
|
-
cid_q.task_done()
|
263
|
-
break
|
264
|
-
if not cid or cid in ignore_set:
|
265
|
-
cid_q.task_done()
|
266
|
-
continue
|
267
|
-
|
268
|
-
async with sem:
|
269
|
-
chap = await self._process_chapter(book_id, cid, html_dir)
|
270
|
-
|
271
|
-
if chap:
|
272
|
-
await save_q.put(chap)
|
273
|
-
|
274
|
-
cid_q.task_done()
|
275
|
-
await async_sleep_with_random_delay(
|
276
|
-
self.request_interval,
|
277
|
-
mul_spread=1.1,
|
278
|
-
max_sleep=self.request_interval + 2,
|
279
|
-
)
|
280
|
-
|
281
322
|
async def _process_chapter(
|
282
323
|
self,
|
283
324
|
book_id: str,
|
@@ -304,7 +345,7 @@ class QianbiDownloader(BaseDownloader):
|
|
304
345
|
if attempt < self.retry_times:
|
305
346
|
self.logger.info(f"[ChapterWorker] Retry {cid} ({attempt+1}): {e}")
|
306
347
|
backoff = self.backoff_factor * (2**attempt)
|
307
|
-
await
|
348
|
+
await async_jitter_sleep(
|
308
349
|
base=backoff, mul_spread=1.2, max_sleep=backoff + 3
|
309
350
|
)
|
310
351
|
else:
|