novel-downloader 1.5.0__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/__init__.py +1 -3
- novel_downloader/cli/clean.py +21 -88
- novel_downloader/cli/config.py +26 -21
- novel_downloader/cli/download.py +77 -64
- novel_downloader/cli/export.py +16 -20
- novel_downloader/cli/main.py +1 -1
- novel_downloader/cli/search.py +62 -65
- novel_downloader/cli/ui.py +156 -0
- novel_downloader/config/__init__.py +8 -5
- novel_downloader/config/adapter.py +65 -105
- novel_downloader/config/{loader.py → file_io.py} +53 -26
- novel_downloader/core/__init__.py +1 -0
- novel_downloader/core/archived/deqixs/fetcher.py +115 -0
- novel_downloader/core/archived/deqixs/parser.py +132 -0
- novel_downloader/core/archived/deqixs/searcher.py +89 -0
- novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} +12 -20
- novel_downloader/core/archived/wanbengo/searcher.py +98 -0
- novel_downloader/core/archived/xshbook/searcher.py +93 -0
- novel_downloader/core/downloaders/__init__.py +3 -24
- novel_downloader/core/downloaders/base.py +49 -23
- novel_downloader/core/downloaders/common.py +191 -137
- novel_downloader/core/downloaders/qianbi.py +187 -146
- novel_downloader/core/downloaders/qidian.py +187 -141
- novel_downloader/core/downloaders/registry.py +4 -2
- novel_downloader/core/downloaders/signals.py +46 -0
- novel_downloader/core/exporters/__init__.py +3 -20
- novel_downloader/core/exporters/base.py +33 -37
- novel_downloader/core/exporters/common/__init__.py +1 -2
- novel_downloader/core/exporters/common/epub.py +15 -10
- novel_downloader/core/exporters/common/main_exporter.py +19 -12
- novel_downloader/core/exporters/common/txt.py +14 -9
- novel_downloader/core/exporters/epub_util.py +59 -29
- novel_downloader/core/exporters/linovelib/__init__.py +1 -0
- novel_downloader/core/exporters/linovelib/epub.py +23 -25
- novel_downloader/core/exporters/linovelib/main_exporter.py +8 -12
- novel_downloader/core/exporters/linovelib/txt.py +17 -11
- novel_downloader/core/exporters/qidian.py +2 -8
- novel_downloader/core/exporters/registry.py +4 -2
- novel_downloader/core/exporters/txt_util.py +7 -7
- novel_downloader/core/fetchers/__init__.py +54 -48
- novel_downloader/core/fetchers/aaatxt.py +83 -0
- novel_downloader/core/fetchers/{biquge/session.py → b520.py} +6 -11
- novel_downloader/core/fetchers/{base/session.py → base.py} +37 -46
- novel_downloader/core/fetchers/{biquge/browser.py → biquyuedu.py} +12 -17
- novel_downloader/core/fetchers/dxmwx.py +110 -0
- novel_downloader/core/fetchers/eightnovel.py +139 -0
- novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +19 -12
- novel_downloader/core/fetchers/guidaye.py +85 -0
- novel_downloader/core/fetchers/hetushu.py +92 -0
- novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +19 -28
- novel_downloader/core/fetchers/ixdzs8.py +113 -0
- novel_downloader/core/fetchers/jpxs123.py +101 -0
- novel_downloader/core/fetchers/lewenn.py +83 -0
- novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +12 -13
- novel_downloader/core/fetchers/piaotia.py +105 -0
- novel_downloader/core/fetchers/qbtr.py +101 -0
- novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +5 -10
- novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +46 -39
- novel_downloader/core/fetchers/quanben5.py +92 -0
- novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
- novel_downloader/core/fetchers/registry.py +5 -16
- novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +7 -10
- novel_downloader/core/fetchers/shencou.py +106 -0
- novel_downloader/core/fetchers/shuhaige.py +84 -0
- novel_downloader/core/fetchers/tongrenquan.py +84 -0
- novel_downloader/core/fetchers/ttkan.py +95 -0
- novel_downloader/core/fetchers/wanbengo.py +83 -0
- novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
- novel_downloader/core/fetchers/xiguashuwu.py +177 -0
- novel_downloader/core/fetchers/xs63b.py +171 -0
- novel_downloader/core/fetchers/xshbook.py +85 -0
- novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +19 -12
- novel_downloader/core/fetchers/yibige.py +114 -0
- novel_downloader/core/interfaces/__init__.py +1 -9
- novel_downloader/core/interfaces/downloader.py +6 -2
- novel_downloader/core/interfaces/exporter.py +7 -7
- novel_downloader/core/interfaces/fetcher.py +4 -17
- novel_downloader/core/interfaces/parser.py +5 -6
- novel_downloader/core/interfaces/searcher.py +9 -1
- novel_downloader/core/parsers/__init__.py +49 -12
- novel_downloader/core/parsers/aaatxt.py +132 -0
- novel_downloader/core/parsers/b520.py +116 -0
- novel_downloader/core/parsers/base.py +63 -12
- novel_downloader/core/parsers/biquyuedu.py +133 -0
- novel_downloader/core/parsers/dxmwx.py +162 -0
- novel_downloader/core/parsers/eightnovel.py +224 -0
- novel_downloader/core/parsers/esjzone.py +61 -66
- novel_downloader/core/parsers/guidaye.py +128 -0
- novel_downloader/core/parsers/hetushu.py +139 -0
- novel_downloader/core/parsers/i25zw.py +137 -0
- novel_downloader/core/parsers/ixdzs8.py +186 -0
- novel_downloader/core/parsers/jpxs123.py +137 -0
- novel_downloader/core/parsers/lewenn.py +142 -0
- novel_downloader/core/parsers/linovelib.py +48 -64
- novel_downloader/core/parsers/piaotia.py +189 -0
- novel_downloader/core/parsers/qbtr.py +136 -0
- novel_downloader/core/parsers/qianbi.py +48 -50
- novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +272 -330
- novel_downloader/core/parsers/qidian/chapter_normal.py +24 -55
- novel_downloader/core/parsers/qidian/main_parser.py +11 -38
- novel_downloader/core/parsers/qidian/utils/__init__.py +1 -0
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +1 -1
- novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
- novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
- novel_downloader/core/parsers/quanben5.py +103 -0
- novel_downloader/core/parsers/registry.py +5 -16
- novel_downloader/core/parsers/sfacg.py +38 -45
- novel_downloader/core/parsers/shencou.py +215 -0
- novel_downloader/core/parsers/shuhaige.py +111 -0
- novel_downloader/core/parsers/tongrenquan.py +116 -0
- novel_downloader/core/parsers/ttkan.py +132 -0
- novel_downloader/core/parsers/wanbengo.py +191 -0
- novel_downloader/core/parsers/xiaoshuowu.py +173 -0
- novel_downloader/core/parsers/xiguashuwu.py +435 -0
- novel_downloader/core/parsers/xs63b.py +161 -0
- novel_downloader/core/parsers/xshbook.py +134 -0
- novel_downloader/core/parsers/yamibo.py +87 -131
- novel_downloader/core/parsers/yibige.py +166 -0
- novel_downloader/core/searchers/__init__.py +34 -3
- novel_downloader/core/searchers/aaatxt.py +107 -0
- novel_downloader/core/searchers/{biquge.py → b520.py} +29 -28
- novel_downloader/core/searchers/base.py +112 -36
- novel_downloader/core/searchers/dxmwx.py +105 -0
- novel_downloader/core/searchers/eightnovel.py +84 -0
- novel_downloader/core/searchers/esjzone.py +43 -25
- novel_downloader/core/searchers/hetushu.py +92 -0
- novel_downloader/core/searchers/i25zw.py +93 -0
- novel_downloader/core/searchers/ixdzs8.py +107 -0
- novel_downloader/core/searchers/jpxs123.py +107 -0
- novel_downloader/core/searchers/piaotia.py +100 -0
- novel_downloader/core/searchers/qbtr.py +106 -0
- novel_downloader/core/searchers/qianbi.py +74 -40
- novel_downloader/core/searchers/quanben5.py +144 -0
- novel_downloader/core/searchers/registry.py +24 -8
- novel_downloader/core/searchers/shuhaige.py +124 -0
- novel_downloader/core/searchers/tongrenquan.py +110 -0
- novel_downloader/core/searchers/ttkan.py +92 -0
- novel_downloader/core/searchers/xiaoshuowu.py +122 -0
- novel_downloader/core/searchers/xiguashuwu.py +95 -0
- novel_downloader/core/searchers/xs63b.py +104 -0
- novel_downloader/locales/en.json +31 -82
- novel_downloader/locales/zh.json +32 -83
- novel_downloader/models/__init__.py +21 -22
- novel_downloader/models/book.py +44 -0
- novel_downloader/models/config.py +4 -37
- novel_downloader/models/login.py +1 -1
- novel_downloader/models/search.py +5 -0
- novel_downloader/resources/config/settings.toml +8 -70
- novel_downloader/resources/json/xiguashuwu.json +718 -0
- novel_downloader/utils/__init__.py +13 -22
- novel_downloader/utils/chapter_storage.py +3 -2
- novel_downloader/utils/constants.py +4 -29
- novel_downloader/utils/cookies.py +6 -18
- novel_downloader/utils/crypto_utils/__init__.py +13 -0
- novel_downloader/utils/crypto_utils/aes_util.py +90 -0
- novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
- novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
- novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
- novel_downloader/utils/epub/__init__.py +1 -1
- novel_downloader/utils/epub/constants.py +57 -16
- novel_downloader/utils/epub/documents.py +88 -194
- novel_downloader/utils/epub/models.py +0 -14
- novel_downloader/utils/epub/utils.py +63 -96
- novel_downloader/utils/file_utils/__init__.py +2 -23
- novel_downloader/utils/file_utils/io.py +3 -113
- novel_downloader/utils/file_utils/sanitize.py +0 -4
- novel_downloader/utils/fontocr.py +207 -0
- novel_downloader/utils/logger.py +8 -16
- novel_downloader/utils/network.py +2 -2
- novel_downloader/utils/state.py +4 -90
- novel_downloader/utils/text_utils/__init__.py +1 -7
- novel_downloader/utils/text_utils/diff_display.py +5 -7
- novel_downloader/utils/time_utils/__init__.py +5 -11
- novel_downloader/utils/time_utils/datetime_utils.py +20 -29
- novel_downloader/utils/time_utils/sleep_utils.py +4 -8
- novel_downloader/web/__init__.py +13 -0
- novel_downloader/web/components/__init__.py +11 -0
- novel_downloader/web/components/navigation.py +35 -0
- novel_downloader/web/main.py +66 -0
- novel_downloader/web/pages/__init__.py +17 -0
- novel_downloader/web/pages/download.py +78 -0
- novel_downloader/web/pages/progress.py +147 -0
- novel_downloader/web/pages/search.py +329 -0
- novel_downloader/web/services/__init__.py +17 -0
- novel_downloader/web/services/client_dialog.py +164 -0
- novel_downloader/web/services/cred_broker.py +113 -0
- novel_downloader/web/services/cred_models.py +35 -0
- novel_downloader/web/services/task_manager.py +264 -0
- novel_downloader-2.0.0.dist-info/METADATA +171 -0
- novel_downloader-2.0.0.dist-info/RECORD +210 -0
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
- novel_downloader/core/downloaders/biquge.py +0 -29
- novel_downloader/core/downloaders/esjzone.py +0 -29
- novel_downloader/core/downloaders/linovelib.py +0 -29
- novel_downloader/core/downloaders/sfacg.py +0 -29
- novel_downloader/core/downloaders/yamibo.py +0 -29
- novel_downloader/core/exporters/biquge.py +0 -22
- novel_downloader/core/exporters/esjzone.py +0 -22
- novel_downloader/core/exporters/qianbi.py +0 -22
- novel_downloader/core/exporters/sfacg.py +0 -22
- novel_downloader/core/exporters/yamibo.py +0 -22
- novel_downloader/core/fetchers/base/__init__.py +0 -14
- novel_downloader/core/fetchers/base/browser.py +0 -422
- novel_downloader/core/fetchers/biquge/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/browser.py +0 -209
- novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
- novel_downloader/core/fetchers/linovelib/browser.py +0 -198
- novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/browser.py +0 -326
- novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
- novel_downloader/core/fetchers/sfacg/browser.py +0 -194
- novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
- novel_downloader/core/fetchers/yamibo/browser.py +0 -234
- novel_downloader/core/parsers/biquge.py +0 -139
- novel_downloader/models/chapter.py +0 -25
- novel_downloader/models/types.py +0 -13
- novel_downloader/tui/__init__.py +0 -7
- novel_downloader/tui/app.py +0 -32
- novel_downloader/tui/main.py +0 -17
- novel_downloader/tui/screens/__init__.py +0 -14
- novel_downloader/tui/screens/home.py +0 -198
- novel_downloader/tui/screens/login.py +0 -74
- novel_downloader/tui/styles/home_layout.tcss +0 -79
- novel_downloader/tui/widgets/richlog_handler.py +0 -24
- novel_downloader/utils/cache.py +0 -24
- novel_downloader/utils/fontocr/__init__.py +0 -22
- novel_downloader/utils/fontocr/hash_store.py +0 -280
- novel_downloader/utils/fontocr/hash_utils.py +0 -103
- novel_downloader/utils/fontocr/model_loader.py +0 -69
- novel_downloader/utils/fontocr/ocr_v1.py +0 -315
- novel_downloader/utils/fontocr/ocr_v2.py +0 -764
- novel_downloader/utils/fontocr/ocr_v3.py +0 -744
- novel_downloader-1.5.0.dist-info/METADATA +0 -196
- novel_downloader-1.5.0.dist-info/RECORD +0 -164
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -3,22 +3,27 @@
|
|
3
3
|
novel_downloader.core.downloaders.common
|
4
4
|
----------------------------------------
|
5
5
|
|
6
|
+
Concrete downloader implementation with a generic async pipeline for common novel sites
|
6
7
|
"""
|
7
8
|
|
8
9
|
import asyncio
|
9
|
-
from collections.abc import
|
10
|
-
from contextlib import asynccontextmanager
|
10
|
+
from collections.abc import Awaitable, Callable
|
11
11
|
from pathlib import Path
|
12
12
|
from typing import Any
|
13
13
|
|
14
14
|
from novel_downloader.core.downloaders.base import BaseDownloader
|
15
|
+
from novel_downloader.core.downloaders.signals import (
|
16
|
+
STOP,
|
17
|
+
Progress,
|
18
|
+
StopToken,
|
19
|
+
)
|
15
20
|
from novel_downloader.models import (
|
16
21
|
BookConfig,
|
17
22
|
ChapterDict,
|
18
23
|
)
|
19
24
|
from novel_downloader.utils import (
|
20
25
|
ChapterStorage,
|
21
|
-
|
26
|
+
async_jitter_sleep,
|
22
27
|
)
|
23
28
|
|
24
29
|
|
@@ -32,164 +37,203 @@ class CommonDownloader(BaseDownloader):
|
|
32
37
|
book: BookConfig,
|
33
38
|
*,
|
34
39
|
progress_hook: Callable[[int, int], Awaitable[None]] | None = None,
|
40
|
+
cancel_event: asyncio.Event | None = None,
|
35
41
|
**kwargs: Any,
|
36
42
|
) -> None:
|
37
43
|
"""
|
38
|
-
|
44
|
+
Sentinel-based pipeline with graceful cancellation:
|
45
|
+
|
46
|
+
Producer -> ChapterWorkers -> StorageWorker.
|
39
47
|
|
40
|
-
|
48
|
+
On cancel: stop producing, workers finish at most one chapter,
|
49
|
+
storage drains, flushes, and exits.
|
41
50
|
"""
|
42
51
|
TAG = "[Downloader]"
|
43
|
-
|
52
|
+
|
53
|
+
book_id = self._normalize_book_id(book["book_id"])
|
44
54
|
start_id = book.get("start_id")
|
45
55
|
end_id = book.get("end_id")
|
46
56
|
ignore_set = set(book.get("ignore_ids", []))
|
47
57
|
|
48
|
-
# prepare storage & dirs
|
49
58
|
raw_base = self._raw_data_dir / book_id
|
50
59
|
raw_base.mkdir(parents=True, exist_ok=True)
|
51
60
|
html_dir = self._debug_dir / book_id / "html"
|
61
|
+
|
52
62
|
chapter_storage = ChapterStorage(
|
53
63
|
raw_base=raw_base,
|
54
|
-
priorities=self.
|
64
|
+
priorities=self.PRIORITIES_MAP,
|
55
65
|
)
|
56
66
|
chapter_storage.connect()
|
57
67
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
self.
|
64
|
-
|
65
|
-
|
66
|
-
# concurrency primitives
|
67
|
-
sem = asyncio.Semaphore(self.workers)
|
68
|
-
cid_q: asyncio.Queue[str | None] = asyncio.Queue()
|
69
|
-
save_q: asyncio.Queue[ChapterDict | None] = asyncio.Queue()
|
70
|
-
batch: list[ChapterDict] = []
|
71
|
-
completed = 0
|
72
|
-
|
73
|
-
async def _flush_batch() -> None:
|
74
|
-
nonlocal batch, completed
|
75
|
-
if not batch:
|
68
|
+
def cancelled() -> bool:
|
69
|
+
return bool(cancel_event and cancel_event.is_set())
|
70
|
+
|
71
|
+
try:
|
72
|
+
# --- metadata ---
|
73
|
+
book_info = await self.load_book_info(book_id=book_id, html_dir=html_dir)
|
74
|
+
if not book_info:
|
76
75
|
return
|
77
76
|
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
self.logger.
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
batch
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
batch.append(item)
|
104
|
-
if len(batch) >= self.storage_batch_size:
|
105
|
-
await _flush_batch()
|
106
|
-
|
107
|
-
async def producer() -> None:
|
108
|
-
nonlocal completed
|
109
|
-
async for cid in self._chapter_ids(vols, start_id, end_id):
|
110
|
-
if self.skip_existing and chapter_storage.exists(cid):
|
111
|
-
completed += 1
|
112
|
-
if progress_hook:
|
113
|
-
await progress_hook(completed, total_chapters)
|
77
|
+
vols = book_info["volumes"]
|
78
|
+
total_chapters = sum(len(v["chapters"]) for v in vols)
|
79
|
+
if total_chapters == 0:
|
80
|
+
self.logger.warning("%s 书籍没有章节可下载: %s", TAG, book_id)
|
81
|
+
return
|
82
|
+
|
83
|
+
progress = Progress(total_chapters, progress_hook)
|
84
|
+
|
85
|
+
# --- queues & batching ---
|
86
|
+
cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue()
|
87
|
+
save_q: asyncio.Queue[ChapterDict | StopToken] = asyncio.Queue()
|
88
|
+
batch: list[ChapterDict] = []
|
89
|
+
|
90
|
+
async def flush_batch() -> None:
|
91
|
+
if not batch:
|
92
|
+
return
|
93
|
+
try:
|
94
|
+
chapter_storage.upsert_chapters(batch, self.DEFAULT_SOURCE_ID)
|
95
|
+
except Exception as e:
|
96
|
+
self.logger.error(
|
97
|
+
"[Storage] batch upsert failed (size=%d): %s",
|
98
|
+
len(batch),
|
99
|
+
e,
|
100
|
+
exc_info=True,
|
101
|
+
)
|
114
102
|
else:
|
115
|
-
await
|
103
|
+
await progress.bump(len(batch))
|
104
|
+
finally:
|
105
|
+
batch.clear()
|
106
|
+
|
107
|
+
# --- stage: storage worker ---
|
108
|
+
async def storage_worker() -> None:
|
109
|
+
"""
|
110
|
+
Consumes parsed chapters, writes in batches.
|
111
|
+
|
112
|
+
Terminates after receiving STOP from each chapter worker.
|
113
|
+
|
114
|
+
On cancel: keeps consuming (to avoid blocking producers),
|
115
|
+
flushes, and exits once all STOPs are seen.
|
116
|
+
"""
|
117
|
+
stop_count = 0
|
118
|
+
while True:
|
119
|
+
item = await save_q.get()
|
120
|
+
if isinstance(item, StopToken):
|
121
|
+
stop_count += 1
|
122
|
+
if stop_count == self.workers:
|
123
|
+
# All chapter workers have exited.
|
124
|
+
await flush_batch()
|
125
|
+
return
|
126
|
+
# else keep waiting for remaining STOPs
|
127
|
+
continue
|
128
|
+
|
129
|
+
# Normal chapter
|
130
|
+
batch.append(item)
|
131
|
+
if len(batch) >= self.storage_batch_size:
|
132
|
+
await flush_batch()
|
133
|
+
|
134
|
+
if cancelled():
|
135
|
+
# Drain whatever is already in the queue
|
136
|
+
try:
|
137
|
+
while True:
|
138
|
+
nxt = save_q.get_nowait()
|
139
|
+
if isinstance(nxt, StopToken):
|
140
|
+
stop_count += 1
|
141
|
+
else:
|
142
|
+
batch.append(nxt)
|
143
|
+
except asyncio.QueueEmpty:
|
144
|
+
pass
|
145
|
+
# Final flush of everything
|
146
|
+
await flush_batch()
|
147
|
+
# Wait for remaining STOPs so chapter workers can finish.
|
148
|
+
while stop_count < self.workers:
|
149
|
+
nxt = await save_q.get()
|
150
|
+
if isinstance(nxt, StopToken):
|
151
|
+
stop_count += 1
|
152
|
+
return
|
153
|
+
|
154
|
+
# --- stage: chapter worker ---
|
155
|
+
sem = asyncio.Semaphore(self.workers)
|
156
|
+
|
157
|
+
async def chapter_worker() -> None:
|
158
|
+
"""
|
159
|
+
Fetch + parse with retry, then enqueue to save_q.
|
160
|
+
|
161
|
+
Exits on STOP, or early if cancel is set before starting a new fetch.
|
162
|
+
"""
|
163
|
+
while True:
|
164
|
+
cid = await cid_q.get()
|
165
|
+
if isinstance(cid, StopToken):
|
166
|
+
# Propagate one STOP to storage and exit.
|
167
|
+
await save_q.put(STOP)
|
168
|
+
return
|
169
|
+
|
170
|
+
if not cid or cid in ignore_set:
|
171
|
+
# Ignore silently and continue.
|
172
|
+
continue
|
116
173
|
|
117
|
-
|
118
|
-
|
174
|
+
# If cancelled, don't start a new network call; let storage finish.
|
175
|
+
if cancelled():
|
176
|
+
await save_q.put(STOP)
|
177
|
+
return
|
178
|
+
|
179
|
+
async with sem:
|
180
|
+
chap = await self._process_chapter(book_id, cid, html_dir)
|
181
|
+
if chap:
|
182
|
+
await save_q.put(chap)
|
183
|
+
|
184
|
+
# polite pacing
|
185
|
+
await async_jitter_sleep(
|
186
|
+
self.request_interval,
|
187
|
+
mul_spread=1.1,
|
188
|
+
max_sleep=self.request_interval + 2,
|
189
|
+
)
|
190
|
+
|
191
|
+
# --- stage: producer ---
|
192
|
+
async def producer() -> None:
|
193
|
+
"""
|
194
|
+
Enqueue chapter IDs (respecting start/end/skip_existing).
|
195
|
+
|
196
|
+
Always sends STOP x workers at the end (even if cancelled early),
|
197
|
+
so chapter workers can exit deterministically.
|
198
|
+
"""
|
199
|
+
try:
|
200
|
+
async for cid in self._chapter_ids(vols, start_id, end_id):
|
201
|
+
if cancelled():
|
202
|
+
break
|
203
|
+
if self.skip_existing and chapter_storage.exists(cid):
|
204
|
+
# Count as completed but don't enqueue.
|
205
|
+
await progress.bump(1)
|
206
|
+
else:
|
207
|
+
await cid_q.put(cid)
|
208
|
+
finally:
|
209
|
+
for _ in range(self.workers):
|
210
|
+
await cid_q.put(STOP)
|
211
|
+
|
212
|
+
# --- run the pipeline ---
|
119
213
|
async with asyncio.TaskGroup() as tg:
|
120
|
-
|
214
|
+
tg.create_task(storage_worker())
|
121
215
|
for _ in range(self.workers):
|
122
|
-
tg.create_task(
|
123
|
-
|
124
|
-
book_id,
|
125
|
-
ignore_set,
|
126
|
-
cid_q,
|
127
|
-
save_q,
|
128
|
-
sem,
|
129
|
-
)
|
130
|
-
)
|
131
|
-
# start storage worker
|
132
|
-
tg.create_task(storage_worker(save_q))
|
133
|
-
yield tg
|
134
|
-
|
135
|
-
# run producer + workers
|
136
|
-
async with task_group_ctx():
|
137
|
-
# produce all CidTask
|
138
|
-
await producer()
|
139
|
-
|
140
|
-
# signal chapter workers to exit
|
141
|
-
for _ in range(self.workers):
|
142
|
-
await cid_q.put(None)
|
143
|
-
await cid_q.join()
|
144
|
-
|
145
|
-
# signal storage worker to exit
|
146
|
-
await save_q.put(None)
|
147
|
-
await save_q.join()
|
148
|
-
|
149
|
-
# final flush to catch any remaining items
|
150
|
-
await _flush_batch()
|
151
|
-
|
152
|
-
chapter_storage.close()
|
153
|
-
self.logger.info(
|
154
|
-
"%s Novel '%s' download completed.",
|
155
|
-
TAG,
|
156
|
-
book_info.get("book_name", "unknown"),
|
157
|
-
)
|
216
|
+
tg.create_task(chapter_worker())
|
217
|
+
tg.create_task(producer())
|
158
218
|
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
if not cid or cid in ignore_set:
|
178
|
-
cid_q.task_done()
|
179
|
-
continue
|
180
|
-
|
181
|
-
async with sem:
|
182
|
-
chap = await self._process_chapter(book_id, cid, html_dir)
|
183
|
-
|
184
|
-
if chap:
|
185
|
-
await save_q.put(chap)
|
186
|
-
|
187
|
-
cid_q.task_done()
|
188
|
-
await async_sleep_with_random_delay(
|
189
|
-
self.request_interval,
|
190
|
-
mul_spread=1.1,
|
191
|
-
max_sleep=self.request_interval + 2,
|
192
|
-
)
|
219
|
+
# --- done ---
|
220
|
+
if cancelled():
|
221
|
+
self.logger.info(
|
222
|
+
"%s Novel '%s' cancelled: flushed %d/%d chapters.",
|
223
|
+
TAG,
|
224
|
+
book_info.get("book_name", "unknown"),
|
225
|
+
progress.done,
|
226
|
+
progress.total,
|
227
|
+
)
|
228
|
+
else:
|
229
|
+
self.logger.info(
|
230
|
+
"%s Novel '%s' download completed.",
|
231
|
+
TAG,
|
232
|
+
book_info.get("book_name", "unknown"),
|
233
|
+
)
|
234
|
+
|
235
|
+
finally:
|
236
|
+
chapter_storage.close()
|
193
237
|
|
194
238
|
async def _process_chapter(
|
195
239
|
self,
|
@@ -219,9 +263,19 @@ class CommonDownloader(BaseDownloader):
|
|
219
263
|
"[ChapterWorker] Retry %s (%s): %s", cid, attempt + 1, e
|
220
264
|
)
|
221
265
|
backoff = self.backoff_factor * (2**attempt)
|
222
|
-
await
|
266
|
+
await async_jitter_sleep(
|
223
267
|
base=backoff, mul_spread=1.2, max_sleep=backoff + 3
|
224
268
|
)
|
225
269
|
else:
|
226
270
|
self.logger.warning("[ChapterWorker] Failed %s: %s", cid, e)
|
227
271
|
return None
|
272
|
+
|
273
|
+
@staticmethod
|
274
|
+
def _normalize_book_id(book_id: str) -> str:
|
275
|
+
"""
|
276
|
+
Normalize a book identifier.
|
277
|
+
|
278
|
+
Subclasses may override this method to transform the book ID
|
279
|
+
into their preferred format.
|
280
|
+
"""
|
281
|
+
return book_id.replace("/", "-")
|