novel-downloader 2.0.0__py3-none-any.whl → 2.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/download.py +14 -11
- novel_downloader/cli/export.py +19 -19
- novel_downloader/cli/ui.py +35 -8
- novel_downloader/config/adapter.py +216 -153
- novel_downloader/core/__init__.py +5 -6
- novel_downloader/core/archived/deqixs/fetcher.py +1 -28
- novel_downloader/core/downloaders/__init__.py +2 -0
- novel_downloader/core/downloaders/base.py +34 -85
- novel_downloader/core/downloaders/common.py +147 -171
- novel_downloader/core/downloaders/qianbi.py +30 -64
- novel_downloader/core/downloaders/qidian.py +157 -184
- novel_downloader/core/downloaders/qqbook.py +292 -0
- novel_downloader/core/downloaders/registry.py +2 -2
- novel_downloader/core/exporters/__init__.py +2 -0
- novel_downloader/core/exporters/base.py +37 -59
- novel_downloader/core/exporters/common.py +620 -0
- novel_downloader/core/exporters/linovelib.py +47 -0
- novel_downloader/core/exporters/qidian.py +41 -12
- novel_downloader/core/exporters/qqbook.py +28 -0
- novel_downloader/core/exporters/registry.py +2 -2
- novel_downloader/core/fetchers/__init__.py +4 -2
- novel_downloader/core/fetchers/aaatxt.py +2 -22
- novel_downloader/core/fetchers/b520.py +3 -23
- novel_downloader/core/fetchers/base.py +80 -105
- novel_downloader/core/fetchers/biquyuedu.py +2 -22
- novel_downloader/core/fetchers/dxmwx.py +10 -22
- novel_downloader/core/fetchers/esjzone.py +6 -29
- novel_downloader/core/fetchers/guidaye.py +2 -22
- novel_downloader/core/fetchers/hetushu.py +9 -29
- novel_downloader/core/fetchers/i25zw.py +2 -16
- novel_downloader/core/fetchers/ixdzs8.py +2 -16
- novel_downloader/core/fetchers/jpxs123.py +2 -16
- novel_downloader/core/fetchers/lewenn.py +2 -22
- novel_downloader/core/fetchers/linovelib.py +4 -20
- novel_downloader/core/fetchers/{eightnovel.py → n8novel.py} +12 -40
- novel_downloader/core/fetchers/piaotia.py +2 -16
- novel_downloader/core/fetchers/qbtr.py +2 -16
- novel_downloader/core/fetchers/qianbi.py +1 -20
- novel_downloader/core/fetchers/qidian.py +27 -68
- novel_downloader/core/fetchers/qqbook.py +177 -0
- novel_downloader/core/fetchers/quanben5.py +9 -29
- novel_downloader/core/fetchers/rate_limiter.py +22 -53
- novel_downloader/core/fetchers/sfacg.py +3 -16
- novel_downloader/core/fetchers/shencou.py +2 -16
- novel_downloader/core/fetchers/shuhaige.py +2 -22
- novel_downloader/core/fetchers/tongrenquan.py +2 -22
- novel_downloader/core/fetchers/ttkan.py +3 -14
- novel_downloader/core/fetchers/wanbengo.py +2 -22
- novel_downloader/core/fetchers/xiaoshuowu.py +2 -16
- novel_downloader/core/fetchers/xiguashuwu.py +4 -20
- novel_downloader/core/fetchers/xs63b.py +3 -15
- novel_downloader/core/fetchers/xshbook.py +2 -22
- novel_downloader/core/fetchers/yamibo.py +4 -28
- novel_downloader/core/fetchers/yibige.py +13 -26
- novel_downloader/core/interfaces/exporter.py +19 -7
- novel_downloader/core/interfaces/fetcher.py +23 -49
- novel_downloader/core/interfaces/parser.py +2 -2
- novel_downloader/core/parsers/__init__.py +4 -2
- novel_downloader/core/parsers/b520.py +2 -2
- novel_downloader/core/parsers/base.py +5 -39
- novel_downloader/core/parsers/esjzone.py +3 -3
- novel_downloader/core/parsers/{eightnovel.py → n8novel.py} +7 -7
- novel_downloader/core/parsers/qidian.py +717 -0
- novel_downloader/core/parsers/qqbook.py +709 -0
- novel_downloader/core/parsers/xiguashuwu.py +8 -15
- novel_downloader/core/searchers/__init__.py +2 -2
- novel_downloader/core/searchers/b520.py +1 -1
- novel_downloader/core/searchers/base.py +2 -2
- novel_downloader/core/searchers/{eightnovel.py → n8novel.py} +5 -5
- novel_downloader/locales/en.json +3 -3
- novel_downloader/locales/zh.json +3 -3
- novel_downloader/models/__init__.py +2 -0
- novel_downloader/models/book.py +1 -0
- novel_downloader/models/config.py +12 -0
- novel_downloader/resources/config/settings.toml +23 -5
- novel_downloader/resources/js_scripts/expr_to_json.js +14 -0
- novel_downloader/resources/js_scripts/qidian_decrypt_node.js +21 -16
- novel_downloader/resources/js_scripts/qq_decrypt_node.js +92 -0
- novel_downloader/utils/__init__.py +0 -2
- novel_downloader/utils/chapter_storage.py +2 -3
- novel_downloader/utils/constants.py +7 -3
- novel_downloader/utils/cookies.py +32 -17
- novel_downloader/utils/crypto_utils/__init__.py +0 -6
- novel_downloader/utils/crypto_utils/aes_util.py +1 -1
- novel_downloader/utils/crypto_utils/rc4.py +40 -50
- novel_downloader/utils/epub/__init__.py +2 -3
- novel_downloader/utils/epub/builder.py +6 -6
- novel_downloader/utils/epub/constants.py +1 -6
- novel_downloader/utils/epub/documents.py +7 -7
- novel_downloader/utils/epub/models.py +8 -8
- novel_downloader/utils/epub/utils.py +10 -10
- novel_downloader/utils/file_utils/io.py +48 -73
- novel_downloader/utils/file_utils/normalize.py +1 -7
- novel_downloader/utils/file_utils/sanitize.py +4 -11
- novel_downloader/utils/fontocr/__init__.py +13 -0
- novel_downloader/utils/{fontocr.py → fontocr/core.py} +72 -61
- novel_downloader/utils/fontocr/loader.py +52 -0
- novel_downloader/utils/logger.py +80 -56
- novel_downloader/utils/network.py +16 -40
- novel_downloader/utils/node_decryptor/__init__.py +13 -0
- novel_downloader/utils/node_decryptor/decryptor.py +342 -0
- novel_downloader/{core/parsers/qidian/utils → utils/node_decryptor}/decryptor_fetcher.py +5 -6
- novel_downloader/utils/text_utils/text_cleaner.py +39 -30
- novel_downloader/utils/text_utils/truncate_utils.py +3 -14
- novel_downloader/utils/time_utils/sleep_utils.py +53 -43
- novel_downloader/web/main.py +1 -1
- novel_downloader/web/pages/download.py +1 -1
- novel_downloader/web/pages/search.py +4 -4
- novel_downloader/web/services/task_manager.py +2 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/METADATA +5 -1
- novel_downloader-2.0.2.dist-info/RECORD +203 -0
- novel_downloader/core/exporters/common/__init__.py +0 -11
- novel_downloader/core/exporters/common/epub.py +0 -198
- novel_downloader/core/exporters/common/main_exporter.py +0 -64
- novel_downloader/core/exporters/common/txt.py +0 -146
- novel_downloader/core/exporters/epub_util.py +0 -215
- novel_downloader/core/exporters/linovelib/__init__.py +0 -11
- novel_downloader/core/exporters/linovelib/epub.py +0 -349
- novel_downloader/core/exporters/linovelib/main_exporter.py +0 -66
- novel_downloader/core/exporters/linovelib/txt.py +0 -139
- novel_downloader/core/exporters/txt_util.py +0 -67
- novel_downloader/core/parsers/qidian/__init__.py +0 -10
- novel_downloader/core/parsers/qidian/book_info_parser.py +0 -89
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -470
- novel_downloader/core/parsers/qidian/chapter_normal.py +0 -126
- novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
- novel_downloader/core/parsers/qidian/main_parser.py +0 -101
- novel_downloader/core/parsers/qidian/utils/__init__.py +0 -30
- novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +0 -143
- novel_downloader/core/parsers/qidian/utils/helpers.py +0 -110
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +0 -175
- novel_downloader-2.0.0.dist-info/RECORD +0 -210
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/WHEEL +0 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/entry_points.txt +0 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/top_level.txt +0 -0
@@ -13,25 +13,13 @@ from typing import Any
|
|
13
13
|
|
14
14
|
from novel_downloader.core.downloaders.base import BaseDownloader
|
15
15
|
from novel_downloader.core.downloaders.registry import register_downloader
|
16
|
-
from novel_downloader.core.downloaders.signals import
|
17
|
-
STOP,
|
18
|
-
Progress,
|
19
|
-
StopToken,
|
20
|
-
)
|
21
|
-
from novel_downloader.core.interfaces import (
|
22
|
-
FetcherProtocol,
|
23
|
-
ParserProtocol,
|
24
|
-
)
|
16
|
+
from novel_downloader.core.downloaders.signals import STOP, Progress, StopToken
|
25
17
|
from novel_downloader.models import (
|
26
18
|
BookConfig,
|
27
19
|
BookInfoDict,
|
28
20
|
ChapterDict,
|
29
|
-
DownloaderConfig,
|
30
|
-
)
|
31
|
-
from novel_downloader.utils import (
|
32
|
-
ChapterStorage,
|
33
|
-
async_jitter_sleep,
|
34
21
|
)
|
22
|
+
from novel_downloader.utils import ChapterStorage, async_jitter_sleep
|
35
23
|
|
36
24
|
|
37
25
|
@register_downloader(site_keys=["qianbi"])
|
@@ -43,16 +31,6 @@ class QianbiDownloader(BaseDownloader):
|
|
43
31
|
each chapter as a unit (fetch -> parse -> enqueue storage).
|
44
32
|
"""
|
45
33
|
|
46
|
-
DEFAULT_SOURCE_ID = 0
|
47
|
-
|
48
|
-
def __init__(
|
49
|
-
self,
|
50
|
-
fetcher: FetcherProtocol,
|
51
|
-
parser: ParserProtocol,
|
52
|
-
config: DownloaderConfig,
|
53
|
-
):
|
54
|
-
super().__init__(fetcher, parser, config, "qianbi")
|
55
|
-
|
56
34
|
async def _download_one(
|
57
35
|
self,
|
58
36
|
book: BookConfig,
|
@@ -77,16 +55,10 @@ class QianbiDownloader(BaseDownloader):
|
|
77
55
|
raw_base.mkdir(parents=True, exist_ok=True)
|
78
56
|
html_dir = self._debug_dir / book_id / "html"
|
79
57
|
|
80
|
-
chapter_storage = ChapterStorage(
|
81
|
-
raw_base=raw_base,
|
82
|
-
priorities=self.PRIORITIES_MAP,
|
83
|
-
)
|
84
|
-
chapter_storage.connect()
|
85
|
-
|
86
58
|
def cancelled() -> bool:
|
87
59
|
return bool(cancel_event and cancel_event.is_set())
|
88
60
|
|
89
|
-
|
61
|
+
with ChapterStorage(raw_base, priorities=self.PRIORITIES_MAP) as storage:
|
90
62
|
# --- metadata ---
|
91
63
|
book_info = await self.load_book_info(book_id=book_id, html_dir=html_dir)
|
92
64
|
if not book_info:
|
@@ -95,28 +67,32 @@ class QianbiDownloader(BaseDownloader):
|
|
95
67
|
book_info = await self._repair_chapter_ids(
|
96
68
|
book_id,
|
97
69
|
book_info,
|
98
|
-
|
70
|
+
storage,
|
99
71
|
html_dir,
|
100
72
|
)
|
101
73
|
|
102
74
|
vols = book_info["volumes"]
|
103
|
-
|
104
|
-
if
|
105
|
-
self.logger.
|
75
|
+
plan = self._planned_chapter_ids(vols, start_id, end_id, ignore_set)
|
76
|
+
if not plan:
|
77
|
+
self.logger.info("%s nothing to do after filtering: %s", TAG, book_id)
|
106
78
|
return
|
107
79
|
|
108
|
-
progress = Progress(
|
80
|
+
progress = Progress(total=len(plan), hook=progress_hook)
|
109
81
|
|
110
82
|
# --- queues & batching ---
|
111
|
-
cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue(
|
112
|
-
|
83
|
+
cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue(
|
84
|
+
maxsize=self._workers * 2
|
85
|
+
)
|
86
|
+
save_q: asyncio.Queue[ChapterDict | StopToken] = asyncio.Queue(
|
87
|
+
maxsize=self._workers * 2
|
88
|
+
)
|
113
89
|
batch: list[ChapterDict] = []
|
114
90
|
|
115
91
|
async def flush_batch() -> None:
|
116
92
|
if not batch:
|
117
93
|
return
|
118
94
|
try:
|
119
|
-
|
95
|
+
storage.upsert_chapters(batch, self.DEFAULT_SOURCE_ID)
|
120
96
|
except Exception as e:
|
121
97
|
self.logger.error(
|
122
98
|
"[Storage] batch upsert failed (size=%d): %s",
|
@@ -144,7 +120,7 @@ class QianbiDownloader(BaseDownloader):
|
|
144
120
|
item = await save_q.get()
|
145
121
|
if isinstance(item, StopToken):
|
146
122
|
stop_count += 1
|
147
|
-
if stop_count == self.
|
123
|
+
if stop_count == self._workers:
|
148
124
|
# All chapter workers have exited.
|
149
125
|
await flush_batch()
|
150
126
|
return
|
@@ -153,7 +129,7 @@ class QianbiDownloader(BaseDownloader):
|
|
153
129
|
|
154
130
|
# Normal chapter
|
155
131
|
batch.append(item)
|
156
|
-
if len(batch) >= self.
|
132
|
+
if len(batch) >= self._storage_batch_size:
|
157
133
|
await flush_batch()
|
158
134
|
|
159
135
|
if cancelled():
|
@@ -170,15 +146,13 @@ class QianbiDownloader(BaseDownloader):
|
|
170
146
|
# Final flush of everything
|
171
147
|
await flush_batch()
|
172
148
|
# Wait for remaining STOPs so chapter workers can finish.
|
173
|
-
while stop_count < self.
|
149
|
+
while stop_count < self._workers:
|
174
150
|
nxt = await save_q.get()
|
175
151
|
if isinstance(nxt, StopToken):
|
176
152
|
stop_count += 1
|
177
153
|
return
|
178
154
|
|
179
155
|
# --- stage: chapter worker ---
|
180
|
-
sem = asyncio.Semaphore(self.workers)
|
181
|
-
|
182
156
|
async def chapter_worker() -> None:
|
183
157
|
"""
|
184
158
|
Fetch + parse with retry, then enqueue to save_q.
|
@@ -192,25 +166,20 @@ class QianbiDownloader(BaseDownloader):
|
|
192
166
|
await save_q.put(STOP)
|
193
167
|
return
|
194
168
|
|
195
|
-
if not cid or cid in ignore_set:
|
196
|
-
# Ignore silently and continue.
|
197
|
-
continue
|
198
|
-
|
199
169
|
# If cancelled, don't start a new network call; let storage finish.
|
200
170
|
if cancelled():
|
201
171
|
await save_q.put(STOP)
|
202
172
|
return
|
203
173
|
|
204
|
-
|
205
|
-
chap = await self._process_chapter(book_id, cid, html_dir)
|
174
|
+
chap = await self._process_chapter(book_id, cid, html_dir)
|
206
175
|
if chap:
|
207
176
|
await save_q.put(chap)
|
208
177
|
|
209
178
|
# polite pacing
|
210
179
|
await async_jitter_sleep(
|
211
|
-
self.
|
180
|
+
self._request_interval,
|
212
181
|
mul_spread=1.1,
|
213
|
-
max_sleep=self.
|
182
|
+
max_sleep=self._request_interval + 2,
|
214
183
|
)
|
215
184
|
|
216
185
|
# --- stage: producer ---
|
@@ -221,22 +190,22 @@ class QianbiDownloader(BaseDownloader):
|
|
221
190
|
so chapter workers can exit deterministically.
|
222
191
|
"""
|
223
192
|
try:
|
224
|
-
|
193
|
+
for cid in plan:
|
225
194
|
if cancelled():
|
226
195
|
break
|
227
|
-
if self.
|
196
|
+
if self._skip_existing and storage.exists(cid):
|
228
197
|
# Count as completed but don't enqueue.
|
229
198
|
await progress.bump(1)
|
230
199
|
else:
|
231
200
|
await cid_q.put(cid)
|
232
201
|
finally:
|
233
|
-
for _ in range(self.
|
202
|
+
for _ in range(self._workers):
|
234
203
|
await cid_q.put(STOP)
|
235
204
|
|
236
205
|
# --- run the pipeline ---
|
237
206
|
async with asyncio.TaskGroup() as tg:
|
238
207
|
tg.create_task(storage_worker())
|
239
|
-
for _ in range(self.
|
208
|
+
for _ in range(self._workers):
|
240
209
|
tg.create_task(chapter_worker())
|
241
210
|
tg.create_task(producer())
|
242
211
|
|
@@ -256,9 +225,6 @@ class QianbiDownloader(BaseDownloader):
|
|
256
225
|
book_info.get("book_name", "unknown"),
|
257
226
|
)
|
258
227
|
|
259
|
-
finally:
|
260
|
-
chapter_storage.close()
|
261
|
-
|
262
228
|
async def _repair_chapter_ids(
|
263
229
|
self,
|
264
230
|
book_id: str,
|
@@ -295,9 +261,9 @@ class QianbiDownloader(BaseDownloader):
|
|
295
261
|
continue
|
296
262
|
storage.upsert_chapter(data, self.DEFAULT_SOURCE_ID)
|
297
263
|
await async_jitter_sleep(
|
298
|
-
self.
|
264
|
+
self._request_interval,
|
299
265
|
mul_spread=1.1,
|
300
|
-
max_sleep=self.
|
266
|
+
max_sleep=self._request_interval + 2,
|
301
267
|
)
|
302
268
|
|
303
269
|
next_cid = data.get("extra", {}).get("next_chapter_id")
|
@@ -331,7 +297,7 @@ class QianbiDownloader(BaseDownloader):
|
|
331
297
|
|
332
298
|
:return: ChapterDict on success, or None on failure.
|
333
299
|
"""
|
334
|
-
for attempt in range(self.
|
300
|
+
for attempt in range(self._retry_times + 1):
|
335
301
|
try:
|
336
302
|
html_list = await self.fetcher.get_book_chapter(book_id, cid)
|
337
303
|
self._save_html_pages(html_dir, cid, html_list)
|
@@ -342,9 +308,9 @@ class QianbiDownloader(BaseDownloader):
|
|
342
308
|
raise ValueError("Empty parse result")
|
343
309
|
return chap
|
344
310
|
except Exception as e:
|
345
|
-
if attempt < self.
|
311
|
+
if attempt < self._retry_times:
|
346
312
|
self.logger.info(f"[ChapterWorker] Retry {cid} ({attempt+1}): {e}")
|
347
|
-
backoff = self.
|
313
|
+
backoff = self._backoff_factor * (2**attempt)
|
348
314
|
await async_jitter_sleep(
|
349
315
|
base=backoff, mul_spread=1.2, max_sleep=backoff + 3
|
350
316
|
)
|
@@ -10,28 +10,14 @@ with handling for restricted and encrypted chapters
|
|
10
10
|
import asyncio
|
11
11
|
from collections.abc import Awaitable, Callable
|
12
12
|
from pathlib import Path
|
13
|
-
from typing import Any
|
13
|
+
from typing import Any, ClassVar
|
14
14
|
|
15
15
|
from novel_downloader.core.downloaders.base import BaseDownloader
|
16
16
|
from novel_downloader.core.downloaders.registry import register_downloader
|
17
|
-
from novel_downloader.core.downloaders.signals import
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
)
|
22
|
-
from novel_downloader.core.interfaces import (
|
23
|
-
FetcherProtocol,
|
24
|
-
ParserProtocol,
|
25
|
-
)
|
26
|
-
from novel_downloader.models import (
|
27
|
-
BookConfig,
|
28
|
-
ChapterDict,
|
29
|
-
DownloaderConfig,
|
30
|
-
)
|
31
|
-
from novel_downloader.utils import (
|
32
|
-
ChapterStorage,
|
33
|
-
async_jitter_sleep,
|
34
|
-
)
|
17
|
+
from novel_downloader.core.downloaders.signals import STOP, Progress, StopToken
|
18
|
+
from novel_downloader.core.interfaces import FetcherProtocol, ParserProtocol
|
19
|
+
from novel_downloader.models import BookConfig, ChapterDict, DownloaderConfig
|
20
|
+
from novel_downloader.utils import ChapterStorage, async_jitter_sleep
|
35
21
|
|
36
22
|
|
37
23
|
@register_downloader(site_keys=["qidian", "qd"])
|
@@ -43,9 +29,9 @@ class QidianDownloader(BaseDownloader):
|
|
43
29
|
handles fetch -> parse -> enqueue storage.
|
44
30
|
"""
|
45
31
|
|
46
|
-
DEFAULT_SOURCE_ID = 0
|
47
|
-
ENCRYPTED_SOURCE_ID = 1
|
48
|
-
PRIORITIES_MAP = {
|
32
|
+
DEFAULT_SOURCE_ID: ClassVar[int] = 0
|
33
|
+
ENCRYPTED_SOURCE_ID: ClassVar[int] = 1
|
34
|
+
PRIORITIES_MAP: ClassVar[dict[int, int]] = {
|
49
35
|
DEFAULT_SOURCE_ID: 0,
|
50
36
|
ENCRYPTED_SOURCE_ID: 1,
|
51
37
|
}
|
@@ -55,9 +41,10 @@ class QidianDownloader(BaseDownloader):
|
|
55
41
|
fetcher: FetcherProtocol,
|
56
42
|
parser: ParserProtocol,
|
57
43
|
config: DownloaderConfig,
|
44
|
+
site: str,
|
58
45
|
):
|
59
|
-
|
60
|
-
|
46
|
+
super().__init__(fetcher, parser, config, site)
|
47
|
+
self._request_interval = max(1.0, config.request_interval)
|
61
48
|
|
62
49
|
async def _download_one(
|
63
50
|
self,
|
@@ -84,186 +71,172 @@ class QidianDownloader(BaseDownloader):
|
|
84
71
|
raw_base.mkdir(parents=True, exist_ok=True)
|
85
72
|
html_dir = self._debug_dir / book_id / "html"
|
86
73
|
|
87
|
-
chapter_storage = ChapterStorage(
|
88
|
-
raw_base=raw_base,
|
89
|
-
priorities=self.PRIORITIES_MAP,
|
90
|
-
)
|
91
|
-
chapter_storage.connect()
|
92
|
-
|
93
74
|
def cancelled() -> bool:
|
94
75
|
return bool(cancel_event and cancel_event.is_set())
|
95
76
|
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
return
|
101
|
-
|
102
|
-
vols = book_info["volumes"]
|
103
|
-
total_chapters = sum(len(v["chapters"]) for v in vols)
|
104
|
-
if total_chapters == 0:
|
105
|
-
self.logger.warning("%s 书籍没有章节可下载: %s", TAG, book_id)
|
106
|
-
return
|
77
|
+
# ---- metadata ---
|
78
|
+
book_info = await self.load_book_info(book_id=book_id, html_dir=html_dir)
|
79
|
+
if not book_info:
|
80
|
+
return
|
107
81
|
|
108
|
-
|
82
|
+
vols = book_info["volumes"]
|
83
|
+
plan = self._planned_chapter_ids(vols, start_id, end_id, ignore_set)
|
84
|
+
if not plan:
|
85
|
+
self.logger.info("%s nothing to do after filtering: %s", TAG, book_id)
|
86
|
+
return
|
109
87
|
|
110
|
-
|
111
|
-
cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue()
|
112
|
-
save_q: asyncio.Queue[ChapterDict | StopToken] = asyncio.Queue()
|
113
|
-
default_batch: list[ChapterDict] = []
|
114
|
-
encrypted_batch: list[ChapterDict] = []
|
88
|
+
progress = Progress(total=len(plan), hook=progress_hook)
|
115
89
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
90
|
+
# ---- queues & batching ---
|
91
|
+
cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue(maxsize=self._workers * 2)
|
92
|
+
save_q: asyncio.Queue[ChapterDict | StopToken] = asyncio.Queue(
|
93
|
+
maxsize=self._workers * 2
|
94
|
+
)
|
95
|
+
default_batch: list[ChapterDict] = []
|
96
|
+
encrypted_batch: list[ChapterDict] = []
|
121
97
|
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
except Exception as e:
|
128
|
-
self.logger.error(
|
129
|
-
"[Storage] batch upsert failed (size=%d, src=%d): %s",
|
130
|
-
len(batch),
|
131
|
-
src,
|
132
|
-
e,
|
133
|
-
exc_info=True,
|
134
|
-
)
|
135
|
-
else:
|
136
|
-
await progress.bump(len(batch))
|
137
|
-
finally:
|
138
|
-
batch.clear()
|
139
|
-
|
140
|
-
async def flush_all() -> None:
|
141
|
-
await flush_batch(default_batch, self.DEFAULT_SOURCE_ID)
|
142
|
-
await flush_batch(encrypted_batch, self.ENCRYPTED_SOURCE_ID)
|
143
|
-
|
144
|
-
# ---- workers ---
|
145
|
-
sem = asyncio.Semaphore(self.workers)
|
146
|
-
|
147
|
-
async def storage_worker() -> None:
|
148
|
-
"""
|
149
|
-
Consumes parsed chapters, batches by source, flushes on threshold.
|
150
|
-
|
151
|
-
Terminates after receiving STOP from each chapter worker.
|
152
|
-
|
153
|
-
On cancel: drains queue, flushes once, then waits for remaining STOPs.
|
154
|
-
"""
|
155
|
-
stop_count = 0
|
156
|
-
while True:
|
157
|
-
chap = await save_q.get()
|
158
|
-
if isinstance(chap, StopToken):
|
159
|
-
stop_count += 1
|
160
|
-
if stop_count == NUM_WORKERS:
|
161
|
-
await flush_all()
|
162
|
-
return
|
163
|
-
continue
|
164
|
-
|
165
|
-
batch, src = select_batch(chap)
|
166
|
-
batch.append(chap)
|
167
|
-
if len(batch) >= self.storage_batch_size:
|
168
|
-
await flush_batch(batch, src)
|
98
|
+
def select_batch(chap: ChapterDict) -> tuple[list[ChapterDict], int]:
|
99
|
+
# set extra.encrypted (by parser); default to plain if absent.
|
100
|
+
if chap.get("extra", {}).get("encrypted", False):
|
101
|
+
return encrypted_batch, self.ENCRYPTED_SOURCE_ID
|
102
|
+
return default_batch, self.DEFAULT_SOURCE_ID
|
169
103
|
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
104
|
+
async def flush_batch(batch: list[ChapterDict], src: int) -> None:
|
105
|
+
if not batch:
|
106
|
+
return
|
107
|
+
try:
|
108
|
+
storage.upsert_chapters(batch, src)
|
109
|
+
except Exception as e:
|
110
|
+
self.logger.error(
|
111
|
+
"[Storage] batch upsert failed (size=%d, src=%d): %s",
|
112
|
+
len(batch),
|
113
|
+
src,
|
114
|
+
e,
|
115
|
+
exc_info=True,
|
116
|
+
)
|
117
|
+
else:
|
118
|
+
await progress.bump(len(batch))
|
119
|
+
finally:
|
120
|
+
batch.clear()
|
121
|
+
|
122
|
+
async def flush_all() -> None:
|
123
|
+
await flush_batch(default_batch, self.DEFAULT_SOURCE_ID)
|
124
|
+
await flush_batch(encrypted_batch, self.ENCRYPTED_SOURCE_ID)
|
125
|
+
|
126
|
+
# ---- workers ---
|
127
|
+
async def storage_worker() -> None:
|
128
|
+
"""
|
129
|
+
Consumes parsed chapters, batches by source, flushes on threshold.
|
130
|
+
|
131
|
+
Terminates after receiving STOP from each chapter worker.
|
132
|
+
|
133
|
+
On cancel: drains queue, flushes once, then waits for remaining STOPs.
|
134
|
+
"""
|
135
|
+
stop_count = 0
|
136
|
+
while True:
|
137
|
+
chap = await save_q.get()
|
138
|
+
if isinstance(chap, StopToken):
|
139
|
+
stop_count += 1
|
140
|
+
if stop_count == NUM_WORKERS:
|
182
141
|
await flush_all()
|
183
|
-
# Wait for remaining STOPs to arrive
|
184
|
-
while stop_count < NUM_WORKERS:
|
185
|
-
nxt = await save_q.get()
|
186
|
-
if nxt is STOP:
|
187
|
-
stop_count += 1
|
188
142
|
return
|
143
|
+
continue
|
144
|
+
|
145
|
+
batch, src = select_batch(chap)
|
146
|
+
batch.append(chap)
|
147
|
+
if len(batch) >= self._storage_batch_size:
|
148
|
+
await flush_batch(batch, src)
|
149
|
+
|
150
|
+
if cancelled():
|
151
|
+
# Drain whatever is already parsed
|
152
|
+
try:
|
153
|
+
while True:
|
154
|
+
nxt = save_q.get_nowait()
|
155
|
+
if isinstance(nxt, StopToken):
|
156
|
+
stop_count += 1
|
157
|
+
else:
|
158
|
+
nbatch, nsrc = select_batch(nxt)
|
159
|
+
nbatch.append(nxt)
|
160
|
+
except asyncio.QueueEmpty:
|
161
|
+
pass
|
162
|
+
await flush_all()
|
163
|
+
# Wait for remaining STOPs to arrive
|
164
|
+
while stop_count < NUM_WORKERS:
|
165
|
+
nxt = await save_q.get()
|
166
|
+
if nxt is STOP:
|
167
|
+
stop_count += 1
|
168
|
+
return
|
189
169
|
|
190
|
-
|
191
|
-
|
192
|
-
|
170
|
+
async def chapter_worker() -> None:
|
171
|
+
"""
|
172
|
+
Single worker: fetch + parse with retry, then enqueue ChapterDict.
|
193
173
|
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
174
|
+
Exits on STOP. If cancelled, does not start a new fetch; signals STOP.
|
175
|
+
"""
|
176
|
+
while True:
|
177
|
+
cid = await cid_q.get()
|
178
|
+
if isinstance(cid, StopToken):
|
179
|
+
await save_q.put(STOP)
|
180
|
+
return
|
201
181
|
|
202
|
-
|
203
|
-
|
182
|
+
if cancelled():
|
183
|
+
await save_q.put(STOP)
|
184
|
+
return
|
204
185
|
|
205
|
-
|
206
|
-
|
207
|
-
|
186
|
+
chap = await self._process_chapter(book_id, cid, html_dir)
|
187
|
+
if chap and not cancelled():
|
188
|
+
await save_q.put(chap)
|
208
189
|
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
190
|
+
await async_jitter_sleep(
|
191
|
+
self._request_interval,
|
192
|
+
mul_spread=1.1,
|
193
|
+
max_sleep=self._request_interval + 2,
|
194
|
+
)
|
213
195
|
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
max_sleep=self.request_interval + 2,
|
218
|
-
)
|
196
|
+
async def producer() -> None:
|
197
|
+
"""
|
198
|
+
Enqueue chapter IDs respecting start/end/skip_existing.
|
219
199
|
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
for _ in range(NUM_WORKERS):
|
240
|
-
await cid_q.put(STOP)
|
241
|
-
|
242
|
-
# ---- run tasks ---
|
200
|
+
Always emits STOP x NUM_WORKERS at the end (even if cancelled early).
|
201
|
+
"""
|
202
|
+
try:
|
203
|
+
for cid in plan:
|
204
|
+
if cancelled():
|
205
|
+
break
|
206
|
+
if self._skip_existing and storage.exists(
|
207
|
+
cid, self.DEFAULT_SOURCE_ID
|
208
|
+
):
|
209
|
+
# Already have not-encrypted; count as done.
|
210
|
+
await progress.bump(1)
|
211
|
+
else:
|
212
|
+
await cid_q.put(cid)
|
213
|
+
finally:
|
214
|
+
for _ in range(NUM_WORKERS):
|
215
|
+
await cid_q.put(STOP)
|
216
|
+
|
217
|
+
# ---- run tasks ---
|
218
|
+
with ChapterStorage(raw_base, priorities=self.PRIORITIES_MAP) as storage:
|
243
219
|
async with asyncio.TaskGroup() as tg:
|
244
220
|
tg.create_task(storage_worker())
|
245
221
|
for _ in range(NUM_WORKERS):
|
246
222
|
tg.create_task(chapter_worker())
|
247
223
|
tg.create_task(producer())
|
248
224
|
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
finally:
|
266
|
-
chapter_storage.close()
|
225
|
+
# ---- done ---
|
226
|
+
if cancelled():
|
227
|
+
self.logger.info(
|
228
|
+
"%s Novel '%s' cancelled: flushed %d/%d chapters.",
|
229
|
+
TAG,
|
230
|
+
book_info.get("book_name", "unknown"),
|
231
|
+
progress.done,
|
232
|
+
progress.total,
|
233
|
+
)
|
234
|
+
else:
|
235
|
+
self.logger.info(
|
236
|
+
"%s Novel '%s' download completed.",
|
237
|
+
TAG,
|
238
|
+
book_info.get("book_name", "unknown"),
|
239
|
+
)
|
267
240
|
|
268
241
|
@staticmethod
|
269
242
|
def _check_restricted(html_list: list[str]) -> bool:
|
@@ -295,7 +268,7 @@ class QidianDownloader(BaseDownloader):
|
|
295
268
|
|
296
269
|
:return: ChapterDict on success, or None on failure.
|
297
270
|
"""
|
298
|
-
for attempt in range(self.
|
271
|
+
for attempt in range(self._retry_times + 1):
|
299
272
|
try:
|
300
273
|
html_list = await self.fetcher.get_book_chapter(book_id, cid)
|
301
274
|
if self._check_restricted(html_list):
|
@@ -321,11 +294,11 @@ class QidianDownloader(BaseDownloader):
|
|
321
294
|
return chap
|
322
295
|
|
323
296
|
except Exception as e:
|
324
|
-
if attempt < self.
|
297
|
+
if attempt < self._retry_times:
|
325
298
|
self.logger.info(
|
326
299
|
"[ChapterWorker] Retry %s (%s): %s", cid, attempt + 1, e
|
327
300
|
)
|
328
|
-
backoff = self.
|
301
|
+
backoff = self._backoff_factor * (2**attempt)
|
329
302
|
await async_jitter_sleep(
|
330
303
|
base=backoff,
|
331
304
|
mul_spread=1.2,
|