novel-downloader 2.0.1__py3-none-any.whl → 2.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/download.py +11 -8
- novel_downloader/cli/export.py +17 -17
- novel_downloader/cli/ui.py +28 -1
- novel_downloader/config/adapter.py +27 -1
- novel_downloader/core/archived/deqixs/fetcher.py +1 -28
- novel_downloader/core/downloaders/__init__.py +2 -0
- novel_downloader/core/downloaders/base.py +34 -85
- novel_downloader/core/downloaders/common.py +147 -171
- novel_downloader/core/downloaders/qianbi.py +30 -64
- novel_downloader/core/downloaders/qidian.py +157 -184
- novel_downloader/core/downloaders/qqbook.py +292 -0
- novel_downloader/core/downloaders/registry.py +2 -2
- novel_downloader/core/exporters/__init__.py +2 -0
- novel_downloader/core/exporters/base.py +37 -59
- novel_downloader/core/exporters/common.py +620 -0
- novel_downloader/core/exporters/linovelib.py +47 -0
- novel_downloader/core/exporters/qidian.py +41 -12
- novel_downloader/core/exporters/qqbook.py +28 -0
- novel_downloader/core/exporters/registry.py +2 -2
- novel_downloader/core/fetchers/__init__.py +4 -2
- novel_downloader/core/fetchers/aaatxt.py +2 -22
- novel_downloader/core/fetchers/b520.py +3 -23
- novel_downloader/core/fetchers/base.py +80 -105
- novel_downloader/core/fetchers/biquyuedu.py +2 -22
- novel_downloader/core/fetchers/dxmwx.py +10 -22
- novel_downloader/core/fetchers/esjzone.py +6 -29
- novel_downloader/core/fetchers/guidaye.py +2 -22
- novel_downloader/core/fetchers/hetushu.py +9 -29
- novel_downloader/core/fetchers/i25zw.py +2 -16
- novel_downloader/core/fetchers/ixdzs8.py +2 -16
- novel_downloader/core/fetchers/jpxs123.py +2 -16
- novel_downloader/core/fetchers/lewenn.py +2 -22
- novel_downloader/core/fetchers/linovelib.py +4 -20
- novel_downloader/core/fetchers/{eightnovel.py → n8novel.py} +12 -40
- novel_downloader/core/fetchers/piaotia.py +2 -16
- novel_downloader/core/fetchers/qbtr.py +2 -16
- novel_downloader/core/fetchers/qianbi.py +1 -20
- novel_downloader/core/fetchers/qidian.py +7 -33
- novel_downloader/core/fetchers/qqbook.py +177 -0
- novel_downloader/core/fetchers/quanben5.py +9 -29
- novel_downloader/core/fetchers/rate_limiter.py +22 -53
- novel_downloader/core/fetchers/sfacg.py +3 -16
- novel_downloader/core/fetchers/shencou.py +2 -16
- novel_downloader/core/fetchers/shuhaige.py +2 -22
- novel_downloader/core/fetchers/tongrenquan.py +2 -22
- novel_downloader/core/fetchers/ttkan.py +3 -14
- novel_downloader/core/fetchers/wanbengo.py +2 -22
- novel_downloader/core/fetchers/xiaoshuowu.py +2 -16
- novel_downloader/core/fetchers/xiguashuwu.py +4 -20
- novel_downloader/core/fetchers/xs63b.py +3 -15
- novel_downloader/core/fetchers/xshbook.py +2 -22
- novel_downloader/core/fetchers/yamibo.py +4 -28
- novel_downloader/core/fetchers/yibige.py +13 -26
- novel_downloader/core/interfaces/exporter.py +19 -7
- novel_downloader/core/interfaces/fetcher.py +21 -47
- novel_downloader/core/parsers/__init__.py +4 -2
- novel_downloader/core/parsers/b520.py +2 -2
- novel_downloader/core/parsers/base.py +4 -39
- novel_downloader/core/parsers/{eightnovel.py → n8novel.py} +5 -5
- novel_downloader/core/parsers/{qidian/main_parser.py → qidian.py} +147 -266
- novel_downloader/core/parsers/qqbook.py +709 -0
- novel_downloader/core/parsers/xiguashuwu.py +3 -4
- novel_downloader/core/searchers/__init__.py +2 -2
- novel_downloader/core/searchers/b520.py +1 -1
- novel_downloader/core/searchers/base.py +2 -2
- novel_downloader/core/searchers/{eightnovel.py → n8novel.py} +5 -5
- novel_downloader/models/__init__.py +2 -0
- novel_downloader/models/book.py +1 -0
- novel_downloader/models/config.py +12 -0
- novel_downloader/resources/config/settings.toml +23 -5
- novel_downloader/resources/js_scripts/expr_to_json.js +14 -0
- novel_downloader/resources/js_scripts/qidian_decrypt_node.js +21 -16
- novel_downloader/resources/js_scripts/qq_decrypt_node.js +92 -0
- novel_downloader/utils/constants.py +6 -0
- novel_downloader/utils/crypto_utils/aes_util.py +1 -1
- novel_downloader/utils/epub/constants.py +1 -6
- novel_downloader/utils/fontocr/core.py +2 -0
- novel_downloader/utils/fontocr/loader.py +10 -8
- novel_downloader/utils/node_decryptor/__init__.py +13 -0
- novel_downloader/utils/node_decryptor/decryptor.py +342 -0
- novel_downloader/{core/parsers/qidian/utils → utils/node_decryptor}/decryptor_fetcher.py +5 -6
- novel_downloader/web/pages/download.py +1 -1
- novel_downloader/web/pages/search.py +1 -1
- novel_downloader/web/services/task_manager.py +2 -0
- {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/METADATA +4 -1
- {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/RECORD +91 -94
- novel_downloader/core/exporters/common/__init__.py +0 -11
- novel_downloader/core/exporters/common/epub.py +0 -198
- novel_downloader/core/exporters/common/main_exporter.py +0 -64
- novel_downloader/core/exporters/common/txt.py +0 -146
- novel_downloader/core/exporters/epub_util.py +0 -215
- novel_downloader/core/exporters/linovelib/__init__.py +0 -11
- novel_downloader/core/exporters/linovelib/epub.py +0 -349
- novel_downloader/core/exporters/linovelib/main_exporter.py +0 -66
- novel_downloader/core/exporters/linovelib/txt.py +0 -139
- novel_downloader/core/exporters/txt_util.py +0 -67
- novel_downloader/core/parsers/qidian/__init__.py +0 -10
- novel_downloader/core/parsers/qidian/utils/__init__.py +0 -11
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +0 -175
- {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/WHEEL +0 -0
- {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/entry_points.txt +0 -0
- {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/top_level.txt +0 -0
@@ -12,19 +12,9 @@ from pathlib import Path
|
|
12
12
|
from typing import Any
|
13
13
|
|
14
14
|
from novel_downloader.core.downloaders.base import BaseDownloader
|
15
|
-
from novel_downloader.core.downloaders.signals import
|
16
|
-
|
17
|
-
|
18
|
-
StopToken,
|
19
|
-
)
|
20
|
-
from novel_downloader.models import (
|
21
|
-
BookConfig,
|
22
|
-
ChapterDict,
|
23
|
-
)
|
24
|
-
from novel_downloader.utils import (
|
25
|
-
ChapterStorage,
|
26
|
-
async_jitter_sleep,
|
27
|
-
)
|
15
|
+
from novel_downloader.core.downloaders.signals import STOP, Progress, StopToken
|
16
|
+
from novel_downloader.models import BookConfig, ChapterDict
|
17
|
+
from novel_downloader.utils import ChapterStorage, async_jitter_sleep
|
28
18
|
|
29
19
|
|
30
20
|
class CommonDownloader(BaseDownloader):
|
@@ -41,7 +31,7 @@ class CommonDownloader(BaseDownloader):
|
|
41
31
|
**kwargs: Any,
|
42
32
|
) -> None:
|
43
33
|
"""
|
44
|
-
Sentinel-based pipeline with
|
34
|
+
Sentinel-based pipeline with cancellation:
|
45
35
|
|
46
36
|
Producer -> ChapterWorkers -> StorageWorker.
|
47
37
|
|
@@ -59,181 +49,167 @@ class CommonDownloader(BaseDownloader):
|
|
59
49
|
raw_base.mkdir(parents=True, exist_ok=True)
|
60
50
|
html_dir = self._debug_dir / book_id / "html"
|
61
51
|
|
62
|
-
chapter_storage = ChapterStorage(
|
63
|
-
raw_base=raw_base,
|
64
|
-
priorities=self.PRIORITIES_MAP,
|
65
|
-
)
|
66
|
-
chapter_storage.connect()
|
67
|
-
|
68
52
|
def cancelled() -> bool:
|
69
53
|
return bool(cancel_event and cancel_event.is_set())
|
70
54
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
return
|
76
|
-
|
77
|
-
vols = book_info["volumes"]
|
78
|
-
total_chapters = sum(len(v["chapters"]) for v in vols)
|
79
|
-
if total_chapters == 0:
|
80
|
-
self.logger.warning("%s 书籍没有章节可下载: %s", TAG, book_id)
|
81
|
-
return
|
55
|
+
# --- metadata ---
|
56
|
+
book_info = await self.load_book_info(book_id=book_id, html_dir=html_dir)
|
57
|
+
if not book_info:
|
58
|
+
return
|
82
59
|
|
83
|
-
|
60
|
+
vols = book_info["volumes"]
|
61
|
+
plan = self._planned_chapter_ids(vols, start_id, end_id, ignore_set)
|
62
|
+
if not plan:
|
63
|
+
self.logger.info("%s nothing to do after filtering: %s", TAG, book_id)
|
64
|
+
return
|
84
65
|
|
85
|
-
|
86
|
-
cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue()
|
87
|
-
save_q: asyncio.Queue[ChapterDict | StopToken] = asyncio.Queue()
|
88
|
-
batch: list[ChapterDict] = []
|
66
|
+
progress = Progress(total=len(plan), hook=progress_hook)
|
89
67
|
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
self.logger.error(
|
97
|
-
"[Storage] batch upsert failed (size=%d): %s",
|
98
|
-
len(batch),
|
99
|
-
e,
|
100
|
-
exc_info=True,
|
101
|
-
)
|
102
|
-
else:
|
103
|
-
await progress.bump(len(batch))
|
104
|
-
finally:
|
105
|
-
batch.clear()
|
106
|
-
|
107
|
-
# --- stage: storage worker ---
|
108
|
-
async def storage_worker() -> None:
|
109
|
-
"""
|
110
|
-
Consumes parsed chapters, writes in batches.
|
111
|
-
|
112
|
-
Terminates after receiving STOP from each chapter worker.
|
113
|
-
|
114
|
-
On cancel: keeps consuming (to avoid blocking producers),
|
115
|
-
flushes, and exits once all STOPs are seen.
|
116
|
-
"""
|
117
|
-
stop_count = 0
|
118
|
-
while True:
|
119
|
-
item = await save_q.get()
|
120
|
-
if isinstance(item, StopToken):
|
121
|
-
stop_count += 1
|
122
|
-
if stop_count == self.workers:
|
123
|
-
# All chapter workers have exited.
|
124
|
-
await flush_batch()
|
125
|
-
return
|
126
|
-
# else keep waiting for remaining STOPs
|
127
|
-
continue
|
128
|
-
|
129
|
-
# Normal chapter
|
130
|
-
batch.append(item)
|
131
|
-
if len(batch) >= self.storage_batch_size:
|
132
|
-
await flush_batch()
|
68
|
+
# --- queues & batching ---
|
69
|
+
cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue(maxsize=self._workers * 2)
|
70
|
+
save_q: asyncio.Queue[ChapterDict | StopToken] = asyncio.Queue(
|
71
|
+
maxsize=self._workers * 2
|
72
|
+
)
|
73
|
+
batch: list[ChapterDict] = []
|
133
74
|
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
75
|
+
async def flush_batch() -> None:
|
76
|
+
if not batch:
|
77
|
+
return
|
78
|
+
try:
|
79
|
+
storage.upsert_chapters(batch, self.DEFAULT_SOURCE_ID)
|
80
|
+
except Exception as e:
|
81
|
+
self.logger.error(
|
82
|
+
"[Storage] batch upsert failed (size=%d): %s",
|
83
|
+
len(batch),
|
84
|
+
e,
|
85
|
+
exc_info=True,
|
86
|
+
)
|
87
|
+
else:
|
88
|
+
await progress.bump(len(batch))
|
89
|
+
finally:
|
90
|
+
batch.clear()
|
91
|
+
|
92
|
+
# --- stage: storage worker ---
|
93
|
+
async def storage_worker() -> None:
|
94
|
+
"""
|
95
|
+
Consumes parsed chapters, writes in batches.
|
96
|
+
|
97
|
+
Terminates after receiving STOP from each chapter worker.
|
98
|
+
|
99
|
+
On cancel: keeps consuming (to avoid blocking producers),
|
100
|
+
flushes, and exits once all STOPs are seen.
|
101
|
+
"""
|
102
|
+
stop_count = 0
|
103
|
+
while True:
|
104
|
+
item = await save_q.get()
|
105
|
+
if isinstance(item, StopToken):
|
106
|
+
stop_count += 1
|
107
|
+
if stop_count == self._workers:
|
108
|
+
# All chapter workers have exited.
|
146
109
|
await flush_batch()
|
147
|
-
|
148
|
-
|
149
|
-
|
110
|
+
return
|
111
|
+
# else keep waiting for remaining STOPs
|
112
|
+
continue
|
113
|
+
|
114
|
+
# Normal chapter
|
115
|
+
batch.append(item)
|
116
|
+
if len(batch) >= self._storage_batch_size:
|
117
|
+
await flush_batch()
|
118
|
+
|
119
|
+
if cancelled():
|
120
|
+
# Drain whatever is already in the queue
|
121
|
+
try:
|
122
|
+
while True:
|
123
|
+
nxt = save_q.get_nowait()
|
150
124
|
if isinstance(nxt, StopToken):
|
151
125
|
stop_count += 1
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
126
|
+
else:
|
127
|
+
batch.append(nxt)
|
128
|
+
except asyncio.QueueEmpty:
|
129
|
+
pass
|
130
|
+
# Final flush of everything
|
131
|
+
await flush_batch()
|
132
|
+
# Wait for remaining STOPs so chapter workers can finish.
|
133
|
+
while stop_count < self._workers:
|
134
|
+
nxt = await save_q.get()
|
135
|
+
if isinstance(nxt, StopToken):
|
136
|
+
stop_count += 1
|
137
|
+
return
|
156
138
|
|
157
|
-
|
158
|
-
|
159
|
-
|
139
|
+
# --- stage: chapter worker ---
|
140
|
+
async def chapter_worker() -> None:
|
141
|
+
"""
|
142
|
+
Fetch + parse with retry, then enqueue to save_q.
|
143
|
+
|
144
|
+
Exits on STOP, or early if cancel is set before starting a new fetch.
|
145
|
+
"""
|
146
|
+
while True:
|
147
|
+
cid = await cid_q.get()
|
148
|
+
if isinstance(cid, StopToken):
|
149
|
+
# Propagate one STOP to storage and exit.
|
150
|
+
await save_q.put(STOP)
|
151
|
+
return
|
160
152
|
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
if isinstance(cid, StopToken):
|
166
|
-
# Propagate one STOP to storage and exit.
|
167
|
-
await save_q.put(STOP)
|
168
|
-
return
|
153
|
+
# If cancelled, don't start a new network call; let storage finish.
|
154
|
+
if cancelled():
|
155
|
+
await save_q.put(STOP)
|
156
|
+
return
|
169
157
|
|
170
|
-
|
171
|
-
|
172
|
-
|
158
|
+
chap = await self._process_chapter(book_id, cid, html_dir)
|
159
|
+
if chap:
|
160
|
+
await save_q.put(chap)
|
173
161
|
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
162
|
+
# polite pacing
|
163
|
+
await async_jitter_sleep(
|
164
|
+
self._request_interval,
|
165
|
+
mul_spread=1.1,
|
166
|
+
max_sleep=self._request_interval + 2,
|
167
|
+
)
|
178
168
|
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
169
|
+
# --- stage: producer ---
|
170
|
+
async def producer() -> None:
|
171
|
+
"""
|
172
|
+
Enqueue chapter IDs (respecting start/end/skip_existing).
|
183
173
|
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
)
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
break
|
203
|
-
if self.skip_existing and chapter_storage.exists(cid):
|
204
|
-
# Count as completed but don't enqueue.
|
205
|
-
await progress.bump(1)
|
206
|
-
else:
|
207
|
-
await cid_q.put(cid)
|
208
|
-
finally:
|
209
|
-
for _ in range(self.workers):
|
210
|
-
await cid_q.put(STOP)
|
211
|
-
|
212
|
-
# --- run the pipeline ---
|
174
|
+
Always sends STOP x workers at the end (even if cancelled early),
|
175
|
+
so chapter workers can exit deterministically.
|
176
|
+
"""
|
177
|
+
try:
|
178
|
+
for cid in plan:
|
179
|
+
if cancelled():
|
180
|
+
break
|
181
|
+
if self._skip_existing and storage.exists(cid):
|
182
|
+
# Count as completed but don't enqueue.
|
183
|
+
await progress.bump(1)
|
184
|
+
else:
|
185
|
+
await cid_q.put(cid)
|
186
|
+
finally:
|
187
|
+
for _ in range(self._workers):
|
188
|
+
await cid_q.put(STOP)
|
189
|
+
|
190
|
+
# --- run the pipeline ---
|
191
|
+
with ChapterStorage(raw_base, priorities=self.PRIORITIES_MAP) as storage:
|
213
192
|
async with asyncio.TaskGroup() as tg:
|
214
193
|
tg.create_task(storage_worker())
|
215
|
-
for _ in range(self.
|
194
|
+
for _ in range(self._workers):
|
216
195
|
tg.create_task(chapter_worker())
|
217
196
|
tg.create_task(producer())
|
218
197
|
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
finally:
|
236
|
-
chapter_storage.close()
|
198
|
+
# --- done ---
|
199
|
+
if cancelled():
|
200
|
+
self.logger.info(
|
201
|
+
"%s Novel '%s' cancelled: flushed %d/%d chapters.",
|
202
|
+
TAG,
|
203
|
+
book_info.get("book_name", "unknown"),
|
204
|
+
progress.done,
|
205
|
+
progress.total,
|
206
|
+
)
|
207
|
+
else:
|
208
|
+
self.logger.info(
|
209
|
+
"%s Novel '%s' download completed.",
|
210
|
+
TAG,
|
211
|
+
book_info.get("book_name", "unknown"),
|
212
|
+
)
|
237
213
|
|
238
214
|
async def _process_chapter(
|
239
215
|
self,
|
@@ -247,7 +223,7 @@ class CommonDownloader(BaseDownloader):
|
|
247
223
|
|
248
224
|
:return: ChapterDict on success, or None on failure.
|
249
225
|
"""
|
250
|
-
for attempt in range(self.
|
226
|
+
for attempt in range(self._retry_times + 1):
|
251
227
|
try:
|
252
228
|
html_list = await self.fetcher.get_book_chapter(book_id, cid)
|
253
229
|
self._save_html_pages(html_dir, cid, html_list)
|
@@ -258,11 +234,11 @@ class CommonDownloader(BaseDownloader):
|
|
258
234
|
raise ValueError("Empty parse result")
|
259
235
|
return chap
|
260
236
|
except Exception as e:
|
261
|
-
if attempt < self.
|
237
|
+
if attempt < self._retry_times:
|
262
238
|
self.logger.info(
|
263
239
|
"[ChapterWorker] Retry %s (%s): %s", cid, attempt + 1, e
|
264
240
|
)
|
265
|
-
backoff = self.
|
241
|
+
backoff = self._backoff_factor * (2**attempt)
|
266
242
|
await async_jitter_sleep(
|
267
243
|
base=backoff, mul_spread=1.2, max_sleep=backoff + 3
|
268
244
|
)
|
@@ -13,25 +13,13 @@ from typing import Any
|
|
13
13
|
|
14
14
|
from novel_downloader.core.downloaders.base import BaseDownloader
|
15
15
|
from novel_downloader.core.downloaders.registry import register_downloader
|
16
|
-
from novel_downloader.core.downloaders.signals import
|
17
|
-
STOP,
|
18
|
-
Progress,
|
19
|
-
StopToken,
|
20
|
-
)
|
21
|
-
from novel_downloader.core.interfaces import (
|
22
|
-
FetcherProtocol,
|
23
|
-
ParserProtocol,
|
24
|
-
)
|
16
|
+
from novel_downloader.core.downloaders.signals import STOP, Progress, StopToken
|
25
17
|
from novel_downloader.models import (
|
26
18
|
BookConfig,
|
27
19
|
BookInfoDict,
|
28
20
|
ChapterDict,
|
29
|
-
DownloaderConfig,
|
30
|
-
)
|
31
|
-
from novel_downloader.utils import (
|
32
|
-
ChapterStorage,
|
33
|
-
async_jitter_sleep,
|
34
21
|
)
|
22
|
+
from novel_downloader.utils import ChapterStorage, async_jitter_sleep
|
35
23
|
|
36
24
|
|
37
25
|
@register_downloader(site_keys=["qianbi"])
|
@@ -43,16 +31,6 @@ class QianbiDownloader(BaseDownloader):
|
|
43
31
|
each chapter as a unit (fetch -> parse -> enqueue storage).
|
44
32
|
"""
|
45
33
|
|
46
|
-
DEFAULT_SOURCE_ID = 0
|
47
|
-
|
48
|
-
def __init__(
|
49
|
-
self,
|
50
|
-
fetcher: FetcherProtocol,
|
51
|
-
parser: ParserProtocol,
|
52
|
-
config: DownloaderConfig,
|
53
|
-
):
|
54
|
-
super().__init__(fetcher, parser, config, "qianbi")
|
55
|
-
|
56
34
|
async def _download_one(
|
57
35
|
self,
|
58
36
|
book: BookConfig,
|
@@ -77,16 +55,10 @@ class QianbiDownloader(BaseDownloader):
|
|
77
55
|
raw_base.mkdir(parents=True, exist_ok=True)
|
78
56
|
html_dir = self._debug_dir / book_id / "html"
|
79
57
|
|
80
|
-
chapter_storage = ChapterStorage(
|
81
|
-
raw_base=raw_base,
|
82
|
-
priorities=self.PRIORITIES_MAP,
|
83
|
-
)
|
84
|
-
chapter_storage.connect()
|
85
|
-
|
86
58
|
def cancelled() -> bool:
|
87
59
|
return bool(cancel_event and cancel_event.is_set())
|
88
60
|
|
89
|
-
|
61
|
+
with ChapterStorage(raw_base, priorities=self.PRIORITIES_MAP) as storage:
|
90
62
|
# --- metadata ---
|
91
63
|
book_info = await self.load_book_info(book_id=book_id, html_dir=html_dir)
|
92
64
|
if not book_info:
|
@@ -95,28 +67,32 @@ class QianbiDownloader(BaseDownloader):
|
|
95
67
|
book_info = await self._repair_chapter_ids(
|
96
68
|
book_id,
|
97
69
|
book_info,
|
98
|
-
|
70
|
+
storage,
|
99
71
|
html_dir,
|
100
72
|
)
|
101
73
|
|
102
74
|
vols = book_info["volumes"]
|
103
|
-
|
104
|
-
if
|
105
|
-
self.logger.
|
75
|
+
plan = self._planned_chapter_ids(vols, start_id, end_id, ignore_set)
|
76
|
+
if not plan:
|
77
|
+
self.logger.info("%s nothing to do after filtering: %s", TAG, book_id)
|
106
78
|
return
|
107
79
|
|
108
|
-
progress = Progress(
|
80
|
+
progress = Progress(total=len(plan), hook=progress_hook)
|
109
81
|
|
110
82
|
# --- queues & batching ---
|
111
|
-
cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue(
|
112
|
-
|
83
|
+
cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue(
|
84
|
+
maxsize=self._workers * 2
|
85
|
+
)
|
86
|
+
save_q: asyncio.Queue[ChapterDict | StopToken] = asyncio.Queue(
|
87
|
+
maxsize=self._workers * 2
|
88
|
+
)
|
113
89
|
batch: list[ChapterDict] = []
|
114
90
|
|
115
91
|
async def flush_batch() -> None:
|
116
92
|
if not batch:
|
117
93
|
return
|
118
94
|
try:
|
119
|
-
|
95
|
+
storage.upsert_chapters(batch, self.DEFAULT_SOURCE_ID)
|
120
96
|
except Exception as e:
|
121
97
|
self.logger.error(
|
122
98
|
"[Storage] batch upsert failed (size=%d): %s",
|
@@ -144,7 +120,7 @@ class QianbiDownloader(BaseDownloader):
|
|
144
120
|
item = await save_q.get()
|
145
121
|
if isinstance(item, StopToken):
|
146
122
|
stop_count += 1
|
147
|
-
if stop_count == self.
|
123
|
+
if stop_count == self._workers:
|
148
124
|
# All chapter workers have exited.
|
149
125
|
await flush_batch()
|
150
126
|
return
|
@@ -153,7 +129,7 @@ class QianbiDownloader(BaseDownloader):
|
|
153
129
|
|
154
130
|
# Normal chapter
|
155
131
|
batch.append(item)
|
156
|
-
if len(batch) >= self.
|
132
|
+
if len(batch) >= self._storage_batch_size:
|
157
133
|
await flush_batch()
|
158
134
|
|
159
135
|
if cancelled():
|
@@ -170,15 +146,13 @@ class QianbiDownloader(BaseDownloader):
|
|
170
146
|
# Final flush of everything
|
171
147
|
await flush_batch()
|
172
148
|
# Wait for remaining STOPs so chapter workers can finish.
|
173
|
-
while stop_count < self.
|
149
|
+
while stop_count < self._workers:
|
174
150
|
nxt = await save_q.get()
|
175
151
|
if isinstance(nxt, StopToken):
|
176
152
|
stop_count += 1
|
177
153
|
return
|
178
154
|
|
179
155
|
# --- stage: chapter worker ---
|
180
|
-
sem = asyncio.Semaphore(self.workers)
|
181
|
-
|
182
156
|
async def chapter_worker() -> None:
|
183
157
|
"""
|
184
158
|
Fetch + parse with retry, then enqueue to save_q.
|
@@ -192,25 +166,20 @@ class QianbiDownloader(BaseDownloader):
|
|
192
166
|
await save_q.put(STOP)
|
193
167
|
return
|
194
168
|
|
195
|
-
if not cid or cid in ignore_set:
|
196
|
-
# Ignore silently and continue.
|
197
|
-
continue
|
198
|
-
|
199
169
|
# If cancelled, don't start a new network call; let storage finish.
|
200
170
|
if cancelled():
|
201
171
|
await save_q.put(STOP)
|
202
172
|
return
|
203
173
|
|
204
|
-
|
205
|
-
chap = await self._process_chapter(book_id, cid, html_dir)
|
174
|
+
chap = await self._process_chapter(book_id, cid, html_dir)
|
206
175
|
if chap:
|
207
176
|
await save_q.put(chap)
|
208
177
|
|
209
178
|
# polite pacing
|
210
179
|
await async_jitter_sleep(
|
211
|
-
self.
|
180
|
+
self._request_interval,
|
212
181
|
mul_spread=1.1,
|
213
|
-
max_sleep=self.
|
182
|
+
max_sleep=self._request_interval + 2,
|
214
183
|
)
|
215
184
|
|
216
185
|
# --- stage: producer ---
|
@@ -221,22 +190,22 @@ class QianbiDownloader(BaseDownloader):
|
|
221
190
|
so chapter workers can exit deterministically.
|
222
191
|
"""
|
223
192
|
try:
|
224
|
-
|
193
|
+
for cid in plan:
|
225
194
|
if cancelled():
|
226
195
|
break
|
227
|
-
if self.
|
196
|
+
if self._skip_existing and storage.exists(cid):
|
228
197
|
# Count as completed but don't enqueue.
|
229
198
|
await progress.bump(1)
|
230
199
|
else:
|
231
200
|
await cid_q.put(cid)
|
232
201
|
finally:
|
233
|
-
for _ in range(self.
|
202
|
+
for _ in range(self._workers):
|
234
203
|
await cid_q.put(STOP)
|
235
204
|
|
236
205
|
# --- run the pipeline ---
|
237
206
|
async with asyncio.TaskGroup() as tg:
|
238
207
|
tg.create_task(storage_worker())
|
239
|
-
for _ in range(self.
|
208
|
+
for _ in range(self._workers):
|
240
209
|
tg.create_task(chapter_worker())
|
241
210
|
tg.create_task(producer())
|
242
211
|
|
@@ -256,9 +225,6 @@ class QianbiDownloader(BaseDownloader):
|
|
256
225
|
book_info.get("book_name", "unknown"),
|
257
226
|
)
|
258
227
|
|
259
|
-
finally:
|
260
|
-
chapter_storage.close()
|
261
|
-
|
262
228
|
async def _repair_chapter_ids(
|
263
229
|
self,
|
264
230
|
book_id: str,
|
@@ -295,9 +261,9 @@ class QianbiDownloader(BaseDownloader):
|
|
295
261
|
continue
|
296
262
|
storage.upsert_chapter(data, self.DEFAULT_SOURCE_ID)
|
297
263
|
await async_jitter_sleep(
|
298
|
-
self.
|
264
|
+
self._request_interval,
|
299
265
|
mul_spread=1.1,
|
300
|
-
max_sleep=self.
|
266
|
+
max_sleep=self._request_interval + 2,
|
301
267
|
)
|
302
268
|
|
303
269
|
next_cid = data.get("extra", {}).get("next_chapter_id")
|
@@ -331,7 +297,7 @@ class QianbiDownloader(BaseDownloader):
|
|
331
297
|
|
332
298
|
:return: ChapterDict on success, or None on failure.
|
333
299
|
"""
|
334
|
-
for attempt in range(self.
|
300
|
+
for attempt in range(self._retry_times + 1):
|
335
301
|
try:
|
336
302
|
html_list = await self.fetcher.get_book_chapter(book_id, cid)
|
337
303
|
self._save_html_pages(html_dir, cid, html_list)
|
@@ -342,9 +308,9 @@ class QianbiDownloader(BaseDownloader):
|
|
342
308
|
raise ValueError("Empty parse result")
|
343
309
|
return chap
|
344
310
|
except Exception as e:
|
345
|
-
if attempt < self.
|
311
|
+
if attempt < self._retry_times:
|
346
312
|
self.logger.info(f"[ChapterWorker] Retry {cid} ({attempt+1}): {e}")
|
347
|
-
backoff = self.
|
313
|
+
backoff = self._backoff_factor * (2**attempt)
|
348
314
|
await async_jitter_sleep(
|
349
315
|
base=backoff, mul_spread=1.2, max_sleep=backoff + 3
|
350
316
|
)
|