novel-downloader 2.0.1__py3-none-any.whl → 2.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/download.py +11 -8
- novel_downloader/cli/export.py +17 -17
- novel_downloader/cli/ui.py +28 -1
- novel_downloader/config/adapter.py +27 -1
- novel_downloader/core/archived/deqixs/fetcher.py +1 -28
- novel_downloader/core/downloaders/__init__.py +2 -0
- novel_downloader/core/downloaders/base.py +34 -85
- novel_downloader/core/downloaders/common.py +147 -171
- novel_downloader/core/downloaders/qianbi.py +30 -64
- novel_downloader/core/downloaders/qidian.py +157 -184
- novel_downloader/core/downloaders/qqbook.py +292 -0
- novel_downloader/core/downloaders/registry.py +2 -2
- novel_downloader/core/exporters/__init__.py +2 -0
- novel_downloader/core/exporters/base.py +37 -59
- novel_downloader/core/exporters/common.py +620 -0
- novel_downloader/core/exporters/linovelib.py +47 -0
- novel_downloader/core/exporters/qidian.py +41 -12
- novel_downloader/core/exporters/qqbook.py +28 -0
- novel_downloader/core/exporters/registry.py +2 -2
- novel_downloader/core/fetchers/__init__.py +4 -2
- novel_downloader/core/fetchers/aaatxt.py +2 -22
- novel_downloader/core/fetchers/b520.py +3 -23
- novel_downloader/core/fetchers/base.py +80 -105
- novel_downloader/core/fetchers/biquyuedu.py +2 -22
- novel_downloader/core/fetchers/dxmwx.py +10 -22
- novel_downloader/core/fetchers/esjzone.py +6 -29
- novel_downloader/core/fetchers/guidaye.py +2 -22
- novel_downloader/core/fetchers/hetushu.py +9 -29
- novel_downloader/core/fetchers/i25zw.py +2 -16
- novel_downloader/core/fetchers/ixdzs8.py +2 -16
- novel_downloader/core/fetchers/jpxs123.py +2 -16
- novel_downloader/core/fetchers/lewenn.py +2 -22
- novel_downloader/core/fetchers/linovelib.py +4 -20
- novel_downloader/core/fetchers/{eightnovel.py → n8novel.py} +12 -40
- novel_downloader/core/fetchers/piaotia.py +2 -16
- novel_downloader/core/fetchers/qbtr.py +2 -16
- novel_downloader/core/fetchers/qianbi.py +1 -20
- novel_downloader/core/fetchers/qidian.py +7 -33
- novel_downloader/core/fetchers/qqbook.py +177 -0
- novel_downloader/core/fetchers/quanben5.py +9 -29
- novel_downloader/core/fetchers/rate_limiter.py +22 -53
- novel_downloader/core/fetchers/sfacg.py +3 -16
- novel_downloader/core/fetchers/shencou.py +2 -16
- novel_downloader/core/fetchers/shuhaige.py +2 -22
- novel_downloader/core/fetchers/tongrenquan.py +2 -22
- novel_downloader/core/fetchers/ttkan.py +3 -14
- novel_downloader/core/fetchers/wanbengo.py +2 -22
- novel_downloader/core/fetchers/xiaoshuowu.py +2 -16
- novel_downloader/core/fetchers/xiguashuwu.py +4 -20
- novel_downloader/core/fetchers/xs63b.py +3 -15
- novel_downloader/core/fetchers/xshbook.py +2 -22
- novel_downloader/core/fetchers/yamibo.py +4 -28
- novel_downloader/core/fetchers/yibige.py +13 -26
- novel_downloader/core/interfaces/exporter.py +19 -7
- novel_downloader/core/interfaces/fetcher.py +21 -47
- novel_downloader/core/parsers/__init__.py +4 -2
- novel_downloader/core/parsers/b520.py +2 -2
- novel_downloader/core/parsers/base.py +4 -39
- novel_downloader/core/parsers/{eightnovel.py → n8novel.py} +5 -5
- novel_downloader/core/parsers/{qidian/main_parser.py → qidian.py} +147 -266
- novel_downloader/core/parsers/qqbook.py +709 -0
- novel_downloader/core/parsers/xiguashuwu.py +3 -4
- novel_downloader/core/searchers/__init__.py +2 -2
- novel_downloader/core/searchers/b520.py +1 -1
- novel_downloader/core/searchers/base.py +2 -2
- novel_downloader/core/searchers/{eightnovel.py → n8novel.py} +5 -5
- novel_downloader/models/__init__.py +2 -0
- novel_downloader/models/book.py +1 -0
- novel_downloader/models/config.py +12 -0
- novel_downloader/resources/config/settings.toml +23 -5
- novel_downloader/resources/js_scripts/expr_to_json.js +14 -0
- novel_downloader/resources/js_scripts/qidian_decrypt_node.js +21 -16
- novel_downloader/resources/js_scripts/qq_decrypt_node.js +92 -0
- novel_downloader/utils/constants.py +6 -0
- novel_downloader/utils/crypto_utils/aes_util.py +1 -1
- novel_downloader/utils/epub/constants.py +1 -6
- novel_downloader/utils/fontocr/core.py +2 -0
- novel_downloader/utils/fontocr/loader.py +10 -8
- novel_downloader/utils/node_decryptor/__init__.py +13 -0
- novel_downloader/utils/node_decryptor/decryptor.py +342 -0
- novel_downloader/{core/parsers/qidian/utils → utils/node_decryptor}/decryptor_fetcher.py +5 -6
- novel_downloader/web/pages/download.py +1 -1
- novel_downloader/web/pages/search.py +1 -1
- novel_downloader/web/services/task_manager.py +2 -0
- {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/METADATA +4 -1
- {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/RECORD +91 -94
- novel_downloader/core/exporters/common/__init__.py +0 -11
- novel_downloader/core/exporters/common/epub.py +0 -198
- novel_downloader/core/exporters/common/main_exporter.py +0 -64
- novel_downloader/core/exporters/common/txt.py +0 -146
- novel_downloader/core/exporters/epub_util.py +0 -215
- novel_downloader/core/exporters/linovelib/__init__.py +0 -11
- novel_downloader/core/exporters/linovelib/epub.py +0 -349
- novel_downloader/core/exporters/linovelib/main_exporter.py +0 -66
- novel_downloader/core/exporters/linovelib/txt.py +0 -139
- novel_downloader/core/exporters/txt_util.py +0 -67
- novel_downloader/core/parsers/qidian/__init__.py +0 -10
- novel_downloader/core/parsers/qidian/utils/__init__.py +0 -11
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +0 -175
- {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/WHEEL +0 -0
- {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/entry_points.txt +0 -0
- {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/top_level.txt +0 -0
@@ -10,28 +10,14 @@ with handling for restricted and encrypted chapters
|
|
10
10
|
import asyncio
|
11
11
|
from collections.abc import Awaitable, Callable
|
12
12
|
from pathlib import Path
|
13
|
-
from typing import Any
|
13
|
+
from typing import Any, ClassVar
|
14
14
|
|
15
15
|
from novel_downloader.core.downloaders.base import BaseDownloader
|
16
16
|
from novel_downloader.core.downloaders.registry import register_downloader
|
17
|
-
from novel_downloader.core.downloaders.signals import
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
)
|
22
|
-
from novel_downloader.core.interfaces import (
|
23
|
-
FetcherProtocol,
|
24
|
-
ParserProtocol,
|
25
|
-
)
|
26
|
-
from novel_downloader.models import (
|
27
|
-
BookConfig,
|
28
|
-
ChapterDict,
|
29
|
-
DownloaderConfig,
|
30
|
-
)
|
31
|
-
from novel_downloader.utils import (
|
32
|
-
ChapterStorage,
|
33
|
-
async_jitter_sleep,
|
34
|
-
)
|
17
|
+
from novel_downloader.core.downloaders.signals import STOP, Progress, StopToken
|
18
|
+
from novel_downloader.core.interfaces import FetcherProtocol, ParserProtocol
|
19
|
+
from novel_downloader.models import BookConfig, ChapterDict, DownloaderConfig
|
20
|
+
from novel_downloader.utils import ChapterStorage, async_jitter_sleep
|
35
21
|
|
36
22
|
|
37
23
|
@register_downloader(site_keys=["qidian", "qd"])
|
@@ -43,9 +29,9 @@ class QidianDownloader(BaseDownloader):
|
|
43
29
|
handles fetch -> parse -> enqueue storage.
|
44
30
|
"""
|
45
31
|
|
46
|
-
DEFAULT_SOURCE_ID = 0
|
47
|
-
ENCRYPTED_SOURCE_ID = 1
|
48
|
-
PRIORITIES_MAP = {
|
32
|
+
DEFAULT_SOURCE_ID: ClassVar[int] = 0
|
33
|
+
ENCRYPTED_SOURCE_ID: ClassVar[int] = 1
|
34
|
+
PRIORITIES_MAP: ClassVar[dict[int, int]] = {
|
49
35
|
DEFAULT_SOURCE_ID: 0,
|
50
36
|
ENCRYPTED_SOURCE_ID: 1,
|
51
37
|
}
|
@@ -55,9 +41,10 @@ class QidianDownloader(BaseDownloader):
|
|
55
41
|
fetcher: FetcherProtocol,
|
56
42
|
parser: ParserProtocol,
|
57
43
|
config: DownloaderConfig,
|
44
|
+
site: str,
|
58
45
|
):
|
59
|
-
|
60
|
-
|
46
|
+
super().__init__(fetcher, parser, config, site)
|
47
|
+
self._request_interval = max(1.0, config.request_interval)
|
61
48
|
|
62
49
|
async def _download_one(
|
63
50
|
self,
|
@@ -84,186 +71,172 @@ class QidianDownloader(BaseDownloader):
|
|
84
71
|
raw_base.mkdir(parents=True, exist_ok=True)
|
85
72
|
html_dir = self._debug_dir / book_id / "html"
|
86
73
|
|
87
|
-
chapter_storage = ChapterStorage(
|
88
|
-
raw_base=raw_base,
|
89
|
-
priorities=self.PRIORITIES_MAP,
|
90
|
-
)
|
91
|
-
chapter_storage.connect()
|
92
|
-
|
93
74
|
def cancelled() -> bool:
|
94
75
|
return bool(cancel_event and cancel_event.is_set())
|
95
76
|
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
return
|
101
|
-
|
102
|
-
vols = book_info["volumes"]
|
103
|
-
total_chapters = sum(len(v["chapters"]) for v in vols)
|
104
|
-
if total_chapters == 0:
|
105
|
-
self.logger.warning("%s 书籍没有章节可下载: %s", TAG, book_id)
|
106
|
-
return
|
77
|
+
# ---- metadata ---
|
78
|
+
book_info = await self.load_book_info(book_id=book_id, html_dir=html_dir)
|
79
|
+
if not book_info:
|
80
|
+
return
|
107
81
|
|
108
|
-
|
82
|
+
vols = book_info["volumes"]
|
83
|
+
plan = self._planned_chapter_ids(vols, start_id, end_id, ignore_set)
|
84
|
+
if not plan:
|
85
|
+
self.logger.info("%s nothing to do after filtering: %s", TAG, book_id)
|
86
|
+
return
|
109
87
|
|
110
|
-
|
111
|
-
cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue()
|
112
|
-
save_q: asyncio.Queue[ChapterDict | StopToken] = asyncio.Queue()
|
113
|
-
default_batch: list[ChapterDict] = []
|
114
|
-
encrypted_batch: list[ChapterDict] = []
|
88
|
+
progress = Progress(total=len(plan), hook=progress_hook)
|
115
89
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
90
|
+
# ---- queues & batching ---
|
91
|
+
cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue(maxsize=self._workers * 2)
|
92
|
+
save_q: asyncio.Queue[ChapterDict | StopToken] = asyncio.Queue(
|
93
|
+
maxsize=self._workers * 2
|
94
|
+
)
|
95
|
+
default_batch: list[ChapterDict] = []
|
96
|
+
encrypted_batch: list[ChapterDict] = []
|
121
97
|
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
except Exception as e:
|
128
|
-
self.logger.error(
|
129
|
-
"[Storage] batch upsert failed (size=%d, src=%d): %s",
|
130
|
-
len(batch),
|
131
|
-
src,
|
132
|
-
e,
|
133
|
-
exc_info=True,
|
134
|
-
)
|
135
|
-
else:
|
136
|
-
await progress.bump(len(batch))
|
137
|
-
finally:
|
138
|
-
batch.clear()
|
139
|
-
|
140
|
-
async def flush_all() -> None:
|
141
|
-
await flush_batch(default_batch, self.DEFAULT_SOURCE_ID)
|
142
|
-
await flush_batch(encrypted_batch, self.ENCRYPTED_SOURCE_ID)
|
143
|
-
|
144
|
-
# ---- workers ---
|
145
|
-
sem = asyncio.Semaphore(self.workers)
|
146
|
-
|
147
|
-
async def storage_worker() -> None:
|
148
|
-
"""
|
149
|
-
Consumes parsed chapters, batches by source, flushes on threshold.
|
150
|
-
|
151
|
-
Terminates after receiving STOP from each chapter worker.
|
152
|
-
|
153
|
-
On cancel: drains queue, flushes once, then waits for remaining STOPs.
|
154
|
-
"""
|
155
|
-
stop_count = 0
|
156
|
-
while True:
|
157
|
-
chap = await save_q.get()
|
158
|
-
if isinstance(chap, StopToken):
|
159
|
-
stop_count += 1
|
160
|
-
if stop_count == NUM_WORKERS:
|
161
|
-
await flush_all()
|
162
|
-
return
|
163
|
-
continue
|
164
|
-
|
165
|
-
batch, src = select_batch(chap)
|
166
|
-
batch.append(chap)
|
167
|
-
if len(batch) >= self.storage_batch_size:
|
168
|
-
await flush_batch(batch, src)
|
98
|
+
def select_batch(chap: ChapterDict) -> tuple[list[ChapterDict], int]:
|
99
|
+
# set extra.encrypted (by parser); default to plain if absent.
|
100
|
+
if chap.get("extra", {}).get("encrypted", False):
|
101
|
+
return encrypted_batch, self.ENCRYPTED_SOURCE_ID
|
102
|
+
return default_batch, self.DEFAULT_SOURCE_ID
|
169
103
|
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
104
|
+
async def flush_batch(batch: list[ChapterDict], src: int) -> None:
|
105
|
+
if not batch:
|
106
|
+
return
|
107
|
+
try:
|
108
|
+
storage.upsert_chapters(batch, src)
|
109
|
+
except Exception as e:
|
110
|
+
self.logger.error(
|
111
|
+
"[Storage] batch upsert failed (size=%d, src=%d): %s",
|
112
|
+
len(batch),
|
113
|
+
src,
|
114
|
+
e,
|
115
|
+
exc_info=True,
|
116
|
+
)
|
117
|
+
else:
|
118
|
+
await progress.bump(len(batch))
|
119
|
+
finally:
|
120
|
+
batch.clear()
|
121
|
+
|
122
|
+
async def flush_all() -> None:
|
123
|
+
await flush_batch(default_batch, self.DEFAULT_SOURCE_ID)
|
124
|
+
await flush_batch(encrypted_batch, self.ENCRYPTED_SOURCE_ID)
|
125
|
+
|
126
|
+
# ---- workers ---
|
127
|
+
async def storage_worker() -> None:
|
128
|
+
"""
|
129
|
+
Consumes parsed chapters, batches by source, flushes on threshold.
|
130
|
+
|
131
|
+
Terminates after receiving STOP from each chapter worker.
|
132
|
+
|
133
|
+
On cancel: drains queue, flushes once, then waits for remaining STOPs.
|
134
|
+
"""
|
135
|
+
stop_count = 0
|
136
|
+
while True:
|
137
|
+
chap = await save_q.get()
|
138
|
+
if isinstance(chap, StopToken):
|
139
|
+
stop_count += 1
|
140
|
+
if stop_count == NUM_WORKERS:
|
182
141
|
await flush_all()
|
183
|
-
# Wait for remaining STOPs to arrive
|
184
|
-
while stop_count < NUM_WORKERS:
|
185
|
-
nxt = await save_q.get()
|
186
|
-
if nxt is STOP:
|
187
|
-
stop_count += 1
|
188
142
|
return
|
143
|
+
continue
|
144
|
+
|
145
|
+
batch, src = select_batch(chap)
|
146
|
+
batch.append(chap)
|
147
|
+
if len(batch) >= self._storage_batch_size:
|
148
|
+
await flush_batch(batch, src)
|
149
|
+
|
150
|
+
if cancelled():
|
151
|
+
# Drain whatever is already parsed
|
152
|
+
try:
|
153
|
+
while True:
|
154
|
+
nxt = save_q.get_nowait()
|
155
|
+
if isinstance(nxt, StopToken):
|
156
|
+
stop_count += 1
|
157
|
+
else:
|
158
|
+
nbatch, nsrc = select_batch(nxt)
|
159
|
+
nbatch.append(nxt)
|
160
|
+
except asyncio.QueueEmpty:
|
161
|
+
pass
|
162
|
+
await flush_all()
|
163
|
+
# Wait for remaining STOPs to arrive
|
164
|
+
while stop_count < NUM_WORKERS:
|
165
|
+
nxt = await save_q.get()
|
166
|
+
if nxt is STOP:
|
167
|
+
stop_count += 1
|
168
|
+
return
|
189
169
|
|
190
|
-
|
191
|
-
|
192
|
-
|
170
|
+
async def chapter_worker() -> None:
|
171
|
+
"""
|
172
|
+
Single worker: fetch + parse with retry, then enqueue ChapterDict.
|
193
173
|
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
174
|
+
Exits on STOP. If cancelled, does not start a new fetch; signals STOP.
|
175
|
+
"""
|
176
|
+
while True:
|
177
|
+
cid = await cid_q.get()
|
178
|
+
if isinstance(cid, StopToken):
|
179
|
+
await save_q.put(STOP)
|
180
|
+
return
|
201
181
|
|
202
|
-
|
203
|
-
|
182
|
+
if cancelled():
|
183
|
+
await save_q.put(STOP)
|
184
|
+
return
|
204
185
|
|
205
|
-
|
206
|
-
|
207
|
-
|
186
|
+
chap = await self._process_chapter(book_id, cid, html_dir)
|
187
|
+
if chap and not cancelled():
|
188
|
+
await save_q.put(chap)
|
208
189
|
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
190
|
+
await async_jitter_sleep(
|
191
|
+
self._request_interval,
|
192
|
+
mul_spread=1.1,
|
193
|
+
max_sleep=self._request_interval + 2,
|
194
|
+
)
|
213
195
|
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
max_sleep=self.request_interval + 2,
|
218
|
-
)
|
196
|
+
async def producer() -> None:
|
197
|
+
"""
|
198
|
+
Enqueue chapter IDs respecting start/end/skip_existing.
|
219
199
|
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
for _ in range(NUM_WORKERS):
|
240
|
-
await cid_q.put(STOP)
|
241
|
-
|
242
|
-
# ---- run tasks ---
|
200
|
+
Always emits STOP x NUM_WORKERS at the end (even if cancelled early).
|
201
|
+
"""
|
202
|
+
try:
|
203
|
+
for cid in plan:
|
204
|
+
if cancelled():
|
205
|
+
break
|
206
|
+
if self._skip_existing and storage.exists(
|
207
|
+
cid, self.DEFAULT_SOURCE_ID
|
208
|
+
):
|
209
|
+
# Already have not-encrypted; count as done.
|
210
|
+
await progress.bump(1)
|
211
|
+
else:
|
212
|
+
await cid_q.put(cid)
|
213
|
+
finally:
|
214
|
+
for _ in range(NUM_WORKERS):
|
215
|
+
await cid_q.put(STOP)
|
216
|
+
|
217
|
+
# ---- run tasks ---
|
218
|
+
with ChapterStorage(raw_base, priorities=self.PRIORITIES_MAP) as storage:
|
243
219
|
async with asyncio.TaskGroup() as tg:
|
244
220
|
tg.create_task(storage_worker())
|
245
221
|
for _ in range(NUM_WORKERS):
|
246
222
|
tg.create_task(chapter_worker())
|
247
223
|
tg.create_task(producer())
|
248
224
|
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
finally:
|
266
|
-
chapter_storage.close()
|
225
|
+
# ---- done ---
|
226
|
+
if cancelled():
|
227
|
+
self.logger.info(
|
228
|
+
"%s Novel '%s' cancelled: flushed %d/%d chapters.",
|
229
|
+
TAG,
|
230
|
+
book_info.get("book_name", "unknown"),
|
231
|
+
progress.done,
|
232
|
+
progress.total,
|
233
|
+
)
|
234
|
+
else:
|
235
|
+
self.logger.info(
|
236
|
+
"%s Novel '%s' download completed.",
|
237
|
+
TAG,
|
238
|
+
book_info.get("book_name", "unknown"),
|
239
|
+
)
|
267
240
|
|
268
241
|
@staticmethod
|
269
242
|
def _check_restricted(html_list: list[str]) -> bool:
|
@@ -295,7 +268,7 @@ class QidianDownloader(BaseDownloader):
|
|
295
268
|
|
296
269
|
:return: ChapterDict on success, or None on failure.
|
297
270
|
"""
|
298
|
-
for attempt in range(self.
|
271
|
+
for attempt in range(self._retry_times + 1):
|
299
272
|
try:
|
300
273
|
html_list = await self.fetcher.get_book_chapter(book_id, cid)
|
301
274
|
if self._check_restricted(html_list):
|
@@ -321,11 +294,11 @@ class QidianDownloader(BaseDownloader):
|
|
321
294
|
return chap
|
322
295
|
|
323
296
|
except Exception as e:
|
324
|
-
if attempt < self.
|
297
|
+
if attempt < self._retry_times:
|
325
298
|
self.logger.info(
|
326
299
|
"[ChapterWorker] Retry %s (%s): %s", cid, attempt + 1, e
|
327
300
|
)
|
328
|
-
backoff = self.
|
301
|
+
backoff = self._backoff_factor * (2**attempt)
|
329
302
|
await async_jitter_sleep(
|
330
303
|
base=backoff,
|
331
304
|
mul_spread=1.2,
|