novel-downloader 2.0.1__py3-none-any.whl → 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/download.py +11 -8
  3. novel_downloader/cli/export.py +17 -17
  4. novel_downloader/cli/ui.py +28 -1
  5. novel_downloader/config/adapter.py +27 -1
  6. novel_downloader/core/archived/deqixs/fetcher.py +1 -28
  7. novel_downloader/core/downloaders/__init__.py +2 -0
  8. novel_downloader/core/downloaders/base.py +34 -85
  9. novel_downloader/core/downloaders/common.py +147 -171
  10. novel_downloader/core/downloaders/qianbi.py +30 -64
  11. novel_downloader/core/downloaders/qidian.py +157 -184
  12. novel_downloader/core/downloaders/qqbook.py +292 -0
  13. novel_downloader/core/downloaders/registry.py +2 -2
  14. novel_downloader/core/exporters/__init__.py +2 -0
  15. novel_downloader/core/exporters/base.py +37 -59
  16. novel_downloader/core/exporters/common.py +620 -0
  17. novel_downloader/core/exporters/linovelib.py +47 -0
  18. novel_downloader/core/exporters/qidian.py +41 -12
  19. novel_downloader/core/exporters/qqbook.py +28 -0
  20. novel_downloader/core/exporters/registry.py +2 -2
  21. novel_downloader/core/fetchers/__init__.py +4 -2
  22. novel_downloader/core/fetchers/aaatxt.py +2 -22
  23. novel_downloader/core/fetchers/b520.py +3 -23
  24. novel_downloader/core/fetchers/base.py +80 -105
  25. novel_downloader/core/fetchers/biquyuedu.py +2 -22
  26. novel_downloader/core/fetchers/dxmwx.py +10 -22
  27. novel_downloader/core/fetchers/esjzone.py +6 -29
  28. novel_downloader/core/fetchers/guidaye.py +2 -22
  29. novel_downloader/core/fetchers/hetushu.py +9 -29
  30. novel_downloader/core/fetchers/i25zw.py +2 -16
  31. novel_downloader/core/fetchers/ixdzs8.py +2 -16
  32. novel_downloader/core/fetchers/jpxs123.py +2 -16
  33. novel_downloader/core/fetchers/lewenn.py +2 -22
  34. novel_downloader/core/fetchers/linovelib.py +4 -20
  35. novel_downloader/core/fetchers/{eightnovel.py → n8novel.py} +12 -40
  36. novel_downloader/core/fetchers/piaotia.py +2 -16
  37. novel_downloader/core/fetchers/qbtr.py +2 -16
  38. novel_downloader/core/fetchers/qianbi.py +1 -20
  39. novel_downloader/core/fetchers/qidian.py +7 -33
  40. novel_downloader/core/fetchers/qqbook.py +177 -0
  41. novel_downloader/core/fetchers/quanben5.py +9 -29
  42. novel_downloader/core/fetchers/rate_limiter.py +22 -53
  43. novel_downloader/core/fetchers/sfacg.py +3 -16
  44. novel_downloader/core/fetchers/shencou.py +2 -16
  45. novel_downloader/core/fetchers/shuhaige.py +2 -22
  46. novel_downloader/core/fetchers/tongrenquan.py +2 -22
  47. novel_downloader/core/fetchers/ttkan.py +3 -14
  48. novel_downloader/core/fetchers/wanbengo.py +2 -22
  49. novel_downloader/core/fetchers/xiaoshuowu.py +2 -16
  50. novel_downloader/core/fetchers/xiguashuwu.py +4 -20
  51. novel_downloader/core/fetchers/xs63b.py +3 -15
  52. novel_downloader/core/fetchers/xshbook.py +2 -22
  53. novel_downloader/core/fetchers/yamibo.py +4 -28
  54. novel_downloader/core/fetchers/yibige.py +13 -26
  55. novel_downloader/core/interfaces/exporter.py +19 -7
  56. novel_downloader/core/interfaces/fetcher.py +21 -47
  57. novel_downloader/core/parsers/__init__.py +4 -2
  58. novel_downloader/core/parsers/b520.py +2 -2
  59. novel_downloader/core/parsers/base.py +4 -39
  60. novel_downloader/core/parsers/{eightnovel.py → n8novel.py} +5 -5
  61. novel_downloader/core/parsers/{qidian/main_parser.py → qidian.py} +147 -266
  62. novel_downloader/core/parsers/qqbook.py +709 -0
  63. novel_downloader/core/parsers/xiguashuwu.py +3 -4
  64. novel_downloader/core/searchers/__init__.py +2 -2
  65. novel_downloader/core/searchers/b520.py +1 -1
  66. novel_downloader/core/searchers/base.py +2 -2
  67. novel_downloader/core/searchers/{eightnovel.py → n8novel.py} +5 -5
  68. novel_downloader/models/__init__.py +2 -0
  69. novel_downloader/models/book.py +1 -0
  70. novel_downloader/models/config.py +12 -0
  71. novel_downloader/resources/config/settings.toml +23 -5
  72. novel_downloader/resources/js_scripts/expr_to_json.js +14 -0
  73. novel_downloader/resources/js_scripts/qidian_decrypt_node.js +21 -16
  74. novel_downloader/resources/js_scripts/qq_decrypt_node.js +92 -0
  75. novel_downloader/utils/constants.py +6 -0
  76. novel_downloader/utils/crypto_utils/aes_util.py +1 -1
  77. novel_downloader/utils/epub/constants.py +1 -6
  78. novel_downloader/utils/fontocr/core.py +2 -0
  79. novel_downloader/utils/fontocr/loader.py +10 -8
  80. novel_downloader/utils/node_decryptor/__init__.py +13 -0
  81. novel_downloader/utils/node_decryptor/decryptor.py +342 -0
  82. novel_downloader/{core/parsers/qidian/utils → utils/node_decryptor}/decryptor_fetcher.py +5 -6
  83. novel_downloader/web/pages/download.py +1 -1
  84. novel_downloader/web/pages/search.py +1 -1
  85. novel_downloader/web/services/task_manager.py +2 -0
  86. {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/METADATA +4 -1
  87. {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/RECORD +91 -94
  88. novel_downloader/core/exporters/common/__init__.py +0 -11
  89. novel_downloader/core/exporters/common/epub.py +0 -198
  90. novel_downloader/core/exporters/common/main_exporter.py +0 -64
  91. novel_downloader/core/exporters/common/txt.py +0 -146
  92. novel_downloader/core/exporters/epub_util.py +0 -215
  93. novel_downloader/core/exporters/linovelib/__init__.py +0 -11
  94. novel_downloader/core/exporters/linovelib/epub.py +0 -349
  95. novel_downloader/core/exporters/linovelib/main_exporter.py +0 -66
  96. novel_downloader/core/exporters/linovelib/txt.py +0 -139
  97. novel_downloader/core/exporters/txt_util.py +0 -67
  98. novel_downloader/core/parsers/qidian/__init__.py +0 -10
  99. novel_downloader/core/parsers/qidian/utils/__init__.py +0 -11
  100. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +0 -175
  101. {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/WHEEL +0 -0
  102. {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/entry_points.txt +0 -0
  103. {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/licenses/LICENSE +0 -0
  104. {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/top_level.txt +0 -0
@@ -10,28 +10,14 @@ with handling for restricted and encrypted chapters
10
10
  import asyncio
11
11
  from collections.abc import Awaitable, Callable
12
12
  from pathlib import Path
13
- from typing import Any
13
+ from typing import Any, ClassVar
14
14
 
15
15
  from novel_downloader.core.downloaders.base import BaseDownloader
16
16
  from novel_downloader.core.downloaders.registry import register_downloader
17
- from novel_downloader.core.downloaders.signals import (
18
- STOP,
19
- Progress,
20
- StopToken,
21
- )
22
- from novel_downloader.core.interfaces import (
23
- FetcherProtocol,
24
- ParserProtocol,
25
- )
26
- from novel_downloader.models import (
27
- BookConfig,
28
- ChapterDict,
29
- DownloaderConfig,
30
- )
31
- from novel_downloader.utils import (
32
- ChapterStorage,
33
- async_jitter_sleep,
34
- )
17
+ from novel_downloader.core.downloaders.signals import STOP, Progress, StopToken
18
+ from novel_downloader.core.interfaces import FetcherProtocol, ParserProtocol
19
+ from novel_downloader.models import BookConfig, ChapterDict, DownloaderConfig
20
+ from novel_downloader.utils import ChapterStorage, async_jitter_sleep
35
21
 
36
22
 
37
23
  @register_downloader(site_keys=["qidian", "qd"])
@@ -43,9 +29,9 @@ class QidianDownloader(BaseDownloader):
43
29
  handles fetch -> parse -> enqueue storage.
44
30
  """
45
31
 
46
- DEFAULT_SOURCE_ID = 0
47
- ENCRYPTED_SOURCE_ID = 1
48
- PRIORITIES_MAP = {
32
+ DEFAULT_SOURCE_ID: ClassVar[int] = 0
33
+ ENCRYPTED_SOURCE_ID: ClassVar[int] = 1
34
+ PRIORITIES_MAP: ClassVar[dict[int, int]] = {
49
35
  DEFAULT_SOURCE_ID: 0,
50
36
  ENCRYPTED_SOURCE_ID: 1,
51
37
  }
@@ -55,9 +41,10 @@ class QidianDownloader(BaseDownloader):
55
41
  fetcher: FetcherProtocol,
56
42
  parser: ParserProtocol,
57
43
  config: DownloaderConfig,
44
+ site: str,
58
45
  ):
59
- config.request_interval = max(1.0, config.request_interval)
60
- super().__init__(fetcher, parser, config, "qidian")
46
+ super().__init__(fetcher, parser, config, site)
47
+ self._request_interval = max(1.0, config.request_interval)
61
48
 
62
49
  async def _download_one(
63
50
  self,
@@ -84,186 +71,172 @@ class QidianDownloader(BaseDownloader):
84
71
  raw_base.mkdir(parents=True, exist_ok=True)
85
72
  html_dir = self._debug_dir / book_id / "html"
86
73
 
87
- chapter_storage = ChapterStorage(
88
- raw_base=raw_base,
89
- priorities=self.PRIORITIES_MAP,
90
- )
91
- chapter_storage.connect()
92
-
93
74
  def cancelled() -> bool:
94
75
  return bool(cancel_event and cancel_event.is_set())
95
76
 
96
- try:
97
- # ---- metadata ---
98
- book_info = await self.load_book_info(book_id=book_id, html_dir=html_dir)
99
- if not book_info:
100
- return
101
-
102
- vols = book_info["volumes"]
103
- total_chapters = sum(len(v["chapters"]) for v in vols)
104
- if total_chapters == 0:
105
- self.logger.warning("%s 书籍没有章节可下载: %s", TAG, book_id)
106
- return
77
+ # ---- metadata ---
78
+ book_info = await self.load_book_info(book_id=book_id, html_dir=html_dir)
79
+ if not book_info:
80
+ return
107
81
 
108
- progress = Progress(total_chapters, progress_hook)
82
+ vols = book_info["volumes"]
83
+ plan = self._planned_chapter_ids(vols, start_id, end_id, ignore_set)
84
+ if not plan:
85
+ self.logger.info("%s nothing to do after filtering: %s", TAG, book_id)
86
+ return
109
87
 
110
- # ---- queues & batching ---
111
- cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue()
112
- save_q: asyncio.Queue[ChapterDict | StopToken] = asyncio.Queue()
113
- default_batch: list[ChapterDict] = []
114
- encrypted_batch: list[ChapterDict] = []
88
+ progress = Progress(total=len(plan), hook=progress_hook)
115
89
 
116
- def select_batch(chap: ChapterDict) -> tuple[list[ChapterDict], int]:
117
- # set extra.encrypted (by parser); default to plain if absent.
118
- if chap.get("extra", {}).get("encrypted", False):
119
- return encrypted_batch, self.ENCRYPTED_SOURCE_ID
120
- return default_batch, self.DEFAULT_SOURCE_ID
90
+ # ---- queues & batching ---
91
+ cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue(maxsize=self._workers * 2)
92
+ save_q: asyncio.Queue[ChapterDict | StopToken] = asyncio.Queue(
93
+ maxsize=self._workers * 2
94
+ )
95
+ default_batch: list[ChapterDict] = []
96
+ encrypted_batch: list[ChapterDict] = []
121
97
 
122
- async def flush_batch(batch: list[ChapterDict], src: int) -> None:
123
- if not batch:
124
- return
125
- try:
126
- chapter_storage.upsert_chapters(batch, src)
127
- except Exception as e:
128
- self.logger.error(
129
- "[Storage] batch upsert failed (size=%d, src=%d): %s",
130
- len(batch),
131
- src,
132
- e,
133
- exc_info=True,
134
- )
135
- else:
136
- await progress.bump(len(batch))
137
- finally:
138
- batch.clear()
139
-
140
- async def flush_all() -> None:
141
- await flush_batch(default_batch, self.DEFAULT_SOURCE_ID)
142
- await flush_batch(encrypted_batch, self.ENCRYPTED_SOURCE_ID)
143
-
144
- # ---- workers ---
145
- sem = asyncio.Semaphore(self.workers)
146
-
147
- async def storage_worker() -> None:
148
- """
149
- Consumes parsed chapters, batches by source, flushes on threshold.
150
-
151
- Terminates after receiving STOP from each chapter worker.
152
-
153
- On cancel: drains queue, flushes once, then waits for remaining STOPs.
154
- """
155
- stop_count = 0
156
- while True:
157
- chap = await save_q.get()
158
- if isinstance(chap, StopToken):
159
- stop_count += 1
160
- if stop_count == NUM_WORKERS:
161
- await flush_all()
162
- return
163
- continue
164
-
165
- batch, src = select_batch(chap)
166
- batch.append(chap)
167
- if len(batch) >= self.storage_batch_size:
168
- await flush_batch(batch, src)
98
+ def select_batch(chap: ChapterDict) -> tuple[list[ChapterDict], int]:
99
+ # set extra.encrypted (by parser); default to plain if absent.
100
+ if chap.get("extra", {}).get("encrypted", False):
101
+ return encrypted_batch, self.ENCRYPTED_SOURCE_ID
102
+ return default_batch, self.DEFAULT_SOURCE_ID
169
103
 
170
- if cancelled():
171
- # Drain whatever is already parsed
172
- try:
173
- while True:
174
- nxt = save_q.get_nowait()
175
- if isinstance(nxt, StopToken):
176
- stop_count += 1
177
- else:
178
- nbatch, nsrc = select_batch(nxt)
179
- nbatch.append(nxt)
180
- except asyncio.QueueEmpty:
181
- pass
104
+ async def flush_batch(batch: list[ChapterDict], src: int) -> None:
105
+ if not batch:
106
+ return
107
+ try:
108
+ storage.upsert_chapters(batch, src)
109
+ except Exception as e:
110
+ self.logger.error(
111
+ "[Storage] batch upsert failed (size=%d, src=%d): %s",
112
+ len(batch),
113
+ src,
114
+ e,
115
+ exc_info=True,
116
+ )
117
+ else:
118
+ await progress.bump(len(batch))
119
+ finally:
120
+ batch.clear()
121
+
122
+ async def flush_all() -> None:
123
+ await flush_batch(default_batch, self.DEFAULT_SOURCE_ID)
124
+ await flush_batch(encrypted_batch, self.ENCRYPTED_SOURCE_ID)
125
+
126
+ # ---- workers ---
127
+ async def storage_worker() -> None:
128
+ """
129
+ Consumes parsed chapters, batches by source, flushes on threshold.
130
+
131
+ Terminates after receiving STOP from each chapter worker.
132
+
133
+ On cancel: drains queue, flushes once, then waits for remaining STOPs.
134
+ """
135
+ stop_count = 0
136
+ while True:
137
+ chap = await save_q.get()
138
+ if isinstance(chap, StopToken):
139
+ stop_count += 1
140
+ if stop_count == NUM_WORKERS:
182
141
  await flush_all()
183
- # Wait for remaining STOPs to arrive
184
- while stop_count < NUM_WORKERS:
185
- nxt = await save_q.get()
186
- if nxt is STOP:
187
- stop_count += 1
188
142
  return
143
+ continue
144
+
145
+ batch, src = select_batch(chap)
146
+ batch.append(chap)
147
+ if len(batch) >= self._storage_batch_size:
148
+ await flush_batch(batch, src)
149
+
150
+ if cancelled():
151
+ # Drain whatever is already parsed
152
+ try:
153
+ while True:
154
+ nxt = save_q.get_nowait()
155
+ if isinstance(nxt, StopToken):
156
+ stop_count += 1
157
+ else:
158
+ nbatch, nsrc = select_batch(nxt)
159
+ nbatch.append(nxt)
160
+ except asyncio.QueueEmpty:
161
+ pass
162
+ await flush_all()
163
+ # Wait for remaining STOPs to arrive
164
+ while stop_count < NUM_WORKERS:
165
+ nxt = await save_q.get()
166
+ if nxt is STOP:
167
+ stop_count += 1
168
+ return
189
169
 
190
- async def chapter_worker() -> None:
191
- """
192
- Single worker: fetch + parse with retry, then enqueue ChapterDict.
170
+ async def chapter_worker() -> None:
171
+ """
172
+ Single worker: fetch + parse with retry, then enqueue ChapterDict.
193
173
 
194
- Exits on STOP. If cancelled, does not start a new fetch; signals STOP.
195
- """
196
- while True:
197
- cid = await cid_q.get()
198
- if isinstance(cid, StopToken):
199
- await save_q.put(STOP)
200
- return
174
+ Exits on STOP. If cancelled, does not start a new fetch; signals STOP.
175
+ """
176
+ while True:
177
+ cid = await cid_q.get()
178
+ if isinstance(cid, StopToken):
179
+ await save_q.put(STOP)
180
+ return
201
181
 
202
- if not cid or cid in ignore_set:
203
- continue
182
+ if cancelled():
183
+ await save_q.put(STOP)
184
+ return
204
185
 
205
- if cancelled():
206
- await save_q.put(STOP)
207
- return
186
+ chap = await self._process_chapter(book_id, cid, html_dir)
187
+ if chap and not cancelled():
188
+ await save_q.put(chap)
208
189
 
209
- async with sem:
210
- chap = await self._process_chapter(book_id, cid, html_dir)
211
- if chap and not cancelled():
212
- await save_q.put(chap)
190
+ await async_jitter_sleep(
191
+ self._request_interval,
192
+ mul_spread=1.1,
193
+ max_sleep=self._request_interval + 2,
194
+ )
213
195
 
214
- await async_jitter_sleep(
215
- self.request_interval,
216
- mul_spread=1.1,
217
- max_sleep=self.request_interval + 2,
218
- )
196
+ async def producer() -> None:
197
+ """
198
+ Enqueue chapter IDs respecting start/end/skip_existing.
219
199
 
220
- async def producer() -> None:
221
- """
222
- Enqueue chapter IDs respecting start/end/skip_existing.
223
-
224
- Always emits STOP x NUM_WORKERS at the end (even if cancelled early).
225
- """
226
- try:
227
- async for cid in self._chapter_ids(vols, start_id, end_id):
228
- if cancelled():
229
- break
230
- if self.skip_existing and (
231
- chapter_storage.exists(cid, self.DEFAULT_SOURCE_ID)
232
- or chapter_storage.exists(cid, self.ENCRYPTED_SOURCE_ID)
233
- ):
234
- # Already have either variant; count as done.
235
- await progress.bump(1)
236
- else:
237
- await cid_q.put(cid)
238
- finally:
239
- for _ in range(NUM_WORKERS):
240
- await cid_q.put(STOP)
241
-
242
- # ---- run tasks ---
200
+ Always emits STOP x NUM_WORKERS at the end (even if cancelled early).
201
+ """
202
+ try:
203
+ for cid in plan:
204
+ if cancelled():
205
+ break
206
+ if self._skip_existing and storage.exists(
207
+ cid, self.DEFAULT_SOURCE_ID
208
+ ):
209
+ # Already have not-encrypted; count as done.
210
+ await progress.bump(1)
211
+ else:
212
+ await cid_q.put(cid)
213
+ finally:
214
+ for _ in range(NUM_WORKERS):
215
+ await cid_q.put(STOP)
216
+
217
+ # ---- run tasks ---
218
+ with ChapterStorage(raw_base, priorities=self.PRIORITIES_MAP) as storage:
243
219
  async with asyncio.TaskGroup() as tg:
244
220
  tg.create_task(storage_worker())
245
221
  for _ in range(NUM_WORKERS):
246
222
  tg.create_task(chapter_worker())
247
223
  tg.create_task(producer())
248
224
 
249
- # ---- done ---
250
- if cancelled():
251
- self.logger.info(
252
- "%s Novel '%s' cancelled: flushed %d/%d chapters.",
253
- TAG,
254
- book_info.get("book_name", "unknown"),
255
- progress.done,
256
- progress.total,
257
- )
258
- else:
259
- self.logger.info(
260
- "%s Novel '%s' download completed.",
261
- TAG,
262
- book_info.get("book_name", "unknown"),
263
- )
264
-
265
- finally:
266
- chapter_storage.close()
225
+ # ---- done ---
226
+ if cancelled():
227
+ self.logger.info(
228
+ "%s Novel '%s' cancelled: flushed %d/%d chapters.",
229
+ TAG,
230
+ book_info.get("book_name", "unknown"),
231
+ progress.done,
232
+ progress.total,
233
+ )
234
+ else:
235
+ self.logger.info(
236
+ "%s Novel '%s' download completed.",
237
+ TAG,
238
+ book_info.get("book_name", "unknown"),
239
+ )
267
240
 
268
241
  @staticmethod
269
242
  def _check_restricted(html_list: list[str]) -> bool:
@@ -295,7 +268,7 @@ class QidianDownloader(BaseDownloader):
295
268
 
296
269
  :return: ChapterDict on success, or None on failure.
297
270
  """
298
- for attempt in range(self.retry_times + 1):
271
+ for attempt in range(self._retry_times + 1):
299
272
  try:
300
273
  html_list = await self.fetcher.get_book_chapter(book_id, cid)
301
274
  if self._check_restricted(html_list):
@@ -321,11 +294,11 @@ class QidianDownloader(BaseDownloader):
321
294
  return chap
322
295
 
323
296
  except Exception as e:
324
- if attempt < self.retry_times:
297
+ if attempt < self._retry_times:
325
298
  self.logger.info(
326
299
  "[ChapterWorker] Retry %s (%s): %s", cid, attempt + 1, e
327
300
  )
328
- backoff = self.backoff_factor * (2**attempt)
301
+ backoff = self._backoff_factor * (2**attempt)
329
302
  await async_jitter_sleep(
330
303
  base=backoff,
331
304
  mul_spread=1.2,