novel-downloader 2.0.0__py3-none-any.whl → 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/download.py +14 -11
  3. novel_downloader/cli/export.py +19 -19
  4. novel_downloader/cli/ui.py +35 -8
  5. novel_downloader/config/adapter.py +216 -153
  6. novel_downloader/core/__init__.py +5 -6
  7. novel_downloader/core/archived/deqixs/fetcher.py +1 -28
  8. novel_downloader/core/downloaders/__init__.py +2 -0
  9. novel_downloader/core/downloaders/base.py +34 -85
  10. novel_downloader/core/downloaders/common.py +147 -171
  11. novel_downloader/core/downloaders/qianbi.py +30 -64
  12. novel_downloader/core/downloaders/qidian.py +157 -184
  13. novel_downloader/core/downloaders/qqbook.py +292 -0
  14. novel_downloader/core/downloaders/registry.py +2 -2
  15. novel_downloader/core/exporters/__init__.py +2 -0
  16. novel_downloader/core/exporters/base.py +37 -59
  17. novel_downloader/core/exporters/common.py +620 -0
  18. novel_downloader/core/exporters/linovelib.py +47 -0
  19. novel_downloader/core/exporters/qidian.py +41 -12
  20. novel_downloader/core/exporters/qqbook.py +28 -0
  21. novel_downloader/core/exporters/registry.py +2 -2
  22. novel_downloader/core/fetchers/__init__.py +4 -2
  23. novel_downloader/core/fetchers/aaatxt.py +2 -22
  24. novel_downloader/core/fetchers/b520.py +3 -23
  25. novel_downloader/core/fetchers/base.py +80 -105
  26. novel_downloader/core/fetchers/biquyuedu.py +2 -22
  27. novel_downloader/core/fetchers/dxmwx.py +10 -22
  28. novel_downloader/core/fetchers/esjzone.py +6 -29
  29. novel_downloader/core/fetchers/guidaye.py +2 -22
  30. novel_downloader/core/fetchers/hetushu.py +9 -29
  31. novel_downloader/core/fetchers/i25zw.py +2 -16
  32. novel_downloader/core/fetchers/ixdzs8.py +2 -16
  33. novel_downloader/core/fetchers/jpxs123.py +2 -16
  34. novel_downloader/core/fetchers/lewenn.py +2 -22
  35. novel_downloader/core/fetchers/linovelib.py +4 -20
  36. novel_downloader/core/fetchers/{eightnovel.py → n8novel.py} +12 -40
  37. novel_downloader/core/fetchers/piaotia.py +2 -16
  38. novel_downloader/core/fetchers/qbtr.py +2 -16
  39. novel_downloader/core/fetchers/qianbi.py +1 -20
  40. novel_downloader/core/fetchers/qidian.py +27 -68
  41. novel_downloader/core/fetchers/qqbook.py +177 -0
  42. novel_downloader/core/fetchers/quanben5.py +9 -29
  43. novel_downloader/core/fetchers/rate_limiter.py +22 -53
  44. novel_downloader/core/fetchers/sfacg.py +3 -16
  45. novel_downloader/core/fetchers/shencou.py +2 -16
  46. novel_downloader/core/fetchers/shuhaige.py +2 -22
  47. novel_downloader/core/fetchers/tongrenquan.py +2 -22
  48. novel_downloader/core/fetchers/ttkan.py +3 -14
  49. novel_downloader/core/fetchers/wanbengo.py +2 -22
  50. novel_downloader/core/fetchers/xiaoshuowu.py +2 -16
  51. novel_downloader/core/fetchers/xiguashuwu.py +4 -20
  52. novel_downloader/core/fetchers/xs63b.py +3 -15
  53. novel_downloader/core/fetchers/xshbook.py +2 -22
  54. novel_downloader/core/fetchers/yamibo.py +4 -28
  55. novel_downloader/core/fetchers/yibige.py +13 -26
  56. novel_downloader/core/interfaces/exporter.py +19 -7
  57. novel_downloader/core/interfaces/fetcher.py +23 -49
  58. novel_downloader/core/interfaces/parser.py +2 -2
  59. novel_downloader/core/parsers/__init__.py +4 -2
  60. novel_downloader/core/parsers/b520.py +2 -2
  61. novel_downloader/core/parsers/base.py +5 -39
  62. novel_downloader/core/parsers/esjzone.py +3 -3
  63. novel_downloader/core/parsers/{eightnovel.py → n8novel.py} +7 -7
  64. novel_downloader/core/parsers/qidian.py +717 -0
  65. novel_downloader/core/parsers/qqbook.py +709 -0
  66. novel_downloader/core/parsers/xiguashuwu.py +8 -15
  67. novel_downloader/core/searchers/__init__.py +2 -2
  68. novel_downloader/core/searchers/b520.py +1 -1
  69. novel_downloader/core/searchers/base.py +2 -2
  70. novel_downloader/core/searchers/{eightnovel.py → n8novel.py} +5 -5
  71. novel_downloader/locales/en.json +3 -3
  72. novel_downloader/locales/zh.json +3 -3
  73. novel_downloader/models/__init__.py +2 -0
  74. novel_downloader/models/book.py +1 -0
  75. novel_downloader/models/config.py +12 -0
  76. novel_downloader/resources/config/settings.toml +23 -5
  77. novel_downloader/resources/js_scripts/expr_to_json.js +14 -0
  78. novel_downloader/resources/js_scripts/qidian_decrypt_node.js +21 -16
  79. novel_downloader/resources/js_scripts/qq_decrypt_node.js +92 -0
  80. novel_downloader/utils/__init__.py +0 -2
  81. novel_downloader/utils/chapter_storage.py +2 -3
  82. novel_downloader/utils/constants.py +7 -3
  83. novel_downloader/utils/cookies.py +32 -17
  84. novel_downloader/utils/crypto_utils/__init__.py +0 -6
  85. novel_downloader/utils/crypto_utils/aes_util.py +1 -1
  86. novel_downloader/utils/crypto_utils/rc4.py +40 -50
  87. novel_downloader/utils/epub/__init__.py +2 -3
  88. novel_downloader/utils/epub/builder.py +6 -6
  89. novel_downloader/utils/epub/constants.py +1 -6
  90. novel_downloader/utils/epub/documents.py +7 -7
  91. novel_downloader/utils/epub/models.py +8 -8
  92. novel_downloader/utils/epub/utils.py +10 -10
  93. novel_downloader/utils/file_utils/io.py +48 -73
  94. novel_downloader/utils/file_utils/normalize.py +1 -7
  95. novel_downloader/utils/file_utils/sanitize.py +4 -11
  96. novel_downloader/utils/fontocr/__init__.py +13 -0
  97. novel_downloader/utils/{fontocr.py → fontocr/core.py} +72 -61
  98. novel_downloader/utils/fontocr/loader.py +52 -0
  99. novel_downloader/utils/logger.py +80 -56
  100. novel_downloader/utils/network.py +16 -40
  101. novel_downloader/utils/node_decryptor/__init__.py +13 -0
  102. novel_downloader/utils/node_decryptor/decryptor.py +342 -0
  103. novel_downloader/{core/parsers/qidian/utils → utils/node_decryptor}/decryptor_fetcher.py +5 -6
  104. novel_downloader/utils/text_utils/text_cleaner.py +39 -30
  105. novel_downloader/utils/text_utils/truncate_utils.py +3 -14
  106. novel_downloader/utils/time_utils/sleep_utils.py +53 -43
  107. novel_downloader/web/main.py +1 -1
  108. novel_downloader/web/pages/download.py +1 -1
  109. novel_downloader/web/pages/search.py +4 -4
  110. novel_downloader/web/services/task_manager.py +2 -0
  111. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/METADATA +5 -1
  112. novel_downloader-2.0.2.dist-info/RECORD +203 -0
  113. novel_downloader/core/exporters/common/__init__.py +0 -11
  114. novel_downloader/core/exporters/common/epub.py +0 -198
  115. novel_downloader/core/exporters/common/main_exporter.py +0 -64
  116. novel_downloader/core/exporters/common/txt.py +0 -146
  117. novel_downloader/core/exporters/epub_util.py +0 -215
  118. novel_downloader/core/exporters/linovelib/__init__.py +0 -11
  119. novel_downloader/core/exporters/linovelib/epub.py +0 -349
  120. novel_downloader/core/exporters/linovelib/main_exporter.py +0 -66
  121. novel_downloader/core/exporters/linovelib/txt.py +0 -139
  122. novel_downloader/core/exporters/txt_util.py +0 -67
  123. novel_downloader/core/parsers/qidian/__init__.py +0 -10
  124. novel_downloader/core/parsers/qidian/book_info_parser.py +0 -89
  125. novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -470
  126. novel_downloader/core/parsers/qidian/chapter_normal.py +0 -126
  127. novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
  128. novel_downloader/core/parsers/qidian/main_parser.py +0 -101
  129. novel_downloader/core/parsers/qidian/utils/__init__.py +0 -30
  130. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +0 -143
  131. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -110
  132. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +0 -175
  133. novel_downloader-2.0.0.dist-info/RECORD +0 -210
  134. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/WHEEL +0 -0
  135. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/entry_points.txt +0 -0
  136. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/licenses/LICENSE +0 -0
  137. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/top_level.txt +0 -0
@@ -13,25 +13,13 @@ from typing import Any
13
13
 
14
14
  from novel_downloader.core.downloaders.base import BaseDownloader
15
15
  from novel_downloader.core.downloaders.registry import register_downloader
16
- from novel_downloader.core.downloaders.signals import (
17
- STOP,
18
- Progress,
19
- StopToken,
20
- )
21
- from novel_downloader.core.interfaces import (
22
- FetcherProtocol,
23
- ParserProtocol,
24
- )
16
+ from novel_downloader.core.downloaders.signals import STOP, Progress, StopToken
25
17
  from novel_downloader.models import (
26
18
  BookConfig,
27
19
  BookInfoDict,
28
20
  ChapterDict,
29
- DownloaderConfig,
30
- )
31
- from novel_downloader.utils import (
32
- ChapterStorage,
33
- async_jitter_sleep,
34
21
  )
22
+ from novel_downloader.utils import ChapterStorage, async_jitter_sleep
35
23
 
36
24
 
37
25
  @register_downloader(site_keys=["qianbi"])
@@ -43,16 +31,6 @@ class QianbiDownloader(BaseDownloader):
43
31
  each chapter as a unit (fetch -> parse -> enqueue storage).
44
32
  """
45
33
 
46
- DEFAULT_SOURCE_ID = 0
47
-
48
- def __init__(
49
- self,
50
- fetcher: FetcherProtocol,
51
- parser: ParserProtocol,
52
- config: DownloaderConfig,
53
- ):
54
- super().__init__(fetcher, parser, config, "qianbi")
55
-
56
34
  async def _download_one(
57
35
  self,
58
36
  book: BookConfig,
@@ -77,16 +55,10 @@ class QianbiDownloader(BaseDownloader):
77
55
  raw_base.mkdir(parents=True, exist_ok=True)
78
56
  html_dir = self._debug_dir / book_id / "html"
79
57
 
80
- chapter_storage = ChapterStorage(
81
- raw_base=raw_base,
82
- priorities=self.PRIORITIES_MAP,
83
- )
84
- chapter_storage.connect()
85
-
86
58
  def cancelled() -> bool:
87
59
  return bool(cancel_event and cancel_event.is_set())
88
60
 
89
- try:
61
+ with ChapterStorage(raw_base, priorities=self.PRIORITIES_MAP) as storage:
90
62
  # --- metadata ---
91
63
  book_info = await self.load_book_info(book_id=book_id, html_dir=html_dir)
92
64
  if not book_info:
@@ -95,28 +67,32 @@ class QianbiDownloader(BaseDownloader):
95
67
  book_info = await self._repair_chapter_ids(
96
68
  book_id,
97
69
  book_info,
98
- chapter_storage,
70
+ storage,
99
71
  html_dir,
100
72
  )
101
73
 
102
74
  vols = book_info["volumes"]
103
- total_chapters = sum(len(v["chapters"]) for v in vols)
104
- if total_chapters == 0:
105
- self.logger.warning("%s 书籍没有章节可下载: %s", TAG, book_id)
75
+ plan = self._planned_chapter_ids(vols, start_id, end_id, ignore_set)
76
+ if not plan:
77
+ self.logger.info("%s nothing to do after filtering: %s", TAG, book_id)
106
78
  return
107
79
 
108
- progress = Progress(total_chapters, progress_hook)
80
+ progress = Progress(total=len(plan), hook=progress_hook)
109
81
 
110
82
  # --- queues & batching ---
111
- cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue()
112
- save_q: asyncio.Queue[ChapterDict | StopToken] = asyncio.Queue()
83
+ cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue(
84
+ maxsize=self._workers * 2
85
+ )
86
+ save_q: asyncio.Queue[ChapterDict | StopToken] = asyncio.Queue(
87
+ maxsize=self._workers * 2
88
+ )
113
89
  batch: list[ChapterDict] = []
114
90
 
115
91
  async def flush_batch() -> None:
116
92
  if not batch:
117
93
  return
118
94
  try:
119
- chapter_storage.upsert_chapters(batch, self.DEFAULT_SOURCE_ID)
95
+ storage.upsert_chapters(batch, self.DEFAULT_SOURCE_ID)
120
96
  except Exception as e:
121
97
  self.logger.error(
122
98
  "[Storage] batch upsert failed (size=%d): %s",
@@ -144,7 +120,7 @@ class QianbiDownloader(BaseDownloader):
144
120
  item = await save_q.get()
145
121
  if isinstance(item, StopToken):
146
122
  stop_count += 1
147
- if stop_count == self.workers:
123
+ if stop_count == self._workers:
148
124
  # All chapter workers have exited.
149
125
  await flush_batch()
150
126
  return
@@ -153,7 +129,7 @@ class QianbiDownloader(BaseDownloader):
153
129
 
154
130
  # Normal chapter
155
131
  batch.append(item)
156
- if len(batch) >= self.storage_batch_size:
132
+ if len(batch) >= self._storage_batch_size:
157
133
  await flush_batch()
158
134
 
159
135
  if cancelled():
@@ -170,15 +146,13 @@ class QianbiDownloader(BaseDownloader):
170
146
  # Final flush of everything
171
147
  await flush_batch()
172
148
  # Wait for remaining STOPs so chapter workers can finish.
173
- while stop_count < self.workers:
149
+ while stop_count < self._workers:
174
150
  nxt = await save_q.get()
175
151
  if isinstance(nxt, StopToken):
176
152
  stop_count += 1
177
153
  return
178
154
 
179
155
  # --- stage: chapter worker ---
180
- sem = asyncio.Semaphore(self.workers)
181
-
182
156
  async def chapter_worker() -> None:
183
157
  """
184
158
  Fetch + parse with retry, then enqueue to save_q.
@@ -192,25 +166,20 @@ class QianbiDownloader(BaseDownloader):
192
166
  await save_q.put(STOP)
193
167
  return
194
168
 
195
- if not cid or cid in ignore_set:
196
- # Ignore silently and continue.
197
- continue
198
-
199
169
  # If cancelled, don't start a new network call; let storage finish.
200
170
  if cancelled():
201
171
  await save_q.put(STOP)
202
172
  return
203
173
 
204
- async with sem:
205
- chap = await self._process_chapter(book_id, cid, html_dir)
174
+ chap = await self._process_chapter(book_id, cid, html_dir)
206
175
  if chap:
207
176
  await save_q.put(chap)
208
177
 
209
178
  # polite pacing
210
179
  await async_jitter_sleep(
211
- self.request_interval,
180
+ self._request_interval,
212
181
  mul_spread=1.1,
213
- max_sleep=self.request_interval + 2,
182
+ max_sleep=self._request_interval + 2,
214
183
  )
215
184
 
216
185
  # --- stage: producer ---
@@ -221,22 +190,22 @@ class QianbiDownloader(BaseDownloader):
221
190
  so chapter workers can exit deterministically.
222
191
  """
223
192
  try:
224
- async for cid in self._chapter_ids(vols, start_id, end_id):
193
+ for cid in plan:
225
194
  if cancelled():
226
195
  break
227
- if self.skip_existing and chapter_storage.exists(cid):
196
+ if self._skip_existing and storage.exists(cid):
228
197
  # Count as completed but don't enqueue.
229
198
  await progress.bump(1)
230
199
  else:
231
200
  await cid_q.put(cid)
232
201
  finally:
233
- for _ in range(self.workers):
202
+ for _ in range(self._workers):
234
203
  await cid_q.put(STOP)
235
204
 
236
205
  # --- run the pipeline ---
237
206
  async with asyncio.TaskGroup() as tg:
238
207
  tg.create_task(storage_worker())
239
- for _ in range(self.workers):
208
+ for _ in range(self._workers):
240
209
  tg.create_task(chapter_worker())
241
210
  tg.create_task(producer())
242
211
 
@@ -256,9 +225,6 @@ class QianbiDownloader(BaseDownloader):
256
225
  book_info.get("book_name", "unknown"),
257
226
  )
258
227
 
259
- finally:
260
- chapter_storage.close()
261
-
262
228
  async def _repair_chapter_ids(
263
229
  self,
264
230
  book_id: str,
@@ -295,9 +261,9 @@ class QianbiDownloader(BaseDownloader):
295
261
  continue
296
262
  storage.upsert_chapter(data, self.DEFAULT_SOURCE_ID)
297
263
  await async_jitter_sleep(
298
- self.request_interval,
264
+ self._request_interval,
299
265
  mul_spread=1.1,
300
- max_sleep=self.request_interval + 2,
266
+ max_sleep=self._request_interval + 2,
301
267
  )
302
268
 
303
269
  next_cid = data.get("extra", {}).get("next_chapter_id")
@@ -331,7 +297,7 @@ class QianbiDownloader(BaseDownloader):
331
297
 
332
298
  :return: ChapterDict on success, or None on failure.
333
299
  """
334
- for attempt in range(self.retry_times + 1):
300
+ for attempt in range(self._retry_times + 1):
335
301
  try:
336
302
  html_list = await self.fetcher.get_book_chapter(book_id, cid)
337
303
  self._save_html_pages(html_dir, cid, html_list)
@@ -342,9 +308,9 @@ class QianbiDownloader(BaseDownloader):
342
308
  raise ValueError("Empty parse result")
343
309
  return chap
344
310
  except Exception as e:
345
- if attempt < self.retry_times:
311
+ if attempt < self._retry_times:
346
312
  self.logger.info(f"[ChapterWorker] Retry {cid} ({attempt+1}): {e}")
347
- backoff = self.backoff_factor * (2**attempt)
313
+ backoff = self._backoff_factor * (2**attempt)
348
314
  await async_jitter_sleep(
349
315
  base=backoff, mul_spread=1.2, max_sleep=backoff + 3
350
316
  )
@@ -10,28 +10,14 @@ with handling for restricted and encrypted chapters
10
10
  import asyncio
11
11
  from collections.abc import Awaitable, Callable
12
12
  from pathlib import Path
13
- from typing import Any
13
+ from typing import Any, ClassVar
14
14
 
15
15
  from novel_downloader.core.downloaders.base import BaseDownloader
16
16
  from novel_downloader.core.downloaders.registry import register_downloader
17
- from novel_downloader.core.downloaders.signals import (
18
- STOP,
19
- Progress,
20
- StopToken,
21
- )
22
- from novel_downloader.core.interfaces import (
23
- FetcherProtocol,
24
- ParserProtocol,
25
- )
26
- from novel_downloader.models import (
27
- BookConfig,
28
- ChapterDict,
29
- DownloaderConfig,
30
- )
31
- from novel_downloader.utils import (
32
- ChapterStorage,
33
- async_jitter_sleep,
34
- )
17
+ from novel_downloader.core.downloaders.signals import STOP, Progress, StopToken
18
+ from novel_downloader.core.interfaces import FetcherProtocol, ParserProtocol
19
+ from novel_downloader.models import BookConfig, ChapterDict, DownloaderConfig
20
+ from novel_downloader.utils import ChapterStorage, async_jitter_sleep
35
21
 
36
22
 
37
23
  @register_downloader(site_keys=["qidian", "qd"])
@@ -43,9 +29,9 @@ class QidianDownloader(BaseDownloader):
43
29
  handles fetch -> parse -> enqueue storage.
44
30
  """
45
31
 
46
- DEFAULT_SOURCE_ID = 0
47
- ENCRYPTED_SOURCE_ID = 1
48
- PRIORITIES_MAP = {
32
+ DEFAULT_SOURCE_ID: ClassVar[int] = 0
33
+ ENCRYPTED_SOURCE_ID: ClassVar[int] = 1
34
+ PRIORITIES_MAP: ClassVar[dict[int, int]] = {
49
35
  DEFAULT_SOURCE_ID: 0,
50
36
  ENCRYPTED_SOURCE_ID: 1,
51
37
  }
@@ -55,9 +41,10 @@ class QidianDownloader(BaseDownloader):
55
41
  fetcher: FetcherProtocol,
56
42
  parser: ParserProtocol,
57
43
  config: DownloaderConfig,
44
+ site: str,
58
45
  ):
59
- config.request_interval = max(1.0, config.request_interval)
60
- super().__init__(fetcher, parser, config, "qidian")
46
+ super().__init__(fetcher, parser, config, site)
47
+ self._request_interval = max(1.0, config.request_interval)
61
48
 
62
49
  async def _download_one(
63
50
  self,
@@ -84,186 +71,172 @@ class QidianDownloader(BaseDownloader):
84
71
  raw_base.mkdir(parents=True, exist_ok=True)
85
72
  html_dir = self._debug_dir / book_id / "html"
86
73
 
87
- chapter_storage = ChapterStorage(
88
- raw_base=raw_base,
89
- priorities=self.PRIORITIES_MAP,
90
- )
91
- chapter_storage.connect()
92
-
93
74
  def cancelled() -> bool:
94
75
  return bool(cancel_event and cancel_event.is_set())
95
76
 
96
- try:
97
- # ---- metadata ---
98
- book_info = await self.load_book_info(book_id=book_id, html_dir=html_dir)
99
- if not book_info:
100
- return
101
-
102
- vols = book_info["volumes"]
103
- total_chapters = sum(len(v["chapters"]) for v in vols)
104
- if total_chapters == 0:
105
- self.logger.warning("%s 书籍没有章节可下载: %s", TAG, book_id)
106
- return
77
+ # ---- metadata ---
78
+ book_info = await self.load_book_info(book_id=book_id, html_dir=html_dir)
79
+ if not book_info:
80
+ return
107
81
 
108
- progress = Progress(total_chapters, progress_hook)
82
+ vols = book_info["volumes"]
83
+ plan = self._planned_chapter_ids(vols, start_id, end_id, ignore_set)
84
+ if not plan:
85
+ self.logger.info("%s nothing to do after filtering: %s", TAG, book_id)
86
+ return
109
87
 
110
- # ---- queues & batching ---
111
- cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue()
112
- save_q: asyncio.Queue[ChapterDict | StopToken] = asyncio.Queue()
113
- default_batch: list[ChapterDict] = []
114
- encrypted_batch: list[ChapterDict] = []
88
+ progress = Progress(total=len(plan), hook=progress_hook)
115
89
 
116
- def select_batch(chap: ChapterDict) -> tuple[list[ChapterDict], int]:
117
- # set extra.encrypted (by parser); default to plain if absent.
118
- if chap.get("extra", {}).get("encrypted", False):
119
- return encrypted_batch, self.ENCRYPTED_SOURCE_ID
120
- return default_batch, self.DEFAULT_SOURCE_ID
90
+ # ---- queues & batching ---
91
+ cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue(maxsize=self._workers * 2)
92
+ save_q: asyncio.Queue[ChapterDict | StopToken] = asyncio.Queue(
93
+ maxsize=self._workers * 2
94
+ )
95
+ default_batch: list[ChapterDict] = []
96
+ encrypted_batch: list[ChapterDict] = []
121
97
 
122
- async def flush_batch(batch: list[ChapterDict], src: int) -> None:
123
- if not batch:
124
- return
125
- try:
126
- chapter_storage.upsert_chapters(batch, src)
127
- except Exception as e:
128
- self.logger.error(
129
- "[Storage] batch upsert failed (size=%d, src=%d): %s",
130
- len(batch),
131
- src,
132
- e,
133
- exc_info=True,
134
- )
135
- else:
136
- await progress.bump(len(batch))
137
- finally:
138
- batch.clear()
139
-
140
- async def flush_all() -> None:
141
- await flush_batch(default_batch, self.DEFAULT_SOURCE_ID)
142
- await flush_batch(encrypted_batch, self.ENCRYPTED_SOURCE_ID)
143
-
144
- # ---- workers ---
145
- sem = asyncio.Semaphore(self.workers)
146
-
147
- async def storage_worker() -> None:
148
- """
149
- Consumes parsed chapters, batches by source, flushes on threshold.
150
-
151
- Terminates after receiving STOP from each chapter worker.
152
-
153
- On cancel: drains queue, flushes once, then waits for remaining STOPs.
154
- """
155
- stop_count = 0
156
- while True:
157
- chap = await save_q.get()
158
- if isinstance(chap, StopToken):
159
- stop_count += 1
160
- if stop_count == NUM_WORKERS:
161
- await flush_all()
162
- return
163
- continue
164
-
165
- batch, src = select_batch(chap)
166
- batch.append(chap)
167
- if len(batch) >= self.storage_batch_size:
168
- await flush_batch(batch, src)
98
+ def select_batch(chap: ChapterDict) -> tuple[list[ChapterDict], int]:
99
+ # set extra.encrypted (by parser); default to plain if absent.
100
+ if chap.get("extra", {}).get("encrypted", False):
101
+ return encrypted_batch, self.ENCRYPTED_SOURCE_ID
102
+ return default_batch, self.DEFAULT_SOURCE_ID
169
103
 
170
- if cancelled():
171
- # Drain whatever is already parsed
172
- try:
173
- while True:
174
- nxt = save_q.get_nowait()
175
- if isinstance(nxt, StopToken):
176
- stop_count += 1
177
- else:
178
- nbatch, nsrc = select_batch(nxt)
179
- nbatch.append(nxt)
180
- except asyncio.QueueEmpty:
181
- pass
104
+ async def flush_batch(batch: list[ChapterDict], src: int) -> None:
105
+ if not batch:
106
+ return
107
+ try:
108
+ storage.upsert_chapters(batch, src)
109
+ except Exception as e:
110
+ self.logger.error(
111
+ "[Storage] batch upsert failed (size=%d, src=%d): %s",
112
+ len(batch),
113
+ src,
114
+ e,
115
+ exc_info=True,
116
+ )
117
+ else:
118
+ await progress.bump(len(batch))
119
+ finally:
120
+ batch.clear()
121
+
122
+ async def flush_all() -> None:
123
+ await flush_batch(default_batch, self.DEFAULT_SOURCE_ID)
124
+ await flush_batch(encrypted_batch, self.ENCRYPTED_SOURCE_ID)
125
+
126
+ # ---- workers ---
127
+ async def storage_worker() -> None:
128
+ """
129
+ Consumes parsed chapters, batches by source, flushes on threshold.
130
+
131
+ Terminates after receiving STOP from each chapter worker.
132
+
133
+ On cancel: drains queue, flushes once, then waits for remaining STOPs.
134
+ """
135
+ stop_count = 0
136
+ while True:
137
+ chap = await save_q.get()
138
+ if isinstance(chap, StopToken):
139
+ stop_count += 1
140
+ if stop_count == NUM_WORKERS:
182
141
  await flush_all()
183
- # Wait for remaining STOPs to arrive
184
- while stop_count < NUM_WORKERS:
185
- nxt = await save_q.get()
186
- if nxt is STOP:
187
- stop_count += 1
188
142
  return
143
+ continue
144
+
145
+ batch, src = select_batch(chap)
146
+ batch.append(chap)
147
+ if len(batch) >= self._storage_batch_size:
148
+ await flush_batch(batch, src)
149
+
150
+ if cancelled():
151
+ # Drain whatever is already parsed
152
+ try:
153
+ while True:
154
+ nxt = save_q.get_nowait()
155
+ if isinstance(nxt, StopToken):
156
+ stop_count += 1
157
+ else:
158
+ nbatch, nsrc = select_batch(nxt)
159
+ nbatch.append(nxt)
160
+ except asyncio.QueueEmpty:
161
+ pass
162
+ await flush_all()
163
+ # Wait for remaining STOPs to arrive
164
+ while stop_count < NUM_WORKERS:
165
+ nxt = await save_q.get()
166
+ if nxt is STOP:
167
+ stop_count += 1
168
+ return
189
169
 
190
- async def chapter_worker() -> None:
191
- """
192
- Single worker: fetch + parse with retry, then enqueue ChapterDict.
170
+ async def chapter_worker() -> None:
171
+ """
172
+ Single worker: fetch + parse with retry, then enqueue ChapterDict.
193
173
 
194
- Exits on STOP. If cancelled, does not start a new fetch; signals STOP.
195
- """
196
- while True:
197
- cid = await cid_q.get()
198
- if isinstance(cid, StopToken):
199
- await save_q.put(STOP)
200
- return
174
+ Exits on STOP. If cancelled, does not start a new fetch; signals STOP.
175
+ """
176
+ while True:
177
+ cid = await cid_q.get()
178
+ if isinstance(cid, StopToken):
179
+ await save_q.put(STOP)
180
+ return
201
181
 
202
- if not cid or cid in ignore_set:
203
- continue
182
+ if cancelled():
183
+ await save_q.put(STOP)
184
+ return
204
185
 
205
- if cancelled():
206
- await save_q.put(STOP)
207
- return
186
+ chap = await self._process_chapter(book_id, cid, html_dir)
187
+ if chap and not cancelled():
188
+ await save_q.put(chap)
208
189
 
209
- async with sem:
210
- chap = await self._process_chapter(book_id, cid, html_dir)
211
- if chap and not cancelled():
212
- await save_q.put(chap)
190
+ await async_jitter_sleep(
191
+ self._request_interval,
192
+ mul_spread=1.1,
193
+ max_sleep=self._request_interval + 2,
194
+ )
213
195
 
214
- await async_jitter_sleep(
215
- self.request_interval,
216
- mul_spread=1.1,
217
- max_sleep=self.request_interval + 2,
218
- )
196
+ async def producer() -> None:
197
+ """
198
+ Enqueue chapter IDs respecting start/end/skip_existing.
219
199
 
220
- async def producer() -> None:
221
- """
222
- Enqueue chapter IDs respecting start/end/skip_existing.
223
-
224
- Always emits STOP x NUM_WORKERS at the end (even if cancelled early).
225
- """
226
- try:
227
- async for cid in self._chapter_ids(vols, start_id, end_id):
228
- if cancelled():
229
- break
230
- if self.skip_existing and (
231
- chapter_storage.exists(cid, self.DEFAULT_SOURCE_ID)
232
- or chapter_storage.exists(cid, self.ENCRYPTED_SOURCE_ID)
233
- ):
234
- # Already have either variant; count as done.
235
- await progress.bump(1)
236
- else:
237
- await cid_q.put(cid)
238
- finally:
239
- for _ in range(NUM_WORKERS):
240
- await cid_q.put(STOP)
241
-
242
- # ---- run tasks ---
200
+ Always emits STOP x NUM_WORKERS at the end (even if cancelled early).
201
+ """
202
+ try:
203
+ for cid in plan:
204
+ if cancelled():
205
+ break
206
+ if self._skip_existing and storage.exists(
207
+ cid, self.DEFAULT_SOURCE_ID
208
+ ):
209
+ # Already have not-encrypted; count as done.
210
+ await progress.bump(1)
211
+ else:
212
+ await cid_q.put(cid)
213
+ finally:
214
+ for _ in range(NUM_WORKERS):
215
+ await cid_q.put(STOP)
216
+
217
+ # ---- run tasks ---
218
+ with ChapterStorage(raw_base, priorities=self.PRIORITIES_MAP) as storage:
243
219
  async with asyncio.TaskGroup() as tg:
244
220
  tg.create_task(storage_worker())
245
221
  for _ in range(NUM_WORKERS):
246
222
  tg.create_task(chapter_worker())
247
223
  tg.create_task(producer())
248
224
 
249
- # ---- done ---
250
- if cancelled():
251
- self.logger.info(
252
- "%s Novel '%s' cancelled: flushed %d/%d chapters.",
253
- TAG,
254
- book_info.get("book_name", "unknown"),
255
- progress.done,
256
- progress.total,
257
- )
258
- else:
259
- self.logger.info(
260
- "%s Novel '%s' download completed.",
261
- TAG,
262
- book_info.get("book_name", "unknown"),
263
- )
264
-
265
- finally:
266
- chapter_storage.close()
225
+ # ---- done ---
226
+ if cancelled():
227
+ self.logger.info(
228
+ "%s Novel '%s' cancelled: flushed %d/%d chapters.",
229
+ TAG,
230
+ book_info.get("book_name", "unknown"),
231
+ progress.done,
232
+ progress.total,
233
+ )
234
+ else:
235
+ self.logger.info(
236
+ "%s Novel '%s' download completed.",
237
+ TAG,
238
+ book_info.get("book_name", "unknown"),
239
+ )
267
240
 
268
241
  @staticmethod
269
242
  def _check_restricted(html_list: list[str]) -> bool:
@@ -295,7 +268,7 @@ class QidianDownloader(BaseDownloader):
295
268
 
296
269
  :return: ChapterDict on success, or None on failure.
297
270
  """
298
- for attempt in range(self.retry_times + 1):
271
+ for attempt in range(self._retry_times + 1):
299
272
  try:
300
273
  html_list = await self.fetcher.get_book_chapter(book_id, cid)
301
274
  if self._check_restricted(html_list):
@@ -321,11 +294,11 @@ class QidianDownloader(BaseDownloader):
321
294
  return chap
322
295
 
323
296
  except Exception as e:
324
- if attempt < self.retry_times:
297
+ if attempt < self._retry_times:
325
298
  self.logger.info(
326
299
  "[ChapterWorker] Retry %s (%s): %s", cid, attempt + 1, e
327
300
  )
328
- backoff = self.backoff_factor * (2**attempt)
301
+ backoff = self._backoff_factor * (2**attempt)
329
302
  await async_jitter_sleep(
330
303
  base=backoff,
331
304
  mul_spread=1.2,