novel-downloader 2.0.0__py3-none-any.whl → 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/download.py +14 -11
  3. novel_downloader/cli/export.py +19 -19
  4. novel_downloader/cli/ui.py +35 -8
  5. novel_downloader/config/adapter.py +216 -153
  6. novel_downloader/core/__init__.py +5 -6
  7. novel_downloader/core/archived/deqixs/fetcher.py +1 -28
  8. novel_downloader/core/downloaders/__init__.py +2 -0
  9. novel_downloader/core/downloaders/base.py +34 -85
  10. novel_downloader/core/downloaders/common.py +147 -171
  11. novel_downloader/core/downloaders/qianbi.py +30 -64
  12. novel_downloader/core/downloaders/qidian.py +157 -184
  13. novel_downloader/core/downloaders/qqbook.py +292 -0
  14. novel_downloader/core/downloaders/registry.py +2 -2
  15. novel_downloader/core/exporters/__init__.py +2 -0
  16. novel_downloader/core/exporters/base.py +37 -59
  17. novel_downloader/core/exporters/common.py +620 -0
  18. novel_downloader/core/exporters/linovelib.py +47 -0
  19. novel_downloader/core/exporters/qidian.py +41 -12
  20. novel_downloader/core/exporters/qqbook.py +28 -0
  21. novel_downloader/core/exporters/registry.py +2 -2
  22. novel_downloader/core/fetchers/__init__.py +4 -2
  23. novel_downloader/core/fetchers/aaatxt.py +2 -22
  24. novel_downloader/core/fetchers/b520.py +3 -23
  25. novel_downloader/core/fetchers/base.py +80 -105
  26. novel_downloader/core/fetchers/biquyuedu.py +2 -22
  27. novel_downloader/core/fetchers/dxmwx.py +10 -22
  28. novel_downloader/core/fetchers/esjzone.py +6 -29
  29. novel_downloader/core/fetchers/guidaye.py +2 -22
  30. novel_downloader/core/fetchers/hetushu.py +9 -29
  31. novel_downloader/core/fetchers/i25zw.py +2 -16
  32. novel_downloader/core/fetchers/ixdzs8.py +2 -16
  33. novel_downloader/core/fetchers/jpxs123.py +2 -16
  34. novel_downloader/core/fetchers/lewenn.py +2 -22
  35. novel_downloader/core/fetchers/linovelib.py +4 -20
  36. novel_downloader/core/fetchers/{eightnovel.py → n8novel.py} +12 -40
  37. novel_downloader/core/fetchers/piaotia.py +2 -16
  38. novel_downloader/core/fetchers/qbtr.py +2 -16
  39. novel_downloader/core/fetchers/qianbi.py +1 -20
  40. novel_downloader/core/fetchers/qidian.py +27 -68
  41. novel_downloader/core/fetchers/qqbook.py +177 -0
  42. novel_downloader/core/fetchers/quanben5.py +9 -29
  43. novel_downloader/core/fetchers/rate_limiter.py +22 -53
  44. novel_downloader/core/fetchers/sfacg.py +3 -16
  45. novel_downloader/core/fetchers/shencou.py +2 -16
  46. novel_downloader/core/fetchers/shuhaige.py +2 -22
  47. novel_downloader/core/fetchers/tongrenquan.py +2 -22
  48. novel_downloader/core/fetchers/ttkan.py +3 -14
  49. novel_downloader/core/fetchers/wanbengo.py +2 -22
  50. novel_downloader/core/fetchers/xiaoshuowu.py +2 -16
  51. novel_downloader/core/fetchers/xiguashuwu.py +4 -20
  52. novel_downloader/core/fetchers/xs63b.py +3 -15
  53. novel_downloader/core/fetchers/xshbook.py +2 -22
  54. novel_downloader/core/fetchers/yamibo.py +4 -28
  55. novel_downloader/core/fetchers/yibige.py +13 -26
  56. novel_downloader/core/interfaces/exporter.py +19 -7
  57. novel_downloader/core/interfaces/fetcher.py +23 -49
  58. novel_downloader/core/interfaces/parser.py +2 -2
  59. novel_downloader/core/parsers/__init__.py +4 -2
  60. novel_downloader/core/parsers/b520.py +2 -2
  61. novel_downloader/core/parsers/base.py +5 -39
  62. novel_downloader/core/parsers/esjzone.py +3 -3
  63. novel_downloader/core/parsers/{eightnovel.py → n8novel.py} +7 -7
  64. novel_downloader/core/parsers/qidian.py +717 -0
  65. novel_downloader/core/parsers/qqbook.py +709 -0
  66. novel_downloader/core/parsers/xiguashuwu.py +8 -15
  67. novel_downloader/core/searchers/__init__.py +2 -2
  68. novel_downloader/core/searchers/b520.py +1 -1
  69. novel_downloader/core/searchers/base.py +2 -2
  70. novel_downloader/core/searchers/{eightnovel.py → n8novel.py} +5 -5
  71. novel_downloader/locales/en.json +3 -3
  72. novel_downloader/locales/zh.json +3 -3
  73. novel_downloader/models/__init__.py +2 -0
  74. novel_downloader/models/book.py +1 -0
  75. novel_downloader/models/config.py +12 -0
  76. novel_downloader/resources/config/settings.toml +23 -5
  77. novel_downloader/resources/js_scripts/expr_to_json.js +14 -0
  78. novel_downloader/resources/js_scripts/qidian_decrypt_node.js +21 -16
  79. novel_downloader/resources/js_scripts/qq_decrypt_node.js +92 -0
  80. novel_downloader/utils/__init__.py +0 -2
  81. novel_downloader/utils/chapter_storage.py +2 -3
  82. novel_downloader/utils/constants.py +7 -3
  83. novel_downloader/utils/cookies.py +32 -17
  84. novel_downloader/utils/crypto_utils/__init__.py +0 -6
  85. novel_downloader/utils/crypto_utils/aes_util.py +1 -1
  86. novel_downloader/utils/crypto_utils/rc4.py +40 -50
  87. novel_downloader/utils/epub/__init__.py +2 -3
  88. novel_downloader/utils/epub/builder.py +6 -6
  89. novel_downloader/utils/epub/constants.py +1 -6
  90. novel_downloader/utils/epub/documents.py +7 -7
  91. novel_downloader/utils/epub/models.py +8 -8
  92. novel_downloader/utils/epub/utils.py +10 -10
  93. novel_downloader/utils/file_utils/io.py +48 -73
  94. novel_downloader/utils/file_utils/normalize.py +1 -7
  95. novel_downloader/utils/file_utils/sanitize.py +4 -11
  96. novel_downloader/utils/fontocr/__init__.py +13 -0
  97. novel_downloader/utils/{fontocr.py → fontocr/core.py} +72 -61
  98. novel_downloader/utils/fontocr/loader.py +52 -0
  99. novel_downloader/utils/logger.py +80 -56
  100. novel_downloader/utils/network.py +16 -40
  101. novel_downloader/utils/node_decryptor/__init__.py +13 -0
  102. novel_downloader/utils/node_decryptor/decryptor.py +342 -0
  103. novel_downloader/{core/parsers/qidian/utils → utils/node_decryptor}/decryptor_fetcher.py +5 -6
  104. novel_downloader/utils/text_utils/text_cleaner.py +39 -30
  105. novel_downloader/utils/text_utils/truncate_utils.py +3 -14
  106. novel_downloader/utils/time_utils/sleep_utils.py +53 -43
  107. novel_downloader/web/main.py +1 -1
  108. novel_downloader/web/pages/download.py +1 -1
  109. novel_downloader/web/pages/search.py +4 -4
  110. novel_downloader/web/services/task_manager.py +2 -0
  111. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/METADATA +5 -1
  112. novel_downloader-2.0.2.dist-info/RECORD +203 -0
  113. novel_downloader/core/exporters/common/__init__.py +0 -11
  114. novel_downloader/core/exporters/common/epub.py +0 -198
  115. novel_downloader/core/exporters/common/main_exporter.py +0 -64
  116. novel_downloader/core/exporters/common/txt.py +0 -146
  117. novel_downloader/core/exporters/epub_util.py +0 -215
  118. novel_downloader/core/exporters/linovelib/__init__.py +0 -11
  119. novel_downloader/core/exporters/linovelib/epub.py +0 -349
  120. novel_downloader/core/exporters/linovelib/main_exporter.py +0 -66
  121. novel_downloader/core/exporters/linovelib/txt.py +0 -139
  122. novel_downloader/core/exporters/txt_util.py +0 -67
  123. novel_downloader/core/parsers/qidian/__init__.py +0 -10
  124. novel_downloader/core/parsers/qidian/book_info_parser.py +0 -89
  125. novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -470
  126. novel_downloader/core/parsers/qidian/chapter_normal.py +0 -126
  127. novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
  128. novel_downloader/core/parsers/qidian/main_parser.py +0 -101
  129. novel_downloader/core/parsers/qidian/utils/__init__.py +0 -30
  130. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +0 -143
  131. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -110
  132. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +0 -175
  133. novel_downloader-2.0.0.dist-info/RECORD +0 -210
  134. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/WHEEL +0 -0
  135. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/entry_points.txt +0 -0
  136. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/licenses/LICENSE +0 -0
  137. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/top_level.txt +0 -0
@@ -10,15 +10,11 @@ import abc
10
10
  import asyncio
11
11
  import json
12
12
  import logging
13
- from collections.abc import AsyncIterator, Awaitable, Callable, Sequence
13
+ from collections.abc import Awaitable, Callable, Sequence
14
14
  from pathlib import Path
15
- from typing import Any, cast
15
+ from typing import Any, ClassVar, cast
16
16
 
17
- from novel_downloader.core.interfaces import (
18
- DownloaderProtocol,
19
- FetcherProtocol,
20
- ParserProtocol,
21
- )
17
+ from novel_downloader.core.interfaces import FetcherProtocol, ParserProtocol
22
18
  from novel_downloader.models import (
23
19
  BookConfig,
24
20
  BookInfoDict,
@@ -28,7 +24,7 @@ from novel_downloader.models import (
28
24
  from novel_downloader.utils import time_diff
29
25
 
30
26
 
31
- class BaseDownloader(DownloaderProtocol, abc.ABC):
27
+ class BaseDownloader(abc.ABC):
32
28
  """
33
29
  Abstract base class for novel downloaders.
34
30
 
@@ -39,8 +35,8 @@ class BaseDownloader(DownloaderProtocol, abc.ABC):
39
35
  a single book, using the provided fetcher and parser components.
40
36
  """
41
37
 
42
- DEFAULT_SOURCE_ID = 0
43
- PRIORITIES_MAP = {
38
+ DEFAULT_SOURCE_ID: ClassVar[int] = 0
39
+ PRIORITIES_MAP: ClassVar[dict[int, int]] = {
44
40
  DEFAULT_SOURCE_ID: 0,
45
41
  }
46
42
 
@@ -61,15 +57,23 @@ class BaseDownloader(DownloaderProtocol, abc.ABC):
61
57
  """
62
58
  self._fetcher = fetcher
63
59
  self._parser = parser
64
- self._config = config
65
60
  self._site = site
66
61
 
62
+ self._save_html = config.save_html
63
+ self._skip_existing = config.skip_existing
64
+ self._login_required = config.login_required
65
+ self._request_interval = config.request_interval
66
+ self._retry_times = config.retry_times
67
+ self._backoff_factor = config.backoff_factor
68
+ self._workers = config.workers
69
+ self._storage_batch_size = max(1, config.storage_batch_size)
70
+
67
71
  self._raw_data_dir = Path(config.raw_data_dir) / site
68
72
  self._raw_data_dir.mkdir(parents=True, exist_ok=True)
69
73
  self._debug_dir = Path.cwd() / "debug" / site
70
74
  self._debug_dir.mkdir(parents=True, exist_ok=True)
71
75
 
72
- self.logger = logging.getLogger(f"{self.__class__.__name__}")
76
+ self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
73
77
 
74
78
  async def download_many(
75
79
  self,
@@ -87,7 +91,7 @@ class BaseDownloader(DownloaderProtocol, abc.ABC):
87
91
  args: completed_count, total_count.
88
92
  :param cancel_event: Optional asyncio.Event to allow cancellation.
89
93
  """
90
- if not await self._ensure_ready():
94
+ if not self._check_login():
91
95
  book_ids = [b["book_id"] for b in books]
92
96
  self.logger.warning(
93
97
  "[%s] login failed, skipping download of books: %s",
@@ -116,8 +120,6 @@ class BaseDownloader(DownloaderProtocol, abc.ABC):
116
120
  except Exception as e:
117
121
  self._handle_download_exception(book, e)
118
122
 
119
- await self._finalize()
120
-
121
123
  async def download(
122
124
  self,
123
125
  book: BookConfig,
@@ -134,7 +136,7 @@ class BaseDownloader(DownloaderProtocol, abc.ABC):
134
136
  args: completed_count, total_count.
135
137
  :param cancel_event: Optional asyncio.Event to allow cancellation.
136
138
  """
137
- if not await self._ensure_ready():
139
+ if not self._check_login():
138
140
  self.logger.warning(
139
141
  "[%s] login failed, skipping download of book: %s (%s-%s)",
140
142
  self._site,
@@ -142,6 +144,7 @@ class BaseDownloader(DownloaderProtocol, abc.ABC):
142
144
  book.get("start_id", "-"),
143
145
  book.get("end_id", "-"),
144
146
  )
147
+ return
145
148
 
146
149
  # if already cancelled before starting
147
150
  if cancel_event and cancel_event.is_set():
@@ -162,8 +165,6 @@ class BaseDownloader(DownloaderProtocol, abc.ABC):
162
165
  except Exception as e:
163
166
  self._handle_download_exception(book, e)
164
167
 
165
- await self._finalize()
166
-
167
168
  async def load_book_info(
168
169
  self,
169
170
  book_id: str,
@@ -200,23 +201,6 @@ class BaseDownloader(DownloaderProtocol, abc.ABC):
200
201
  """
201
202
  ...
202
203
 
203
- async def _prepare(self) -> None:
204
- """
205
- Optional hook called before downloading.
206
-
207
- Subclasses can override this method to perform pre-download setup.
208
- """
209
- return
210
-
211
- async def _finalize(self) -> None:
212
- """
213
- Optional hook called after downloading is complete.
214
-
215
- Subclasses can override this method to perform post-download tasks,
216
- such as saving state or releasing resources.
217
- """
218
- return
219
-
220
204
  def _load_book_info(
221
205
  self,
222
206
  book_id: str,
@@ -283,25 +267,22 @@ class BaseDownloader(DownloaderProtocol, abc.ABC):
283
267
  :param filename: used as filename prefix
284
268
  :param html_list: list of HTML strings to save
285
269
  """
286
- if not self.save_html:
270
+ if not self._save_html:
287
271
  return
288
-
289
272
  html_dir.mkdir(parents=True, exist_ok=True)
290
273
  for i, html in enumerate(html_list):
291
- file_path = html_dir / f"{filename}_{i}.html"
292
- file_path.write_text(html, encoding="utf-8")
274
+ (html_dir / f"{filename}_{i}.html").write_text(html, encoding="utf-8")
293
275
 
294
276
  @staticmethod
295
- async def _chapter_ids(
296
- volumes: list[VolumeInfoDict],
277
+ def _planned_chapter_ids(
278
+ vols: list[VolumeInfoDict],
297
279
  start_id: str | None,
298
280
  end_id: str | None,
299
- ) -> AsyncIterator[str]:
300
- """
301
- Yield each chapterId in order, respecting start/end bounds.
302
- """
281
+ ignore: set[str],
282
+ ) -> list[str]:
303
283
  seen_start = start_id is None
304
- for vol in volumes:
284
+ out: list[str] = []
285
+ for vol in vols:
305
286
  for chap in vol["chapters"]:
306
287
  cid = chap.get("chapterId")
307
288
  if not cid:
@@ -311,9 +292,11 @@ class BaseDownloader(DownloaderProtocol, abc.ABC):
311
292
  seen_start = True
312
293
  else:
313
294
  continue
314
- yield cid
295
+ if cid not in ignore:
296
+ out.append(cid)
315
297
  if end_id is not None and cid == end_id:
316
- return
298
+ return out
299
+ return out
317
300
 
318
301
  @property
319
302
  def fetcher(self) -> FetcherProtocol:
@@ -323,38 +306,6 @@ class BaseDownloader(DownloaderProtocol, abc.ABC):
323
306
  def parser(self) -> ParserProtocol:
324
307
  return self._parser
325
308
 
326
- @property
327
- def save_html(self) -> bool:
328
- return self._config.save_html
329
-
330
- @property
331
- def skip_existing(self) -> bool:
332
- return self._config.skip_existing
333
-
334
- @property
335
- def login_required(self) -> bool:
336
- return self._config.login_required
337
-
338
- @property
339
- def request_interval(self) -> float:
340
- return self._config.request_interval
341
-
342
- @property
343
- def retry_times(self) -> int:
344
- return self._config.retry_times
345
-
346
- @property
347
- def backoff_factor(self) -> float:
348
- return self._config.backoff_factor
349
-
350
- @property
351
- def workers(self) -> int:
352
- return self._config.workers
353
-
354
- @property
355
- def storage_batch_size(self) -> int:
356
- return max(1, self._config.storage_batch_size)
357
-
358
309
  def _handle_download_exception(self, book: BookConfig, error: Exception) -> None:
359
310
  """
360
311
  Handle download errors in a consistent way.
@@ -373,10 +324,8 @@ class BaseDownloader(DownloaderProtocol, abc.ABC):
373
324
  error,
374
325
  )
375
326
 
376
- async def _ensure_ready(self) -> bool:
327
+ def _check_login(self) -> bool:
377
328
  """
378
- Run pre-download preparation and check login if needed.
329
+ Check login if needed.
379
330
  """
380
- await self._prepare()
381
-
382
- return self.fetcher.is_logged_in if self.login_required else True
331
+ return self.fetcher.is_logged_in if self._login_required else True
@@ -12,19 +12,9 @@ from pathlib import Path
12
12
  from typing import Any
13
13
 
14
14
  from novel_downloader.core.downloaders.base import BaseDownloader
15
- from novel_downloader.core.downloaders.signals import (
16
- STOP,
17
- Progress,
18
- StopToken,
19
- )
20
- from novel_downloader.models import (
21
- BookConfig,
22
- ChapterDict,
23
- )
24
- from novel_downloader.utils import (
25
- ChapterStorage,
26
- async_jitter_sleep,
27
- )
15
+ from novel_downloader.core.downloaders.signals import STOP, Progress, StopToken
16
+ from novel_downloader.models import BookConfig, ChapterDict
17
+ from novel_downloader.utils import ChapterStorage, async_jitter_sleep
28
18
 
29
19
 
30
20
  class CommonDownloader(BaseDownloader):
@@ -41,7 +31,7 @@ class CommonDownloader(BaseDownloader):
41
31
  **kwargs: Any,
42
32
  ) -> None:
43
33
  """
44
- Sentinel-based pipeline with graceful cancellation:
34
+ Sentinel-based pipeline with cancellation:
45
35
 
46
36
  Producer -> ChapterWorkers -> StorageWorker.
47
37
 
@@ -59,181 +49,167 @@ class CommonDownloader(BaseDownloader):
59
49
  raw_base.mkdir(parents=True, exist_ok=True)
60
50
  html_dir = self._debug_dir / book_id / "html"
61
51
 
62
- chapter_storage = ChapterStorage(
63
- raw_base=raw_base,
64
- priorities=self.PRIORITIES_MAP,
65
- )
66
- chapter_storage.connect()
67
-
68
52
  def cancelled() -> bool:
69
53
  return bool(cancel_event and cancel_event.is_set())
70
54
 
71
- try:
72
- # --- metadata ---
73
- book_info = await self.load_book_info(book_id=book_id, html_dir=html_dir)
74
- if not book_info:
75
- return
76
-
77
- vols = book_info["volumes"]
78
- total_chapters = sum(len(v["chapters"]) for v in vols)
79
- if total_chapters == 0:
80
- self.logger.warning("%s 书籍没有章节可下载: %s", TAG, book_id)
81
- return
55
+ # --- metadata ---
56
+ book_info = await self.load_book_info(book_id=book_id, html_dir=html_dir)
57
+ if not book_info:
58
+ return
82
59
 
83
- progress = Progress(total_chapters, progress_hook)
60
+ vols = book_info["volumes"]
61
+ plan = self._planned_chapter_ids(vols, start_id, end_id, ignore_set)
62
+ if not plan:
63
+ self.logger.info("%s nothing to do after filtering: %s", TAG, book_id)
64
+ return
84
65
 
85
- # --- queues & batching ---
86
- cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue()
87
- save_q: asyncio.Queue[ChapterDict | StopToken] = asyncio.Queue()
88
- batch: list[ChapterDict] = []
66
+ progress = Progress(total=len(plan), hook=progress_hook)
89
67
 
90
- async def flush_batch() -> None:
91
- if not batch:
92
- return
93
- try:
94
- chapter_storage.upsert_chapters(batch, self.DEFAULT_SOURCE_ID)
95
- except Exception as e:
96
- self.logger.error(
97
- "[Storage] batch upsert failed (size=%d): %s",
98
- len(batch),
99
- e,
100
- exc_info=True,
101
- )
102
- else:
103
- await progress.bump(len(batch))
104
- finally:
105
- batch.clear()
106
-
107
- # --- stage: storage worker ---
108
- async def storage_worker() -> None:
109
- """
110
- Consumes parsed chapters, writes in batches.
111
-
112
- Terminates after receiving STOP from each chapter worker.
113
-
114
- On cancel: keeps consuming (to avoid blocking producers),
115
- flushes, and exits once all STOPs are seen.
116
- """
117
- stop_count = 0
118
- while True:
119
- item = await save_q.get()
120
- if isinstance(item, StopToken):
121
- stop_count += 1
122
- if stop_count == self.workers:
123
- # All chapter workers have exited.
124
- await flush_batch()
125
- return
126
- # else keep waiting for remaining STOPs
127
- continue
128
-
129
- # Normal chapter
130
- batch.append(item)
131
- if len(batch) >= self.storage_batch_size:
132
- await flush_batch()
68
+ # --- queues & batching ---
69
+ cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue(maxsize=self._workers * 2)
70
+ save_q: asyncio.Queue[ChapterDict | StopToken] = asyncio.Queue(
71
+ maxsize=self._workers * 2
72
+ )
73
+ batch: list[ChapterDict] = []
133
74
 
134
- if cancelled():
135
- # Drain whatever is already in the queue
136
- try:
137
- while True:
138
- nxt = save_q.get_nowait()
139
- if isinstance(nxt, StopToken):
140
- stop_count += 1
141
- else:
142
- batch.append(nxt)
143
- except asyncio.QueueEmpty:
144
- pass
145
- # Final flush of everything
75
+ async def flush_batch() -> None:
76
+ if not batch:
77
+ return
78
+ try:
79
+ storage.upsert_chapters(batch, self.DEFAULT_SOURCE_ID)
80
+ except Exception as e:
81
+ self.logger.error(
82
+ "[Storage] batch upsert failed (size=%d): %s",
83
+ len(batch),
84
+ e,
85
+ exc_info=True,
86
+ )
87
+ else:
88
+ await progress.bump(len(batch))
89
+ finally:
90
+ batch.clear()
91
+
92
+ # --- stage: storage worker ---
93
+ async def storage_worker() -> None:
94
+ """
95
+ Consumes parsed chapters, writes in batches.
96
+
97
+ Terminates after receiving STOP from each chapter worker.
98
+
99
+ On cancel: keeps consuming (to avoid blocking producers),
100
+ flushes, and exits once all STOPs are seen.
101
+ """
102
+ stop_count = 0
103
+ while True:
104
+ item = await save_q.get()
105
+ if isinstance(item, StopToken):
106
+ stop_count += 1
107
+ if stop_count == self._workers:
108
+ # All chapter workers have exited.
146
109
  await flush_batch()
147
- # Wait for remaining STOPs so chapter workers can finish.
148
- while stop_count < self.workers:
149
- nxt = await save_q.get()
110
+ return
111
+ # else keep waiting for remaining STOPs
112
+ continue
113
+
114
+ # Normal chapter
115
+ batch.append(item)
116
+ if len(batch) >= self._storage_batch_size:
117
+ await flush_batch()
118
+
119
+ if cancelled():
120
+ # Drain whatever is already in the queue
121
+ try:
122
+ while True:
123
+ nxt = save_q.get_nowait()
150
124
  if isinstance(nxt, StopToken):
151
125
  stop_count += 1
152
- return
153
-
154
- # --- stage: chapter worker ---
155
- sem = asyncio.Semaphore(self.workers)
126
+ else:
127
+ batch.append(nxt)
128
+ except asyncio.QueueEmpty:
129
+ pass
130
+ # Final flush of everything
131
+ await flush_batch()
132
+ # Wait for remaining STOPs so chapter workers can finish.
133
+ while stop_count < self._workers:
134
+ nxt = await save_q.get()
135
+ if isinstance(nxt, StopToken):
136
+ stop_count += 1
137
+ return
156
138
 
157
- async def chapter_worker() -> None:
158
- """
159
- Fetch + parse with retry, then enqueue to save_q.
139
+ # --- stage: chapter worker ---
140
+ async def chapter_worker() -> None:
141
+ """
142
+ Fetch + parse with retry, then enqueue to save_q.
143
+
144
+ Exits on STOP, or early if cancel is set before starting a new fetch.
145
+ """
146
+ while True:
147
+ cid = await cid_q.get()
148
+ if isinstance(cid, StopToken):
149
+ # Propagate one STOP to storage and exit.
150
+ await save_q.put(STOP)
151
+ return
160
152
 
161
- Exits on STOP, or early if cancel is set before starting a new fetch.
162
- """
163
- while True:
164
- cid = await cid_q.get()
165
- if isinstance(cid, StopToken):
166
- # Propagate one STOP to storage and exit.
167
- await save_q.put(STOP)
168
- return
153
+ # If cancelled, don't start a new network call; let storage finish.
154
+ if cancelled():
155
+ await save_q.put(STOP)
156
+ return
169
157
 
170
- if not cid or cid in ignore_set:
171
- # Ignore silently and continue.
172
- continue
158
+ chap = await self._process_chapter(book_id, cid, html_dir)
159
+ if chap:
160
+ await save_q.put(chap)
173
161
 
174
- # If cancelled, don't start a new network call; let storage finish.
175
- if cancelled():
176
- await save_q.put(STOP)
177
- return
162
+ # polite pacing
163
+ await async_jitter_sleep(
164
+ self._request_interval,
165
+ mul_spread=1.1,
166
+ max_sleep=self._request_interval + 2,
167
+ )
178
168
 
179
- async with sem:
180
- chap = await self._process_chapter(book_id, cid, html_dir)
181
- if chap:
182
- await save_q.put(chap)
169
+ # --- stage: producer ---
170
+ async def producer() -> None:
171
+ """
172
+ Enqueue chapter IDs (respecting start/end/skip_existing).
183
173
 
184
- # polite pacing
185
- await async_jitter_sleep(
186
- self.request_interval,
187
- mul_spread=1.1,
188
- max_sleep=self.request_interval + 2,
189
- )
190
-
191
- # --- stage: producer ---
192
- async def producer() -> None:
193
- """
194
- Enqueue chapter IDs (respecting start/end/skip_existing).
195
-
196
- Always sends STOP x workers at the end (even if cancelled early),
197
- so chapter workers can exit deterministically.
198
- """
199
- try:
200
- async for cid in self._chapter_ids(vols, start_id, end_id):
201
- if cancelled():
202
- break
203
- if self.skip_existing and chapter_storage.exists(cid):
204
- # Count as completed but don't enqueue.
205
- await progress.bump(1)
206
- else:
207
- await cid_q.put(cid)
208
- finally:
209
- for _ in range(self.workers):
210
- await cid_q.put(STOP)
211
-
212
- # --- run the pipeline ---
174
+ Always sends STOP x workers at the end (even if cancelled early),
175
+ so chapter workers can exit deterministically.
176
+ """
177
+ try:
178
+ for cid in plan:
179
+ if cancelled():
180
+ break
181
+ if self._skip_existing and storage.exists(cid):
182
+ # Count as completed but don't enqueue.
183
+ await progress.bump(1)
184
+ else:
185
+ await cid_q.put(cid)
186
+ finally:
187
+ for _ in range(self._workers):
188
+ await cid_q.put(STOP)
189
+
190
+ # --- run the pipeline ---
191
+ with ChapterStorage(raw_base, priorities=self.PRIORITIES_MAP) as storage:
213
192
  async with asyncio.TaskGroup() as tg:
214
193
  tg.create_task(storage_worker())
215
- for _ in range(self.workers):
194
+ for _ in range(self._workers):
216
195
  tg.create_task(chapter_worker())
217
196
  tg.create_task(producer())
218
197
 
219
- # --- done ---
220
- if cancelled():
221
- self.logger.info(
222
- "%s Novel '%s' cancelled: flushed %d/%d chapters.",
223
- TAG,
224
- book_info.get("book_name", "unknown"),
225
- progress.done,
226
- progress.total,
227
- )
228
- else:
229
- self.logger.info(
230
- "%s Novel '%s' download completed.",
231
- TAG,
232
- book_info.get("book_name", "unknown"),
233
- )
234
-
235
- finally:
236
- chapter_storage.close()
198
+ # --- done ---
199
+ if cancelled():
200
+ self.logger.info(
201
+ "%s Novel '%s' cancelled: flushed %d/%d chapters.",
202
+ TAG,
203
+ book_info.get("book_name", "unknown"),
204
+ progress.done,
205
+ progress.total,
206
+ )
207
+ else:
208
+ self.logger.info(
209
+ "%s Novel '%s' download completed.",
210
+ TAG,
211
+ book_info.get("book_name", "unknown"),
212
+ )
237
213
 
238
214
  async def _process_chapter(
239
215
  self,
@@ -247,7 +223,7 @@ class CommonDownloader(BaseDownloader):
247
223
 
248
224
  :return: ChapterDict on success, or None on failure.
249
225
  """
250
- for attempt in range(self.retry_times + 1):
226
+ for attempt in range(self._retry_times + 1):
251
227
  try:
252
228
  html_list = await self.fetcher.get_book_chapter(book_id, cid)
253
229
  self._save_html_pages(html_dir, cid, html_list)
@@ -258,11 +234,11 @@ class CommonDownloader(BaseDownloader):
258
234
  raise ValueError("Empty parse result")
259
235
  return chap
260
236
  except Exception as e:
261
- if attempt < self.retry_times:
237
+ if attempt < self._retry_times:
262
238
  self.logger.info(
263
239
  "[ChapterWorker] Retry %s (%s): %s", cid, attempt + 1, e
264
240
  )
265
- backoff = self.backoff_factor * (2**attempt)
241
+ backoff = self._backoff_factor * (2**attempt)
266
242
  await async_jitter_sleep(
267
243
  base=backoff, mul_spread=1.2, max_sleep=backoff + 3
268
244
  )