novel-downloader 1.5.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +1 -3
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +26 -21
  5. novel_downloader/cli/download.py +77 -64
  6. novel_downloader/cli/export.py +16 -20
  7. novel_downloader/cli/main.py +1 -1
  8. novel_downloader/cli/search.py +62 -65
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +8 -5
  11. novel_downloader/config/adapter.py +65 -105
  12. novel_downloader/config/{loader.py → file_io.py} +53 -26
  13. novel_downloader/core/__init__.py +1 -0
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} +12 -20
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +3 -24
  21. novel_downloader/core/downloaders/base.py +49 -23
  22. novel_downloader/core/downloaders/common.py +191 -137
  23. novel_downloader/core/downloaders/qianbi.py +187 -146
  24. novel_downloader/core/downloaders/qidian.py +187 -141
  25. novel_downloader/core/downloaders/registry.py +4 -2
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +3 -20
  28. novel_downloader/core/exporters/base.py +33 -37
  29. novel_downloader/core/exporters/common/__init__.py +1 -2
  30. novel_downloader/core/exporters/common/epub.py +15 -10
  31. novel_downloader/core/exporters/common/main_exporter.py +19 -12
  32. novel_downloader/core/exporters/common/txt.py +14 -9
  33. novel_downloader/core/exporters/epub_util.py +59 -29
  34. novel_downloader/core/exporters/linovelib/__init__.py +1 -0
  35. novel_downloader/core/exporters/linovelib/epub.py +23 -25
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +8 -12
  37. novel_downloader/core/exporters/linovelib/txt.py +17 -11
  38. novel_downloader/core/exporters/qidian.py +2 -8
  39. novel_downloader/core/exporters/registry.py +4 -2
  40. novel_downloader/core/exporters/txt_util.py +7 -7
  41. novel_downloader/core/fetchers/__init__.py +54 -48
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +6 -11
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +37 -46
  45. novel_downloader/core/fetchers/{biquge/browser.py → biquyuedu.py} +12 -17
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +19 -12
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +19 -28
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/lewenn.py +83 -0
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +12 -13
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +5 -10
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +46 -39
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +5 -16
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +7 -10
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/shuhaige.py +84 -0
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/wanbengo.py +83 -0
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +19 -12
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +1 -9
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +4 -17
  79. novel_downloader/core/interfaces/parser.py +5 -6
  80. novel_downloader/core/interfaces/searcher.py +9 -1
  81. novel_downloader/core/parsers/__init__.py +49 -12
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +63 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/esjzone.py +61 -66
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/linovelib.py +48 -64
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/qianbi.py +48 -50
  99. novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
  100. novel_downloader/core/parsers/qidian/chapter_encrypted.py +272 -330
  101. novel_downloader/core/parsers/qidian/chapter_normal.py +24 -55
  102. novel_downloader/core/parsers/qidian/main_parser.py +11 -38
  103. novel_downloader/core/parsers/qidian/utils/__init__.py +1 -0
  104. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +1 -1
  105. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
  106. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
  107. novel_downloader/core/parsers/quanben5.py +103 -0
  108. novel_downloader/core/parsers/registry.py +5 -16
  109. novel_downloader/core/parsers/sfacg.py +38 -45
  110. novel_downloader/core/parsers/shencou.py +215 -0
  111. novel_downloader/core/parsers/shuhaige.py +111 -0
  112. novel_downloader/core/parsers/tongrenquan.py +116 -0
  113. novel_downloader/core/parsers/ttkan.py +132 -0
  114. novel_downloader/core/parsers/wanbengo.py +191 -0
  115. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  116. novel_downloader/core/parsers/xiguashuwu.py +435 -0
  117. novel_downloader/core/parsers/xs63b.py +161 -0
  118. novel_downloader/core/parsers/xshbook.py +134 -0
  119. novel_downloader/core/parsers/yamibo.py +87 -131
  120. novel_downloader/core/parsers/yibige.py +166 -0
  121. novel_downloader/core/searchers/__init__.py +34 -3
  122. novel_downloader/core/searchers/aaatxt.py +107 -0
  123. novel_downloader/core/searchers/{biquge.py → b520.py} +29 -28
  124. novel_downloader/core/searchers/base.py +112 -36
  125. novel_downloader/core/searchers/dxmwx.py +105 -0
  126. novel_downloader/core/searchers/eightnovel.py +84 -0
  127. novel_downloader/core/searchers/esjzone.py +43 -25
  128. novel_downloader/core/searchers/hetushu.py +92 -0
  129. novel_downloader/core/searchers/i25zw.py +93 -0
  130. novel_downloader/core/searchers/ixdzs8.py +107 -0
  131. novel_downloader/core/searchers/jpxs123.py +107 -0
  132. novel_downloader/core/searchers/piaotia.py +100 -0
  133. novel_downloader/core/searchers/qbtr.py +106 -0
  134. novel_downloader/core/searchers/qianbi.py +74 -40
  135. novel_downloader/core/searchers/quanben5.py +144 -0
  136. novel_downloader/core/searchers/registry.py +24 -8
  137. novel_downloader/core/searchers/shuhaige.py +124 -0
  138. novel_downloader/core/searchers/tongrenquan.py +110 -0
  139. novel_downloader/core/searchers/ttkan.py +92 -0
  140. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  141. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  142. novel_downloader/core/searchers/xs63b.py +104 -0
  143. novel_downloader/locales/en.json +31 -82
  144. novel_downloader/locales/zh.json +32 -83
  145. novel_downloader/models/__init__.py +21 -22
  146. novel_downloader/models/book.py +44 -0
  147. novel_downloader/models/config.py +4 -37
  148. novel_downloader/models/login.py +1 -1
  149. novel_downloader/models/search.py +5 -0
  150. novel_downloader/resources/config/settings.toml +8 -70
  151. novel_downloader/resources/json/xiguashuwu.json +718 -0
  152. novel_downloader/utils/__init__.py +13 -22
  153. novel_downloader/utils/chapter_storage.py +3 -2
  154. novel_downloader/utils/constants.py +4 -29
  155. novel_downloader/utils/cookies.py +6 -18
  156. novel_downloader/utils/crypto_utils/__init__.py +13 -0
  157. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  158. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  159. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  160. novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
  161. novel_downloader/utils/epub/__init__.py +1 -1
  162. novel_downloader/utils/epub/constants.py +57 -16
  163. novel_downloader/utils/epub/documents.py +88 -194
  164. novel_downloader/utils/epub/models.py +0 -14
  165. novel_downloader/utils/epub/utils.py +63 -96
  166. novel_downloader/utils/file_utils/__init__.py +2 -23
  167. novel_downloader/utils/file_utils/io.py +3 -113
  168. novel_downloader/utils/file_utils/sanitize.py +0 -4
  169. novel_downloader/utils/fontocr.py +207 -0
  170. novel_downloader/utils/logger.py +8 -16
  171. novel_downloader/utils/network.py +2 -2
  172. novel_downloader/utils/state.py +4 -90
  173. novel_downloader/utils/text_utils/__init__.py +1 -7
  174. novel_downloader/utils/text_utils/diff_display.py +5 -7
  175. novel_downloader/utils/time_utils/__init__.py +5 -11
  176. novel_downloader/utils/time_utils/datetime_utils.py +20 -29
  177. novel_downloader/utils/time_utils/sleep_utils.py +4 -8
  178. novel_downloader/web/__init__.py +13 -0
  179. novel_downloader/web/components/__init__.py +11 -0
  180. novel_downloader/web/components/navigation.py +35 -0
  181. novel_downloader/web/main.py +66 -0
  182. novel_downloader/web/pages/__init__.py +17 -0
  183. novel_downloader/web/pages/download.py +78 -0
  184. novel_downloader/web/pages/progress.py +147 -0
  185. novel_downloader/web/pages/search.py +329 -0
  186. novel_downloader/web/services/__init__.py +17 -0
  187. novel_downloader/web/services/client_dialog.py +164 -0
  188. novel_downloader/web/services/cred_broker.py +113 -0
  189. novel_downloader/web/services/cred_models.py +35 -0
  190. novel_downloader/web/services/task_manager.py +264 -0
  191. novel_downloader-2.0.0.dist-info/METADATA +171 -0
  192. novel_downloader-2.0.0.dist-info/RECORD +210 -0
  193. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
  194. novel_downloader/core/downloaders/biquge.py +0 -29
  195. novel_downloader/core/downloaders/esjzone.py +0 -29
  196. novel_downloader/core/downloaders/linovelib.py +0 -29
  197. novel_downloader/core/downloaders/sfacg.py +0 -29
  198. novel_downloader/core/downloaders/yamibo.py +0 -29
  199. novel_downloader/core/exporters/biquge.py +0 -22
  200. novel_downloader/core/exporters/esjzone.py +0 -22
  201. novel_downloader/core/exporters/qianbi.py +0 -22
  202. novel_downloader/core/exporters/sfacg.py +0 -22
  203. novel_downloader/core/exporters/yamibo.py +0 -22
  204. novel_downloader/core/fetchers/base/__init__.py +0 -14
  205. novel_downloader/core/fetchers/base/browser.py +0 -422
  206. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  207. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  208. novel_downloader/core/fetchers/esjzone/browser.py +0 -209
  209. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  210. novel_downloader/core/fetchers/linovelib/browser.py +0 -198
  211. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  212. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  213. novel_downloader/core/fetchers/qidian/browser.py +0 -326
  214. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  215. novel_downloader/core/fetchers/sfacg/browser.py +0 -194
  216. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  217. novel_downloader/core/fetchers/yamibo/browser.py +0 -234
  218. novel_downloader/core/parsers/biquge.py +0 -139
  219. novel_downloader/models/chapter.py +0 -25
  220. novel_downloader/models/types.py +0 -13
  221. novel_downloader/tui/__init__.py +0 -7
  222. novel_downloader/tui/app.py +0 -32
  223. novel_downloader/tui/main.py +0 -17
  224. novel_downloader/tui/screens/__init__.py +0 -14
  225. novel_downloader/tui/screens/home.py +0 -198
  226. novel_downloader/tui/screens/login.py +0 -74
  227. novel_downloader/tui/styles/home_layout.tcss +0 -79
  228. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  229. novel_downloader/utils/cache.py +0 -24
  230. novel_downloader/utils/fontocr/__init__.py +0 -22
  231. novel_downloader/utils/fontocr/hash_store.py +0 -280
  232. novel_downloader/utils/fontocr/hash_utils.py +0 -103
  233. novel_downloader/utils/fontocr/model_loader.py +0 -69
  234. novel_downloader/utils/fontocr/ocr_v1.py +0 -315
  235. novel_downloader/utils/fontocr/ocr_v2.py +0 -764
  236. novel_downloader/utils/fontocr/ocr_v3.py +0 -744
  237. novel_downloader-1.5.0.dist-info/METADATA +0 -196
  238. novel_downloader-1.5.0.dist-info/RECORD +0 -164
  239. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
  240. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
  241. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -3,22 +3,27 @@
3
3
  novel_downloader.core.downloaders.common
4
4
  ----------------------------------------
5
5
 
6
+ Concrete downloader implementation with a generic async pipeline for common novel sites
6
7
  """
7
8
 
8
9
  import asyncio
9
- from collections.abc import AsyncIterator, Awaitable, Callable
10
- from contextlib import asynccontextmanager
10
+ from collections.abc import Awaitable, Callable
11
11
  from pathlib import Path
12
12
  from typing import Any
13
13
 
14
14
  from novel_downloader.core.downloaders.base import BaseDownloader
15
+ from novel_downloader.core.downloaders.signals import (
16
+ STOP,
17
+ Progress,
18
+ StopToken,
19
+ )
15
20
  from novel_downloader.models import (
16
21
  BookConfig,
17
22
  ChapterDict,
18
23
  )
19
24
  from novel_downloader.utils import (
20
25
  ChapterStorage,
21
- async_sleep_with_random_delay,
26
+ async_jitter_sleep,
22
27
  )
23
28
 
24
29
 
@@ -32,164 +37,203 @@ class CommonDownloader(BaseDownloader):
32
37
  book: BookConfig,
33
38
  *,
34
39
  progress_hook: Callable[[int, int], Awaitable[None]] | None = None,
40
+ cancel_event: asyncio.Event | None = None,
35
41
  **kwargs: Any,
36
42
  ) -> None:
37
43
  """
38
- The full download logic for a single book.
44
+ Sentinel-based pipeline with graceful cancellation:
45
+
46
+ Producer -> ChapterWorkers -> StorageWorker.
39
47
 
40
- :param book: BookConfig with at least 'book_id'.
48
+ On cancel: stop producing, workers finish at most one chapter,
49
+ storage drains, flushes, and exits.
41
50
  """
42
51
  TAG = "[Downloader]"
43
- book_id = book["book_id"]
52
+
53
+ book_id = self._normalize_book_id(book["book_id"])
44
54
  start_id = book.get("start_id")
45
55
  end_id = book.get("end_id")
46
56
  ignore_set = set(book.get("ignore_ids", []))
47
57
 
48
- # prepare storage & dirs
49
58
  raw_base = self._raw_data_dir / book_id
50
59
  raw_base.mkdir(parents=True, exist_ok=True)
51
60
  html_dir = self._debug_dir / book_id / "html"
61
+
52
62
  chapter_storage = ChapterStorage(
53
63
  raw_base=raw_base,
54
- priorities=self._priorities,
64
+ priorities=self.PRIORITIES_MAP,
55
65
  )
56
66
  chapter_storage.connect()
57
67
 
58
- # load or fetch metadata
59
- book_info = await self.load_book_info(book_id=book_id, html_dir=html_dir)
60
- vols = book_info.get("volumes", [])
61
- total_chapters = sum(len(v.get("chapters", [])) for v in vols)
62
- if total_chapters == 0:
63
- self.logger.warning("%s 书籍没有章节可下载: %s", TAG, book_id)
64
- return
65
-
66
- # concurrency primitives
67
- sem = asyncio.Semaphore(self.workers)
68
- cid_q: asyncio.Queue[str | None] = asyncio.Queue()
69
- save_q: asyncio.Queue[ChapterDict | None] = asyncio.Queue()
70
- batch: list[ChapterDict] = []
71
- completed = 0
72
-
73
- async def _flush_batch() -> None:
74
- nonlocal batch, completed
75
- if not batch:
68
+ def cancelled() -> bool:
69
+ return bool(cancel_event and cancel_event.is_set())
70
+
71
+ try:
72
+ # --- metadata ---
73
+ book_info = await self.load_book_info(book_id=book_id, html_dir=html_dir)
74
+ if not book_info:
76
75
  return
77
76
 
78
- try:
79
- chapter_storage.upsert_chapters(batch, self.DEFAULT_SOURCE_ID)
80
- except Exception as e:
81
- self.logger.error(
82
- "[Storage] batch upsert failed (size=%d): %s",
83
- len(batch),
84
- e,
85
- exc_info=True,
86
- )
87
- else:
88
- completed += len(batch)
89
- if progress_hook:
90
- await progress_hook(completed, total_chapters)
91
- finally:
92
- batch.clear()
93
-
94
- async def storage_worker(q: asyncio.Queue[ChapterDict | None]) -> None:
95
- while True:
96
- item = await q.get()
97
- q.task_done()
98
- if item is None:
99
- # final flush before exit
100
- if batch:
101
- await _flush_batch()
102
- break
103
- batch.append(item)
104
- if len(batch) >= self.storage_batch_size:
105
- await _flush_batch()
106
-
107
- async def producer() -> None:
108
- nonlocal completed
109
- async for cid in self._chapter_ids(vols, start_id, end_id):
110
- if self.skip_existing and chapter_storage.exists(cid):
111
- completed += 1
112
- if progress_hook:
113
- await progress_hook(completed, total_chapters)
77
+ vols = book_info["volumes"]
78
+ total_chapters = sum(len(v["chapters"]) for v in vols)
79
+ if total_chapters == 0:
80
+ self.logger.warning("%s 书籍没有章节可下载: %s", TAG, book_id)
81
+ return
82
+
83
+ progress = Progress(total_chapters, progress_hook)
84
+
85
+ # --- queues & batching ---
86
+ cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue()
87
+ save_q: asyncio.Queue[ChapterDict | StopToken] = asyncio.Queue()
88
+ batch: list[ChapterDict] = []
89
+
90
+ async def flush_batch() -> None:
91
+ if not batch:
92
+ return
93
+ try:
94
+ chapter_storage.upsert_chapters(batch, self.DEFAULT_SOURCE_ID)
95
+ except Exception as e:
96
+ self.logger.error(
97
+ "[Storage] batch upsert failed (size=%d): %s",
98
+ len(batch),
99
+ e,
100
+ exc_info=True,
101
+ )
114
102
  else:
115
- await cid_q.put(cid)
103
+ await progress.bump(len(batch))
104
+ finally:
105
+ batch.clear()
106
+
107
+ # --- stage: storage worker ---
108
+ async def storage_worker() -> None:
109
+ """
110
+ Consumes parsed chapters, writes in batches.
111
+
112
+ Terminates after receiving STOP from each chapter worker.
113
+
114
+ On cancel: keeps consuming (to avoid blocking producers),
115
+ flushes, and exits once all STOPs are seen.
116
+ """
117
+ stop_count = 0
118
+ while True:
119
+ item = await save_q.get()
120
+ if isinstance(item, StopToken):
121
+ stop_count += 1
122
+ if stop_count == self.workers:
123
+ # All chapter workers have exited.
124
+ await flush_batch()
125
+ return
126
+ # else keep waiting for remaining STOPs
127
+ continue
128
+
129
+ # Normal chapter
130
+ batch.append(item)
131
+ if len(batch) >= self.storage_batch_size:
132
+ await flush_batch()
133
+
134
+ if cancelled():
135
+ # Drain whatever is already in the queue
136
+ try:
137
+ while True:
138
+ nxt = save_q.get_nowait()
139
+ if isinstance(nxt, StopToken):
140
+ stop_count += 1
141
+ else:
142
+ batch.append(nxt)
143
+ except asyncio.QueueEmpty:
144
+ pass
145
+ # Final flush of everything
146
+ await flush_batch()
147
+ # Wait for remaining STOPs so chapter workers can finish.
148
+ while stop_count < self.workers:
149
+ nxt = await save_q.get()
150
+ if isinstance(nxt, StopToken):
151
+ stop_count += 1
152
+ return
153
+
154
+ # --- stage: chapter worker ---
155
+ sem = asyncio.Semaphore(self.workers)
156
+
157
+ async def chapter_worker() -> None:
158
+ """
159
+ Fetch + parse with retry, then enqueue to save_q.
160
+
161
+ Exits on STOP, or early if cancel is set before starting a new fetch.
162
+ """
163
+ while True:
164
+ cid = await cid_q.get()
165
+ if isinstance(cid, StopToken):
166
+ # Propagate one STOP to storage and exit.
167
+ await save_q.put(STOP)
168
+ return
169
+
170
+ if not cid or cid in ignore_set:
171
+ # Ignore silently and continue.
172
+ continue
116
173
 
117
- @asynccontextmanager
118
- async def task_group_ctx() -> AsyncIterator[asyncio.TaskGroup]:
174
+ # If cancelled, don't start a new network call; let storage finish.
175
+ if cancelled():
176
+ await save_q.put(STOP)
177
+ return
178
+
179
+ async with sem:
180
+ chap = await self._process_chapter(book_id, cid, html_dir)
181
+ if chap:
182
+ await save_q.put(chap)
183
+
184
+ # polite pacing
185
+ await async_jitter_sleep(
186
+ self.request_interval,
187
+ mul_spread=1.1,
188
+ max_sleep=self.request_interval + 2,
189
+ )
190
+
191
+ # --- stage: producer ---
192
+ async def producer() -> None:
193
+ """
194
+ Enqueue chapter IDs (respecting start/end/skip_existing).
195
+
196
+ Always sends STOP x workers at the end (even if cancelled early),
197
+ so chapter workers can exit deterministically.
198
+ """
199
+ try:
200
+ async for cid in self._chapter_ids(vols, start_id, end_id):
201
+ if cancelled():
202
+ break
203
+ if self.skip_existing and chapter_storage.exists(cid):
204
+ # Count as completed but don't enqueue.
205
+ await progress.bump(1)
206
+ else:
207
+ await cid_q.put(cid)
208
+ finally:
209
+ for _ in range(self.workers):
210
+ await cid_q.put(STOP)
211
+
212
+ # --- run the pipeline ---
119
213
  async with asyncio.TaskGroup() as tg:
120
- # start chapter workers
214
+ tg.create_task(storage_worker())
121
215
  for _ in range(self.workers):
122
- tg.create_task(
123
- self._chapter_worker(
124
- book_id,
125
- ignore_set,
126
- cid_q,
127
- save_q,
128
- sem,
129
- )
130
- )
131
- # start storage worker
132
- tg.create_task(storage_worker(save_q))
133
- yield tg
134
-
135
- # run producer + workers
136
- async with task_group_ctx():
137
- # produce all CidTask
138
- await producer()
139
-
140
- # signal chapter workers to exit
141
- for _ in range(self.workers):
142
- await cid_q.put(None)
143
- await cid_q.join()
144
-
145
- # signal storage worker to exit
146
- await save_q.put(None)
147
- await save_q.join()
148
-
149
- # final flush to catch any remaining items
150
- await _flush_batch()
151
-
152
- chapter_storage.close()
153
- self.logger.info(
154
- "%s Novel '%s' download completed.",
155
- TAG,
156
- book_info.get("book_name", "unknown"),
157
- )
216
+ tg.create_task(chapter_worker())
217
+ tg.create_task(producer())
158
218
 
159
- async def _chapter_worker(
160
- self,
161
- book_id: str,
162
- ignore_set: set[str],
163
- cid_q: asyncio.Queue[str | None],
164
- save_q: asyncio.Queue[ChapterDict | None],
165
- sem: asyncio.Semaphore,
166
- ) -> None:
167
- """
168
- Worker that processes one chapter at a time:
169
- fetch + parse with retry, then enqueue to save_q.
170
- """
171
- html_dir = self._debug_dir / book_id / "html"
172
- while True:
173
- cid = await cid_q.get()
174
- if cid is None:
175
- cid_q.task_done()
176
- break
177
- if not cid or cid in ignore_set:
178
- cid_q.task_done()
179
- continue
180
-
181
- async with sem:
182
- chap = await self._process_chapter(book_id, cid, html_dir)
183
-
184
- if chap:
185
- await save_q.put(chap)
186
-
187
- cid_q.task_done()
188
- await async_sleep_with_random_delay(
189
- self.request_interval,
190
- mul_spread=1.1,
191
- max_sleep=self.request_interval + 2,
192
- )
219
+ # --- done ---
220
+ if cancelled():
221
+ self.logger.info(
222
+ "%s Novel '%s' cancelled: flushed %d/%d chapters.",
223
+ TAG,
224
+ book_info.get("book_name", "unknown"),
225
+ progress.done,
226
+ progress.total,
227
+ )
228
+ else:
229
+ self.logger.info(
230
+ "%s Novel '%s' download completed.",
231
+ TAG,
232
+ book_info.get("book_name", "unknown"),
233
+ )
234
+
235
+ finally:
236
+ chapter_storage.close()
193
237
 
194
238
  async def _process_chapter(
195
239
  self,
@@ -219,9 +263,19 @@ class CommonDownloader(BaseDownloader):
219
263
  "[ChapterWorker] Retry %s (%s): %s", cid, attempt + 1, e
220
264
  )
221
265
  backoff = self.backoff_factor * (2**attempt)
222
- await async_sleep_with_random_delay(
266
+ await async_jitter_sleep(
223
267
  base=backoff, mul_spread=1.2, max_sleep=backoff + 3
224
268
  )
225
269
  else:
226
270
  self.logger.warning("[ChapterWorker] Failed %s: %s", cid, e)
227
271
  return None
272
+
273
+ @staticmethod
274
+ def _normalize_book_id(book_id: str) -> str:
275
+ """
276
+ Normalize a book identifier.
277
+
278
+ Subclasses may override this method to transform the book ID
279
+ into their preferred format.
280
+ """
281
+ return book_id.replace("/", "-")