novel-downloader 1.5.0__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (248) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +1 -3
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +26 -21
  5. novel_downloader/cli/download.py +79 -66
  6. novel_downloader/cli/export.py +17 -21
  7. novel_downloader/cli/main.py +1 -1
  8. novel_downloader/cli/search.py +62 -65
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +8 -5
  11. novel_downloader/config/adapter.py +206 -209
  12. novel_downloader/config/{loader.py → file_io.py} +53 -26
  13. novel_downloader/core/__init__.py +5 -5
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} +12 -20
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +3 -24
  21. novel_downloader/core/downloaders/base.py +49 -23
  22. novel_downloader/core/downloaders/common.py +191 -137
  23. novel_downloader/core/downloaders/qianbi.py +187 -146
  24. novel_downloader/core/downloaders/qidian.py +187 -141
  25. novel_downloader/core/downloaders/registry.py +4 -2
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +3 -20
  28. novel_downloader/core/exporters/base.py +33 -37
  29. novel_downloader/core/exporters/common/__init__.py +1 -2
  30. novel_downloader/core/exporters/common/epub.py +15 -10
  31. novel_downloader/core/exporters/common/main_exporter.py +19 -12
  32. novel_downloader/core/exporters/common/txt.py +17 -12
  33. novel_downloader/core/exporters/epub_util.py +59 -29
  34. novel_downloader/core/exporters/linovelib/__init__.py +1 -0
  35. novel_downloader/core/exporters/linovelib/epub.py +23 -25
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +8 -12
  37. novel_downloader/core/exporters/linovelib/txt.py +20 -14
  38. novel_downloader/core/exporters/qidian.py +2 -8
  39. novel_downloader/core/exporters/registry.py +4 -2
  40. novel_downloader/core/exporters/txt_util.py +7 -7
  41. novel_downloader/core/fetchers/__init__.py +54 -48
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +6 -11
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +37 -46
  45. novel_downloader/core/fetchers/{biquge/browser.py → biquyuedu.py} +12 -17
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +19 -12
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +19 -28
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/lewenn.py +83 -0
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +12 -13
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +5 -10
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +56 -64
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +5 -16
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +7 -10
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/shuhaige.py +84 -0
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/wanbengo.py +83 -0
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +19 -12
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +1 -9
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +6 -19
  79. novel_downloader/core/interfaces/parser.py +7 -8
  80. novel_downloader/core/interfaces/searcher.py +9 -1
  81. novel_downloader/core/parsers/__init__.py +49 -12
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +64 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/esjzone.py +64 -69
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/linovelib.py +48 -64
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/qianbi.py +48 -50
  99. novel_downloader/core/parsers/qidian/main_parser.py +756 -48
  100. novel_downloader/core/parsers/qidian/utils/__init__.py +3 -21
  101. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +1 -1
  102. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +4 -4
  103. novel_downloader/core/parsers/quanben5.py +103 -0
  104. novel_downloader/core/parsers/registry.py +5 -16
  105. novel_downloader/core/parsers/sfacg.py +38 -45
  106. novel_downloader/core/parsers/shencou.py +215 -0
  107. novel_downloader/core/parsers/shuhaige.py +111 -0
  108. novel_downloader/core/parsers/tongrenquan.py +116 -0
  109. novel_downloader/core/parsers/ttkan.py +132 -0
  110. novel_downloader/core/parsers/wanbengo.py +191 -0
  111. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  112. novel_downloader/core/parsers/xiguashuwu.py +429 -0
  113. novel_downloader/core/parsers/xs63b.py +161 -0
  114. novel_downloader/core/parsers/xshbook.py +134 -0
  115. novel_downloader/core/parsers/yamibo.py +87 -131
  116. novel_downloader/core/parsers/yibige.py +166 -0
  117. novel_downloader/core/searchers/__init__.py +34 -3
  118. novel_downloader/core/searchers/aaatxt.py +107 -0
  119. novel_downloader/core/searchers/{biquge.py → b520.py} +29 -28
  120. novel_downloader/core/searchers/base.py +112 -36
  121. novel_downloader/core/searchers/dxmwx.py +105 -0
  122. novel_downloader/core/searchers/eightnovel.py +84 -0
  123. novel_downloader/core/searchers/esjzone.py +43 -25
  124. novel_downloader/core/searchers/hetushu.py +92 -0
  125. novel_downloader/core/searchers/i25zw.py +93 -0
  126. novel_downloader/core/searchers/ixdzs8.py +107 -0
  127. novel_downloader/core/searchers/jpxs123.py +107 -0
  128. novel_downloader/core/searchers/piaotia.py +100 -0
  129. novel_downloader/core/searchers/qbtr.py +106 -0
  130. novel_downloader/core/searchers/qianbi.py +74 -40
  131. novel_downloader/core/searchers/quanben5.py +144 -0
  132. novel_downloader/core/searchers/registry.py +24 -8
  133. novel_downloader/core/searchers/shuhaige.py +124 -0
  134. novel_downloader/core/searchers/tongrenquan.py +110 -0
  135. novel_downloader/core/searchers/ttkan.py +92 -0
  136. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  137. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  138. novel_downloader/core/searchers/xs63b.py +104 -0
  139. novel_downloader/locales/en.json +34 -85
  140. novel_downloader/locales/zh.json +35 -86
  141. novel_downloader/models/__init__.py +21 -22
  142. novel_downloader/models/book.py +44 -0
  143. novel_downloader/models/config.py +4 -37
  144. novel_downloader/models/login.py +1 -1
  145. novel_downloader/models/search.py +5 -0
  146. novel_downloader/resources/config/settings.toml +8 -70
  147. novel_downloader/resources/json/xiguashuwu.json +718 -0
  148. novel_downloader/utils/__init__.py +13 -24
  149. novel_downloader/utils/chapter_storage.py +5 -5
  150. novel_downloader/utils/constants.py +4 -31
  151. novel_downloader/utils/cookies.py +38 -35
  152. novel_downloader/utils/crypto_utils/__init__.py +7 -0
  153. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  154. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  155. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  156. novel_downloader/utils/crypto_utils/rc4.py +54 -0
  157. novel_downloader/utils/epub/__init__.py +3 -4
  158. novel_downloader/utils/epub/builder.py +6 -6
  159. novel_downloader/utils/epub/constants.py +62 -21
  160. novel_downloader/utils/epub/documents.py +95 -201
  161. novel_downloader/utils/epub/models.py +8 -22
  162. novel_downloader/utils/epub/utils.py +73 -106
  163. novel_downloader/utils/file_utils/__init__.py +2 -23
  164. novel_downloader/utils/file_utils/io.py +53 -188
  165. novel_downloader/utils/file_utils/normalize.py +1 -7
  166. novel_downloader/utils/file_utils/sanitize.py +4 -15
  167. novel_downloader/utils/fontocr/__init__.py +5 -14
  168. novel_downloader/utils/fontocr/core.py +216 -0
  169. novel_downloader/utils/fontocr/loader.py +50 -0
  170. novel_downloader/utils/logger.py +81 -65
  171. novel_downloader/utils/network.py +17 -41
  172. novel_downloader/utils/state.py +4 -90
  173. novel_downloader/utils/text_utils/__init__.py +1 -7
  174. novel_downloader/utils/text_utils/diff_display.py +5 -7
  175. novel_downloader/utils/text_utils/text_cleaner.py +39 -30
  176. novel_downloader/utils/text_utils/truncate_utils.py +3 -14
  177. novel_downloader/utils/time_utils/__init__.py +5 -11
  178. novel_downloader/utils/time_utils/datetime_utils.py +20 -29
  179. novel_downloader/utils/time_utils/sleep_utils.py +55 -49
  180. novel_downloader/web/__init__.py +13 -0
  181. novel_downloader/web/components/__init__.py +11 -0
  182. novel_downloader/web/components/navigation.py +35 -0
  183. novel_downloader/web/main.py +66 -0
  184. novel_downloader/web/pages/__init__.py +17 -0
  185. novel_downloader/web/pages/download.py +78 -0
  186. novel_downloader/web/pages/progress.py +147 -0
  187. novel_downloader/web/pages/search.py +329 -0
  188. novel_downloader/web/services/__init__.py +17 -0
  189. novel_downloader/web/services/client_dialog.py +164 -0
  190. novel_downloader/web/services/cred_broker.py +113 -0
  191. novel_downloader/web/services/cred_models.py +35 -0
  192. novel_downloader/web/services/task_manager.py +264 -0
  193. novel_downloader-2.0.1.dist-info/METADATA +172 -0
  194. novel_downloader-2.0.1.dist-info/RECORD +206 -0
  195. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/entry_points.txt +1 -1
  196. novel_downloader/core/downloaders/biquge.py +0 -29
  197. novel_downloader/core/downloaders/esjzone.py +0 -29
  198. novel_downloader/core/downloaders/linovelib.py +0 -29
  199. novel_downloader/core/downloaders/sfacg.py +0 -29
  200. novel_downloader/core/downloaders/yamibo.py +0 -29
  201. novel_downloader/core/exporters/biquge.py +0 -22
  202. novel_downloader/core/exporters/esjzone.py +0 -22
  203. novel_downloader/core/exporters/qianbi.py +0 -22
  204. novel_downloader/core/exporters/sfacg.py +0 -22
  205. novel_downloader/core/exporters/yamibo.py +0 -22
  206. novel_downloader/core/fetchers/base/__init__.py +0 -14
  207. novel_downloader/core/fetchers/base/browser.py +0 -422
  208. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  209. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  210. novel_downloader/core/fetchers/esjzone/browser.py +0 -209
  211. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  212. novel_downloader/core/fetchers/linovelib/browser.py +0 -198
  213. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  214. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  215. novel_downloader/core/fetchers/qidian/browser.py +0 -326
  216. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  217. novel_downloader/core/fetchers/sfacg/browser.py +0 -194
  218. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  219. novel_downloader/core/fetchers/yamibo/browser.py +0 -234
  220. novel_downloader/core/parsers/biquge.py +0 -139
  221. novel_downloader/core/parsers/qidian/book_info_parser.py +0 -90
  222. novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -528
  223. novel_downloader/core/parsers/qidian/chapter_normal.py +0 -157
  224. novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
  225. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -114
  226. novel_downloader/models/chapter.py +0 -25
  227. novel_downloader/models/types.py +0 -13
  228. novel_downloader/tui/__init__.py +0 -7
  229. novel_downloader/tui/app.py +0 -32
  230. novel_downloader/tui/main.py +0 -17
  231. novel_downloader/tui/screens/__init__.py +0 -14
  232. novel_downloader/tui/screens/home.py +0 -198
  233. novel_downloader/tui/screens/login.py +0 -74
  234. novel_downloader/tui/styles/home_layout.tcss +0 -79
  235. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  236. novel_downloader/utils/cache.py +0 -24
  237. novel_downloader/utils/crypto_utils.py +0 -71
  238. novel_downloader/utils/fontocr/hash_store.py +0 -280
  239. novel_downloader/utils/fontocr/hash_utils.py +0 -103
  240. novel_downloader/utils/fontocr/model_loader.py +0 -69
  241. novel_downloader/utils/fontocr/ocr_v1.py +0 -315
  242. novel_downloader/utils/fontocr/ocr_v2.py +0 -764
  243. novel_downloader/utils/fontocr/ocr_v3.py +0 -744
  244. novel_downloader-1.5.0.dist-info/METADATA +0 -196
  245. novel_downloader-1.5.0.dist-info/RECORD +0 -164
  246. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/WHEEL +0 -0
  247. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/licenses/LICENSE +0 -0
  248. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/top_level.txt +0 -0
@@ -3,28 +3,34 @@
3
3
  novel_downloader.core.downloaders.qianbi
4
4
  ----------------------------------------
5
5
 
6
+ Downloader implementation for Qianbi novels, with chapter ID repair logic.
6
7
  """
7
8
 
8
9
  import asyncio
9
- from collections.abc import AsyncIterator, Awaitable, Callable
10
- from contextlib import asynccontextmanager
10
+ from collections.abc import Awaitable, Callable
11
11
  from pathlib import Path
12
12
  from typing import Any
13
13
 
14
14
  from novel_downloader.core.downloaders.base import BaseDownloader
15
15
  from novel_downloader.core.downloaders.registry import register_downloader
16
+ from novel_downloader.core.downloaders.signals import (
17
+ STOP,
18
+ Progress,
19
+ StopToken,
20
+ )
16
21
  from novel_downloader.core.interfaces import (
17
22
  FetcherProtocol,
18
23
  ParserProtocol,
19
24
  )
20
25
  from novel_downloader.models import (
21
26
  BookConfig,
27
+ BookInfoDict,
22
28
  ChapterDict,
23
29
  DownloaderConfig,
24
30
  )
25
31
  from novel_downloader.utils import (
26
32
  ChapterStorage,
27
- async_sleep_with_random_delay,
33
+ async_jitter_sleep,
28
34
  )
29
35
 
30
36
 
@@ -52,6 +58,7 @@ class QianbiDownloader(BaseDownloader):
52
58
  book: BookConfig,
53
59
  *,
54
60
  progress_hook: Callable[[int, int], Awaitable[None]] | None = None,
61
+ cancel_event: asyncio.Event | None = None,
55
62
  **kwargs: Any,
56
63
  ) -> None:
57
64
  """
@@ -60,143 +67,212 @@ class QianbiDownloader(BaseDownloader):
60
67
  :param book: BookConfig with at least 'book_id'.
61
68
  """
62
69
  TAG = "[Downloader]"
70
+
63
71
  book_id = book["book_id"]
64
72
  start_id = book.get("start_id")
65
73
  end_id = book.get("end_id")
66
74
  ignore_set = set(book.get("ignore_ids", []))
67
75
 
68
- # prepare storage & dirs
69
76
  raw_base = self._raw_data_dir / book_id
70
77
  raw_base.mkdir(parents=True, exist_ok=True)
71
78
  html_dir = self._debug_dir / book_id / "html"
79
+
72
80
  chapter_storage = ChapterStorage(
73
81
  raw_base=raw_base,
74
- priorities=self._priorities,
82
+ priorities=self.PRIORITIES_MAP,
75
83
  )
76
84
  chapter_storage.connect()
77
85
 
78
- # load or fetch metadata
79
- book_info = await self.load_book_info(book_id=book_id, html_dir=html_dir)
80
- book_info = await self._repair_chapter_ids(
81
- book_id,
82
- book_info,
83
- chapter_storage,
84
- html_dir,
85
- )
86
+ def cancelled() -> bool:
87
+ return bool(cancel_event and cancel_event.is_set())
86
88
 
87
- vols = book_info.get("volumes", [])
88
- total_chapters = sum(len(v.get("chapters", [])) for v in vols)
89
- if total_chapters == 0:
90
- self.logger.warning("%s 书籍没有章节可下载: %s", TAG, book_id)
91
- return
92
-
93
- # concurrency primitives
94
- sem = asyncio.Semaphore(self.workers)
95
- cid_q: asyncio.Queue[str | None] = asyncio.Queue()
96
- save_q: asyncio.Queue[ChapterDict | None] = asyncio.Queue()
97
- batch: list[ChapterDict] = []
98
- completed = 0
99
-
100
- async def _flush_batch() -> None:
101
- nonlocal batch, completed
102
- if not batch:
89
+ try:
90
+ # --- metadata ---
91
+ book_info = await self.load_book_info(book_id=book_id, html_dir=html_dir)
92
+ if not book_info:
103
93
  return
104
94
 
105
- try:
106
- chapter_storage.upsert_chapters(batch, self.DEFAULT_SOURCE_ID)
107
- except Exception as e:
108
- self.logger.error(
109
- "[Storage] batch upsert failed (size=%d): %s",
110
- len(batch),
111
- e,
112
- exc_info=True,
113
- )
114
- else:
115
- completed += len(batch)
116
- if progress_hook:
117
- await progress_hook(completed, total_chapters)
118
- finally:
119
- batch.clear()
120
-
121
- async def storage_worker(q: asyncio.Queue[ChapterDict | None]) -> None:
122
- while True:
123
- item = await q.get()
124
- q.task_done()
125
- if item is None:
126
- # final flush before exit
127
- if batch:
128
- await _flush_batch()
129
- break
130
- batch.append(item)
131
- if len(batch) >= self.storage_batch_size:
132
- await _flush_batch()
133
-
134
- async def producer() -> None:
135
- nonlocal completed
136
- async for cid in self._chapter_ids(vols, start_id, end_id):
137
- if self.skip_existing and chapter_storage.exists(cid):
138
- completed += 1
139
- if progress_hook:
140
- await progress_hook(completed, total_chapters)
95
+ book_info = await self._repair_chapter_ids(
96
+ book_id,
97
+ book_info,
98
+ chapter_storage,
99
+ html_dir,
100
+ )
101
+
102
+ vols = book_info["volumes"]
103
+ total_chapters = sum(len(v["chapters"]) for v in vols)
104
+ if total_chapters == 0:
105
+ self.logger.warning("%s 书籍没有章节可下载: %s", TAG, book_id)
106
+ return
107
+
108
+ progress = Progress(total_chapters, progress_hook)
109
+
110
+ # --- queues & batching ---
111
+ cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue()
112
+ save_q: asyncio.Queue[ChapterDict | StopToken] = asyncio.Queue()
113
+ batch: list[ChapterDict] = []
114
+
115
+ async def flush_batch() -> None:
116
+ if not batch:
117
+ return
118
+ try:
119
+ chapter_storage.upsert_chapters(batch, self.DEFAULT_SOURCE_ID)
120
+ except Exception as e:
121
+ self.logger.error(
122
+ "[Storage] batch upsert failed (size=%d): %s",
123
+ len(batch),
124
+ e,
125
+ exc_info=True,
126
+ )
141
127
  else:
142
- await cid_q.put(cid)
128
+ await progress.bump(len(batch))
129
+ finally:
130
+ batch.clear()
131
+
132
+ # --- stage: storage worker ---
133
+ async def storage_worker() -> None:
134
+ """
135
+ Consumes parsed chapters, writes in batches.
136
+
137
+ Terminates after receiving STOP from each chapter worker.
138
+
139
+ On cancel: keeps consuming (to avoid blocking producers),
140
+ flushes, and exits once all STOPs are seen.
141
+ """
142
+ stop_count = 0
143
+ while True:
144
+ item = await save_q.get()
145
+ if isinstance(item, StopToken):
146
+ stop_count += 1
147
+ if stop_count == self.workers:
148
+ # All chapter workers have exited.
149
+ await flush_batch()
150
+ return
151
+ # else keep waiting for remaining STOPs
152
+ continue
153
+
154
+ # Normal chapter
155
+ batch.append(item)
156
+ if len(batch) >= self.storage_batch_size:
157
+ await flush_batch()
158
+
159
+ if cancelled():
160
+ # Drain whatever is already in the queue
161
+ try:
162
+ while True:
163
+ nxt = save_q.get_nowait()
164
+ if isinstance(nxt, StopToken):
165
+ stop_count += 1
166
+ else:
167
+ batch.append(nxt)
168
+ except asyncio.QueueEmpty:
169
+ pass
170
+ # Final flush of everything
171
+ await flush_batch()
172
+ # Wait for remaining STOPs so chapter workers can finish.
173
+ while stop_count < self.workers:
174
+ nxt = await save_q.get()
175
+ if isinstance(nxt, StopToken):
176
+ stop_count += 1
177
+ return
178
+
179
+ # --- stage: chapter worker ---
180
+ sem = asyncio.Semaphore(self.workers)
181
+
182
+ async def chapter_worker() -> None:
183
+ """
184
+ Fetch + parse with retry, then enqueue to save_q.
185
+
186
+ Exits on STOP, or early if cancel is set before starting a new fetch.
187
+ """
188
+ while True:
189
+ cid = await cid_q.get()
190
+ if isinstance(cid, StopToken):
191
+ # Propagate one STOP to storage and exit.
192
+ await save_q.put(STOP)
193
+ return
194
+
195
+ if not cid or cid in ignore_set:
196
+ # Ignore silently and continue.
197
+ continue
143
198
 
144
- @asynccontextmanager
145
- async def task_group_ctx() -> AsyncIterator[asyncio.TaskGroup]:
199
+ # If cancelled, don't start a new network call; let storage finish.
200
+ if cancelled():
201
+ await save_q.put(STOP)
202
+ return
203
+
204
+ async with sem:
205
+ chap = await self._process_chapter(book_id, cid, html_dir)
206
+ if chap:
207
+ await save_q.put(chap)
208
+
209
+ # polite pacing
210
+ await async_jitter_sleep(
211
+ self.request_interval,
212
+ mul_spread=1.1,
213
+ max_sleep=self.request_interval + 2,
214
+ )
215
+
216
+ # --- stage: producer ---
217
+ async def producer() -> None:
218
+ """
219
+ Enqueue chapter IDs (respecting start/end/skip_existing).
220
+ Always sends STOP x workers at the end (even if cancelled early),
221
+ so chapter workers can exit deterministically.
222
+ """
223
+ try:
224
+ async for cid in self._chapter_ids(vols, start_id, end_id):
225
+ if cancelled():
226
+ break
227
+ if self.skip_existing and chapter_storage.exists(cid):
228
+ # Count as completed but don't enqueue.
229
+ await progress.bump(1)
230
+ else:
231
+ await cid_q.put(cid)
232
+ finally:
233
+ for _ in range(self.workers):
234
+ await cid_q.put(STOP)
235
+
236
+ # --- run the pipeline ---
146
237
  async with asyncio.TaskGroup() as tg:
147
- # start chapter workers
238
+ tg.create_task(storage_worker())
148
239
  for _ in range(self.workers):
149
- tg.create_task(
150
- self._chapter_worker(
151
- book_id,
152
- ignore_set,
153
- cid_q,
154
- save_q,
155
- sem,
156
- )
157
- )
158
- # start storage worker
159
- tg.create_task(storage_worker(save_q))
160
- yield tg
161
-
162
- # run producer + workers
163
- async with task_group_ctx():
164
- # produce all CidTask
165
- await producer()
166
-
167
- # signal chapter workers to exit
168
- for _ in range(self.workers):
169
- await cid_q.put(None)
170
- await cid_q.join()
171
-
172
- # signal storage worker to exit
173
- await save_q.put(None)
174
- await save_q.join()
175
-
176
- # final flush to catch any remaining items
177
- await _flush_batch()
178
-
179
- chapter_storage.close()
180
- self.logger.info(
181
- "%s Novel '%s' download completed.",
182
- TAG,
183
- book_info.get("book_name", "unknown"),
184
- )
240
+ tg.create_task(chapter_worker())
241
+ tg.create_task(producer())
242
+
243
+ # --- done ---
244
+ if cancelled():
245
+ self.logger.info(
246
+ "%s Novel '%s' cancelled: flushed %d/%d chapters.",
247
+ TAG,
248
+ book_info.get("book_name", "unknown"),
249
+ progress.done,
250
+ progress.total,
251
+ )
252
+ else:
253
+ self.logger.info(
254
+ "%s Novel '%s' download completed.",
255
+ TAG,
256
+ book_info.get("book_name", "unknown"),
257
+ )
258
+
259
+ finally:
260
+ chapter_storage.close()
185
261
 
186
262
  async def _repair_chapter_ids(
187
263
  self,
188
264
  book_id: str,
189
- book_info: dict[str, Any],
265
+ book_info: BookInfoDict,
190
266
  storage: ChapterStorage,
191
267
  html_dir: Path,
192
- ) -> dict[str, Any]:
268
+ ) -> BookInfoDict:
193
269
  """
194
270
  Fill in missing chapterId fields by retrieving the previous chapter
195
271
  and following its 'next_chapter_id'. Uses storage to avoid refetching.
196
272
  """
197
273
  prev_cid: str = ""
198
- for vol in book_info.get("volumes", []):
199
- for chap in vol.get("chapters", []):
274
+ for vol in book_info["volumes"]:
275
+ for chap in vol["chapters"]:
200
276
  cid = chap.get("chapterId")
201
277
  if cid:
202
278
  prev_cid = cid
@@ -218,7 +294,7 @@ class QianbiDownloader(BaseDownloader):
218
294
  )
219
295
  continue
220
296
  storage.upsert_chapter(data, self.DEFAULT_SOURCE_ID)
221
- await async_sleep_with_random_delay(
297
+ await async_jitter_sleep(
222
298
  self.request_interval,
223
299
  mul_spread=1.1,
224
300
  max_sleep=self.request_interval + 2,
@@ -243,41 +319,6 @@ class QianbiDownloader(BaseDownloader):
243
319
  self._save_book_info(book_id, book_info)
244
320
  return book_info
245
321
 
246
- async def _chapter_worker(
247
- self,
248
- book_id: str,
249
- ignore_set: set[str],
250
- cid_q: asyncio.Queue[str | None],
251
- save_q: asyncio.Queue[ChapterDict | None],
252
- sem: asyncio.Semaphore,
253
- ) -> None:
254
- """
255
- Worker that processes one chapter at a time:
256
- fetch + parse with retry, then enqueue to save_q.
257
- """
258
- html_dir = self._debug_dir / book_id / "html"
259
- while True:
260
- cid = await cid_q.get()
261
- if cid is None:
262
- cid_q.task_done()
263
- break
264
- if not cid or cid in ignore_set:
265
- cid_q.task_done()
266
- continue
267
-
268
- async with sem:
269
- chap = await self._process_chapter(book_id, cid, html_dir)
270
-
271
- if chap:
272
- await save_q.put(chap)
273
-
274
- cid_q.task_done()
275
- await async_sleep_with_random_delay(
276
- self.request_interval,
277
- mul_spread=1.1,
278
- max_sleep=self.request_interval + 2,
279
- )
280
-
281
322
  async def _process_chapter(
282
323
  self,
283
324
  book_id: str,
@@ -304,7 +345,7 @@ class QianbiDownloader(BaseDownloader):
304
345
  if attempt < self.retry_times:
305
346
  self.logger.info(f"[ChapterWorker] Retry {cid} ({attempt+1}): {e}")
306
347
  backoff = self.backoff_factor * (2**attempt)
307
- await async_sleep_with_random_delay(
348
+ await async_jitter_sleep(
308
349
  base=backoff, mul_spread=1.2, max_sleep=backoff + 3
309
350
  )
310
351
  else: