novel-downloader 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +2 -4
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +27 -104
  5. novel_downloader/cli/download.py +78 -66
  6. novel_downloader/cli/export.py +20 -21
  7. novel_downloader/cli/main.py +3 -1
  8. novel_downloader/cli/search.py +120 -0
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +10 -14
  11. novel_downloader/config/adapter.py +195 -99
  12. novel_downloader/config/{loader.py → file_io.py} +53 -27
  13. novel_downloader/core/__init__.py +14 -13
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/archived/qidian/searcher.py +79 -0
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +8 -30
  21. novel_downloader/core/downloaders/base.py +182 -30
  22. novel_downloader/core/downloaders/common.py +217 -384
  23. novel_downloader/core/downloaders/qianbi.py +332 -4
  24. novel_downloader/core/downloaders/qidian.py +250 -290
  25. novel_downloader/core/downloaders/registry.py +69 -0
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +8 -26
  28. novel_downloader/core/exporters/base.py +107 -31
  29. novel_downloader/core/exporters/common/__init__.py +3 -4
  30. novel_downloader/core/exporters/common/epub.py +92 -171
  31. novel_downloader/core/exporters/common/main_exporter.py +14 -67
  32. novel_downloader/core/exporters/common/txt.py +90 -86
  33. novel_downloader/core/exporters/epub_util.py +184 -1327
  34. novel_downloader/core/exporters/linovelib/__init__.py +3 -2
  35. novel_downloader/core/exporters/linovelib/epub.py +165 -222
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +10 -71
  37. novel_downloader/core/exporters/linovelib/txt.py +76 -66
  38. novel_downloader/core/exporters/qidian.py +15 -11
  39. novel_downloader/core/exporters/registry.py +55 -0
  40. novel_downloader/core/exporters/txt_util.py +67 -0
  41. novel_downloader/core/fetchers/__init__.py +57 -56
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +10 -10
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +63 -47
  45. novel_downloader/core/fetchers/biquyuedu.py +83 -0
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +23 -11
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +22 -26
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/{biquge/browser.py → lewenn.py} +15 -15
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +16 -12
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +9 -9
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +55 -40
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +60 -0
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +11 -9
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/{common/browser.py → shuhaige.py} +24 -19
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/{common/session.py → wanbengo.py} +21 -17
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +23 -11
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +8 -14
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +4 -17
  79. novel_downloader/core/interfaces/parser.py +5 -6
  80. novel_downloader/core/interfaces/searcher.py +26 -0
  81. novel_downloader/core/parsers/__init__.py +58 -22
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +63 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +67 -67
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +54 -65
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +54 -51
  99. novel_downloader/core/parsers/qidian/__init__.py +2 -2
  100. novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
  101. novel_downloader/core/parsers/qidian/chapter_encrypted.py +290 -346
  102. novel_downloader/core/parsers/qidian/chapter_normal.py +25 -56
  103. novel_downloader/core/parsers/qidian/main_parser.py +19 -57
  104. novel_downloader/core/parsers/qidian/utils/__init__.py +12 -11
  105. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +6 -7
  106. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
  107. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
  108. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
  109. novel_downloader/core/parsers/quanben5.py +103 -0
  110. novel_downloader/core/parsers/registry.py +57 -0
  111. novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +46 -48
  112. novel_downloader/core/parsers/shencou.py +215 -0
  113. novel_downloader/core/parsers/shuhaige.py +111 -0
  114. novel_downloader/core/parsers/tongrenquan.py +116 -0
  115. novel_downloader/core/parsers/ttkan.py +132 -0
  116. novel_downloader/core/parsers/wanbengo.py +191 -0
  117. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  118. novel_downloader/core/parsers/xiguashuwu.py +435 -0
  119. novel_downloader/core/parsers/xs63b.py +161 -0
  120. novel_downloader/core/parsers/xshbook.py +134 -0
  121. novel_downloader/core/parsers/yamibo.py +155 -0
  122. novel_downloader/core/parsers/yibige.py +166 -0
  123. novel_downloader/core/searchers/__init__.py +51 -0
  124. novel_downloader/core/searchers/aaatxt.py +107 -0
  125. novel_downloader/core/searchers/b520.py +84 -0
  126. novel_downloader/core/searchers/base.py +168 -0
  127. novel_downloader/core/searchers/dxmwx.py +105 -0
  128. novel_downloader/core/searchers/eightnovel.py +84 -0
  129. novel_downloader/core/searchers/esjzone.py +102 -0
  130. novel_downloader/core/searchers/hetushu.py +92 -0
  131. novel_downloader/core/searchers/i25zw.py +93 -0
  132. novel_downloader/core/searchers/ixdzs8.py +107 -0
  133. novel_downloader/core/searchers/jpxs123.py +107 -0
  134. novel_downloader/core/searchers/piaotia.py +100 -0
  135. novel_downloader/core/searchers/qbtr.py +106 -0
  136. novel_downloader/core/searchers/qianbi.py +165 -0
  137. novel_downloader/core/searchers/quanben5.py +144 -0
  138. novel_downloader/core/searchers/registry.py +79 -0
  139. novel_downloader/core/searchers/shuhaige.py +124 -0
  140. novel_downloader/core/searchers/tongrenquan.py +110 -0
  141. novel_downloader/core/searchers/ttkan.py +92 -0
  142. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  143. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  144. novel_downloader/core/searchers/xs63b.py +104 -0
  145. novel_downloader/locales/en.json +36 -79
  146. novel_downloader/locales/zh.json +37 -80
  147. novel_downloader/models/__init__.py +23 -50
  148. novel_downloader/models/book.py +44 -0
  149. novel_downloader/models/config.py +16 -43
  150. novel_downloader/models/login.py +1 -1
  151. novel_downloader/models/search.py +21 -0
  152. novel_downloader/resources/config/settings.toml +39 -74
  153. novel_downloader/resources/css_styles/intro.css +83 -0
  154. novel_downloader/resources/css_styles/main.css +30 -89
  155. novel_downloader/resources/json/xiguashuwu.json +718 -0
  156. novel_downloader/utils/__init__.py +43 -0
  157. novel_downloader/utils/chapter_storage.py +247 -226
  158. novel_downloader/utils/constants.py +5 -50
  159. novel_downloader/utils/cookies.py +6 -18
  160. novel_downloader/utils/crypto_utils/__init__.py +13 -0
  161. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  162. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  163. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  164. novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
  165. novel_downloader/utils/epub/__init__.py +34 -0
  166. novel_downloader/utils/epub/builder.py +377 -0
  167. novel_downloader/utils/epub/constants.py +118 -0
  168. novel_downloader/utils/epub/documents.py +297 -0
  169. novel_downloader/utils/epub/models.py +120 -0
  170. novel_downloader/utils/epub/utils.py +179 -0
  171. novel_downloader/utils/file_utils/__init__.py +5 -30
  172. novel_downloader/utils/file_utils/io.py +9 -150
  173. novel_downloader/utils/file_utils/normalize.py +2 -2
  174. novel_downloader/utils/file_utils/sanitize.py +2 -7
  175. novel_downloader/utils/fontocr.py +207 -0
  176. novel_downloader/utils/i18n.py +2 -0
  177. novel_downloader/utils/logger.py +10 -16
  178. novel_downloader/utils/network.py +111 -252
  179. novel_downloader/utils/state.py +5 -90
  180. novel_downloader/utils/text_utils/__init__.py +16 -21
  181. novel_downloader/utils/text_utils/diff_display.py +6 -9
  182. novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
  183. novel_downloader/utils/text_utils/text_cleaner.py +179 -0
  184. novel_downloader/utils/text_utils/truncate_utils.py +62 -0
  185. novel_downloader/utils/time_utils/__init__.py +6 -12
  186. novel_downloader/utils/time_utils/datetime_utils.py +23 -33
  187. novel_downloader/utils/time_utils/sleep_utils.py +5 -10
  188. novel_downloader/web/__init__.py +13 -0
  189. novel_downloader/web/components/__init__.py +11 -0
  190. novel_downloader/web/components/navigation.py +35 -0
  191. novel_downloader/web/main.py +66 -0
  192. novel_downloader/web/pages/__init__.py +17 -0
  193. novel_downloader/web/pages/download.py +78 -0
  194. novel_downloader/web/pages/progress.py +147 -0
  195. novel_downloader/web/pages/search.py +329 -0
  196. novel_downloader/web/services/__init__.py +17 -0
  197. novel_downloader/web/services/client_dialog.py +164 -0
  198. novel_downloader/web/services/cred_broker.py +113 -0
  199. novel_downloader/web/services/cred_models.py +35 -0
  200. novel_downloader/web/services/task_manager.py +264 -0
  201. novel_downloader-2.0.0.dist-info/METADATA +171 -0
  202. novel_downloader-2.0.0.dist-info/RECORD +210 -0
  203. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
  204. novel_downloader/config/site_rules.py +0 -94
  205. novel_downloader/core/downloaders/biquge.py +0 -25
  206. novel_downloader/core/downloaders/esjzone.py +0 -25
  207. novel_downloader/core/downloaders/linovelib.py +0 -25
  208. novel_downloader/core/downloaders/sfacg.py +0 -25
  209. novel_downloader/core/downloaders/yamibo.py +0 -25
  210. novel_downloader/core/exporters/biquge.py +0 -25
  211. novel_downloader/core/exporters/esjzone.py +0 -25
  212. novel_downloader/core/exporters/qianbi.py +0 -25
  213. novel_downloader/core/exporters/sfacg.py +0 -25
  214. novel_downloader/core/exporters/yamibo.py +0 -25
  215. novel_downloader/core/factory/__init__.py +0 -20
  216. novel_downloader/core/factory/downloader.py +0 -73
  217. novel_downloader/core/factory/exporter.py +0 -58
  218. novel_downloader/core/factory/fetcher.py +0 -96
  219. novel_downloader/core/factory/parser.py +0 -86
  220. novel_downloader/core/fetchers/base/__init__.py +0 -14
  221. novel_downloader/core/fetchers/base/browser.py +0 -403
  222. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  223. novel_downloader/core/fetchers/common/__init__.py +0 -14
  224. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  225. novel_downloader/core/fetchers/esjzone/browser.py +0 -204
  226. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  227. novel_downloader/core/fetchers/linovelib/browser.py +0 -193
  228. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  229. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  230. novel_downloader/core/fetchers/qidian/browser.py +0 -318
  231. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  232. novel_downloader/core/fetchers/sfacg/browser.py +0 -189
  233. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  234. novel_downloader/core/fetchers/yamibo/browser.py +0 -229
  235. novel_downloader/core/parsers/biquge/__init__.py +0 -10
  236. novel_downloader/core/parsers/biquge/main_parser.py +0 -134
  237. novel_downloader/core/parsers/common/__init__.py +0 -13
  238. novel_downloader/core/parsers/common/helper.py +0 -323
  239. novel_downloader/core/parsers/common/main_parser.py +0 -106
  240. novel_downloader/core/parsers/esjzone/__init__.py +0 -10
  241. novel_downloader/core/parsers/linovelib/__init__.py +0 -10
  242. novel_downloader/core/parsers/qianbi/__init__.py +0 -10
  243. novel_downloader/core/parsers/sfacg/__init__.py +0 -10
  244. novel_downloader/core/parsers/yamibo/__init__.py +0 -10
  245. novel_downloader/core/parsers/yamibo/main_parser.py +0 -194
  246. novel_downloader/models/browser.py +0 -21
  247. novel_downloader/models/chapter.py +0 -25
  248. novel_downloader/models/site_rules.py +0 -99
  249. novel_downloader/models/tasks.py +0 -33
  250. novel_downloader/models/types.py +0 -15
  251. novel_downloader/resources/css_styles/volume-intro.css +0 -56
  252. novel_downloader/resources/json/replace_word_map.json +0 -4
  253. novel_downloader/resources/text/blacklist.txt +0 -22
  254. novel_downloader/tui/__init__.py +0 -7
  255. novel_downloader/tui/app.py +0 -32
  256. novel_downloader/tui/main.py +0 -17
  257. novel_downloader/tui/screens/__init__.py +0 -14
  258. novel_downloader/tui/screens/home.py +0 -198
  259. novel_downloader/tui/screens/login.py +0 -74
  260. novel_downloader/tui/styles/home_layout.tcss +0 -79
  261. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  262. novel_downloader/utils/cache.py +0 -24
  263. novel_downloader/utils/fontocr/__init__.py +0 -22
  264. novel_downloader/utils/fontocr/model_loader.py +0 -69
  265. novel_downloader/utils/fontocr/ocr_v1.py +0 -303
  266. novel_downloader/utils/fontocr/ocr_v2.py +0 -752
  267. novel_downloader/utils/hash_store.py +0 -279
  268. novel_downloader/utils/hash_utils.py +0 -103
  269. novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
  270. novel_downloader/utils/text_utils/font_mapping.py +0 -28
  271. novel_downloader/utils/text_utils/text_cleaning.py +0 -107
  272. novel_downloader-1.4.5.dist-info/METADATA +0 -196
  273. novel_downloader-1.4.5.dist-info/RECORD +0 -165
  274. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
  275. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
  276. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -3,18 +3,47 @@
3
3
  novel_downloader.core.downloaders.qianbi
4
4
  ----------------------------------------
5
5
 
6
+ Downloader implementation for Qianbi novels, with chapter ID repair logic.
6
7
  """
7
8
 
8
- from novel_downloader.core.downloaders.common import CommonDownloader
9
+ import asyncio
10
+ from collections.abc import Awaitable, Callable
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+ from novel_downloader.core.downloaders.base import BaseDownloader
15
+ from novel_downloader.core.downloaders.registry import register_downloader
16
+ from novel_downloader.core.downloaders.signals import (
17
+ STOP,
18
+ Progress,
19
+ StopToken,
20
+ )
9
21
  from novel_downloader.core.interfaces import (
10
22
  FetcherProtocol,
11
23
  ParserProtocol,
12
24
  )
13
- from novel_downloader.models import DownloaderConfig
25
+ from novel_downloader.models import (
26
+ BookConfig,
27
+ BookInfoDict,
28
+ ChapterDict,
29
+ DownloaderConfig,
30
+ )
31
+ from novel_downloader.utils import (
32
+ ChapterStorage,
33
+ async_jitter_sleep,
34
+ )
35
+
36
+
37
+ @register_downloader(site_keys=["qianbi"])
38
+ class QianbiDownloader(BaseDownloader):
39
+ """
40
+ Downloader for Qianbi (铅笔) novels.
14
41
 
42
+ Repairs missing chapter IDs by following 'next' links, then downloads
43
+ each chapter as a unit (fetch -> parse -> enqueue storage).
44
+ """
15
45
 
16
- class QianbiDownloader(CommonDownloader):
17
- """"""
46
+ DEFAULT_SOURCE_ID = 0
18
47
 
19
48
  def __init__(
20
49
  self,
@@ -23,3 +52,302 @@ class QianbiDownloader(CommonDownloader):
23
52
  config: DownloaderConfig,
24
53
  ):
25
54
  super().__init__(fetcher, parser, config, "qianbi")
55
+
56
+ async def _download_one(
57
+ self,
58
+ book: BookConfig,
59
+ *,
60
+ progress_hook: Callable[[int, int], Awaitable[None]] | None = None,
61
+ cancel_event: asyncio.Event | None = None,
62
+ **kwargs: Any,
63
+ ) -> None:
64
+ """
65
+ The full download logic for a single book.
66
+
67
+ :param book: BookConfig with at least 'book_id'.
68
+ """
69
+ TAG = "[Downloader]"
70
+
71
+ book_id = book["book_id"]
72
+ start_id = book.get("start_id")
73
+ end_id = book.get("end_id")
74
+ ignore_set = set(book.get("ignore_ids", []))
75
+
76
+ raw_base = self._raw_data_dir / book_id
77
+ raw_base.mkdir(parents=True, exist_ok=True)
78
+ html_dir = self._debug_dir / book_id / "html"
79
+
80
+ chapter_storage = ChapterStorage(
81
+ raw_base=raw_base,
82
+ priorities=self.PRIORITIES_MAP,
83
+ )
84
+ chapter_storage.connect()
85
+
86
+ def cancelled() -> bool:
87
+ return bool(cancel_event and cancel_event.is_set())
88
+
89
+ try:
90
+ # --- metadata ---
91
+ book_info = await self.load_book_info(book_id=book_id, html_dir=html_dir)
92
+ if not book_info:
93
+ return
94
+
95
+ book_info = await self._repair_chapter_ids(
96
+ book_id,
97
+ book_info,
98
+ chapter_storage,
99
+ html_dir,
100
+ )
101
+
102
+ vols = book_info["volumes"]
103
+ total_chapters = sum(len(v["chapters"]) for v in vols)
104
+ if total_chapters == 0:
105
+ self.logger.warning("%s 书籍没有章节可下载: %s", TAG, book_id)
106
+ return
107
+
108
+ progress = Progress(total_chapters, progress_hook)
109
+
110
+ # --- queues & batching ---
111
+ cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue()
112
+ save_q: asyncio.Queue[ChapterDict | StopToken] = asyncio.Queue()
113
+ batch: list[ChapterDict] = []
114
+
115
+ async def flush_batch() -> None:
116
+ if not batch:
117
+ return
118
+ try:
119
+ chapter_storage.upsert_chapters(batch, self.DEFAULT_SOURCE_ID)
120
+ except Exception as e:
121
+ self.logger.error(
122
+ "[Storage] batch upsert failed (size=%d): %s",
123
+ len(batch),
124
+ e,
125
+ exc_info=True,
126
+ )
127
+ else:
128
+ await progress.bump(len(batch))
129
+ finally:
130
+ batch.clear()
131
+
132
+ # --- stage: storage worker ---
133
+ async def storage_worker() -> None:
134
+ """
135
+ Consumes parsed chapters, writes in batches.
136
+
137
+ Terminates after receiving STOP from each chapter worker.
138
+
139
+ On cancel: keeps consuming (to avoid blocking producers),
140
+ flushes, and exits once all STOPs are seen.
141
+ """
142
+ stop_count = 0
143
+ while True:
144
+ item = await save_q.get()
145
+ if isinstance(item, StopToken):
146
+ stop_count += 1
147
+ if stop_count == self.workers:
148
+ # All chapter workers have exited.
149
+ await flush_batch()
150
+ return
151
+ # else keep waiting for remaining STOPs
152
+ continue
153
+
154
+ # Normal chapter
155
+ batch.append(item)
156
+ if len(batch) >= self.storage_batch_size:
157
+ await flush_batch()
158
+
159
+ if cancelled():
160
+ # Drain whatever is already in the queue
161
+ try:
162
+ while True:
163
+ nxt = save_q.get_nowait()
164
+ if isinstance(nxt, StopToken):
165
+ stop_count += 1
166
+ else:
167
+ batch.append(nxt)
168
+ except asyncio.QueueEmpty:
169
+ pass
170
+ # Final flush of everything
171
+ await flush_batch()
172
+ # Wait for remaining STOPs so chapter workers can finish.
173
+ while stop_count < self.workers:
174
+ nxt = await save_q.get()
175
+ if isinstance(nxt, StopToken):
176
+ stop_count += 1
177
+ return
178
+
179
+ # --- stage: chapter worker ---
180
+ sem = asyncio.Semaphore(self.workers)
181
+
182
+ async def chapter_worker() -> None:
183
+ """
184
+ Fetch + parse with retry, then enqueue to save_q.
185
+
186
+ Exits on STOP, or early if cancel is set before starting a new fetch.
187
+ """
188
+ while True:
189
+ cid = await cid_q.get()
190
+ if isinstance(cid, StopToken):
191
+ # Propagate one STOP to storage and exit.
192
+ await save_q.put(STOP)
193
+ return
194
+
195
+ if not cid or cid in ignore_set:
196
+ # Ignore silently and continue.
197
+ continue
198
+
199
+ # If cancelled, don't start a new network call; let storage finish.
200
+ if cancelled():
201
+ await save_q.put(STOP)
202
+ return
203
+
204
+ async with sem:
205
+ chap = await self._process_chapter(book_id, cid, html_dir)
206
+ if chap:
207
+ await save_q.put(chap)
208
+
209
+ # polite pacing
210
+ await async_jitter_sleep(
211
+ self.request_interval,
212
+ mul_spread=1.1,
213
+ max_sleep=self.request_interval + 2,
214
+ )
215
+
216
+ # --- stage: producer ---
217
+ async def producer() -> None:
218
+ """
219
+ Enqueue chapter IDs (respecting start/end/skip_existing).
220
+ Always sends STOP x workers at the end (even if cancelled early),
221
+ so chapter workers can exit deterministically.
222
+ """
223
+ try:
224
+ async for cid in self._chapter_ids(vols, start_id, end_id):
225
+ if cancelled():
226
+ break
227
+ if self.skip_existing and chapter_storage.exists(cid):
228
+ # Count as completed but don't enqueue.
229
+ await progress.bump(1)
230
+ else:
231
+ await cid_q.put(cid)
232
+ finally:
233
+ for _ in range(self.workers):
234
+ await cid_q.put(STOP)
235
+
236
+ # --- run the pipeline ---
237
+ async with asyncio.TaskGroup() as tg:
238
+ tg.create_task(storage_worker())
239
+ for _ in range(self.workers):
240
+ tg.create_task(chapter_worker())
241
+ tg.create_task(producer())
242
+
243
+ # --- done ---
244
+ if cancelled():
245
+ self.logger.info(
246
+ "%s Novel '%s' cancelled: flushed %d/%d chapters.",
247
+ TAG,
248
+ book_info.get("book_name", "unknown"),
249
+ progress.done,
250
+ progress.total,
251
+ )
252
+ else:
253
+ self.logger.info(
254
+ "%s Novel '%s' download completed.",
255
+ TAG,
256
+ book_info.get("book_name", "unknown"),
257
+ )
258
+
259
+ finally:
260
+ chapter_storage.close()
261
+
262
+ async def _repair_chapter_ids(
263
+ self,
264
+ book_id: str,
265
+ book_info: BookInfoDict,
266
+ storage: ChapterStorage,
267
+ html_dir: Path,
268
+ ) -> BookInfoDict:
269
+ """
270
+ Fill in missing chapterId fields by retrieving the previous chapter
271
+ and following its 'next_chapter_id'. Uses storage to avoid refetching.
272
+ """
273
+ prev_cid: str = ""
274
+ for vol in book_info["volumes"]:
275
+ for chap in vol["chapters"]:
276
+ cid = chap.get("chapterId")
277
+ if cid:
278
+ prev_cid = cid
279
+ continue
280
+
281
+ # no valid previous to follow
282
+ if not prev_cid:
283
+ continue
284
+
285
+ # missing id: try storage
286
+ data = storage.get_best_chapter(prev_cid)
287
+ if not data:
288
+ # fetch+parse previous to discover next
289
+ data = await self._process_chapter(book_id, prev_cid, html_dir)
290
+ if not data:
291
+ self.logger.warning(
292
+ "failed to fetch chapter %s, skipping repair",
293
+ prev_cid,
294
+ )
295
+ continue
296
+ storage.upsert_chapter(data, self.DEFAULT_SOURCE_ID)
297
+ await async_jitter_sleep(
298
+ self.request_interval,
299
+ mul_spread=1.1,
300
+ max_sleep=self.request_interval + 2,
301
+ )
302
+
303
+ next_cid = data.get("extra", {}).get("next_chapter_id")
304
+ if not next_cid:
305
+ self.logger.warning(
306
+ "No next_chapter_id in data for %s",
307
+ prev_cid,
308
+ )
309
+ continue
310
+
311
+ self.logger.info(
312
+ "repaired chapterId: set to %s (from prev %s)",
313
+ next_cid,
314
+ prev_cid,
315
+ )
316
+ chap["chapterId"] = next_cid
317
+ prev_cid = next_cid
318
+
319
+ self._save_book_info(book_id, book_info)
320
+ return book_info
321
+
322
+ async def _process_chapter(
323
+ self,
324
+ book_id: str,
325
+ cid: str,
326
+ html_dir: Path,
327
+ ) -> ChapterDict | None:
328
+ """
329
+ Fetches, saves raw HTML, parses a single chapter,
330
+ retrying up to self.retry_times.
331
+
332
+ :return: ChapterDict on success, or None on failure.
333
+ """
334
+ for attempt in range(self.retry_times + 1):
335
+ try:
336
+ html_list = await self.fetcher.get_book_chapter(book_id, cid)
337
+ self._save_html_pages(html_dir, cid, html_list)
338
+ chap = await asyncio.to_thread(
339
+ self.parser.parse_chapter, html_list, cid
340
+ )
341
+ if not chap:
342
+ raise ValueError("Empty parse result")
343
+ return chap
344
+ except Exception as e:
345
+ if attempt < self.retry_times:
346
+ self.logger.info(f"[ChapterWorker] Retry {cid} ({attempt+1}): {e}")
347
+ backoff = self.backoff_factor * (2**attempt)
348
+ await async_jitter_sleep(
349
+ base=backoff, mul_spread=1.2, max_sleep=backoff + 3
350
+ )
351
+ else:
352
+ self.logger.warning(f"[ChapterWorker] Failed {cid}: {e}")
353
+ return None