novel-downloader 1.5.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +1 -3
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +26 -21
  5. novel_downloader/cli/download.py +77 -64
  6. novel_downloader/cli/export.py +16 -20
  7. novel_downloader/cli/main.py +1 -1
  8. novel_downloader/cli/search.py +62 -65
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +8 -5
  11. novel_downloader/config/adapter.py +65 -105
  12. novel_downloader/config/{loader.py → file_io.py} +53 -26
  13. novel_downloader/core/__init__.py +1 -0
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} +12 -20
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +3 -24
  21. novel_downloader/core/downloaders/base.py +49 -23
  22. novel_downloader/core/downloaders/common.py +191 -137
  23. novel_downloader/core/downloaders/qianbi.py +187 -146
  24. novel_downloader/core/downloaders/qidian.py +187 -141
  25. novel_downloader/core/downloaders/registry.py +4 -2
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +3 -20
  28. novel_downloader/core/exporters/base.py +33 -37
  29. novel_downloader/core/exporters/common/__init__.py +1 -2
  30. novel_downloader/core/exporters/common/epub.py +15 -10
  31. novel_downloader/core/exporters/common/main_exporter.py +19 -12
  32. novel_downloader/core/exporters/common/txt.py +14 -9
  33. novel_downloader/core/exporters/epub_util.py +59 -29
  34. novel_downloader/core/exporters/linovelib/__init__.py +1 -0
  35. novel_downloader/core/exporters/linovelib/epub.py +23 -25
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +8 -12
  37. novel_downloader/core/exporters/linovelib/txt.py +17 -11
  38. novel_downloader/core/exporters/qidian.py +2 -8
  39. novel_downloader/core/exporters/registry.py +4 -2
  40. novel_downloader/core/exporters/txt_util.py +7 -7
  41. novel_downloader/core/fetchers/__init__.py +54 -48
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +6 -11
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +37 -46
  45. novel_downloader/core/fetchers/{biquge/browser.py → biquyuedu.py} +12 -17
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +19 -12
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +19 -28
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/lewenn.py +83 -0
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +12 -13
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +5 -10
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +46 -39
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +5 -16
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +7 -10
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/shuhaige.py +84 -0
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/wanbengo.py +83 -0
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +19 -12
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +1 -9
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +4 -17
  79. novel_downloader/core/interfaces/parser.py +5 -6
  80. novel_downloader/core/interfaces/searcher.py +9 -1
  81. novel_downloader/core/parsers/__init__.py +49 -12
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +63 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/esjzone.py +61 -66
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/linovelib.py +48 -64
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/qianbi.py +48 -50
  99. novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
  100. novel_downloader/core/parsers/qidian/chapter_encrypted.py +272 -330
  101. novel_downloader/core/parsers/qidian/chapter_normal.py +24 -55
  102. novel_downloader/core/parsers/qidian/main_parser.py +11 -38
  103. novel_downloader/core/parsers/qidian/utils/__init__.py +1 -0
  104. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +1 -1
  105. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
  106. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
  107. novel_downloader/core/parsers/quanben5.py +103 -0
  108. novel_downloader/core/parsers/registry.py +5 -16
  109. novel_downloader/core/parsers/sfacg.py +38 -45
  110. novel_downloader/core/parsers/shencou.py +215 -0
  111. novel_downloader/core/parsers/shuhaige.py +111 -0
  112. novel_downloader/core/parsers/tongrenquan.py +116 -0
  113. novel_downloader/core/parsers/ttkan.py +132 -0
  114. novel_downloader/core/parsers/wanbengo.py +191 -0
  115. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  116. novel_downloader/core/parsers/xiguashuwu.py +435 -0
  117. novel_downloader/core/parsers/xs63b.py +161 -0
  118. novel_downloader/core/parsers/xshbook.py +134 -0
  119. novel_downloader/core/parsers/yamibo.py +87 -131
  120. novel_downloader/core/parsers/yibige.py +166 -0
  121. novel_downloader/core/searchers/__init__.py +34 -3
  122. novel_downloader/core/searchers/aaatxt.py +107 -0
  123. novel_downloader/core/searchers/{biquge.py → b520.py} +29 -28
  124. novel_downloader/core/searchers/base.py +112 -36
  125. novel_downloader/core/searchers/dxmwx.py +105 -0
  126. novel_downloader/core/searchers/eightnovel.py +84 -0
  127. novel_downloader/core/searchers/esjzone.py +43 -25
  128. novel_downloader/core/searchers/hetushu.py +92 -0
  129. novel_downloader/core/searchers/i25zw.py +93 -0
  130. novel_downloader/core/searchers/ixdzs8.py +107 -0
  131. novel_downloader/core/searchers/jpxs123.py +107 -0
  132. novel_downloader/core/searchers/piaotia.py +100 -0
  133. novel_downloader/core/searchers/qbtr.py +106 -0
  134. novel_downloader/core/searchers/qianbi.py +74 -40
  135. novel_downloader/core/searchers/quanben5.py +144 -0
  136. novel_downloader/core/searchers/registry.py +24 -8
  137. novel_downloader/core/searchers/shuhaige.py +124 -0
  138. novel_downloader/core/searchers/tongrenquan.py +110 -0
  139. novel_downloader/core/searchers/ttkan.py +92 -0
  140. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  141. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  142. novel_downloader/core/searchers/xs63b.py +104 -0
  143. novel_downloader/locales/en.json +31 -82
  144. novel_downloader/locales/zh.json +32 -83
  145. novel_downloader/models/__init__.py +21 -22
  146. novel_downloader/models/book.py +44 -0
  147. novel_downloader/models/config.py +4 -37
  148. novel_downloader/models/login.py +1 -1
  149. novel_downloader/models/search.py +5 -0
  150. novel_downloader/resources/config/settings.toml +8 -70
  151. novel_downloader/resources/json/xiguashuwu.json +718 -0
  152. novel_downloader/utils/__init__.py +13 -22
  153. novel_downloader/utils/chapter_storage.py +3 -2
  154. novel_downloader/utils/constants.py +4 -29
  155. novel_downloader/utils/cookies.py +6 -18
  156. novel_downloader/utils/crypto_utils/__init__.py +13 -0
  157. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  158. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  159. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  160. novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
  161. novel_downloader/utils/epub/__init__.py +1 -1
  162. novel_downloader/utils/epub/constants.py +57 -16
  163. novel_downloader/utils/epub/documents.py +88 -194
  164. novel_downloader/utils/epub/models.py +0 -14
  165. novel_downloader/utils/epub/utils.py +63 -96
  166. novel_downloader/utils/file_utils/__init__.py +2 -23
  167. novel_downloader/utils/file_utils/io.py +3 -113
  168. novel_downloader/utils/file_utils/sanitize.py +0 -4
  169. novel_downloader/utils/fontocr.py +207 -0
  170. novel_downloader/utils/logger.py +8 -16
  171. novel_downloader/utils/network.py +2 -2
  172. novel_downloader/utils/state.py +4 -90
  173. novel_downloader/utils/text_utils/__init__.py +1 -7
  174. novel_downloader/utils/text_utils/diff_display.py +5 -7
  175. novel_downloader/utils/time_utils/__init__.py +5 -11
  176. novel_downloader/utils/time_utils/datetime_utils.py +20 -29
  177. novel_downloader/utils/time_utils/sleep_utils.py +4 -8
  178. novel_downloader/web/__init__.py +13 -0
  179. novel_downloader/web/components/__init__.py +11 -0
  180. novel_downloader/web/components/navigation.py +35 -0
  181. novel_downloader/web/main.py +66 -0
  182. novel_downloader/web/pages/__init__.py +17 -0
  183. novel_downloader/web/pages/download.py +78 -0
  184. novel_downloader/web/pages/progress.py +147 -0
  185. novel_downloader/web/pages/search.py +329 -0
  186. novel_downloader/web/services/__init__.py +17 -0
  187. novel_downloader/web/services/client_dialog.py +164 -0
  188. novel_downloader/web/services/cred_broker.py +113 -0
  189. novel_downloader/web/services/cred_models.py +35 -0
  190. novel_downloader/web/services/task_manager.py +264 -0
  191. novel_downloader-2.0.0.dist-info/METADATA +171 -0
  192. novel_downloader-2.0.0.dist-info/RECORD +210 -0
  193. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
  194. novel_downloader/core/downloaders/biquge.py +0 -29
  195. novel_downloader/core/downloaders/esjzone.py +0 -29
  196. novel_downloader/core/downloaders/linovelib.py +0 -29
  197. novel_downloader/core/downloaders/sfacg.py +0 -29
  198. novel_downloader/core/downloaders/yamibo.py +0 -29
  199. novel_downloader/core/exporters/biquge.py +0 -22
  200. novel_downloader/core/exporters/esjzone.py +0 -22
  201. novel_downloader/core/exporters/qianbi.py +0 -22
  202. novel_downloader/core/exporters/sfacg.py +0 -22
  203. novel_downloader/core/exporters/yamibo.py +0 -22
  204. novel_downloader/core/fetchers/base/__init__.py +0 -14
  205. novel_downloader/core/fetchers/base/browser.py +0 -422
  206. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  207. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  208. novel_downloader/core/fetchers/esjzone/browser.py +0 -209
  209. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  210. novel_downloader/core/fetchers/linovelib/browser.py +0 -198
  211. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  212. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  213. novel_downloader/core/fetchers/qidian/browser.py +0 -326
  214. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  215. novel_downloader/core/fetchers/sfacg/browser.py +0 -194
  216. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  217. novel_downloader/core/fetchers/yamibo/browser.py +0 -234
  218. novel_downloader/core/parsers/biquge.py +0 -139
  219. novel_downloader/models/chapter.py +0 -25
  220. novel_downloader/models/types.py +0 -13
  221. novel_downloader/tui/__init__.py +0 -7
  222. novel_downloader/tui/app.py +0 -32
  223. novel_downloader/tui/main.py +0 -17
  224. novel_downloader/tui/screens/__init__.py +0 -14
  225. novel_downloader/tui/screens/home.py +0 -198
  226. novel_downloader/tui/screens/login.py +0 -74
  227. novel_downloader/tui/styles/home_layout.tcss +0 -79
  228. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  229. novel_downloader/utils/cache.py +0 -24
  230. novel_downloader/utils/fontocr/__init__.py +0 -22
  231. novel_downloader/utils/fontocr/hash_store.py +0 -280
  232. novel_downloader/utils/fontocr/hash_utils.py +0 -103
  233. novel_downloader/utils/fontocr/model_loader.py +0 -69
  234. novel_downloader/utils/fontocr/ocr_v1.py +0 -315
  235. novel_downloader/utils/fontocr/ocr_v2.py +0 -764
  236. novel_downloader/utils/fontocr/ocr_v3.py +0 -744
  237. novel_downloader-1.5.0.dist-info/METADATA +0 -196
  238. novel_downloader-1.5.0.dist-info/RECORD +0 -164
  239. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
  240. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
  241. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -3,16 +3,22 @@
3
3
  novel_downloader.core.downloaders.qidian
4
4
  ----------------------------------------
5
5
 
6
+ Downloader implementation for Qidian novels,
7
+ with handling for restricted and encrypted chapters
6
8
  """
7
9
 
8
10
  import asyncio
9
- from collections.abc import AsyncIterator, Awaitable, Callable
10
- from contextlib import asynccontextmanager
11
+ from collections.abc import Awaitable, Callable
11
12
  from pathlib import Path
12
13
  from typing import Any
13
14
 
14
15
  from novel_downloader.core.downloaders.base import BaseDownloader
15
16
  from novel_downloader.core.downloaders.registry import register_downloader
17
+ from novel_downloader.core.downloaders.signals import (
18
+ STOP,
19
+ Progress,
20
+ StopToken,
21
+ )
16
22
  from novel_downloader.core.interfaces import (
17
23
  FetcherProtocol,
18
24
  ParserProtocol,
@@ -24,7 +30,7 @@ from novel_downloader.models import (
24
30
  )
25
31
  from novel_downloader.utils import (
26
32
  ChapterStorage,
27
- async_sleep_with_random_delay,
33
+ async_jitter_sleep,
28
34
  )
29
35
 
30
36
 
@@ -51,13 +57,14 @@ class QidianDownloader(BaseDownloader):
51
57
  config: DownloaderConfig,
52
58
  ):
53
59
  config.request_interval = max(1.0, config.request_interval)
54
- super().__init__(fetcher, parser, config, "qidian", self.PRIORITIES_MAP)
60
+ super().__init__(fetcher, parser, config, "qidian")
55
61
 
56
62
  async def _download_one(
57
63
  self,
58
64
  book: BookConfig,
59
65
  *,
60
66
  progress_hook: Callable[[int, int], Awaitable[None]] | None = None,
67
+ cancel_event: asyncio.Event | None = None,
61
68
  **kwargs: Any,
62
69
  ) -> None:
63
70
  """
@@ -66,6 +73,8 @@ class QidianDownloader(BaseDownloader):
66
73
  :param book: BookConfig with at least 'book_id'.
67
74
  """
68
75
  TAG = "[Downloader]"
76
+ NUM_WORKERS = 1
77
+
69
78
  book_id = book["book_id"]
70
79
  start_id = book.get("start_id")
71
80
  end_id = book.get("end_id")
@@ -74,117 +83,187 @@ class QidianDownloader(BaseDownloader):
74
83
  raw_base = self._raw_data_dir / book_id
75
84
  raw_base.mkdir(parents=True, exist_ok=True)
76
85
  html_dir = self._debug_dir / book_id / "html"
86
+
77
87
  chapter_storage = ChapterStorage(
78
88
  raw_base=raw_base,
79
- priorities=self._priorities,
89
+ priorities=self.PRIORITIES_MAP,
80
90
  )
81
91
  chapter_storage.connect()
82
92
 
83
- # load or fetch metadata
84
- book_info = await self.load_book_info(book_id=book_id, html_dir=html_dir)
85
- vols = book_info.get("volumes", [])
86
- total_chapters = sum(len(v.get("chapters", [])) for v in vols)
87
- if total_chapters == 0:
88
- self.logger.warning("%s 书籍没有章节可下载: %s", TAG, book_id)
89
- return
90
-
91
- # concurrency primitives
92
- sem = asyncio.Semaphore(self.workers)
93
- cid_q: asyncio.Queue[str | None] = asyncio.Queue()
94
- save_q: asyncio.Queue[ChapterDict | None] = asyncio.Queue()
95
- default_batch: list[ChapterDict] = []
96
- encrypted_batch: list[ChapterDict] = []
97
- completed = 0
98
-
99
- def _select(batch_item: ChapterDict) -> tuple[list[ChapterDict], int]:
100
- if batch_item.get("extra", {}).get("encrypted", False):
101
- return encrypted_batch, self.ENCRYPTED_SOURCE_ID
102
- return default_batch, self.DEFAULT_SOURCE_ID
103
-
104
- async def _flush(batch: list[ChapterDict], src: int) -> None:
105
- nonlocal completed
106
- if not batch:
93
+ def cancelled() -> bool:
94
+ return bool(cancel_event and cancel_event.is_set())
95
+
96
+ try:
97
+ # ---- metadata ---
98
+ book_info = await self.load_book_info(book_id=book_id, html_dir=html_dir)
99
+ if not book_info:
107
100
  return
108
- try:
109
- chapter_storage.upsert_chapters(batch, src)
110
- except Exception as e:
111
- self.logger.error(
112
- "[Storage] batch upsert failed (size=%d, source=%d): %s",
113
- len(batch),
114
- src,
115
- e,
116
- exc_info=True,
117
- )
118
- else:
119
- completed += len(batch)
120
- if progress_hook:
121
- await progress_hook(completed, total_chapters)
122
- finally:
123
- batch.clear()
124
-
125
- async def storage_worker(q: asyncio.Queue[ChapterDict | None]) -> None:
126
- while True:
127
- chap = await q.get()
128
- q.task_done()
129
- if chap is None:
130
- # final flush before exit
131
- await _flush(default_batch, self.DEFAULT_SOURCE_ID)
132
- await _flush(encrypted_batch, self.ENCRYPTED_SOURCE_ID)
133
- break
134
- batch, src = _select(chap)
135
- batch.append(chap)
136
- if len(batch) >= self.storage_batch_size:
137
- await _flush(batch, src)
138
-
139
- async def producer() -> None:
140
- nonlocal completed
141
- async for cid in self._chapter_ids(vols, start_id, end_id):
142
- if self.skip_existing and chapter_storage.exists(
143
- cid, self.DEFAULT_SOURCE_ID
144
- ):
145
- completed += 1
146
- if progress_hook:
147
- await progress_hook(completed, total_chapters)
101
+
102
+ vols = book_info["volumes"]
103
+ total_chapters = sum(len(v["chapters"]) for v in vols)
104
+ if total_chapters == 0:
105
+ self.logger.warning("%s 书籍没有章节可下载: %s", TAG, book_id)
106
+ return
107
+
108
+ progress = Progress(total_chapters, progress_hook)
109
+
110
+ # ---- queues & batching ---
111
+ cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue()
112
+ save_q: asyncio.Queue[ChapterDict | StopToken] = asyncio.Queue()
113
+ default_batch: list[ChapterDict] = []
114
+ encrypted_batch: list[ChapterDict] = []
115
+
116
+ def select_batch(chap: ChapterDict) -> tuple[list[ChapterDict], int]:
117
+ # set extra.encrypted (by parser); default to plain if absent.
118
+ if chap.get("extra", {}).get("encrypted", False):
119
+ return encrypted_batch, self.ENCRYPTED_SOURCE_ID
120
+ return default_batch, self.DEFAULT_SOURCE_ID
121
+
122
+ async def flush_batch(batch: list[ChapterDict], src: int) -> None:
123
+ if not batch:
124
+ return
125
+ try:
126
+ chapter_storage.upsert_chapters(batch, src)
127
+ except Exception as e:
128
+ self.logger.error(
129
+ "[Storage] batch upsert failed (size=%d, src=%d): %s",
130
+ len(batch),
131
+ src,
132
+ e,
133
+ exc_info=True,
134
+ )
148
135
  else:
149
- await cid_q.put(cid)
136
+ await progress.bump(len(batch))
137
+ finally:
138
+ batch.clear()
139
+
140
+ async def flush_all() -> None:
141
+ await flush_batch(default_batch, self.DEFAULT_SOURCE_ID)
142
+ await flush_batch(encrypted_batch, self.ENCRYPTED_SOURCE_ID)
143
+
144
+ # ---- workers ---
145
+ sem = asyncio.Semaphore(self.workers)
146
+
147
+ async def storage_worker() -> None:
148
+ """
149
+ Consumes parsed chapters, batches by source, flushes on threshold.
150
+
151
+ Terminates after receiving STOP from each chapter worker.
152
+
153
+ On cancel: drains queue, flushes once, then waits for remaining STOPs.
154
+ """
155
+ stop_count = 0
156
+ while True:
157
+ chap = await save_q.get()
158
+ if isinstance(chap, StopToken):
159
+ stop_count += 1
160
+ if stop_count == NUM_WORKERS:
161
+ await flush_all()
162
+ return
163
+ continue
164
+
165
+ batch, src = select_batch(chap)
166
+ batch.append(chap)
167
+ if len(batch) >= self.storage_batch_size:
168
+ await flush_batch(batch, src)
169
+
170
+ if cancelled():
171
+ # Drain whatever is already parsed
172
+ try:
173
+ while True:
174
+ nxt = save_q.get_nowait()
175
+ if isinstance(nxt, StopToken):
176
+ stop_count += 1
177
+ else:
178
+ nbatch, nsrc = select_batch(nxt)
179
+ nbatch.append(nxt)
180
+ except asyncio.QueueEmpty:
181
+ pass
182
+ await flush_all()
183
+ # Wait for remaining STOPs to arrive
184
+ while stop_count < NUM_WORKERS:
185
+ nxt = await save_q.get()
186
+ if nxt is STOP:
187
+ stop_count += 1
188
+ return
189
+
190
+ async def chapter_worker() -> None:
191
+ """
192
+ Single worker: fetch + parse with retry, then enqueue ChapterDict.
193
+
194
+ Exits on STOP. If cancelled, does not start a new fetch; signals STOP.
195
+ """
196
+ while True:
197
+ cid = await cid_q.get()
198
+ if isinstance(cid, StopToken):
199
+ await save_q.put(STOP)
200
+ return
201
+
202
+ if not cid or cid in ignore_set:
203
+ continue
204
+
205
+ if cancelled():
206
+ await save_q.put(STOP)
207
+ return
208
+
209
+ async with sem:
210
+ chap = await self._process_chapter(book_id, cid, html_dir)
211
+ if chap and not cancelled():
212
+ await save_q.put(chap)
213
+
214
+ await async_jitter_sleep(
215
+ self.request_interval,
216
+ mul_spread=1.1,
217
+ max_sleep=self.request_interval + 2,
218
+ )
150
219
 
151
- @asynccontextmanager
152
- async def task_group_ctx() -> AsyncIterator[None]:
220
+ async def producer() -> None:
221
+ """
222
+ Enqueue chapter IDs respecting start/end/skip_existing.
223
+
224
+ Always emits STOP x NUM_WORKERS at the end (even if cancelled early).
225
+ """
226
+ try:
227
+ async for cid in self._chapter_ids(vols, start_id, end_id):
228
+ if cancelled():
229
+ break
230
+ if self.skip_existing and (
231
+ chapter_storage.exists(cid, self.DEFAULT_SOURCE_ID)
232
+ or chapter_storage.exists(cid, self.ENCRYPTED_SOURCE_ID)
233
+ ):
234
+ # Already have either variant; count as done.
235
+ await progress.bump(1)
236
+ else:
237
+ await cid_q.put(cid)
238
+ finally:
239
+ for _ in range(NUM_WORKERS):
240
+ await cid_q.put(STOP)
241
+
242
+ # ---- run tasks ---
153
243
  async with asyncio.TaskGroup() as tg:
154
- tg.create_task(
155
- self._chapter_worker(
156
- book_id,
157
- ignore_set,
158
- cid_q,
159
- save_q,
160
- sem,
161
- )
244
+ tg.create_task(storage_worker())
245
+ for _ in range(NUM_WORKERS):
246
+ tg.create_task(chapter_worker())
247
+ tg.create_task(producer())
248
+
249
+ # ---- done ---
250
+ if cancelled():
251
+ self.logger.info(
252
+ "%s Novel '%s' cancelled: flushed %d/%d chapters.",
253
+ TAG,
254
+ book_info.get("book_name", "unknown"),
255
+ progress.done,
256
+ progress.total,
257
+ )
258
+ else:
259
+ self.logger.info(
260
+ "%s Novel '%s' download completed.",
261
+ TAG,
262
+ book_info.get("book_name", "unknown"),
162
263
  )
163
- tg.create_task(storage_worker(save_q))
164
- yield
165
-
166
- # run producer + workers, send None sentinels to shut down loops
167
- async with task_group_ctx():
168
- await producer()
169
-
170
- # signal fetcher to exit
171
- await cid_q.put(None)
172
- await cid_q.join()
173
-
174
- # signal storage to exit
175
- await save_q.put(None)
176
- await save_q.join()
177
-
178
- # final flush for both batches
179
- await _flush(default_batch, self.DEFAULT_SOURCE_ID)
180
- await _flush(encrypted_batch, self.ENCRYPTED_SOURCE_ID)
181
-
182
- chapter_storage.close()
183
- self.logger.info(
184
- "%s Novel '%s' download completed.",
185
- TAG,
186
- book_info.get("book_name", "unknown"),
187
- )
264
+
265
+ finally:
266
+ chapter_storage.close()
188
267
 
189
268
  @staticmethod
190
269
  def _check_restricted(html_list: list[str]) -> bool:
@@ -205,40 +284,6 @@ class QidianDownloader(BaseDownloader):
205
284
  return True
206
285
  return '"cES":2' in html_list[0]
207
286
 
208
- async def _chapter_worker(
209
- self,
210
- book_id: str,
211
- ignore_set: set[str],
212
- cid_q: asyncio.Queue[str | None],
213
- save_q: asyncio.Queue[ChapterDict | None],
214
- sem: asyncio.Semaphore,
215
- ) -> None:
216
- """
217
- Worker that processes one chapter at a time:
218
- fetch + parse with retry, then enqueue to save_q.
219
- """
220
- html_dir = self._debug_dir / book_id / "html"
221
- while True:
222
- cid = await cid_q.get()
223
- if cid is None:
224
- cid_q.task_done()
225
- break
226
- if not cid or cid in ignore_set:
227
- cid_q.task_done()
228
- continue
229
-
230
- async with sem:
231
- chap = await self._process_chapter(book_id, cid, html_dir)
232
- if chap:
233
- await save_q.put(chap)
234
-
235
- cid_q.task_done()
236
- await async_sleep_with_random_delay(
237
- self.request_interval,
238
- mul_spread=1.1,
239
- max_sleep=self.request_interval + 2,
240
- )
241
-
242
287
  async def _process_chapter(
243
288
  self,
244
289
  book_id: str,
@@ -247,7 +292,8 @@ class QidianDownloader(BaseDownloader):
247
292
  ) -> ChapterDict | None:
248
293
  """
249
294
  Fetch, debug-save, parse a single chapter with retries.
250
- Returns ChapterDict or None on failure.
295
+
296
+ :return: ChapterDict on success, or None on failure.
251
297
  """
252
298
  for attempt in range(self.retry_times + 1):
253
299
  try:
@@ -280,7 +326,7 @@ class QidianDownloader(BaseDownloader):
280
326
  "[ChapterWorker] Retry %s (%s): %s", cid, attempt + 1, e
281
327
  )
282
328
  backoff = self.backoff_factor * (2**attempt)
283
- await async_sleep_with_random_delay(
329
+ await async_jitter_sleep(
284
330
  base=backoff,
285
331
  mul_spread=1.2,
286
332
  max_sleep=backoff + 3,
@@ -3,6 +3,7 @@
3
3
  novel_downloader.core.downloaders.registry
4
4
  ------------------------------------------
5
5
 
6
+ Registry and factory helpers for creating site-specific or common downloaders
6
7
  """
7
8
 
8
9
  __all__ = ["register_downloader", "get_downloader"]
@@ -10,6 +11,7 @@ __all__ = ["register_downloader", "get_downloader"]
10
11
  from collections.abc import Callable, Sequence
11
12
  from typing import TypeVar
12
13
 
14
+ from novel_downloader.core.downloaders.common import CommonDownloader
13
15
  from novel_downloader.core.interfaces import (
14
16
  DownloaderProtocol,
15
17
  FetcherProtocol,
@@ -62,6 +64,6 @@ def get_downloader(
62
64
  site_key = site.lower()
63
65
  try:
64
66
  downloader_cls = _DOWNLOADER_MAP[site_key]
65
- except KeyError as err:
66
- raise ValueError(f"Unsupported site: {site}") from err
67
+ except KeyError:
68
+ return CommonDownloader(fetcher, parser, config, site_key)
67
69
  return downloader_cls(fetcher, parser, config)
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.downloaders.signals
4
+ -----------------------------------------
5
+
6
+ Utilities for signaling task termination and reporting async progress.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from collections.abc import Awaitable, Callable
12
+ from typing import Final, final
13
+
14
+
15
+ @final
16
+ class StopToken:
17
+ """Typed sentinel used to end queues."""
18
+
19
+ __slots__ = ()
20
+
21
+ def __repr__(self) -> str:
22
+ return "STOP"
23
+
24
+
25
+ STOP: Final[StopToken] = StopToken()
26
+
27
+ # from typing_extensions import TypeIs
28
+ # def is_stop(x: object) -> TypeIs[StopToken]:
29
+ # """Type guard so `if is_stop(item)` narrows type to StopToken."""
30
+ # return isinstance(x, StopToken)
31
+
32
+
33
+ class Progress:
34
+ """Lightweight progress reporter."""
35
+
36
+ __slots__ = ("done", "total", "hook")
37
+
38
+ def __init__(self, total: int, hook: Callable[[int, int], Awaitable[None]] | None):
39
+ self.done = 0
40
+ self.total = total
41
+ self.hook = hook
42
+
43
+ async def bump(self, n: int = 1) -> None:
44
+ self.done += n
45
+ if self.hook:
46
+ await self.hook(self.done, self.total)
@@ -3,34 +3,17 @@
3
3
  novel_downloader.core.exporters
4
4
  -------------------------------
5
5
 
6
- This module defines exporter classes for different novel platforms.
7
-
8
- Currently supported platforms:
9
- - biquge (笔趣阁)
10
- - esjzone (ESJ Zone)
11
- - linovelib (哔哩轻小说)
12
- - qianbi (铅笔小说)
13
- - qidian (起点中文网)
14
- - sfacg (SF轻小说)
15
- - yamibo (百合会)
6
+ Exporter implementations for saving books in various formats across different sources
16
7
  """
17
8
 
18
9
  __all__ = [
19
10
  "get_exporter",
20
- "BiqugeExporter",
21
- "EsjzoneExporter",
11
+ "CommonExporter",
22
12
  "LinovelibExporter",
23
- "QianbiExporter",
24
13
  "QidianExporter",
25
- "SfacgExporter",
26
- "YamiboExporter",
27
14
  ]
28
15
 
29
- from .biquge import BiqugeExporter
30
- from .esjzone import EsjzoneExporter
16
+ from .common import CommonExporter
31
17
  from .linovelib import LinovelibExporter
32
- from .qianbi import QianbiExporter
33
18
  from .qidian import QidianExporter
34
19
  from .registry import get_exporter
35
- from .sfacg import SfacgExporter
36
- from .yamibo import YamiboExporter