novel-downloader 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +2 -4
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +27 -104
  5. novel_downloader/cli/download.py +78 -66
  6. novel_downloader/cli/export.py +20 -21
  7. novel_downloader/cli/main.py +3 -1
  8. novel_downloader/cli/search.py +120 -0
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +10 -14
  11. novel_downloader/config/adapter.py +195 -99
  12. novel_downloader/config/{loader.py → file_io.py} +53 -27
  13. novel_downloader/core/__init__.py +14 -13
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/archived/qidian/searcher.py +79 -0
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +8 -30
  21. novel_downloader/core/downloaders/base.py +182 -30
  22. novel_downloader/core/downloaders/common.py +217 -384
  23. novel_downloader/core/downloaders/qianbi.py +332 -4
  24. novel_downloader/core/downloaders/qidian.py +250 -290
  25. novel_downloader/core/downloaders/registry.py +69 -0
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +8 -26
  28. novel_downloader/core/exporters/base.py +107 -31
  29. novel_downloader/core/exporters/common/__init__.py +3 -4
  30. novel_downloader/core/exporters/common/epub.py +92 -171
  31. novel_downloader/core/exporters/common/main_exporter.py +14 -67
  32. novel_downloader/core/exporters/common/txt.py +90 -86
  33. novel_downloader/core/exporters/epub_util.py +184 -1327
  34. novel_downloader/core/exporters/linovelib/__init__.py +3 -2
  35. novel_downloader/core/exporters/linovelib/epub.py +165 -222
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +10 -71
  37. novel_downloader/core/exporters/linovelib/txt.py +76 -66
  38. novel_downloader/core/exporters/qidian.py +15 -11
  39. novel_downloader/core/exporters/registry.py +55 -0
  40. novel_downloader/core/exporters/txt_util.py +67 -0
  41. novel_downloader/core/fetchers/__init__.py +57 -56
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +10 -10
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +63 -47
  45. novel_downloader/core/fetchers/biquyuedu.py +83 -0
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +23 -11
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +22 -26
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/{biquge/browser.py → lewenn.py} +15 -15
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +16 -12
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +9 -9
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +55 -40
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +60 -0
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +11 -9
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/{common/browser.py → shuhaige.py} +24 -19
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/{common/session.py → wanbengo.py} +21 -17
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +23 -11
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +8 -14
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +4 -17
  79. novel_downloader/core/interfaces/parser.py +5 -6
  80. novel_downloader/core/interfaces/searcher.py +26 -0
  81. novel_downloader/core/parsers/__init__.py +58 -22
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +63 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +67 -67
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +54 -65
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +54 -51
  99. novel_downloader/core/parsers/qidian/__init__.py +2 -2
  100. novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
  101. novel_downloader/core/parsers/qidian/chapter_encrypted.py +290 -346
  102. novel_downloader/core/parsers/qidian/chapter_normal.py +25 -56
  103. novel_downloader/core/parsers/qidian/main_parser.py +19 -57
  104. novel_downloader/core/parsers/qidian/utils/__init__.py +12 -11
  105. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +6 -7
  106. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
  107. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
  108. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
  109. novel_downloader/core/parsers/quanben5.py +103 -0
  110. novel_downloader/core/parsers/registry.py +57 -0
  111. novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +46 -48
  112. novel_downloader/core/parsers/shencou.py +215 -0
  113. novel_downloader/core/parsers/shuhaige.py +111 -0
  114. novel_downloader/core/parsers/tongrenquan.py +116 -0
  115. novel_downloader/core/parsers/ttkan.py +132 -0
  116. novel_downloader/core/parsers/wanbengo.py +191 -0
  117. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  118. novel_downloader/core/parsers/xiguashuwu.py +435 -0
  119. novel_downloader/core/parsers/xs63b.py +161 -0
  120. novel_downloader/core/parsers/xshbook.py +134 -0
  121. novel_downloader/core/parsers/yamibo.py +155 -0
  122. novel_downloader/core/parsers/yibige.py +166 -0
  123. novel_downloader/core/searchers/__init__.py +51 -0
  124. novel_downloader/core/searchers/aaatxt.py +107 -0
  125. novel_downloader/core/searchers/b520.py +84 -0
  126. novel_downloader/core/searchers/base.py +168 -0
  127. novel_downloader/core/searchers/dxmwx.py +105 -0
  128. novel_downloader/core/searchers/eightnovel.py +84 -0
  129. novel_downloader/core/searchers/esjzone.py +102 -0
  130. novel_downloader/core/searchers/hetushu.py +92 -0
  131. novel_downloader/core/searchers/i25zw.py +93 -0
  132. novel_downloader/core/searchers/ixdzs8.py +107 -0
  133. novel_downloader/core/searchers/jpxs123.py +107 -0
  134. novel_downloader/core/searchers/piaotia.py +100 -0
  135. novel_downloader/core/searchers/qbtr.py +106 -0
  136. novel_downloader/core/searchers/qianbi.py +165 -0
  137. novel_downloader/core/searchers/quanben5.py +144 -0
  138. novel_downloader/core/searchers/registry.py +79 -0
  139. novel_downloader/core/searchers/shuhaige.py +124 -0
  140. novel_downloader/core/searchers/tongrenquan.py +110 -0
  141. novel_downloader/core/searchers/ttkan.py +92 -0
  142. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  143. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  144. novel_downloader/core/searchers/xs63b.py +104 -0
  145. novel_downloader/locales/en.json +36 -79
  146. novel_downloader/locales/zh.json +37 -80
  147. novel_downloader/models/__init__.py +23 -50
  148. novel_downloader/models/book.py +44 -0
  149. novel_downloader/models/config.py +16 -43
  150. novel_downloader/models/login.py +1 -1
  151. novel_downloader/models/search.py +21 -0
  152. novel_downloader/resources/config/settings.toml +39 -74
  153. novel_downloader/resources/css_styles/intro.css +83 -0
  154. novel_downloader/resources/css_styles/main.css +30 -89
  155. novel_downloader/resources/json/xiguashuwu.json +718 -0
  156. novel_downloader/utils/__init__.py +43 -0
  157. novel_downloader/utils/chapter_storage.py +247 -226
  158. novel_downloader/utils/constants.py +5 -50
  159. novel_downloader/utils/cookies.py +6 -18
  160. novel_downloader/utils/crypto_utils/__init__.py +13 -0
  161. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  162. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  163. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  164. novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
  165. novel_downloader/utils/epub/__init__.py +34 -0
  166. novel_downloader/utils/epub/builder.py +377 -0
  167. novel_downloader/utils/epub/constants.py +118 -0
  168. novel_downloader/utils/epub/documents.py +297 -0
  169. novel_downloader/utils/epub/models.py +120 -0
  170. novel_downloader/utils/epub/utils.py +179 -0
  171. novel_downloader/utils/file_utils/__init__.py +5 -30
  172. novel_downloader/utils/file_utils/io.py +9 -150
  173. novel_downloader/utils/file_utils/normalize.py +2 -2
  174. novel_downloader/utils/file_utils/sanitize.py +2 -7
  175. novel_downloader/utils/fontocr.py +207 -0
  176. novel_downloader/utils/i18n.py +2 -0
  177. novel_downloader/utils/logger.py +10 -16
  178. novel_downloader/utils/network.py +111 -252
  179. novel_downloader/utils/state.py +5 -90
  180. novel_downloader/utils/text_utils/__init__.py +16 -21
  181. novel_downloader/utils/text_utils/diff_display.py +6 -9
  182. novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
  183. novel_downloader/utils/text_utils/text_cleaner.py +179 -0
  184. novel_downloader/utils/text_utils/truncate_utils.py +62 -0
  185. novel_downloader/utils/time_utils/__init__.py +6 -12
  186. novel_downloader/utils/time_utils/datetime_utils.py +23 -33
  187. novel_downloader/utils/time_utils/sleep_utils.py +5 -10
  188. novel_downloader/web/__init__.py +13 -0
  189. novel_downloader/web/components/__init__.py +11 -0
  190. novel_downloader/web/components/navigation.py +35 -0
  191. novel_downloader/web/main.py +66 -0
  192. novel_downloader/web/pages/__init__.py +17 -0
  193. novel_downloader/web/pages/download.py +78 -0
  194. novel_downloader/web/pages/progress.py +147 -0
  195. novel_downloader/web/pages/search.py +329 -0
  196. novel_downloader/web/services/__init__.py +17 -0
  197. novel_downloader/web/services/client_dialog.py +164 -0
  198. novel_downloader/web/services/cred_broker.py +113 -0
  199. novel_downloader/web/services/cred_models.py +35 -0
  200. novel_downloader/web/services/task_manager.py +264 -0
  201. novel_downloader-2.0.0.dist-info/METADATA +171 -0
  202. novel_downloader-2.0.0.dist-info/RECORD +210 -0
  203. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
  204. novel_downloader/config/site_rules.py +0 -94
  205. novel_downloader/core/downloaders/biquge.py +0 -25
  206. novel_downloader/core/downloaders/esjzone.py +0 -25
  207. novel_downloader/core/downloaders/linovelib.py +0 -25
  208. novel_downloader/core/downloaders/sfacg.py +0 -25
  209. novel_downloader/core/downloaders/yamibo.py +0 -25
  210. novel_downloader/core/exporters/biquge.py +0 -25
  211. novel_downloader/core/exporters/esjzone.py +0 -25
  212. novel_downloader/core/exporters/qianbi.py +0 -25
  213. novel_downloader/core/exporters/sfacg.py +0 -25
  214. novel_downloader/core/exporters/yamibo.py +0 -25
  215. novel_downloader/core/factory/__init__.py +0 -20
  216. novel_downloader/core/factory/downloader.py +0 -73
  217. novel_downloader/core/factory/exporter.py +0 -58
  218. novel_downloader/core/factory/fetcher.py +0 -96
  219. novel_downloader/core/factory/parser.py +0 -86
  220. novel_downloader/core/fetchers/base/__init__.py +0 -14
  221. novel_downloader/core/fetchers/base/browser.py +0 -403
  222. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  223. novel_downloader/core/fetchers/common/__init__.py +0 -14
  224. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  225. novel_downloader/core/fetchers/esjzone/browser.py +0 -204
  226. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  227. novel_downloader/core/fetchers/linovelib/browser.py +0 -193
  228. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  229. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  230. novel_downloader/core/fetchers/qidian/browser.py +0 -318
  231. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  232. novel_downloader/core/fetchers/sfacg/browser.py +0 -189
  233. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  234. novel_downloader/core/fetchers/yamibo/browser.py +0 -229
  235. novel_downloader/core/parsers/biquge/__init__.py +0 -10
  236. novel_downloader/core/parsers/biquge/main_parser.py +0 -134
  237. novel_downloader/core/parsers/common/__init__.py +0 -13
  238. novel_downloader/core/parsers/common/helper.py +0 -323
  239. novel_downloader/core/parsers/common/main_parser.py +0 -106
  240. novel_downloader/core/parsers/esjzone/__init__.py +0 -10
  241. novel_downloader/core/parsers/linovelib/__init__.py +0 -10
  242. novel_downloader/core/parsers/qianbi/__init__.py +0 -10
  243. novel_downloader/core/parsers/sfacg/__init__.py +0 -10
  244. novel_downloader/core/parsers/yamibo/__init__.py +0 -10
  245. novel_downloader/core/parsers/yamibo/main_parser.py +0 -194
  246. novel_downloader/models/browser.py +0 -21
  247. novel_downloader/models/chapter.py +0 -25
  248. novel_downloader/models/site_rules.py +0 -99
  249. novel_downloader/models/tasks.py +0 -33
  250. novel_downloader/models/types.py +0 -15
  251. novel_downloader/resources/css_styles/volume-intro.css +0 -56
  252. novel_downloader/resources/json/replace_word_map.json +0 -4
  253. novel_downloader/resources/text/blacklist.txt +0 -22
  254. novel_downloader/tui/__init__.py +0 -7
  255. novel_downloader/tui/app.py +0 -32
  256. novel_downloader/tui/main.py +0 -17
  257. novel_downloader/tui/screens/__init__.py +0 -14
  258. novel_downloader/tui/screens/home.py +0 -198
  259. novel_downloader/tui/screens/login.py +0 -74
  260. novel_downloader/tui/styles/home_layout.tcss +0 -79
  261. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  262. novel_downloader/utils/cache.py +0 -24
  263. novel_downloader/utils/fontocr/__init__.py +0 -22
  264. novel_downloader/utils/fontocr/model_loader.py +0 -69
  265. novel_downloader/utils/fontocr/ocr_v1.py +0 -303
  266. novel_downloader/utils/fontocr/ocr_v2.py +0 -752
  267. novel_downloader/utils/hash_store.py +0 -279
  268. novel_downloader/utils/hash_utils.py +0 -103
  269. novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
  270. novel_downloader/utils/text_utils/font_mapping.py +0 -28
  271. novel_downloader/utils/text_utils/text_cleaning.py +0 -107
  272. novel_downloader-1.4.5.dist-info/METADATA +0 -196
  273. novel_downloader-1.4.5.dist-info/RECORD +0 -165
  274. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
  275. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
  276. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -3,15 +3,22 @@
3
3
  novel_downloader.core.downloaders.qidian
4
4
  ----------------------------------------
5
5
 
6
+ Downloader implementation for Qidian novels,
7
+ with handling for restricted and encrypted chapters
6
8
  """
7
9
 
8
10
  import asyncio
9
- import json
10
11
  from collections.abc import Awaitable, Callable
11
- from contextlib import suppress
12
- from typing import Any, cast
12
+ from pathlib import Path
13
+ from typing import Any
13
14
 
14
15
  from novel_downloader.core.downloaders.base import BaseDownloader
16
+ from novel_downloader.core.downloaders.registry import register_downloader
17
+ from novel_downloader.core.downloaders.signals import (
18
+ STOP,
19
+ Progress,
20
+ StopToken,
21
+ )
15
22
  from novel_downloader.core.interfaces import (
16
23
  FetcherProtocol,
17
24
  ParserProtocol,
@@ -19,23 +26,30 @@ from novel_downloader.core.interfaces import (
19
26
  from novel_downloader.models import (
20
27
  BookConfig,
21
28
  ChapterDict,
22
- CidTask,
23
29
  DownloaderConfig,
24
- HtmlTask,
25
30
  )
26
- from novel_downloader.utils.chapter_storage import ChapterStorage
27
- from novel_downloader.utils.file_utils import save_as_json, save_as_txt
28
- from novel_downloader.utils.time_utils import (
29
- async_sleep_with_random_delay,
30
- calculate_time_difference,
31
+ from novel_downloader.utils import (
32
+ ChapterStorage,
33
+ async_jitter_sleep,
31
34
  )
32
35
 
33
36
 
37
+ @register_downloader(site_keys=["qidian", "qd"])
34
38
  class QidianDownloader(BaseDownloader):
35
39
  """
36
- Specialized downloader for Qidian novels.
40
+ Specialized downloader for Qidian (起点) novels.
41
+
42
+ Processes each chapter in a single worker that
43
+ handles fetch -> parse -> enqueue storage.
37
44
  """
38
45
 
46
+ DEFAULT_SOURCE_ID = 0
47
+ ENCRYPTED_SOURCE_ID = 1
48
+ PRIORITIES_MAP = {
49
+ DEFAULT_SOURCE_ID: 0,
50
+ ENCRYPTED_SOURCE_ID: 1,
51
+ }
52
+
39
53
  def __init__(
40
54
  self,
41
55
  fetcher: FetcherProtocol,
@@ -50,6 +64,7 @@ class QidianDownloader(BaseDownloader):
50
64
  book: BookConfig,
51
65
  *,
52
66
  progress_hook: Callable[[int, int], Awaitable[None]] | None = None,
67
+ cancel_event: asyncio.Event | None = None,
53
68
  **kwargs: Any,
54
69
  ) -> None:
55
70
  """
@@ -58,306 +73,200 @@ class QidianDownloader(BaseDownloader):
58
73
  :param book: BookConfig with at least 'book_id'.
59
74
  """
60
75
  TAG = "[Downloader]"
76
+ NUM_WORKERS = 1
77
+
61
78
  book_id = book["book_id"]
62
79
  start_id = book.get("start_id")
63
80
  end_id = book.get("end_id")
64
81
  ignore_set = set(book.get("ignore_ids", []))
65
82
 
66
- raw_base = self.raw_data_dir / book_id
67
- cache_base = self.cache_dir / book_id
68
- info_path = raw_base / "book_info.json"
69
- chapters_html_dir = cache_base / "html"
70
-
83
+ raw_base = self._raw_data_dir / book_id
71
84
  raw_base.mkdir(parents=True, exist_ok=True)
72
- if self.save_html:
73
- chapters_html_dir.mkdir(parents=True, exist_ok=True)
74
- normal_cs = ChapterStorage(
75
- raw_base=raw_base,
76
- namespace="chapters",
77
- backend_type=self._config.storage_backend,
78
- batch_size=self._config.storage_batch_size,
79
- )
80
- encrypted_cs = ChapterStorage(
85
+ html_dir = self._debug_dir / book_id / "html"
86
+
87
+ chapter_storage = ChapterStorage(
81
88
  raw_base=raw_base,
82
- namespace="encrypted_chapters",
83
- backend_type=self._config.storage_backend,
84
- batch_size=self._config.storage_batch_size,
89
+ priorities=self.PRIORITIES_MAP,
85
90
  )
86
-
87
- # load or fetch book_info
88
- book_info: dict[str, Any]
89
- re_fetch = True
90
- old_data: dict[str, Any] = {}
91
-
92
- if info_path.exists():
93
- try:
94
- old_data = json.loads(info_path.read_text("utf-8"))
95
- days, *_ = calculate_time_difference(
96
- old_data.get("update_time", ""), "UTC+8"
97
- )
98
- re_fetch = days > 1
99
- except Exception:
100
- re_fetch = True
101
-
102
- if re_fetch:
103
- info_html = await self.fetcher.get_book_info(book_id)
104
- if self.save_html:
105
- for i, html in enumerate(info_html):
106
- save_as_txt(html, chapters_html_dir / f"info_{i}.html")
107
- book_info = self.parser.parse_book_info(info_html)
108
-
109
- if book_info.get("book_name") != "未找到书名":
110
- save_as_json(book_info, info_path)
111
- else:
112
- self.logger.warning("%s 书籍信息未找到, book_id = %s", TAG, book_id)
113
- book_info = old_data or {"book_name": "未找到书名"}
114
- else:
115
- book_info = old_data
116
-
117
- vols = book_info.get("volumes", [])
118
- total_chapters = 0
119
- for vol in vols:
120
- total_chapters += len(vol.get("chapters", []))
121
- if total_chapters == 0:
122
- self.logger.warning("%s 书籍没有章节可下载: book_id=%s", TAG, book_id)
123
- return
124
-
125
- completed_count = 0
126
-
127
- # setup queue
128
- cid_queue: asyncio.Queue[CidTask] = asyncio.Queue()
129
- html_queue: asyncio.Queue[HtmlTask] = asyncio.Queue()
130
- save_queue: asyncio.Queue[ChapterDict] = asyncio.Queue()
131
-
132
- async def fetcher_worker(
133
- book_id: str,
134
- cid_queue: asyncio.Queue[CidTask],
135
- html_queue: asyncio.Queue[HtmlTask],
136
- retry_times: int,
137
- ) -> None:
138
- while True:
139
- task = await cid_queue.get()
140
- cid = task.cid
141
- if not cid:
142
- self.logger.warning("[Fetcher] Skipped empty cid task: %s", task)
143
- cid_queue.task_done()
144
- continue
145
-
146
- if cid in ignore_set:
147
- cid_queue.task_done()
148
- continue
149
-
91
+ chapter_storage.connect()
92
+
93
+ def cancelled() -> bool:
94
+ return bool(cancel_event and cancel_event.is_set())
95
+
96
+ try:
97
+ # ---- metadata ---
98
+ book_info = await self.load_book_info(book_id=book_id, html_dir=html_dir)
99
+ if not book_info:
100
+ return
101
+
102
+ vols = book_info["volumes"]
103
+ total_chapters = sum(len(v["chapters"]) for v in vols)
104
+ if total_chapters == 0:
105
+ self.logger.warning("%s 书籍没有章节可下载: %s", TAG, book_id)
106
+ return
107
+
108
+ progress = Progress(total_chapters, progress_hook)
109
+
110
+ # ---- queues & batching ---
111
+ cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue()
112
+ save_q: asyncio.Queue[ChapterDict | StopToken] = asyncio.Queue()
113
+ default_batch: list[ChapterDict] = []
114
+ encrypted_batch: list[ChapterDict] = []
115
+
116
+ def select_batch(chap: ChapterDict) -> tuple[list[ChapterDict], int]:
117
+ # set extra.encrypted (by parser); default to plain if absent.
118
+ if chap.get("extra", {}).get("encrypted", False):
119
+ return encrypted_batch, self.ENCRYPTED_SOURCE_ID
120
+ return default_batch, self.DEFAULT_SOURCE_ID
121
+
122
+ async def flush_batch(batch: list[ChapterDict], src: int) -> None:
123
+ if not batch:
124
+ return
150
125
  try:
151
- html_list = await self.fetcher.get_book_chapter(book_id, cid)
152
- await html_queue.put(
153
- HtmlTask(cid=cid, retry=task.retry, html_list=html_list)
154
- )
155
- self.logger.info("[Fetcher] Downloaded chapter %s", cid)
156
- await async_sleep_with_random_delay(
157
- self.request_interval,
158
- mul_spread=1.1,
159
- max_sleep=self.request_interval + 2,
160
- )
161
-
126
+ chapter_storage.upsert_chapters(batch, src)
162
127
  except Exception as e:
163
- if task.retry < retry_times:
164
- await cid_queue.put(
165
- CidTask(
166
- prev_cid=task.prev_cid,
167
- cid=cid,
168
- retry=task.retry + 1,
169
- )
170
- )
171
- self.logger.info(
172
- "[Fetcher] Re-queued chapter %s for retry #%d: %s",
173
- cid,
174
- task.retry + 1,
175
- e,
176
- )
177
- backoff = self.backoff_factor * (2**task.retry)
178
- await async_sleep_with_random_delay(
179
- base=backoff,
180
- mul_spread=1.2,
181
- max_sleep=backoff + 3,
182
- )
183
- else:
184
- self.logger.warning(
185
- "[Fetcher] Max retries reached for chapter %s: %s",
186
- cid,
187
- e,
188
- )
189
-
190
- finally:
191
- cid_queue.task_done()
192
-
193
- async def parser_worker(
194
- cid_queue: asyncio.Queue[CidTask],
195
- html_queue: asyncio.Queue[HtmlTask],
196
- save_queue: asyncio.Queue[ChapterDict],
197
- retry_times: int,
198
- ) -> None:
199
- while True:
200
- task = await html_queue.get()
201
- skip_retry = False
202
- try:
203
- chap_json: ChapterDict | None = None
204
- if self.check_restricted(task.html_list):
205
- self.logger.info(
206
- "[Parser] Skipped restricted page for cid %s", task.cid
207
- )
208
- skip_retry = True
209
- raise ValueError("Restricted content detected")
210
-
211
- is_encrypted = self.check_encrypted(task.html_list)
212
- chap_json = await asyncio.to_thread(
213
- self.parser.parse_chapter,
214
- task.html_list,
215
- task.cid,
128
+ self.logger.error(
129
+ "[Storage] batch upsert failed (size=%d, src=%d): %s",
130
+ len(batch),
131
+ src,
132
+ e,
133
+ exc_info=True,
216
134
  )
217
- if is_encrypted:
218
- skip_retry = True
219
- if self.save_html:
220
- folder = chapters_html_dir / (
221
- "html_encrypted" if is_encrypted else "html_plain"
222
- )
223
- html_path = folder / f"{task.cid}.html"
224
- save_as_txt(task.html_list[0], html_path, on_exist="skip")
225
- self.logger.debug(
226
- "%s Saved raw HTML for chapter %s to %s",
227
- TAG,
228
- task.cid,
229
- html_path,
230
- )
231
- if chap_json:
232
- await save_queue.put(chap_json)
233
- self.logger.info(
234
- "[Parser] saved chapter %s",
235
- task.cid,
236
- )
237
- else:
238
- raise ValueError("Empty parse result")
239
- except Exception as e:
240
- if not skip_retry and task.retry < retry_times:
241
- await cid_queue.put(
242
- CidTask(prev_cid=None, cid=task.cid, retry=task.retry + 1)
243
- )
244
- self.logger.info(
245
- "[Parser] Re-queued cid %s for retry #%d: %s",
246
- task.cid,
247
- task.retry + 1,
248
- e,
249
- )
250
- elif not skip_retry:
251
- self.logger.warning(
252
- "[Parser] Max retries reached for cid %s: %s",
253
- task.cid,
254
- e,
255
- )
135
+ else:
136
+ await progress.bump(len(batch))
256
137
  finally:
257
- html_queue.task_done()
258
-
259
- async def storage_worker(
260
- normal_cs: ChapterStorage,
261
- encrypted_cs: ChapterStorage,
262
- save_queue: asyncio.Queue[ChapterDict],
263
- ) -> None:
264
- nonlocal completed_count
265
- while True:
266
- item = await save_queue.get()
267
- try:
268
- is_encrypted = item.get("extra", {}).get("encrypted", False)
269
- cs = encrypted_cs if is_encrypted else normal_cs
270
- cs.save(cast(ChapterDict, item))
271
- completed_count += 1
272
- if progress_hook:
273
- await progress_hook(completed_count, total_chapters)
274
- except Exception as e:
275
- self.logger.error("[storage_worker] Failed to save: %s", e)
276
- finally:
277
- save_queue.task_done()
278
-
279
- fetcher_task = asyncio.create_task(
280
- fetcher_worker(
281
- book_id,
282
- cid_queue,
283
- html_queue,
284
- self.retry_times,
285
- )
286
- )
287
-
288
- parser_task = asyncio.create_task(
289
- parser_worker(
290
- cid_queue,
291
- html_queue,
292
- save_queue,
293
- self.retry_times,
294
- )
295
- )
296
-
297
- storage_task = asyncio.create_task(
298
- storage_worker(
299
- normal_cs=normal_cs,
300
- encrypted_cs=encrypted_cs,
301
- save_queue=save_queue,
302
- )
303
- )
304
-
305
- found_start = start_id is None
306
- stop_early = False
307
-
308
- for vol in book_info.get("volumes", []):
309
- chapters = vol.get("chapters", [])
310
- for chap in chapters:
311
- if stop_early:
312
- break
313
-
314
- cid = chap.get("chapterId")
315
- if not cid:
316
- continue
317
-
318
- if not found_start:
319
- if cid == start_id:
320
- found_start = True
321
- else:
322
- completed_count += 1
138
+ batch.clear()
139
+
140
+ async def flush_all() -> None:
141
+ await flush_batch(default_batch, self.DEFAULT_SOURCE_ID)
142
+ await flush_batch(encrypted_batch, self.ENCRYPTED_SOURCE_ID)
143
+
144
+ # ---- workers ---
145
+ sem = asyncio.Semaphore(self.workers)
146
+
147
+ async def storage_worker() -> None:
148
+ """
149
+ Consumes parsed chapters, batches by source, flushes on threshold.
150
+
151
+ Terminates after receiving STOP from each chapter worker.
152
+
153
+ On cancel: drains queue, flushes once, then waits for remaining STOPs.
154
+ """
155
+ stop_count = 0
156
+ while True:
157
+ chap = await save_q.get()
158
+ if isinstance(chap, StopToken):
159
+ stop_count += 1
160
+ if stop_count == NUM_WORKERS:
161
+ await flush_all()
162
+ return
323
163
  continue
324
164
 
325
- if end_id is not None and cid == end_id:
326
- stop_early = True
327
-
328
- if cid in ignore_set:
329
- continue
330
-
331
- if normal_cs.exists(cid) and self.skip_existing:
332
- completed_count += 1
333
- continue
165
+ batch, src = select_batch(chap)
166
+ batch.append(chap)
167
+ if len(batch) >= self.storage_batch_size:
168
+ await flush_batch(batch, src)
169
+
170
+ if cancelled():
171
+ # Drain whatever is already parsed
172
+ try:
173
+ while True:
174
+ nxt = save_q.get_nowait()
175
+ if isinstance(nxt, StopToken):
176
+ stop_count += 1
177
+ else:
178
+ nbatch, nsrc = select_batch(nxt)
179
+ nbatch.append(nxt)
180
+ except asyncio.QueueEmpty:
181
+ pass
182
+ await flush_all()
183
+ # Wait for remaining STOPs to arrive
184
+ while stop_count < NUM_WORKERS:
185
+ nxt = await save_q.get()
186
+ if nxt is STOP:
187
+ stop_count += 1
188
+ return
189
+
190
+ async def chapter_worker() -> None:
191
+ """
192
+ Single worker: fetch + parse with retry, then enqueue ChapterDict.
193
+
194
+ Exits on STOP. If cancelled, does not start a new fetch; signals STOP.
195
+ """
196
+ while True:
197
+ cid = await cid_q.get()
198
+ if isinstance(cid, StopToken):
199
+ await save_q.put(STOP)
200
+ return
201
+
202
+ if not cid or cid in ignore_set:
203
+ continue
334
204
 
335
- await cid_queue.put(CidTask(cid=cid, prev_cid=None))
205
+ if cancelled():
206
+ await save_q.put(STOP)
207
+ return
336
208
 
337
- if stop_early:
338
- break
209
+ async with sem:
210
+ chap = await self._process_chapter(book_id, cid, html_dir)
211
+ if chap and not cancelled():
212
+ await save_q.put(chap)
339
213
 
340
- await cid_queue.join()
341
- await html_queue.join()
342
- await save_queue.join()
214
+ await async_jitter_sleep(
215
+ self.request_interval,
216
+ mul_spread=1.1,
217
+ max_sleep=self.request_interval + 2,
218
+ )
343
219
 
344
- for task in [fetcher_task, parser_task, storage_task]:
345
- task.cancel()
346
- with suppress(asyncio.CancelledError):
347
- await task
220
+ async def producer() -> None:
221
+ """
222
+ Enqueue chapter IDs respecting start/end/skip_existing.
348
223
 
349
- normal_cs.close()
350
- encrypted_cs.close()
224
+ Always emits STOP x NUM_WORKERS at the end (even if cancelled early).
225
+ """
226
+ try:
227
+ async for cid in self._chapter_ids(vols, start_id, end_id):
228
+ if cancelled():
229
+ break
230
+ if self.skip_existing and (
231
+ chapter_storage.exists(cid, self.DEFAULT_SOURCE_ID)
232
+ or chapter_storage.exists(cid, self.ENCRYPTED_SOURCE_ID)
233
+ ):
234
+ # Already have either variant; count as done.
235
+ await progress.bump(1)
236
+ else:
237
+ await cid_q.put(cid)
238
+ finally:
239
+ for _ in range(NUM_WORKERS):
240
+ await cid_q.put(STOP)
241
+
242
+ # ---- run tasks ---
243
+ async with asyncio.TaskGroup() as tg:
244
+ tg.create_task(storage_worker())
245
+ for _ in range(NUM_WORKERS):
246
+ tg.create_task(chapter_worker())
247
+ tg.create_task(producer())
248
+
249
+ # ---- done ---
250
+ if cancelled():
251
+ self.logger.info(
252
+ "%s Novel '%s' cancelled: flushed %d/%d chapters.",
253
+ TAG,
254
+ book_info.get("book_name", "unknown"),
255
+ progress.done,
256
+ progress.total,
257
+ )
258
+ else:
259
+ self.logger.info(
260
+ "%s Novel '%s' download completed.",
261
+ TAG,
262
+ book_info.get("book_name", "unknown"),
263
+ )
351
264
 
352
- self.logger.info(
353
- "%s Novel '%s' download completed.",
354
- TAG,
355
- book_info.get("book_name", "unknown"),
356
- )
357
- return
265
+ finally:
266
+ chapter_storage.close()
358
267
 
359
268
  @staticmethod
360
- def check_restricted(html_list: list[str]) -> bool:
269
+ def _check_restricted(html_list: list[str]) -> bool:
361
270
  """
362
271
  Return True if page content indicates access restriction
363
272
  (e.g. not subscribed/purchased).
@@ -370,7 +279,58 @@ class QidianDownloader(BaseDownloader):
370
279
  return any(m in html_list[0] for m in markers)
371
280
 
372
281
  @staticmethod
373
- def check_encrypted(html_list: list[str]) -> bool:
282
+ def _check_encrypted(html_list: list[str]) -> bool:
374
283
  if not html_list:
375
284
  return True
376
285
  return '"cES":2' in html_list[0]
286
+
287
+ async def _process_chapter(
288
+ self,
289
+ book_id: str,
290
+ cid: str,
291
+ html_dir: Path,
292
+ ) -> ChapterDict | None:
293
+ """
294
+ Fetch, debug-save, parse a single chapter with retries.
295
+
296
+ :return: ChapterDict on success, or None on failure.
297
+ """
298
+ for attempt in range(self.retry_times + 1):
299
+ try:
300
+ html_list = await self.fetcher.get_book_chapter(book_id, cid)
301
+ if self._check_restricted(html_list):
302
+ self.logger.info(
303
+ "[ChapterWorker] Restricted content detected: %s", cid
304
+ )
305
+ return None
306
+ encrypted = self._check_encrypted(html_list)
307
+
308
+ folder = "html_encrypted" if encrypted else "html_plain"
309
+ self._save_html_pages(html_dir / folder, cid, html_list)
310
+
311
+ chap = await asyncio.to_thread(
312
+ self.parser.parse_chapter, html_list, cid
313
+ )
314
+ if encrypted and not chap:
315
+ self.logger.info(
316
+ "[ChapterWorker] Fail for encrypted chapter: %s", cid
317
+ )
318
+ return None
319
+ if not chap:
320
+ raise ValueError("Empty parse result")
321
+ return chap
322
+
323
+ except Exception as e:
324
+ if attempt < self.retry_times:
325
+ self.logger.info(
326
+ "[ChapterWorker] Retry %s (%s): %s", cid, attempt + 1, e
327
+ )
328
+ backoff = self.backoff_factor * (2**attempt)
329
+ await async_jitter_sleep(
330
+ base=backoff,
331
+ mul_spread=1.2,
332
+ max_sleep=backoff + 3,
333
+ )
334
+ else:
335
+ self.logger.warning("[ChapterWorker] Failed %s: %s", cid, e)
336
+ return None
@@ -0,0 +1,69 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.downloaders.registry
4
+ ------------------------------------------
5
+
6
+ Registry and factory helpers for creating site-specific or common downloaders
7
+ """
8
+
9
+ __all__ = ["register_downloader", "get_downloader"]
10
+
11
+ from collections.abc import Callable, Sequence
12
+ from typing import TypeVar
13
+
14
+ from novel_downloader.core.downloaders.common import CommonDownloader
15
+ from novel_downloader.core.interfaces import (
16
+ DownloaderProtocol,
17
+ FetcherProtocol,
18
+ ParserProtocol,
19
+ )
20
+ from novel_downloader.models import DownloaderConfig
21
+
22
+ DownloaderBuilder = Callable[
23
+ [FetcherProtocol, ParserProtocol, DownloaderConfig],
24
+ DownloaderProtocol,
25
+ ]
26
+ D = TypeVar("D", bound=DownloaderProtocol)
27
+ _DOWNLOADER_MAP: dict[str, DownloaderBuilder] = {}
28
+
29
+
30
+ def register_downloader(
31
+ site_keys: Sequence[str],
32
+ ) -> Callable[[type[D]], type[D]]:
33
+ """
34
+ Decorator to register a downloader class under given keys.
35
+
36
+ :param site_keys: Sequence of site identifiers
37
+ :return: A class decorator that populates _DOWNLOADER_MAP.
38
+ """
39
+
40
+ def decorator(cls: type[D]) -> type[D]:
41
+ for key in site_keys:
42
+ _DOWNLOADER_MAP[key.lower()] = cls
43
+ return cls
44
+
45
+ return decorator
46
+
47
+
48
+ def get_downloader(
49
+ fetcher: FetcherProtocol,
50
+ parser: ParserProtocol,
51
+ site: str,
52
+ config: DownloaderConfig,
53
+ ) -> DownloaderProtocol:
54
+ """
55
+ Returns an DownloaderProtocol for the given site.
56
+
57
+ :param fetcher: Fetcher implementation
58
+ :param parser: Parser implementation
59
+ :param site: Site name (e.g., 'qidian')
60
+ :param config: Downloader configuration
61
+
62
+ :return: An instance of a downloader class
63
+ """
64
+ site_key = site.lower()
65
+ try:
66
+ downloader_cls = _DOWNLOADER_MAP[site_key]
67
+ except KeyError:
68
+ return CommonDownloader(fetcher, parser, config, site_key)
69
+ return downloader_cls(fetcher, parser, config)