novel-downloader 1.5.0__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (248) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +1 -3
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +26 -21
  5. novel_downloader/cli/download.py +79 -66
  6. novel_downloader/cli/export.py +17 -21
  7. novel_downloader/cli/main.py +1 -1
  8. novel_downloader/cli/search.py +62 -65
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +8 -5
  11. novel_downloader/config/adapter.py +206 -209
  12. novel_downloader/config/{loader.py → file_io.py} +53 -26
  13. novel_downloader/core/__init__.py +5 -5
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} +12 -20
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +3 -24
  21. novel_downloader/core/downloaders/base.py +49 -23
  22. novel_downloader/core/downloaders/common.py +191 -137
  23. novel_downloader/core/downloaders/qianbi.py +187 -146
  24. novel_downloader/core/downloaders/qidian.py +187 -141
  25. novel_downloader/core/downloaders/registry.py +4 -2
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +3 -20
  28. novel_downloader/core/exporters/base.py +33 -37
  29. novel_downloader/core/exporters/common/__init__.py +1 -2
  30. novel_downloader/core/exporters/common/epub.py +15 -10
  31. novel_downloader/core/exporters/common/main_exporter.py +19 -12
  32. novel_downloader/core/exporters/common/txt.py +17 -12
  33. novel_downloader/core/exporters/epub_util.py +59 -29
  34. novel_downloader/core/exporters/linovelib/__init__.py +1 -0
  35. novel_downloader/core/exporters/linovelib/epub.py +23 -25
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +8 -12
  37. novel_downloader/core/exporters/linovelib/txt.py +20 -14
  38. novel_downloader/core/exporters/qidian.py +2 -8
  39. novel_downloader/core/exporters/registry.py +4 -2
  40. novel_downloader/core/exporters/txt_util.py +7 -7
  41. novel_downloader/core/fetchers/__init__.py +54 -48
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +6 -11
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +37 -46
  45. novel_downloader/core/fetchers/{biquge/browser.py → biquyuedu.py} +12 -17
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +19 -12
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +19 -28
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/lewenn.py +83 -0
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +12 -13
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +5 -10
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +56 -64
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +5 -16
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +7 -10
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/shuhaige.py +84 -0
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/wanbengo.py +83 -0
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +19 -12
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +1 -9
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +6 -19
  79. novel_downloader/core/interfaces/parser.py +7 -8
  80. novel_downloader/core/interfaces/searcher.py +9 -1
  81. novel_downloader/core/parsers/__init__.py +49 -12
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +64 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/esjzone.py +64 -69
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/linovelib.py +48 -64
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/qianbi.py +48 -50
  99. novel_downloader/core/parsers/qidian/main_parser.py +756 -48
  100. novel_downloader/core/parsers/qidian/utils/__init__.py +3 -21
  101. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +1 -1
  102. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +4 -4
  103. novel_downloader/core/parsers/quanben5.py +103 -0
  104. novel_downloader/core/parsers/registry.py +5 -16
  105. novel_downloader/core/parsers/sfacg.py +38 -45
  106. novel_downloader/core/parsers/shencou.py +215 -0
  107. novel_downloader/core/parsers/shuhaige.py +111 -0
  108. novel_downloader/core/parsers/tongrenquan.py +116 -0
  109. novel_downloader/core/parsers/ttkan.py +132 -0
  110. novel_downloader/core/parsers/wanbengo.py +191 -0
  111. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  112. novel_downloader/core/parsers/xiguashuwu.py +429 -0
  113. novel_downloader/core/parsers/xs63b.py +161 -0
  114. novel_downloader/core/parsers/xshbook.py +134 -0
  115. novel_downloader/core/parsers/yamibo.py +87 -131
  116. novel_downloader/core/parsers/yibige.py +166 -0
  117. novel_downloader/core/searchers/__init__.py +34 -3
  118. novel_downloader/core/searchers/aaatxt.py +107 -0
  119. novel_downloader/core/searchers/{biquge.py → b520.py} +29 -28
  120. novel_downloader/core/searchers/base.py +112 -36
  121. novel_downloader/core/searchers/dxmwx.py +105 -0
  122. novel_downloader/core/searchers/eightnovel.py +84 -0
  123. novel_downloader/core/searchers/esjzone.py +43 -25
  124. novel_downloader/core/searchers/hetushu.py +92 -0
  125. novel_downloader/core/searchers/i25zw.py +93 -0
  126. novel_downloader/core/searchers/ixdzs8.py +107 -0
  127. novel_downloader/core/searchers/jpxs123.py +107 -0
  128. novel_downloader/core/searchers/piaotia.py +100 -0
  129. novel_downloader/core/searchers/qbtr.py +106 -0
  130. novel_downloader/core/searchers/qianbi.py +74 -40
  131. novel_downloader/core/searchers/quanben5.py +144 -0
  132. novel_downloader/core/searchers/registry.py +24 -8
  133. novel_downloader/core/searchers/shuhaige.py +124 -0
  134. novel_downloader/core/searchers/tongrenquan.py +110 -0
  135. novel_downloader/core/searchers/ttkan.py +92 -0
  136. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  137. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  138. novel_downloader/core/searchers/xs63b.py +104 -0
  139. novel_downloader/locales/en.json +34 -85
  140. novel_downloader/locales/zh.json +35 -86
  141. novel_downloader/models/__init__.py +21 -22
  142. novel_downloader/models/book.py +44 -0
  143. novel_downloader/models/config.py +4 -37
  144. novel_downloader/models/login.py +1 -1
  145. novel_downloader/models/search.py +5 -0
  146. novel_downloader/resources/config/settings.toml +8 -70
  147. novel_downloader/resources/json/xiguashuwu.json +718 -0
  148. novel_downloader/utils/__init__.py +13 -24
  149. novel_downloader/utils/chapter_storage.py +5 -5
  150. novel_downloader/utils/constants.py +4 -31
  151. novel_downloader/utils/cookies.py +38 -35
  152. novel_downloader/utils/crypto_utils/__init__.py +7 -0
  153. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  154. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  155. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  156. novel_downloader/utils/crypto_utils/rc4.py +54 -0
  157. novel_downloader/utils/epub/__init__.py +3 -4
  158. novel_downloader/utils/epub/builder.py +6 -6
  159. novel_downloader/utils/epub/constants.py +62 -21
  160. novel_downloader/utils/epub/documents.py +95 -201
  161. novel_downloader/utils/epub/models.py +8 -22
  162. novel_downloader/utils/epub/utils.py +73 -106
  163. novel_downloader/utils/file_utils/__init__.py +2 -23
  164. novel_downloader/utils/file_utils/io.py +53 -188
  165. novel_downloader/utils/file_utils/normalize.py +1 -7
  166. novel_downloader/utils/file_utils/sanitize.py +4 -15
  167. novel_downloader/utils/fontocr/__init__.py +5 -14
  168. novel_downloader/utils/fontocr/core.py +216 -0
  169. novel_downloader/utils/fontocr/loader.py +50 -0
  170. novel_downloader/utils/logger.py +81 -65
  171. novel_downloader/utils/network.py +17 -41
  172. novel_downloader/utils/state.py +4 -90
  173. novel_downloader/utils/text_utils/__init__.py +1 -7
  174. novel_downloader/utils/text_utils/diff_display.py +5 -7
  175. novel_downloader/utils/text_utils/text_cleaner.py +39 -30
  176. novel_downloader/utils/text_utils/truncate_utils.py +3 -14
  177. novel_downloader/utils/time_utils/__init__.py +5 -11
  178. novel_downloader/utils/time_utils/datetime_utils.py +20 -29
  179. novel_downloader/utils/time_utils/sleep_utils.py +55 -49
  180. novel_downloader/web/__init__.py +13 -0
  181. novel_downloader/web/components/__init__.py +11 -0
  182. novel_downloader/web/components/navigation.py +35 -0
  183. novel_downloader/web/main.py +66 -0
  184. novel_downloader/web/pages/__init__.py +17 -0
  185. novel_downloader/web/pages/download.py +78 -0
  186. novel_downloader/web/pages/progress.py +147 -0
  187. novel_downloader/web/pages/search.py +329 -0
  188. novel_downloader/web/services/__init__.py +17 -0
  189. novel_downloader/web/services/client_dialog.py +164 -0
  190. novel_downloader/web/services/cred_broker.py +113 -0
  191. novel_downloader/web/services/cred_models.py +35 -0
  192. novel_downloader/web/services/task_manager.py +264 -0
  193. novel_downloader-2.0.1.dist-info/METADATA +172 -0
  194. novel_downloader-2.0.1.dist-info/RECORD +206 -0
  195. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/entry_points.txt +1 -1
  196. novel_downloader/core/downloaders/biquge.py +0 -29
  197. novel_downloader/core/downloaders/esjzone.py +0 -29
  198. novel_downloader/core/downloaders/linovelib.py +0 -29
  199. novel_downloader/core/downloaders/sfacg.py +0 -29
  200. novel_downloader/core/downloaders/yamibo.py +0 -29
  201. novel_downloader/core/exporters/biquge.py +0 -22
  202. novel_downloader/core/exporters/esjzone.py +0 -22
  203. novel_downloader/core/exporters/qianbi.py +0 -22
  204. novel_downloader/core/exporters/sfacg.py +0 -22
  205. novel_downloader/core/exporters/yamibo.py +0 -22
  206. novel_downloader/core/fetchers/base/__init__.py +0 -14
  207. novel_downloader/core/fetchers/base/browser.py +0 -422
  208. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  209. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  210. novel_downloader/core/fetchers/esjzone/browser.py +0 -209
  211. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  212. novel_downloader/core/fetchers/linovelib/browser.py +0 -198
  213. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  214. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  215. novel_downloader/core/fetchers/qidian/browser.py +0 -326
  216. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  217. novel_downloader/core/fetchers/sfacg/browser.py +0 -194
  218. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  219. novel_downloader/core/fetchers/yamibo/browser.py +0 -234
  220. novel_downloader/core/parsers/biquge.py +0 -139
  221. novel_downloader/core/parsers/qidian/book_info_parser.py +0 -90
  222. novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -528
  223. novel_downloader/core/parsers/qidian/chapter_normal.py +0 -157
  224. novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
  225. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -114
  226. novel_downloader/models/chapter.py +0 -25
  227. novel_downloader/models/types.py +0 -13
  228. novel_downloader/tui/__init__.py +0 -7
  229. novel_downloader/tui/app.py +0 -32
  230. novel_downloader/tui/main.py +0 -17
  231. novel_downloader/tui/screens/__init__.py +0 -14
  232. novel_downloader/tui/screens/home.py +0 -198
  233. novel_downloader/tui/screens/login.py +0 -74
  234. novel_downloader/tui/styles/home_layout.tcss +0 -79
  235. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  236. novel_downloader/utils/cache.py +0 -24
  237. novel_downloader/utils/crypto_utils.py +0 -71
  238. novel_downloader/utils/fontocr/hash_store.py +0 -280
  239. novel_downloader/utils/fontocr/hash_utils.py +0 -103
  240. novel_downloader/utils/fontocr/model_loader.py +0 -69
  241. novel_downloader/utils/fontocr/ocr_v1.py +0 -315
  242. novel_downloader/utils/fontocr/ocr_v2.py +0 -764
  243. novel_downloader/utils/fontocr/ocr_v3.py +0 -744
  244. novel_downloader-1.5.0.dist-info/METADATA +0 -196
  245. novel_downloader-1.5.0.dist-info/RECORD +0 -164
  246. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/WHEEL +0 -0
  247. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/licenses/LICENSE +0 -0
  248. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/top_level.txt +0 -0
@@ -3,27 +3,9 @@
3
3
  novel_downloader.core.parsers.qidian.utils
4
4
  ------------------------------------------
5
5
 
6
+ Utility functions and helpers for parsing and decrypting Qidian novel pages
6
7
  """
7
8
 
8
- __all__ = [
9
- "find_ssr_page_context",
10
- "extract_chapter_info",
11
- "is_restricted_page",
12
- "vip_status",
13
- "can_view_chapter",
14
- "is_encrypted",
15
- "is_duplicated",
16
- "QidianNodeDecryptor",
17
- "get_decryptor",
18
- ]
9
+ __all__ = ["get_decryptor"]
19
10
 
20
- from .helpers import (
21
- can_view_chapter,
22
- extract_chapter_info,
23
- find_ssr_page_context,
24
- is_duplicated,
25
- is_encrypted,
26
- is_restricted_page,
27
- vip_status,
28
- )
29
- from .node_decryptor import QidianNodeDecryptor, get_decryptor
11
+ from .node_decryptor import get_decryptor
@@ -25,7 +25,7 @@ import requests
25
25
  from novel_downloader.utils.constants import JS_SCRIPT_DIR
26
26
 
27
27
  DEST_ROOT: Final[Path] = JS_SCRIPT_DIR
28
- GITHUB_OWNER: Final = "BowenZ217"
28
+ GITHUB_OWNER: Final = "saudadez21"
29
29
  GITHUB_REPO: Final = "qidian-decryptor"
30
30
  RELEASE_VERSION: Final = "v1.0.1"
31
31
  BASE_URL: Final = f"https://github.com/{GITHUB_OWNER}/{GITHUB_REPO}/releases/download/{RELEASE_VERSION}"
@@ -36,10 +36,10 @@ class QidianNodeDecryptor:
36
36
  3. Download the remote Fock module JS if not already present.
37
37
 
38
38
  Calling `decrypt()` will:
39
- - Write a temp JSON input file with [ciphertext, chapter_id, fkp, fuid].
40
- - Spawn `node qidian_decrypt_node.js <in> <out>`.
41
- - Read and return the decrypted text.
42
- - Clean up the temp files.
39
+ * Write a temp JSON input file with [ciphertext, chapter_id, fkp, fuid].
40
+ * Spawn `node qidian_decrypt_node.js <in> <out>`.
41
+ * Read and return the decrypted text.
42
+ * Clean up the temp files.
43
43
  """
44
44
 
45
45
  QIDIAN_FOCK_JS_URL: str = (
@@ -0,0 +1,103 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.quanben5
4
+ --------------------------------------
5
+
6
+ """
7
+
8
+ from datetime import datetime
9
+ from typing import Any
10
+
11
+ from lxml import html
12
+
13
+ from novel_downloader.core.parsers.base import BaseParser
14
+ from novel_downloader.core.parsers.registry import register_parser
15
+ from novel_downloader.models import (
16
+ BookInfoDict,
17
+ ChapterDict,
18
+ ChapterInfoDict,
19
+ VolumeInfoDict,
20
+ )
21
+
22
+
23
+ @register_parser(
24
+ site_keys=["quanben5"],
25
+ )
26
+ class Quanben5Parser(BaseParser):
27
+ """
28
+ Parser for 全本小说网 book pages.
29
+ """
30
+
31
+ def parse_book_info(
32
+ self,
33
+ html_list: list[str],
34
+ **kwargs: Any,
35
+ ) -> BookInfoDict | None:
36
+ if not html_list:
37
+ return None
38
+
39
+ tree = html.fromstring(html_list[0])
40
+ book_name = self._first_str(tree.xpath("//h3/span/text()"))
41
+ author = self._first_str(
42
+ tree.xpath(
43
+ '//p[@class="info"][contains(., "作者")]/span[@class="author"]/text()'
44
+ )
45
+ )
46
+ cover_url = self._first_str(tree.xpath('//div[@class="pic"]/img/@src'))
47
+ category = self._first_str(
48
+ tree.xpath('//p[@class="info"][contains(., "类别")]/span/text()')
49
+ )
50
+ tags = [category] if category else []
51
+ update_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
52
+ summary = self._first_str(tree.xpath('//p[@class="description"]/text()'))
53
+
54
+ chapters: list[ChapterInfoDict] = []
55
+ for li in tree.xpath('//ul[@class="list"]/li'):
56
+ link = li.xpath(".//a")[0]
57
+ href = link.get("href", "").strip()
58
+ title = self._first_str(link.xpath(".//span/text()"))
59
+ # '/n/toutian/83840.html' -> '83840'
60
+ chapter_id = href.rstrip(".html").split("/")[-1]
61
+ chapters.append({"title": title, "url": href, "chapterId": chapter_id})
62
+
63
+ volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
64
+
65
+ return {
66
+ "book_name": book_name,
67
+ "author": author,
68
+ "cover_url": cover_url,
69
+ "update_time": update_time,
70
+ "tags": tags,
71
+ "summary": summary,
72
+ "volumes": volumes,
73
+ "extra": {},
74
+ }
75
+
76
+ def parse_chapter(
77
+ self,
78
+ html_list: list[str],
79
+ chapter_id: str,
80
+ **kwargs: Any,
81
+ ) -> ChapterDict | None:
82
+ if not html_list:
83
+ return None
84
+
85
+ tree = html.fromstring(html_list[0])
86
+
87
+ # Extract the chapter title
88
+ title = self._first_str(tree.xpath('//h1[@class="title1"]/text()'))
89
+
90
+ # Extract all <p> text within the content container
91
+ paragraphs = tree.xpath('//div[@id="content"]/p/text()')
92
+ # Clean whitespace and join with double newlines
93
+ content = "\n".join(p.strip() for p in paragraphs if p.strip())
94
+
95
+ if not content:
96
+ return None
97
+
98
+ return {
99
+ "id": chapter_id,
100
+ "title": title,
101
+ "content": content,
102
+ "extra": {"site": "quanben5"},
103
+ }
@@ -3,6 +3,7 @@
3
3
  novel_downloader.core.parsers.registry
4
4
  --------------------------------------
5
5
 
6
+ Registry and factory helpers for creating site-specific parsers.
6
7
  """
7
8
 
8
9
  __all__ = ["register_parser", "get_parser"]
@@ -16,27 +17,24 @@ from novel_downloader.models import ParserConfig
16
17
  ParserBuilder = Callable[[ParserConfig], ParserProtocol]
17
18
 
18
19
  P = TypeVar("P", bound=ParserProtocol)
19
- _PARSER_MAP: dict[str, dict[str, ParserBuilder]] = {}
20
+ _PARSER_MAP: dict[str, ParserBuilder] = {}
20
21
 
21
22
 
22
23
  def register_parser(
23
24
  site_keys: Sequence[str],
24
- backends: Sequence[str],
25
25
  ) -> Callable[[type[P]], type[P]]:
26
26
  """
27
27
  Decorator to register a parser class under given keys.
28
28
 
29
29
  :param site_keys: Sequence of site identifiers
30
- :param backends: Sequence of backend types
30
+ :param backends: Sequence of backend types
31
31
  :return: A class decorator that populates _PARSER_MAP.
32
32
  """
33
33
 
34
34
  def decorator(cls: type[P]) -> type[P]:
35
35
  for site in site_keys:
36
36
  site_lower = site.lower()
37
- bucket = _PARSER_MAP.setdefault(site_lower, {})
38
- for backend in backends:
39
- bucket[backend] = cls
37
+ _PARSER_MAP[site_lower] = cls
40
38
  return cls
41
39
 
42
40
  return decorator
@@ -52,17 +50,8 @@ def get_parser(site: str, config: ParserConfig) -> ParserProtocol:
52
50
  """
53
51
  site_key = site.lower()
54
52
  try:
55
- backend_map = _PARSER_MAP[site_key]
53
+ parser_cls = _PARSER_MAP[site_key]
56
54
  except KeyError as err:
57
55
  raise ValueError(f"Unsupported site: {site!r}") from err
58
56
 
59
- mode = config.mode
60
- try:
61
- parser_cls = backend_map[mode]
62
- except KeyError as err:
63
- raise ValueError(
64
- f"Unsupported parser mode {mode!r} for site {site!r}. "
65
- f"Available modes: {list(backend_map)}"
66
- ) from err
67
-
68
57
  return parser_cls(config)
@@ -11,22 +11,32 @@ from lxml import html
11
11
 
12
12
  from novel_downloader.core.parsers.base import BaseParser
13
13
  from novel_downloader.core.parsers.registry import register_parser
14
- from novel_downloader.models import ChapterDict
14
+ from novel_downloader.models import (
15
+ BookInfoDict,
16
+ ChapterDict,
17
+ ChapterInfoDict,
18
+ VolumeInfoDict,
19
+ )
15
20
 
16
21
 
17
22
  @register_parser(
18
23
  site_keys=["sfacg"],
19
- backends=["session", "browser"],
20
24
  )
21
25
  class SfacgParser(BaseParser):
22
- """ """
26
+ """
27
+ Parser for sfacg book pages.
28
+ """
23
29
 
24
30
  # Book info XPaths
25
31
  _BOOK_NAME_XPATH = '//ul[@class="book_info"]//span[@class="book_newtitle"]/text()'
26
32
  _AUTHOR_INFO_XPATH = '//ul[@class="book_info"]//span[@class="book_info3"]/text()'
27
33
  _UPDATE_TIME_XPATH = '//ul[@class="book_info"]//span[@class="book_info3"]/br/following-sibling::text()' # noqa: E501
28
34
  _COVER_URL_XPATH = '//ul[@class="book_info"]//li/img/@src'
29
- _STATUS_XPATH = '//ul[@class="book_info"]//div[@class="book_info2"]/span/text()'
35
+ # _STATUS_XPATH = '//ul[@class="book_info"]//div[@class="book_info2"]/span/text()'
36
+ _STATUS_XPATH = (
37
+ '//ul[@class="book_info"]//div[@class="book_info2"]/span/text()'
38
+ ' and (contains(., "完结") or contains(., "连载"))]/text()'
39
+ )
30
40
  _SUMMARY_XPATH = '//ul[@class="book_profile"]/li[@class="book_bk_qs1"]/text()'
31
41
 
32
42
  # Catalog XPaths
@@ -47,54 +57,35 @@ class SfacgParser(BaseParser):
47
57
  self,
48
58
  html_list: list[str],
49
59
  **kwargs: Any,
50
- ) -> dict[str, Any]:
51
- """
52
- Parse a book info page and extract metadata and chapter structure.
53
-
54
- :param html_list: Raw HTML of the book info page.
55
- :return: Parsed metadata and chapter structure as a dictionary.
56
- """
60
+ ) -> BookInfoDict | None:
57
61
  if len(html_list) < 2:
58
- return {}
62
+ return None
59
63
 
60
64
  info_tree = html.fromstring(html_list[0])
61
65
  catalog_tree = html.fromstring(html_list[1])
62
66
 
63
- result: dict[str, Any] = {}
64
-
65
67
  # Book metadata
66
- book_name = info_tree.xpath(self._BOOK_NAME_XPATH)
67
- result["book_name"] = book_name[0].strip() if book_name else ""
68
+ book_name = self._first_str(info_tree.xpath(self._BOOK_NAME_XPATH))
68
69
 
69
- book_info3 = info_tree.xpath(self._AUTHOR_INFO_XPATH)
70
- result["author"] = book_info3[0].split("/")[0].strip() if book_info3 else ""
71
- result["word_count"] = (
72
- book_info3[0].split("/")[1].strip()
73
- if book_info3 and len(book_info3[0].split("/")) > 1
74
- else ""
75
- )
70
+ book_info3_str = self._first_str(info_tree.xpath(self._AUTHOR_INFO_XPATH))
71
+ author, _, word_count = (p.strip() for p in book_info3_str.partition("/"))
76
72
 
77
- book_info3_br = info_tree.xpath(self._UPDATE_TIME_XPATH)
78
- result["update_time"] = book_info3_br[0].strip() if book_info3_br else ""
73
+ update_time = self._first_str(info_tree.xpath(self._UPDATE_TIME_XPATH))
79
74
 
80
- cover_url = info_tree.xpath(self._COVER_URL_XPATH)
81
- result["cover_url"] = "https:" + cover_url[0] if cover_url else ""
75
+ cover_url = "https:" + self._first_str(info_tree.xpath(self._COVER_URL_XPATH))
82
76
 
83
- serial_status = info_tree.xpath(self._STATUS_XPATH)
84
- result["serial_status"] = next(
85
- (s for s in serial_status if "完结" in s or "连载" in s), ""
86
- )
77
+ serial_status = self._first_str(info_tree.xpath(self._STATUS_XPATH))
87
78
 
88
- summary = info_tree.xpath(self._SUMMARY_XPATH)
89
- result["summary"] = "".join(summary).strip()
79
+ summary_elem = info_tree.xpath(self._SUMMARY_XPATH)
80
+ summary = "".join(summary_elem).strip()
90
81
 
91
82
  # Chapter structure
92
83
  volume_titles = catalog_tree.xpath(self._VOLUME_TITLE_XPATH)
93
84
  volume_blocks = catalog_tree.xpath(self._VOLUME_CONTENT_XPATH)
94
85
 
95
- volumes = []
86
+ volumes: list[VolumeInfoDict] = []
96
87
  for vol_title, vol_block in zip(volume_titles, volume_blocks, strict=False):
97
- chapters = []
88
+ chapters: list[ChapterInfoDict] = []
98
89
  for a in vol_block.xpath(self._CHAPTER_LIST_XPATH):
99
90
  href = a.xpath("./@href")[0] if a.xpath("./@href") else ""
100
91
  title = "".join(a.xpath(".//li//text()")).strip()
@@ -112,9 +103,18 @@ class SfacgParser(BaseParser):
112
103
  "chapters": chapters,
113
104
  }
114
105
  )
115
- result["volumes"] = volumes
116
106
 
117
- return result
107
+ return {
108
+ "book_name": book_name,
109
+ "author": author,
110
+ "cover_url": cover_url,
111
+ "update_time": update_time,
112
+ "word_count": word_count,
113
+ "serial_status": serial_status,
114
+ "summary": summary,
115
+ "volumes": volumes,
116
+ "extra": {},
117
+ }
118
118
 
119
119
  def parse_chapter(
120
120
  self,
@@ -122,13 +122,6 @@ class SfacgParser(BaseParser):
122
122
  chapter_id: str,
123
123
  **kwargs: Any,
124
124
  ) -> ChapterDict | None:
125
- """
126
- Parse a single chapter page and extract clean text or simplified HTML.
127
-
128
- :param html_list: Raw HTML of the chapter page.
129
- :param chapter_id: Identifier of the chapter being parsed.
130
- :return: Cleaned chapter content as plain text or minimal HTML.
131
- """
132
125
  if not html_list:
133
126
  return None
134
127
  keywords = [
@@ -156,7 +149,7 @@ class SfacgParser(BaseParser):
156
149
  raw_text_parts = tree.xpath(self._CHAPTER_TEXT_XPATH)
157
150
  content_lines = [txt.strip() for txt in raw_text_parts if txt.strip()]
158
151
 
159
- content = "\n\n".join(content_lines).strip()
152
+ content = "\n".join(content_lines).strip()
160
153
  if not content:
161
154
  return None
162
155
 
@@ -0,0 +1,215 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.shencou
4
+ -------------------------------------
5
+
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from lxml import etree, html
11
+
12
+ from novel_downloader.core.parsers.base import BaseParser
13
+ from novel_downloader.core.parsers.registry import register_parser
14
+ from novel_downloader.models import (
15
+ BookInfoDict,
16
+ ChapterDict,
17
+ VolumeInfoDict,
18
+ )
19
+
20
+
21
+ @register_parser(
22
+ site_keys=["shencou"],
23
+ )
24
+ class ShencouParser(BaseParser):
25
+ """
26
+ Parser for 神凑轻小说 book pages.
27
+ """
28
+
29
+ def parse_book_info(
30
+ self,
31
+ html_list: list[str],
32
+ **kwargs: Any,
33
+ ) -> BookInfoDict | None:
34
+ if len(html_list) < 2:
35
+ return None
36
+
37
+ info_tree = html.fromstring(html_list[0])
38
+ catalog_tree = html.fromstring(html_list[1])
39
+
40
+ # --- Metadata ---
41
+ raw_name = self._first_str(info_tree.xpath("//span//a/text()"))
42
+ book_name = raw_name[:-2] if raw_name.endswith("小说") else raw_name
43
+
44
+ author = self._first_str(
45
+ info_tree.xpath('//td[contains(text(),"小说作者")]/text()'),
46
+ replaces=[("小说作者:", "")],
47
+ )
48
+
49
+ cover_url = self._first_str(
50
+ info_tree.xpath('//a[contains(@href,"/files/article/image")]/img/@src')
51
+ )
52
+
53
+ # word count
54
+ word_count = self._first_str(
55
+ info_tree.xpath('//td[contains(text(),"全文长度")]/text()'),
56
+ replaces=[("全文长度:", "")],
57
+ )
58
+
59
+ # update time
60
+ update_time = self._first_str(
61
+ info_tree.xpath('//td[contains(text(),"最后更新")]/text()'),
62
+ replaces=[("最后更新:", "")],
63
+ )
64
+
65
+ # serial status
66
+ serial_status = self._first_str(
67
+ info_tree.xpath('//td[contains(text(),"写作进度")]/text()'),
68
+ replaces=[("写作进度:", "")],
69
+ )
70
+
71
+ # summary
72
+ raw_detail = self._norm_space(
73
+ info_tree.xpath('string(//td[@width="80%" and @valign="top"])')
74
+ )
75
+ summary = ""
76
+ if "内容简介:" in raw_detail and "本书公告:" in raw_detail:
77
+ intro = raw_detail.split("内容简介:", 1)[1]
78
+ summary = intro.split("本书公告:", 1)[0].strip()
79
+
80
+ # --- Catalog / Chapters ---
81
+ volumes: list[VolumeInfoDict] = []
82
+ curr_vol: VolumeInfoDict = {"volume_name": "未命名卷", "chapters": []}
83
+
84
+ # Walk through volume headers (.zjbox) and lists (.zjlist4) in document order
85
+ for elem in catalog_tree.xpath(
86
+ '//div[@class="zjbox"] | //div[@class="zjlist4"]'
87
+ ):
88
+ cls_attr = elem.get("class", "")
89
+ if "zjbox" in cls_attr:
90
+ # before starting new volume, save the previous if it has chapters
91
+ if curr_vol["chapters"]:
92
+ volumes.append(curr_vol)
93
+ # start a new volume
94
+ vol_name = elem.xpath(".//h2/text()")[0].strip()
95
+ curr_vol = {"volume_name": vol_name, "chapters": []}
96
+ elif "zjlist4" in cls_attr:
97
+ # collect all <li><a> entries under this list
98
+ for a in elem.xpath(".//ol/li/a"):
99
+ url = a.get("href").strip()
100
+ title = a.text_content().strip()
101
+ # '203740.html' -> '203740'
102
+ chap_id = url.split(".")[0]
103
+ curr_vol["chapters"].append(
104
+ {
105
+ "title": title,
106
+ "url": url,
107
+ "chapterId": chap_id,
108
+ }
109
+ )
110
+
111
+ # append last volume if not empty
112
+ if curr_vol["chapters"]:
113
+ volumes.append(curr_vol)
114
+
115
+ return {
116
+ "book_name": book_name,
117
+ "author": author,
118
+ "cover_url": cover_url,
119
+ "update_time": update_time,
120
+ "summary": summary,
121
+ "volumes": volumes,
122
+ "word_count": word_count,
123
+ "serial_status": serial_status,
124
+ "extra": {},
125
+ }
126
+
127
+ def parse_chapter(
128
+ self,
129
+ html_list: list[str],
130
+ chapter_id: str,
131
+ **kwargs: Any,
132
+ ) -> ChapterDict | None:
133
+ if not html_list:
134
+ return None
135
+
136
+ tree = html.fromstring(html_list[0])
137
+ title = self._first_str(tree.xpath("//h1/text()"))
138
+ if not title:
139
+ return None
140
+
141
+ # strip book-name prefix if present
142
+ bc = tree.xpath('//div[@id="breadCrumb"]//a/text()')
143
+ if len(bc) >= 2:
144
+ book_name = bc[1].strip()
145
+ title = title.removeprefix(book_name).lstrip(" ::–—-").strip()
146
+
147
+ anchors = tree.xpath('//div[@id="BookSee_Right"]')
148
+ if not anchors:
149
+ return None
150
+ marker = anchors[0]
151
+
152
+ lines: list[str] = []
153
+
154
+ def _append_text(text: str) -> None:
155
+ for ln in text.replace("\xa0", " ").splitlines():
156
+ ln2 = ln.strip()
157
+ if ln2:
158
+ lines.append(ln2)
159
+
160
+ if marker.tail:
161
+ _append_text(marker.tail)
162
+
163
+ # 4. Walk through siblings until <!--over-->
164
+ node = marker
165
+ while True:
166
+ sib = node.getnext()
167
+ if sib is None:
168
+ break
169
+ node = sib
170
+
171
+ # Stop on the closing comment
172
+ if isinstance(sib, etree._Comment) and "over" in (sib.text or ""):
173
+ break
174
+
175
+ # Process comment tails (e.g. <!--go--> tail)
176
+ if isinstance(sib, etree._Comment):
177
+ if sib.tail:
178
+ _append_text(sib.tail)
179
+ continue
180
+
181
+ if isinstance(sib, html.HtmlElement):
182
+ # tag = sib.tag.lower()
183
+ tag = str(sib.tag).lower()
184
+ cls = sib.get("class", "") or ""
185
+
186
+ if tag == "div" and "divimage" in cls:
187
+ srcs = sib.xpath(".//img/@src")
188
+ if srcs:
189
+ lines.append(f'<img src="{srcs[0]}" />')
190
+ # text after the div
191
+ if sib.tail:
192
+ _append_text(sib.tail)
193
+ continue
194
+
195
+ if tag == "br":
196
+ if sib.tail:
197
+ _append_text(sib.tail)
198
+ continue
199
+
200
+ text = sib.text_content()
201
+ _append_text(text)
202
+ if sib.tail:
203
+ _append_text(sib.tail)
204
+ continue
205
+
206
+ content = "\n".join(lines)
207
+ if not content:
208
+ return None
209
+
210
+ return {
211
+ "id": chapter_id,
212
+ "title": title,
213
+ "content": content,
214
+ "extra": {"site": "shencou"},
215
+ }
@@ -0,0 +1,111 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.shuhaige
4
+ --------------------------------------
5
+
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from lxml import html
11
+
12
+ from novel_downloader.core.parsers.base import BaseParser
13
+ from novel_downloader.core.parsers.registry import register_parser
14
+ from novel_downloader.models import (
15
+ BookInfoDict,
16
+ ChapterDict,
17
+ ChapterInfoDict,
18
+ VolumeInfoDict,
19
+ )
20
+
21
+
22
+ @register_parser(
23
+ site_keys=["shuhaige"],
24
+ )
25
+ class ShuhaigeParser(BaseParser):
26
+ """
27
+ Parser for 书海阁小说网 book pages.
28
+ """
29
+
30
+ def parse_book_info(
31
+ self,
32
+ html_list: list[str],
33
+ **kwargs: Any,
34
+ ) -> BookInfoDict | None:
35
+ if not html_list:
36
+ return None
37
+
38
+ tree = html.fromstring(html_list[0])
39
+
40
+ book_name = self._first_str(tree.xpath('//div[@id="info"]/h1/text()'))
41
+ author = self._first_str(tree.xpath('//div[@id="info"]/p[1]/a/text()'))
42
+
43
+ cover_url = self._first_str(tree.xpath('//div[@id="fmimg"]/img/@src'))
44
+
45
+ update_time = self._first_str(
46
+ tree.xpath('//div[@id="info"]/p[3]/text()'),
47
+ replaces=[("最后更新:", "")],
48
+ )
49
+
50
+ summary = self._first_str(tree.xpath('//div[@id="intro"]/p[1]/text()'))
51
+
52
+ book_type = self._first_str(tree.xpath('//div[@class="con_top"]/a[2]/text()'))
53
+ tags = [book_type] if book_type else []
54
+
55
+ chapters: list[ChapterInfoDict] = [
56
+ {
57
+ "title": (a.text or "").strip(),
58
+ "url": (a.get("href") or "").strip(),
59
+ "chapterId": (a.get("href") or "").rsplit("/", 1)[-1].split(".", 1)[0],
60
+ }
61
+ for a in tree.xpath(
62
+ '//div[@id="list"]/dl/dt[contains(., "正文")]/following-sibling::dd/a'
63
+ )
64
+ ]
65
+
66
+ volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
67
+
68
+ return {
69
+ "book_name": book_name,
70
+ "author": author,
71
+ "cover_url": cover_url,
72
+ "update_time": update_time,
73
+ "tags": tags,
74
+ "summary": summary,
75
+ "volumes": volumes,
76
+ "extra": {},
77
+ }
78
+
79
+ def parse_chapter(
80
+ self,
81
+ html_list: list[str],
82
+ chapter_id: str,
83
+ **kwargs: Any,
84
+ ) -> ChapterDict | None:
85
+ if not html_list:
86
+ return None
87
+ tree = html.fromstring(html_list[0])
88
+
89
+ title = self._first_str(tree.xpath('//div[@class="bookname"]/h1/text()'))
90
+ if not title:
91
+ title = f"第 {chapter_id} 章"
92
+
93
+ content_elem = tree.xpath('//div[@id="content"]')
94
+ if not content_elem:
95
+ return None
96
+ paragraphs = [
97
+ "".join(p.itertext()).strip() for p in content_elem[0].xpath(".//p")
98
+ ]
99
+ if paragraphs and "www.shuhaige.net" in paragraphs[-1]:
100
+ paragraphs.pop()
101
+
102
+ content = "\n".join(paragraphs)
103
+ if not content.strip():
104
+ return None
105
+
106
+ return {
107
+ "id": chapter_id,
108
+ "title": title,
109
+ "content": content,
110
+ "extra": {"site": "shuhaige"},
111
+ }