novel-downloader 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +2 -4
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +27 -104
  5. novel_downloader/cli/download.py +78 -66
  6. novel_downloader/cli/export.py +20 -21
  7. novel_downloader/cli/main.py +3 -1
  8. novel_downloader/cli/search.py +120 -0
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +10 -14
  11. novel_downloader/config/adapter.py +195 -99
  12. novel_downloader/config/{loader.py → file_io.py} +53 -27
  13. novel_downloader/core/__init__.py +14 -13
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/archived/qidian/searcher.py +79 -0
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +8 -30
  21. novel_downloader/core/downloaders/base.py +182 -30
  22. novel_downloader/core/downloaders/common.py +217 -384
  23. novel_downloader/core/downloaders/qianbi.py +332 -4
  24. novel_downloader/core/downloaders/qidian.py +250 -290
  25. novel_downloader/core/downloaders/registry.py +69 -0
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +8 -26
  28. novel_downloader/core/exporters/base.py +107 -31
  29. novel_downloader/core/exporters/common/__init__.py +3 -4
  30. novel_downloader/core/exporters/common/epub.py +92 -171
  31. novel_downloader/core/exporters/common/main_exporter.py +14 -67
  32. novel_downloader/core/exporters/common/txt.py +90 -86
  33. novel_downloader/core/exporters/epub_util.py +184 -1327
  34. novel_downloader/core/exporters/linovelib/__init__.py +3 -2
  35. novel_downloader/core/exporters/linovelib/epub.py +165 -222
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +10 -71
  37. novel_downloader/core/exporters/linovelib/txt.py +76 -66
  38. novel_downloader/core/exporters/qidian.py +15 -11
  39. novel_downloader/core/exporters/registry.py +55 -0
  40. novel_downloader/core/exporters/txt_util.py +67 -0
  41. novel_downloader/core/fetchers/__init__.py +57 -56
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +10 -10
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +63 -47
  45. novel_downloader/core/fetchers/biquyuedu.py +83 -0
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +23 -11
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +22 -26
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/{biquge/browser.py → lewenn.py} +15 -15
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +16 -12
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +9 -9
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +55 -40
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +60 -0
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +11 -9
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/{common/browser.py → shuhaige.py} +24 -19
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/{common/session.py → wanbengo.py} +21 -17
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +23 -11
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +8 -14
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +4 -17
  79. novel_downloader/core/interfaces/parser.py +5 -6
  80. novel_downloader/core/interfaces/searcher.py +26 -0
  81. novel_downloader/core/parsers/__init__.py +58 -22
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +63 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +67 -67
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +54 -65
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +54 -51
  99. novel_downloader/core/parsers/qidian/__init__.py +2 -2
  100. novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
  101. novel_downloader/core/parsers/qidian/chapter_encrypted.py +290 -346
  102. novel_downloader/core/parsers/qidian/chapter_normal.py +25 -56
  103. novel_downloader/core/parsers/qidian/main_parser.py +19 -57
  104. novel_downloader/core/parsers/qidian/utils/__init__.py +12 -11
  105. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +6 -7
  106. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
  107. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
  108. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
  109. novel_downloader/core/parsers/quanben5.py +103 -0
  110. novel_downloader/core/parsers/registry.py +57 -0
  111. novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +46 -48
  112. novel_downloader/core/parsers/shencou.py +215 -0
  113. novel_downloader/core/parsers/shuhaige.py +111 -0
  114. novel_downloader/core/parsers/tongrenquan.py +116 -0
  115. novel_downloader/core/parsers/ttkan.py +132 -0
  116. novel_downloader/core/parsers/wanbengo.py +191 -0
  117. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  118. novel_downloader/core/parsers/xiguashuwu.py +435 -0
  119. novel_downloader/core/parsers/xs63b.py +161 -0
  120. novel_downloader/core/parsers/xshbook.py +134 -0
  121. novel_downloader/core/parsers/yamibo.py +155 -0
  122. novel_downloader/core/parsers/yibige.py +166 -0
  123. novel_downloader/core/searchers/__init__.py +51 -0
  124. novel_downloader/core/searchers/aaatxt.py +107 -0
  125. novel_downloader/core/searchers/b520.py +84 -0
  126. novel_downloader/core/searchers/base.py +168 -0
  127. novel_downloader/core/searchers/dxmwx.py +105 -0
  128. novel_downloader/core/searchers/eightnovel.py +84 -0
  129. novel_downloader/core/searchers/esjzone.py +102 -0
  130. novel_downloader/core/searchers/hetushu.py +92 -0
  131. novel_downloader/core/searchers/i25zw.py +93 -0
  132. novel_downloader/core/searchers/ixdzs8.py +107 -0
  133. novel_downloader/core/searchers/jpxs123.py +107 -0
  134. novel_downloader/core/searchers/piaotia.py +100 -0
  135. novel_downloader/core/searchers/qbtr.py +106 -0
  136. novel_downloader/core/searchers/qianbi.py +165 -0
  137. novel_downloader/core/searchers/quanben5.py +144 -0
  138. novel_downloader/core/searchers/registry.py +79 -0
  139. novel_downloader/core/searchers/shuhaige.py +124 -0
  140. novel_downloader/core/searchers/tongrenquan.py +110 -0
  141. novel_downloader/core/searchers/ttkan.py +92 -0
  142. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  143. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  144. novel_downloader/core/searchers/xs63b.py +104 -0
  145. novel_downloader/locales/en.json +36 -79
  146. novel_downloader/locales/zh.json +37 -80
  147. novel_downloader/models/__init__.py +23 -50
  148. novel_downloader/models/book.py +44 -0
  149. novel_downloader/models/config.py +16 -43
  150. novel_downloader/models/login.py +1 -1
  151. novel_downloader/models/search.py +21 -0
  152. novel_downloader/resources/config/settings.toml +39 -74
  153. novel_downloader/resources/css_styles/intro.css +83 -0
  154. novel_downloader/resources/css_styles/main.css +30 -89
  155. novel_downloader/resources/json/xiguashuwu.json +718 -0
  156. novel_downloader/utils/__init__.py +43 -0
  157. novel_downloader/utils/chapter_storage.py +247 -226
  158. novel_downloader/utils/constants.py +5 -50
  159. novel_downloader/utils/cookies.py +6 -18
  160. novel_downloader/utils/crypto_utils/__init__.py +13 -0
  161. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  162. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  163. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  164. novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
  165. novel_downloader/utils/epub/__init__.py +34 -0
  166. novel_downloader/utils/epub/builder.py +377 -0
  167. novel_downloader/utils/epub/constants.py +118 -0
  168. novel_downloader/utils/epub/documents.py +297 -0
  169. novel_downloader/utils/epub/models.py +120 -0
  170. novel_downloader/utils/epub/utils.py +179 -0
  171. novel_downloader/utils/file_utils/__init__.py +5 -30
  172. novel_downloader/utils/file_utils/io.py +9 -150
  173. novel_downloader/utils/file_utils/normalize.py +2 -2
  174. novel_downloader/utils/file_utils/sanitize.py +2 -7
  175. novel_downloader/utils/fontocr.py +207 -0
  176. novel_downloader/utils/i18n.py +2 -0
  177. novel_downloader/utils/logger.py +10 -16
  178. novel_downloader/utils/network.py +111 -252
  179. novel_downloader/utils/state.py +5 -90
  180. novel_downloader/utils/text_utils/__init__.py +16 -21
  181. novel_downloader/utils/text_utils/diff_display.py +6 -9
  182. novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
  183. novel_downloader/utils/text_utils/text_cleaner.py +179 -0
  184. novel_downloader/utils/text_utils/truncate_utils.py +62 -0
  185. novel_downloader/utils/time_utils/__init__.py +6 -12
  186. novel_downloader/utils/time_utils/datetime_utils.py +23 -33
  187. novel_downloader/utils/time_utils/sleep_utils.py +5 -10
  188. novel_downloader/web/__init__.py +13 -0
  189. novel_downloader/web/components/__init__.py +11 -0
  190. novel_downloader/web/components/navigation.py +35 -0
  191. novel_downloader/web/main.py +66 -0
  192. novel_downloader/web/pages/__init__.py +17 -0
  193. novel_downloader/web/pages/download.py +78 -0
  194. novel_downloader/web/pages/progress.py +147 -0
  195. novel_downloader/web/pages/search.py +329 -0
  196. novel_downloader/web/services/__init__.py +17 -0
  197. novel_downloader/web/services/client_dialog.py +164 -0
  198. novel_downloader/web/services/cred_broker.py +113 -0
  199. novel_downloader/web/services/cred_models.py +35 -0
  200. novel_downloader/web/services/task_manager.py +264 -0
  201. novel_downloader-2.0.0.dist-info/METADATA +171 -0
  202. novel_downloader-2.0.0.dist-info/RECORD +210 -0
  203. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
  204. novel_downloader/config/site_rules.py +0 -94
  205. novel_downloader/core/downloaders/biquge.py +0 -25
  206. novel_downloader/core/downloaders/esjzone.py +0 -25
  207. novel_downloader/core/downloaders/linovelib.py +0 -25
  208. novel_downloader/core/downloaders/sfacg.py +0 -25
  209. novel_downloader/core/downloaders/yamibo.py +0 -25
  210. novel_downloader/core/exporters/biquge.py +0 -25
  211. novel_downloader/core/exporters/esjzone.py +0 -25
  212. novel_downloader/core/exporters/qianbi.py +0 -25
  213. novel_downloader/core/exporters/sfacg.py +0 -25
  214. novel_downloader/core/exporters/yamibo.py +0 -25
  215. novel_downloader/core/factory/__init__.py +0 -20
  216. novel_downloader/core/factory/downloader.py +0 -73
  217. novel_downloader/core/factory/exporter.py +0 -58
  218. novel_downloader/core/factory/fetcher.py +0 -96
  219. novel_downloader/core/factory/parser.py +0 -86
  220. novel_downloader/core/fetchers/base/__init__.py +0 -14
  221. novel_downloader/core/fetchers/base/browser.py +0 -403
  222. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  223. novel_downloader/core/fetchers/common/__init__.py +0 -14
  224. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  225. novel_downloader/core/fetchers/esjzone/browser.py +0 -204
  226. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  227. novel_downloader/core/fetchers/linovelib/browser.py +0 -193
  228. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  229. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  230. novel_downloader/core/fetchers/qidian/browser.py +0 -318
  231. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  232. novel_downloader/core/fetchers/sfacg/browser.py +0 -189
  233. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  234. novel_downloader/core/fetchers/yamibo/browser.py +0 -229
  235. novel_downloader/core/parsers/biquge/__init__.py +0 -10
  236. novel_downloader/core/parsers/biquge/main_parser.py +0 -134
  237. novel_downloader/core/parsers/common/__init__.py +0 -13
  238. novel_downloader/core/parsers/common/helper.py +0 -323
  239. novel_downloader/core/parsers/common/main_parser.py +0 -106
  240. novel_downloader/core/parsers/esjzone/__init__.py +0 -10
  241. novel_downloader/core/parsers/linovelib/__init__.py +0 -10
  242. novel_downloader/core/parsers/qianbi/__init__.py +0 -10
  243. novel_downloader/core/parsers/sfacg/__init__.py +0 -10
  244. novel_downloader/core/parsers/yamibo/__init__.py +0 -10
  245. novel_downloader/core/parsers/yamibo/main_parser.py +0 -194
  246. novel_downloader/models/browser.py +0 -21
  247. novel_downloader/models/chapter.py +0 -25
  248. novel_downloader/models/site_rules.py +0 -99
  249. novel_downloader/models/tasks.py +0 -33
  250. novel_downloader/models/types.py +0 -15
  251. novel_downloader/resources/css_styles/volume-intro.css +0 -56
  252. novel_downloader/resources/json/replace_word_map.json +0 -4
  253. novel_downloader/resources/text/blacklist.txt +0 -22
  254. novel_downloader/tui/__init__.py +0 -7
  255. novel_downloader/tui/app.py +0 -32
  256. novel_downloader/tui/main.py +0 -17
  257. novel_downloader/tui/screens/__init__.py +0 -14
  258. novel_downloader/tui/screens/home.py +0 -198
  259. novel_downloader/tui/screens/login.py +0 -74
  260. novel_downloader/tui/styles/home_layout.tcss +0 -79
  261. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  262. novel_downloader/utils/cache.py +0 -24
  263. novel_downloader/utils/fontocr/__init__.py +0 -22
  264. novel_downloader/utils/fontocr/model_loader.py +0 -69
  265. novel_downloader/utils/fontocr/ocr_v1.py +0 -303
  266. novel_downloader/utils/fontocr/ocr_v2.py +0 -752
  267. novel_downloader/utils/hash_store.py +0 -279
  268. novel_downloader/utils/hash_utils.py +0 -103
  269. novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
  270. novel_downloader/utils/text_utils/font_mapping.py +0 -28
  271. novel_downloader/utils/text_utils/text_cleaning.py +0 -107
  272. novel_downloader-1.4.5.dist-info/METADATA +0 -196
  273. novel_downloader-1.4.5.dist-info/RECORD +0 -165
  274. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
  275. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
  276. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- novel_downloader.core.parsers.sfacg.main_parser
4
- -----------------------------------------------
3
+ novel_downloader.core.parsers.sfacg
4
+ -----------------------------------
5
5
 
6
6
  """
7
7
 
@@ -10,18 +10,33 @@ from typing import Any
10
10
  from lxml import html
11
11
 
12
12
  from novel_downloader.core.parsers.base import BaseParser
13
- from novel_downloader.models import ChapterDict
14
-
15
-
13
+ from novel_downloader.core.parsers.registry import register_parser
14
+ from novel_downloader.models import (
15
+ BookInfoDict,
16
+ ChapterDict,
17
+ ChapterInfoDict,
18
+ VolumeInfoDict,
19
+ )
20
+
21
+
22
+ @register_parser(
23
+ site_keys=["sfacg"],
24
+ )
16
25
  class SfacgParser(BaseParser):
17
- """ """
26
+ """
27
+ Parser for sfacg book pages.
28
+ """
18
29
 
19
30
  # Book info XPaths
20
31
  _BOOK_NAME_XPATH = '//ul[@class="book_info"]//span[@class="book_newtitle"]/text()'
21
32
  _AUTHOR_INFO_XPATH = '//ul[@class="book_info"]//span[@class="book_info3"]/text()'
22
33
  _UPDATE_TIME_XPATH = '//ul[@class="book_info"]//span[@class="book_info3"]/br/following-sibling::text()' # noqa: E501
23
34
  _COVER_URL_XPATH = '//ul[@class="book_info"]//li/img/@src'
24
- _STATUS_XPATH = '//ul[@class="book_info"]//div[@class="book_info2"]/span/text()'
35
+ # _STATUS_XPATH = '//ul[@class="book_info"]//div[@class="book_info2"]/span/text()'
36
+ _STATUS_XPATH = (
37
+ '//ul[@class="book_info"]//div[@class="book_info2"]/span/text()'
38
+ ' and (contains(., "完结") or contains(., "连载"))]/text()'
39
+ )
25
40
  _SUMMARY_XPATH = '//ul[@class="book_profile"]/li[@class="book_bk_qs1"]/text()'
26
41
 
27
42
  # Catalog XPaths
@@ -42,54 +57,35 @@ class SfacgParser(BaseParser):
42
57
  self,
43
58
  html_list: list[str],
44
59
  **kwargs: Any,
45
- ) -> dict[str, Any]:
46
- """
47
- Parse a book info page and extract metadata and chapter structure.
48
-
49
- :param html_list: Raw HTML of the book info page.
50
- :return: Parsed metadata and chapter structure as a dictionary.
51
- """
60
+ ) -> BookInfoDict | None:
52
61
  if len(html_list) < 2:
53
- return {}
62
+ return None
54
63
 
55
64
  info_tree = html.fromstring(html_list[0])
56
65
  catalog_tree = html.fromstring(html_list[1])
57
66
 
58
- result: dict[str, Any] = {}
59
-
60
67
  # Book metadata
61
- book_name = info_tree.xpath(self._BOOK_NAME_XPATH)
62
- result["book_name"] = book_name[0].strip() if book_name else ""
68
+ book_name = self._first_str(info_tree.xpath(self._BOOK_NAME_XPATH))
63
69
 
64
- book_info3 = info_tree.xpath(self._AUTHOR_INFO_XPATH)
65
- result["author"] = book_info3[0].split("/")[0].strip() if book_info3 else ""
66
- result["word_count"] = (
67
- book_info3[0].split("/")[1].strip()
68
- if book_info3 and len(book_info3[0].split("/")) > 1
69
- else ""
70
- )
70
+ book_info3_str = self._first_str(info_tree.xpath(self._AUTHOR_INFO_XPATH))
71
+ author, _, word_count = (p.strip() for p in book_info3_str.partition("/"))
71
72
 
72
- book_info3_br = info_tree.xpath(self._UPDATE_TIME_XPATH)
73
- result["update_time"] = book_info3_br[0].strip() if book_info3_br else ""
73
+ update_time = self._first_str(info_tree.xpath(self._UPDATE_TIME_XPATH))
74
74
 
75
- cover_url = info_tree.xpath(self._COVER_URL_XPATH)
76
- result["cover_url"] = "https:" + cover_url[0] if cover_url else ""
75
+ cover_url = "https:" + self._first_str(info_tree.xpath(self._COVER_URL_XPATH))
77
76
 
78
- serial_status = info_tree.xpath(self._STATUS_XPATH)
79
- result["serial_status"] = next(
80
- (s for s in serial_status if "完结" in s or "连载" in s), ""
81
- )
77
+ serial_status = self._first_str(info_tree.xpath(self._STATUS_XPATH))
82
78
 
83
- summary = info_tree.xpath(self._SUMMARY_XPATH)
84
- result["summary"] = "".join(summary).strip()
79
+ summary_elem = info_tree.xpath(self._SUMMARY_XPATH)
80
+ summary = "".join(summary_elem).strip()
85
81
 
86
82
  # Chapter structure
87
83
  volume_titles = catalog_tree.xpath(self._VOLUME_TITLE_XPATH)
88
84
  volume_blocks = catalog_tree.xpath(self._VOLUME_CONTENT_XPATH)
89
85
 
90
- volumes = []
86
+ volumes: list[VolumeInfoDict] = []
91
87
  for vol_title, vol_block in zip(volume_titles, volume_blocks, strict=False):
92
- chapters = []
88
+ chapters: list[ChapterInfoDict] = []
93
89
  for a in vol_block.xpath(self._CHAPTER_LIST_XPATH):
94
90
  href = a.xpath("./@href")[0] if a.xpath("./@href") else ""
95
91
  title = "".join(a.xpath(".//li//text()")).strip()
@@ -107,9 +103,18 @@ class SfacgParser(BaseParser):
107
103
  "chapters": chapters,
108
104
  }
109
105
  )
110
- result["volumes"] = volumes
111
106
 
112
- return result
107
+ return {
108
+ "book_name": book_name,
109
+ "author": author,
110
+ "cover_url": cover_url,
111
+ "update_time": update_time,
112
+ "word_count": word_count,
113
+ "serial_status": serial_status,
114
+ "summary": summary,
115
+ "volumes": volumes,
116
+ "extra": {},
117
+ }
113
118
 
114
119
  def parse_chapter(
115
120
  self,
@@ -117,13 +122,6 @@ class SfacgParser(BaseParser):
117
122
  chapter_id: str,
118
123
  **kwargs: Any,
119
124
  ) -> ChapterDict | None:
120
- """
121
- Parse a single chapter page and extract clean text or simplified HTML.
122
-
123
- :param html_list: Raw HTML of the chapter page.
124
- :param chapter_id: Identifier of the chapter being parsed.
125
- :return: Cleaned chapter content as plain text or minimal HTML.
126
- """
127
125
  if not html_list:
128
126
  return None
129
127
  keywords = [
@@ -151,7 +149,7 @@ class SfacgParser(BaseParser):
151
149
  raw_text_parts = tree.xpath(self._CHAPTER_TEXT_XPATH)
152
150
  content_lines = [txt.strip() for txt in raw_text_parts if txt.strip()]
153
151
 
154
- content = "\n\n".join(content_lines).strip()
152
+ content = "\n".join(content_lines).strip()
155
153
  if not content:
156
154
  return None
157
155
 
@@ -0,0 +1,215 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.shencou
4
+ -------------------------------------
5
+
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from lxml import etree, html
11
+
12
+ from novel_downloader.core.parsers.base import BaseParser
13
+ from novel_downloader.core.parsers.registry import register_parser
14
+ from novel_downloader.models import (
15
+ BookInfoDict,
16
+ ChapterDict,
17
+ VolumeInfoDict,
18
+ )
19
+
20
+
21
+ @register_parser(
22
+ site_keys=["shencou"],
23
+ )
24
+ class ShencouParser(BaseParser):
25
+ """
26
+ Parser for 神凑轻小说 book pages.
27
+ """
28
+
29
+ def parse_book_info(
30
+ self,
31
+ html_list: list[str],
32
+ **kwargs: Any,
33
+ ) -> BookInfoDict | None:
34
+ if len(html_list) < 2:
35
+ return None
36
+
37
+ info_tree = html.fromstring(html_list[0])
38
+ catalog_tree = html.fromstring(html_list[1])
39
+
40
+ # --- Metadata ---
41
+ raw_name = self._first_str(info_tree.xpath("//span//a/text()"))
42
+ book_name = raw_name[:-2] if raw_name.endswith("小说") else raw_name
43
+
44
+ author = self._first_str(
45
+ info_tree.xpath('//td[contains(text(),"小说作者")]/text()'),
46
+ replaces=[("小说作者:", "")],
47
+ )
48
+
49
+ cover_url = self._first_str(
50
+ info_tree.xpath('//a[contains(@href,"/files/article/image")]/img/@src')
51
+ )
52
+
53
+ # word count
54
+ word_count = self._first_str(
55
+ info_tree.xpath('//td[contains(text(),"全文长度")]/text()'),
56
+ replaces=[("全文长度:", "")],
57
+ )
58
+
59
+ # update time
60
+ update_time = self._first_str(
61
+ info_tree.xpath('//td[contains(text(),"最后更新")]/text()'),
62
+ replaces=[("最后更新:", "")],
63
+ )
64
+
65
+ # serial status
66
+ serial_status = self._first_str(
67
+ info_tree.xpath('//td[contains(text(),"写作进度")]/text()'),
68
+ replaces=[("写作进度:", "")],
69
+ )
70
+
71
+ # summary
72
+ raw_detail = self._norm_space(
73
+ info_tree.xpath('string(//td[@width="80%" and @valign="top"])')
74
+ )
75
+ summary = ""
76
+ if "内容简介:" in raw_detail and "本书公告:" in raw_detail:
77
+ intro = raw_detail.split("内容简介:", 1)[1]
78
+ summary = intro.split("本书公告:", 1)[0].strip()
79
+
80
+ # --- Catalog / Chapters ---
81
+ volumes: list[VolumeInfoDict] = []
82
+ curr_vol: VolumeInfoDict = {"volume_name": "未命名卷", "chapters": []}
83
+
84
+ # Walk through volume headers (.zjbox) and lists (.zjlist4) in document order
85
+ for elem in catalog_tree.xpath(
86
+ '//div[@class="zjbox"] | //div[@class="zjlist4"]'
87
+ ):
88
+ cls_attr = elem.get("class", "")
89
+ if "zjbox" in cls_attr:
90
+ # before starting new volume, save the previous if it has chapters
91
+ if curr_vol["chapters"]:
92
+ volumes.append(curr_vol)
93
+ # start a new volume
94
+ vol_name = elem.xpath(".//h2/text()")[0].strip()
95
+ curr_vol = {"volume_name": vol_name, "chapters": []}
96
+ elif "zjlist4" in cls_attr:
97
+ # collect all <li><a> entries under this list
98
+ for a in elem.xpath(".//ol/li/a"):
99
+ url = a.get("href").strip()
100
+ title = a.text_content().strip()
101
+ # '203740.html' -> '203740'
102
+ chap_id = url.split(".")[0]
103
+ curr_vol["chapters"].append(
104
+ {
105
+ "title": title,
106
+ "url": url,
107
+ "chapterId": chap_id,
108
+ }
109
+ )
110
+
111
+ # append last volume if not empty
112
+ if curr_vol["chapters"]:
113
+ volumes.append(curr_vol)
114
+
115
+ return {
116
+ "book_name": book_name,
117
+ "author": author,
118
+ "cover_url": cover_url,
119
+ "update_time": update_time,
120
+ "summary": summary,
121
+ "volumes": volumes,
122
+ "word_count": word_count,
123
+ "serial_status": serial_status,
124
+ "extra": {},
125
+ }
126
+
127
+ def parse_chapter(
128
+ self,
129
+ html_list: list[str],
130
+ chapter_id: str,
131
+ **kwargs: Any,
132
+ ) -> ChapterDict | None:
133
+ if not html_list:
134
+ return None
135
+
136
+ tree = html.fromstring(html_list[0])
137
+ title = self._first_str(tree.xpath("//h1/text()"))
138
+ if not title:
139
+ return None
140
+
141
+ # strip book-name prefix if present
142
+ bc = tree.xpath('//div[@id="breadCrumb"]//a/text()')
143
+ if len(bc) >= 2:
144
+ book_name = bc[1].strip()
145
+ title = title.removeprefix(book_name).lstrip(" ::–—-").strip()
146
+
147
+ anchors = tree.xpath('//div[@id="BookSee_Right"]')
148
+ if not anchors:
149
+ return None
150
+ marker = anchors[0]
151
+
152
+ lines: list[str] = []
153
+
154
+ def _append_text(text: str) -> None:
155
+ for ln in text.replace("\xa0", " ").splitlines():
156
+ ln2 = ln.strip()
157
+ if ln2:
158
+ lines.append(ln2)
159
+
160
+ if marker.tail:
161
+ _append_text(marker.tail)
162
+
163
+ # 4. Walk through siblings until <!--over-->
164
+ node = marker
165
+ while True:
166
+ sib = node.getnext()
167
+ if sib is None:
168
+ break
169
+ node = sib
170
+
171
+ # Stop on the closing comment
172
+ if isinstance(sib, etree._Comment) and "over" in (sib.text or ""):
173
+ break
174
+
175
+ # Process comment tails (e.g. <!--go--> tail)
176
+ if isinstance(sib, etree._Comment):
177
+ if sib.tail:
178
+ _append_text(sib.tail)
179
+ continue
180
+
181
+ if isinstance(sib, html.HtmlElement):
182
+ # tag = sib.tag.lower()
183
+ tag = str(sib.tag).lower()
184
+ cls = sib.get("class", "") or ""
185
+
186
+ if tag == "div" and "divimage" in cls:
187
+ srcs = sib.xpath(".//img/@src")
188
+ if srcs:
189
+ lines.append(f'<img src="{srcs[0]}" />')
190
+ # text after the div
191
+ if sib.tail:
192
+ _append_text(sib.tail)
193
+ continue
194
+
195
+ if tag == "br":
196
+ if sib.tail:
197
+ _append_text(sib.tail)
198
+ continue
199
+
200
+ text = sib.text_content()
201
+ _append_text(text)
202
+ if sib.tail:
203
+ _append_text(sib.tail)
204
+ continue
205
+
206
+ content = "\n".join(lines)
207
+ if not content:
208
+ return None
209
+
210
+ return {
211
+ "id": chapter_id,
212
+ "title": title,
213
+ "content": content,
214
+ "extra": {"site": "shencou"},
215
+ }
@@ -0,0 +1,111 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.shuhaige
4
+ --------------------------------------
5
+
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from lxml import html
11
+
12
+ from novel_downloader.core.parsers.base import BaseParser
13
+ from novel_downloader.core.parsers.registry import register_parser
14
+ from novel_downloader.models import (
15
+ BookInfoDict,
16
+ ChapterDict,
17
+ ChapterInfoDict,
18
+ VolumeInfoDict,
19
+ )
20
+
21
+
22
+ @register_parser(
23
+ site_keys=["shuhaige"],
24
+ )
25
+ class ShuhaigeParser(BaseParser):
26
+ """
27
+ Parser for 书海阁小说网 book pages.
28
+ """
29
+
30
+ def parse_book_info(
31
+ self,
32
+ html_list: list[str],
33
+ **kwargs: Any,
34
+ ) -> BookInfoDict | None:
35
+ if not html_list:
36
+ return None
37
+
38
+ tree = html.fromstring(html_list[0])
39
+
40
+ book_name = self._first_str(tree.xpath('//div[@id="info"]/h1/text()'))
41
+ author = self._first_str(tree.xpath('//div[@id="info"]/p[1]/a/text()'))
42
+
43
+ cover_url = self._first_str(tree.xpath('//div[@id="fmimg"]/img/@src'))
44
+
45
+ update_time = self._first_str(
46
+ tree.xpath('//div[@id="info"]/p[3]/text()'),
47
+ replaces=[("最后更新:", "")],
48
+ )
49
+
50
+ summary = self._first_str(tree.xpath('//div[@id="intro"]/p[1]/text()'))
51
+
52
+ book_type = self._first_str(tree.xpath('//div[@class="con_top"]/a[2]/text()'))
53
+ tags = [book_type] if book_type else []
54
+
55
+ chapters: list[ChapterInfoDict] = [
56
+ {
57
+ "title": (a.text or "").strip(),
58
+ "url": (a.get("href") or "").strip(),
59
+ "chapterId": (a.get("href") or "").rsplit("/", 1)[-1].split(".", 1)[0],
60
+ }
61
+ for a in tree.xpath(
62
+ '//div[@id="list"]/dl/dt[contains(., "正文")]/following-sibling::dd/a'
63
+ )
64
+ ]
65
+
66
+ volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
67
+
68
+ return {
69
+ "book_name": book_name,
70
+ "author": author,
71
+ "cover_url": cover_url,
72
+ "update_time": update_time,
73
+ "tags": tags,
74
+ "summary": summary,
75
+ "volumes": volumes,
76
+ "extra": {},
77
+ }
78
+
79
+ def parse_chapter(
80
+ self,
81
+ html_list: list[str],
82
+ chapter_id: str,
83
+ **kwargs: Any,
84
+ ) -> ChapterDict | None:
85
+ if not html_list:
86
+ return None
87
+ tree = html.fromstring(html_list[0])
88
+
89
+ title = self._first_str(tree.xpath('//div[@class="bookname"]/h1/text()'))
90
+ if not title:
91
+ title = f"第 {chapter_id} 章"
92
+
93
+ content_elem = tree.xpath('//div[@id="content"]')
94
+ if not content_elem:
95
+ return None
96
+ paragraphs = [
97
+ "".join(p.itertext()).strip() for p in content_elem[0].xpath(".//p")
98
+ ]
99
+ if paragraphs and "www.shuhaige.net" in paragraphs[-1]:
100
+ paragraphs.pop()
101
+
102
+ content = "\n".join(paragraphs)
103
+ if not content.strip():
104
+ return None
105
+
106
+ return {
107
+ "id": chapter_id,
108
+ "title": title,
109
+ "content": content,
110
+ "extra": {"site": "shuhaige"},
111
+ }
@@ -0,0 +1,116 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.tongrenquan
4
+ -----------------------------------------
5
+
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from lxml import html
11
+
12
+ from novel_downloader.core.parsers.base import BaseParser
13
+ from novel_downloader.core.parsers.registry import register_parser
14
+ from novel_downloader.models import (
15
+ BookInfoDict,
16
+ ChapterDict,
17
+ ChapterInfoDict,
18
+ VolumeInfoDict,
19
+ )
20
+
21
+
22
+ @register_parser(
23
+ site_keys=["tongrenquan"],
24
+ )
25
+ class TongrenquanParser(BaseParser):
26
+ """
27
+ Parser for 同人圈 book pages.
28
+ """
29
+
30
+ BASE_URL = "https://www.tongrenquan.org"
31
+
32
+ def parse_book_info(
33
+ self,
34
+ html_list: list[str],
35
+ **kwargs: Any,
36
+ ) -> BookInfoDict | None:
37
+ if not html_list:
38
+ return None
39
+
40
+ tree = html.fromstring(html_list[0])
41
+
42
+ # Metadata
43
+ book_name = self._first_str(tree.xpath('//div[@class="infos"]/h1/text()'))
44
+ author = self._first_str(
45
+ tree.xpath('//div[@class="date"]/span/text()'),
46
+ replaces=[("作者:", "")],
47
+ )
48
+ cover_url = self.BASE_URL + self._first_str(
49
+ tree.xpath('//div[@class="pic"]//img/@src')
50
+ )
51
+ update_time = self._first_str(
52
+ tree.xpath('//div[@class="date"]/text()'),
53
+ replaces=[("日期:", "")],
54
+ )
55
+
56
+ # Summary (collapse text within the <p> tag)
57
+ paras = tree.xpath('//div[@class="infos"]/p//text()')
58
+ summary = "\n".join(p.strip() for p in paras if p.strip())
59
+
60
+ # Chapters extraction
61
+ chapters: list[ChapterInfoDict] = []
62
+ for a in tree.xpath('//div[contains(@class,"book_list")]//ul//li/a'):
63
+ url = a.get("href", "").strip()
64
+ title = a.text_content().strip()
65
+ # General pattern: /category/bookId/chapterId.html
66
+ # '/tongren/7562/462.html' -> '462'
67
+ chapter_id = url.rstrip(".html").split("/")[-1]
68
+ chapters.append({"title": title, "url": url, "chapterId": chapter_id})
69
+
70
+ volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
71
+
72
+ return {
73
+ "book_name": book_name,
74
+ "author": author,
75
+ "cover_url": cover_url,
76
+ "update_time": update_time,
77
+ "tags": ["同人小说"],
78
+ "summary": summary,
79
+ "volumes": volumes,
80
+ "extra": {},
81
+ }
82
+
83
+ def parse_chapter(
84
+ self,
85
+ html_list: list[str],
86
+ chapter_id: str,
87
+ **kwargs: Any,
88
+ ) -> ChapterDict | None:
89
+ if not html_list:
90
+ return None
91
+
92
+ tree = html.fromstring(html_list[0])
93
+
94
+ raw_title = self._first_str(
95
+ tree.xpath('//div[contains(@class,"read_chapterName")]//h1/text()')
96
+ )
97
+
98
+ book_name = self._first_str(
99
+ tree.xpath('//div[contains(@class,"readTop")]//a[last()]/text()')
100
+ )
101
+
102
+ title = raw_title.replace(book_name, "").strip()
103
+
104
+ # Extract paragraphs of content
105
+ paras = tree.xpath('//div[contains(@class,"read_chapterDetail")]/p')
106
+ texts = [p.text_content().strip() for p in paras if p.text_content().strip()]
107
+ content = "\n".join(texts)
108
+ if not content:
109
+ return None
110
+
111
+ return {
112
+ "id": chapter_id,
113
+ "title": title,
114
+ "content": content,
115
+ "extra": {"site": "tongrenquan"},
116
+ }