novel-downloader 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +2 -4
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +27 -104
  5. novel_downloader/cli/download.py +78 -66
  6. novel_downloader/cli/export.py +20 -21
  7. novel_downloader/cli/main.py +3 -1
  8. novel_downloader/cli/search.py +120 -0
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +10 -14
  11. novel_downloader/config/adapter.py +195 -99
  12. novel_downloader/config/{loader.py → file_io.py} +53 -27
  13. novel_downloader/core/__init__.py +14 -13
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/archived/qidian/searcher.py +79 -0
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +8 -30
  21. novel_downloader/core/downloaders/base.py +182 -30
  22. novel_downloader/core/downloaders/common.py +217 -384
  23. novel_downloader/core/downloaders/qianbi.py +332 -4
  24. novel_downloader/core/downloaders/qidian.py +250 -290
  25. novel_downloader/core/downloaders/registry.py +69 -0
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +8 -26
  28. novel_downloader/core/exporters/base.py +107 -31
  29. novel_downloader/core/exporters/common/__init__.py +3 -4
  30. novel_downloader/core/exporters/common/epub.py +92 -171
  31. novel_downloader/core/exporters/common/main_exporter.py +14 -67
  32. novel_downloader/core/exporters/common/txt.py +90 -86
  33. novel_downloader/core/exporters/epub_util.py +184 -1327
  34. novel_downloader/core/exporters/linovelib/__init__.py +3 -2
  35. novel_downloader/core/exporters/linovelib/epub.py +165 -222
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +10 -71
  37. novel_downloader/core/exporters/linovelib/txt.py +76 -66
  38. novel_downloader/core/exporters/qidian.py +15 -11
  39. novel_downloader/core/exporters/registry.py +55 -0
  40. novel_downloader/core/exporters/txt_util.py +67 -0
  41. novel_downloader/core/fetchers/__init__.py +57 -56
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +10 -10
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +63 -47
  45. novel_downloader/core/fetchers/biquyuedu.py +83 -0
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +23 -11
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +22 -26
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/{biquge/browser.py → lewenn.py} +15 -15
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +16 -12
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +9 -9
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +55 -40
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +60 -0
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +11 -9
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/{common/browser.py → shuhaige.py} +24 -19
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/{common/session.py → wanbengo.py} +21 -17
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +23 -11
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +8 -14
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +4 -17
  79. novel_downloader/core/interfaces/parser.py +5 -6
  80. novel_downloader/core/interfaces/searcher.py +26 -0
  81. novel_downloader/core/parsers/__init__.py +58 -22
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +63 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +67 -67
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +54 -65
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +54 -51
  99. novel_downloader/core/parsers/qidian/__init__.py +2 -2
  100. novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
  101. novel_downloader/core/parsers/qidian/chapter_encrypted.py +290 -346
  102. novel_downloader/core/parsers/qidian/chapter_normal.py +25 -56
  103. novel_downloader/core/parsers/qidian/main_parser.py +19 -57
  104. novel_downloader/core/parsers/qidian/utils/__init__.py +12 -11
  105. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +6 -7
  106. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
  107. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
  108. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
  109. novel_downloader/core/parsers/quanben5.py +103 -0
  110. novel_downloader/core/parsers/registry.py +57 -0
  111. novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +46 -48
  112. novel_downloader/core/parsers/shencou.py +215 -0
  113. novel_downloader/core/parsers/shuhaige.py +111 -0
  114. novel_downloader/core/parsers/tongrenquan.py +116 -0
  115. novel_downloader/core/parsers/ttkan.py +132 -0
  116. novel_downloader/core/parsers/wanbengo.py +191 -0
  117. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  118. novel_downloader/core/parsers/xiguashuwu.py +435 -0
  119. novel_downloader/core/parsers/xs63b.py +161 -0
  120. novel_downloader/core/parsers/xshbook.py +134 -0
  121. novel_downloader/core/parsers/yamibo.py +155 -0
  122. novel_downloader/core/parsers/yibige.py +166 -0
  123. novel_downloader/core/searchers/__init__.py +51 -0
  124. novel_downloader/core/searchers/aaatxt.py +107 -0
  125. novel_downloader/core/searchers/b520.py +84 -0
  126. novel_downloader/core/searchers/base.py +168 -0
  127. novel_downloader/core/searchers/dxmwx.py +105 -0
  128. novel_downloader/core/searchers/eightnovel.py +84 -0
  129. novel_downloader/core/searchers/esjzone.py +102 -0
  130. novel_downloader/core/searchers/hetushu.py +92 -0
  131. novel_downloader/core/searchers/i25zw.py +93 -0
  132. novel_downloader/core/searchers/ixdzs8.py +107 -0
  133. novel_downloader/core/searchers/jpxs123.py +107 -0
  134. novel_downloader/core/searchers/piaotia.py +100 -0
  135. novel_downloader/core/searchers/qbtr.py +106 -0
  136. novel_downloader/core/searchers/qianbi.py +165 -0
  137. novel_downloader/core/searchers/quanben5.py +144 -0
  138. novel_downloader/core/searchers/registry.py +79 -0
  139. novel_downloader/core/searchers/shuhaige.py +124 -0
  140. novel_downloader/core/searchers/tongrenquan.py +110 -0
  141. novel_downloader/core/searchers/ttkan.py +92 -0
  142. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  143. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  144. novel_downloader/core/searchers/xs63b.py +104 -0
  145. novel_downloader/locales/en.json +36 -79
  146. novel_downloader/locales/zh.json +37 -80
  147. novel_downloader/models/__init__.py +23 -50
  148. novel_downloader/models/book.py +44 -0
  149. novel_downloader/models/config.py +16 -43
  150. novel_downloader/models/login.py +1 -1
  151. novel_downloader/models/search.py +21 -0
  152. novel_downloader/resources/config/settings.toml +39 -74
  153. novel_downloader/resources/css_styles/intro.css +83 -0
  154. novel_downloader/resources/css_styles/main.css +30 -89
  155. novel_downloader/resources/json/xiguashuwu.json +718 -0
  156. novel_downloader/utils/__init__.py +43 -0
  157. novel_downloader/utils/chapter_storage.py +247 -226
  158. novel_downloader/utils/constants.py +5 -50
  159. novel_downloader/utils/cookies.py +6 -18
  160. novel_downloader/utils/crypto_utils/__init__.py +13 -0
  161. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  162. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  163. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  164. novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
  165. novel_downloader/utils/epub/__init__.py +34 -0
  166. novel_downloader/utils/epub/builder.py +377 -0
  167. novel_downloader/utils/epub/constants.py +118 -0
  168. novel_downloader/utils/epub/documents.py +297 -0
  169. novel_downloader/utils/epub/models.py +120 -0
  170. novel_downloader/utils/epub/utils.py +179 -0
  171. novel_downloader/utils/file_utils/__init__.py +5 -30
  172. novel_downloader/utils/file_utils/io.py +9 -150
  173. novel_downloader/utils/file_utils/normalize.py +2 -2
  174. novel_downloader/utils/file_utils/sanitize.py +2 -7
  175. novel_downloader/utils/fontocr.py +207 -0
  176. novel_downloader/utils/i18n.py +2 -0
  177. novel_downloader/utils/logger.py +10 -16
  178. novel_downloader/utils/network.py +111 -252
  179. novel_downloader/utils/state.py +5 -90
  180. novel_downloader/utils/text_utils/__init__.py +16 -21
  181. novel_downloader/utils/text_utils/diff_display.py +6 -9
  182. novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
  183. novel_downloader/utils/text_utils/text_cleaner.py +179 -0
  184. novel_downloader/utils/text_utils/truncate_utils.py +62 -0
  185. novel_downloader/utils/time_utils/__init__.py +6 -12
  186. novel_downloader/utils/time_utils/datetime_utils.py +23 -33
  187. novel_downloader/utils/time_utils/sleep_utils.py +5 -10
  188. novel_downloader/web/__init__.py +13 -0
  189. novel_downloader/web/components/__init__.py +11 -0
  190. novel_downloader/web/components/navigation.py +35 -0
  191. novel_downloader/web/main.py +66 -0
  192. novel_downloader/web/pages/__init__.py +17 -0
  193. novel_downloader/web/pages/download.py +78 -0
  194. novel_downloader/web/pages/progress.py +147 -0
  195. novel_downloader/web/pages/search.py +329 -0
  196. novel_downloader/web/services/__init__.py +17 -0
  197. novel_downloader/web/services/client_dialog.py +164 -0
  198. novel_downloader/web/services/cred_broker.py +113 -0
  199. novel_downloader/web/services/cred_models.py +35 -0
  200. novel_downloader/web/services/task_manager.py +264 -0
  201. novel_downloader-2.0.0.dist-info/METADATA +171 -0
  202. novel_downloader-2.0.0.dist-info/RECORD +210 -0
  203. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
  204. novel_downloader/config/site_rules.py +0 -94
  205. novel_downloader/core/downloaders/biquge.py +0 -25
  206. novel_downloader/core/downloaders/esjzone.py +0 -25
  207. novel_downloader/core/downloaders/linovelib.py +0 -25
  208. novel_downloader/core/downloaders/sfacg.py +0 -25
  209. novel_downloader/core/downloaders/yamibo.py +0 -25
  210. novel_downloader/core/exporters/biquge.py +0 -25
  211. novel_downloader/core/exporters/esjzone.py +0 -25
  212. novel_downloader/core/exporters/qianbi.py +0 -25
  213. novel_downloader/core/exporters/sfacg.py +0 -25
  214. novel_downloader/core/exporters/yamibo.py +0 -25
  215. novel_downloader/core/factory/__init__.py +0 -20
  216. novel_downloader/core/factory/downloader.py +0 -73
  217. novel_downloader/core/factory/exporter.py +0 -58
  218. novel_downloader/core/factory/fetcher.py +0 -96
  219. novel_downloader/core/factory/parser.py +0 -86
  220. novel_downloader/core/fetchers/base/__init__.py +0 -14
  221. novel_downloader/core/fetchers/base/browser.py +0 -403
  222. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  223. novel_downloader/core/fetchers/common/__init__.py +0 -14
  224. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  225. novel_downloader/core/fetchers/esjzone/browser.py +0 -204
  226. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  227. novel_downloader/core/fetchers/linovelib/browser.py +0 -193
  228. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  229. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  230. novel_downloader/core/fetchers/qidian/browser.py +0 -318
  231. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  232. novel_downloader/core/fetchers/sfacg/browser.py +0 -189
  233. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  234. novel_downloader/core/fetchers/yamibo/browser.py +0 -229
  235. novel_downloader/core/parsers/biquge/__init__.py +0 -10
  236. novel_downloader/core/parsers/biquge/main_parser.py +0 -134
  237. novel_downloader/core/parsers/common/__init__.py +0 -13
  238. novel_downloader/core/parsers/common/helper.py +0 -323
  239. novel_downloader/core/parsers/common/main_parser.py +0 -106
  240. novel_downloader/core/parsers/esjzone/__init__.py +0 -10
  241. novel_downloader/core/parsers/linovelib/__init__.py +0 -10
  242. novel_downloader/core/parsers/qianbi/__init__.py +0 -10
  243. novel_downloader/core/parsers/sfacg/__init__.py +0 -10
  244. novel_downloader/core/parsers/yamibo/__init__.py +0 -10
  245. novel_downloader/core/parsers/yamibo/main_parser.py +0 -194
  246. novel_downloader/models/browser.py +0 -21
  247. novel_downloader/models/chapter.py +0 -25
  248. novel_downloader/models/site_rules.py +0 -99
  249. novel_downloader/models/tasks.py +0 -33
  250. novel_downloader/models/types.py +0 -15
  251. novel_downloader/resources/css_styles/volume-intro.css +0 -56
  252. novel_downloader/resources/json/replace_word_map.json +0 -4
  253. novel_downloader/resources/text/blacklist.txt +0 -22
  254. novel_downloader/tui/__init__.py +0 -7
  255. novel_downloader/tui/app.py +0 -32
  256. novel_downloader/tui/main.py +0 -17
  257. novel_downloader/tui/screens/__init__.py +0 -14
  258. novel_downloader/tui/screens/home.py +0 -198
  259. novel_downloader/tui/screens/login.py +0 -74
  260. novel_downloader/tui/styles/home_layout.tcss +0 -79
  261. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  262. novel_downloader/utils/cache.py +0 -24
  263. novel_downloader/utils/fontocr/__init__.py +0 -22
  264. novel_downloader/utils/fontocr/model_loader.py +0 -69
  265. novel_downloader/utils/fontocr/ocr_v1.py +0 -303
  266. novel_downloader/utils/fontocr/ocr_v2.py +0 -752
  267. novel_downloader/utils/hash_store.py +0 -279
  268. novel_downloader/utils/hash_utils.py +0 -103
  269. novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
  270. novel_downloader/utils/text_utils/font_mapping.py +0 -28
  271. novel_downloader/utils/text_utils/text_cleaning.py +0 -107
  272. novel_downloader-1.4.5.dist-info/METADATA +0 -196
  273. novel_downloader-1.4.5.dist-info/RECORD +0 -165
  274. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
  275. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
  276. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,186 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.ixdzs8
4
+ ------------------------------------
5
+
6
+ """
7
+
8
+ import contextlib
9
+ import json
10
+ from typing import Any
11
+
12
+ from lxml import html
13
+
14
+ from novel_downloader.core.parsers.base import BaseParser
15
+ from novel_downloader.core.parsers.registry import register_parser
16
+ from novel_downloader.models import (
17
+ BookInfoDict,
18
+ ChapterDict,
19
+ ChapterInfoDict,
20
+ VolumeInfoDict,
21
+ )
22
+
23
+
24
+ @register_parser(
25
+ site_keys=["ixdzs8"],
26
+ )
27
+ class Ixdzs8Parser(BaseParser):
28
+ """
29
+ Parser for 爱下电子书 book pages.
30
+ """
31
+
32
+ def parse_book_info(
33
+ self,
34
+ html_list: list[str],
35
+ **kwargs: Any,
36
+ ) -> BookInfoDict | None:
37
+ if len(html_list) < 2 or not html_list[0] or not html_list[1]:
38
+ return None
39
+
40
+ # Parse HTML
41
+ tree = html.fromstring(html_list[0])
42
+
43
+ book_name = self._meta(tree, "og:novel:book_name") or self._first_str(
44
+ tree.xpath("//div[@class='n-text']/h1/text()")
45
+ )
46
+
47
+ author = self._meta(tree, "og:novel:author") or self._first_str(
48
+ tree.xpath("//div[@class='n-text']//a[contains(@class,'bauthor')]/text()")
49
+ )
50
+
51
+ cover_url = self._meta(tree, "og:image")
52
+ if not cover_url:
53
+ cover_url = self._first_str(tree.xpath("//div[@class='n-img']//img/@src"))
54
+
55
+ serial_status = self._meta(tree, "og:novel:status")
56
+
57
+ # 2022-08-25T18:08:03+08:00 -> 2022-08-25 18:08:03
58
+ iso_time = self._meta(tree, "og:novel:update_time")
59
+ update_time = ""
60
+ if iso_time:
61
+ update_time = iso_time.replace("T", " ").split("+", 1)[0].strip()
62
+
63
+ word_count = self._first_str(
64
+ tree.xpath("//div[@class='n-text']//span[contains(@class,'nsize')]/text()")
65
+ )
66
+
67
+ raw_summary = self._meta(tree, "og:description")
68
+ summary = ""
69
+ if raw_summary:
70
+ s = raw_summary.replace("&nbsp;", "")
71
+ s = s.replace("<br />", "\n")
72
+ summary = "\n".join(
73
+ self._norm_space(line) for line in s.splitlines()
74
+ ).strip()
75
+
76
+ tags = [
77
+ self._norm_space(t)
78
+ for t in tree.xpath("//div[contains(@class,'tags')]//em/a/text()")
79
+ if t and t.strip()
80
+ ]
81
+ category = self._meta(tree, "og:novel:category") or self._first_str(
82
+ tree.xpath("//div[@class='n-text']/p[a[contains(@class,'nsort')]]/a/text()")
83
+ )
84
+ if category:
85
+ tags.append(category)
86
+
87
+ book_path = self._meta(tree, "og:novel:read_url") or self._meta(tree, "og:url")
88
+ book_id = ""
89
+ if book_path:
90
+ book_id = book_path.strip("/").split("/")[-1]
91
+
92
+ data = {}
93
+ with contextlib.suppress(Exception):
94
+ data = json.loads(html_list[1])
95
+ clist = data.get("data", []) if isinstance(data, dict) else []
96
+
97
+ chapters: list[ChapterInfoDict] = []
98
+ for chap in clist:
99
+ ordernum = str(chap.get("ordernum", "")).strip()
100
+ if not ordernum:
101
+ continue
102
+ title = self._norm_space(chap.get("title", "") or "") or "未命名章节"
103
+ url = f"/read/{book_id}/p{ordernum}.html" if book_id else ""
104
+ chapters.append(
105
+ {
106
+ "url": url,
107
+ "title": title,
108
+ "chapterId": f"p{ordernum}",
109
+ }
110
+ )
111
+
112
+ volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
113
+
114
+ return {
115
+ "book_name": book_name,
116
+ "author": author,
117
+ "cover_url": cover_url,
118
+ "serial_status": serial_status,
119
+ "update_time": update_time,
120
+ "word_count": word_count,
121
+ "summary": summary,
122
+ "tags": tags,
123
+ "volumes": volumes,
124
+ "extra": {},
125
+ }
126
+
127
+ def parse_chapter(
128
+ self,
129
+ html_list: list[str],
130
+ chapter_id: str,
131
+ **kwargs: Any,
132
+ ) -> ChapterDict | None:
133
+ if not html_list:
134
+ return None
135
+ tree = html.fromstring(html_list[0])
136
+
137
+ title = self._first_str(tree.xpath("//div[@class='page-d-top']/h1/text()"))
138
+ if not title:
139
+ title = self._first_str(
140
+ tree.xpath("//article[contains(@class,'page-content')]//h3/text()")
141
+ )
142
+ title = self._norm_space(title)
143
+
144
+ # paragraphs within the reading section; skip ad containers
145
+ ps = tree.xpath(
146
+ "//article[contains(@class,'page-content')]//section//p[not(contains(@class,'abg'))]"
147
+ )
148
+
149
+ paragraphs: list[str] = []
150
+ for p in ps:
151
+ raw = p.text_content()
152
+ txt = self._norm_space(raw)
153
+ if not txt or self._is_ad_line(txt):
154
+ continue
155
+ paragraphs.append(txt)
156
+
157
+ if not paragraphs:
158
+ return None
159
+
160
+ # Replace FIRST line with .replace(title, "")
161
+ first = paragraphs[0].replace(title, "")
162
+ first = first.replace(title.replace(" ", ""), "").strip()
163
+ if first:
164
+ paragraphs[0] = first
165
+ else:
166
+ paragraphs.pop(0)
167
+
168
+ if paragraphs:
169
+ last = paragraphs[-1]
170
+ if "本章完" in last:
171
+ paragraphs.pop()
172
+
173
+ content = "\n".join(paragraphs)
174
+ if not content.strip():
175
+ return None
176
+
177
+ return {
178
+ "id": chapter_id,
179
+ "title": title,
180
+ "content": content,
181
+ "extra": {"site": "ixdzs8"},
182
+ }
183
+
184
+ @classmethod
185
+ def _meta(cls, tree: html.HtmlElement, prop: str) -> str:
186
+ return cls._first_str(tree.xpath(f"//meta[@property='{prop}']/@content"))
@@ -0,0 +1,137 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.jpxs123
4
+ -------------------------------------
5
+
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from lxml import html
11
+
12
+ from novel_downloader.core.parsers.base import BaseParser
13
+ from novel_downloader.core.parsers.registry import register_parser
14
+ from novel_downloader.models import (
15
+ BookInfoDict,
16
+ ChapterDict,
17
+ ChapterInfoDict,
18
+ VolumeInfoDict,
19
+ )
20
+
21
+
22
+ @register_parser(
23
+ site_keys=["jpxs123"],
24
+ )
25
+ class Jpxs123Parser(BaseParser):
26
+ """
27
+ Parser for 精品小说网 book pages.
28
+ """
29
+
30
+ BASE_URL = "https://www.jpxs123.com"
31
+
32
+ def parse_book_info(
33
+ self,
34
+ html_list: list[str],
35
+ **kwargs: Any,
36
+ ) -> BookInfoDict | None:
37
+ if not html_list:
38
+ return None
39
+
40
+ # Parse the main info page
41
+ tree = html.fromstring(html_list[0])
42
+ # Book name
43
+ book_name = self._first_str(tree.xpath('//div[@class="infos"]/h1/text()'))
44
+ # Tags: the second breadcrumb (e.g., "同人小说")
45
+ tag = self._first_str(
46
+ tree.xpath('//div[contains(@class,"menNav")]/a[2]/text()')
47
+ )
48
+ tags = [tag] if tag else []
49
+
50
+ author = self._first_str(tree.xpath('//div[@class="date"]/span[1]//a/text()'))
51
+ update_time = self._first_str(
52
+ tree.xpath('//div[@class="date"]/span[2]/text()'), replaces=[("时间:", "")]
53
+ )
54
+ cover_rel = self._first_str(tree.xpath('//div[@class="pic"]/img/@src'))
55
+ cover_url = (
56
+ f"{self.BASE_URL}{cover_rel}"
57
+ if cover_rel and not cover_rel.startswith("http")
58
+ else cover_rel
59
+ )
60
+
61
+ # Summary from the <p> inside infos
62
+ paras = tree.xpath('//div[@class="infos"]/p//text()')
63
+ summary = "\n".join(p.strip() for p in paras if p.strip())
64
+
65
+ # Chapters from the book_list
66
+ chapters: list[ChapterInfoDict] = []
67
+ for a in tree.xpath('//div[contains(@class,"book_list")]//li/a'):
68
+ url = a.get("href", "").strip()
69
+ title = a.text_content().strip()
70
+ # General regex: /{category}/{bookId}/{chapterId}.html
71
+ cid = url.split("/")[-1].split(".")[0]
72
+ chapters.append({"title": title, "url": url, "chapterId": cid})
73
+
74
+ volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
75
+
76
+ # Parse the download page (second HTML)
77
+ download_url = ""
78
+ if len(html_list) > 1 and html_list[1]:
79
+ dtree = html.fromstring(html_list[1])
80
+ a = dtree.xpath('//a[@id="dowloadnUrl"]')
81
+ if a:
82
+ link = a[0].get("link") or a[0].get("href") or ""
83
+ download_url = self._fix_download_link(link)
84
+
85
+ return {
86
+ "book_name": book_name,
87
+ "author": author,
88
+ "cover_url": cover_url,
89
+ "update_time": update_time,
90
+ "tags": tags,
91
+ "summary": summary,
92
+ "volumes": volumes,
93
+ "extra": {"download_url": download_url},
94
+ }
95
+
96
+ def parse_chapter(
97
+ self,
98
+ html_list: list[str],
99
+ chapter_id: str,
100
+ **kwargs: Any,
101
+ ) -> ChapterDict | None:
102
+ if not html_list:
103
+ return None
104
+
105
+ tree = html.fromstring(html_list[0])
106
+
107
+ raw_title = self._first_str(
108
+ tree.xpath('//div[contains(@class,"read_chapterName")]//h1/text()')
109
+ )
110
+
111
+ crumbs = tree.xpath('//div[contains(@class,"readTop")]//a/text()')
112
+ book_name = crumbs[-1].strip() if crumbs else ""
113
+
114
+ title = raw_title.replace(book_name, "").strip()
115
+
116
+ paragraphs = tree.xpath('//div[contains(@class,"read_chapterDetail")]/p')
117
+ texts = []
118
+ for p in paragraphs:
119
+ txt = p.text_content().strip()
120
+ if txt:
121
+ texts.append(txt)
122
+
123
+ content = "\n".join(texts)
124
+ if not content:
125
+ return None
126
+
127
+ return {
128
+ "id": chapter_id,
129
+ "title": title,
130
+ "content": content,
131
+ "extra": {"site": "jpxs123"},
132
+ }
133
+
134
+ @classmethod
135
+ def _fix_download_link(cls, link: str) -> str:
136
+ true_link = link.replace("xs../", "/e/DownSys/")
137
+ return f"{cls.BASE_URL}{true_link}"
@@ -0,0 +1,142 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.lewenn
4
+ ------------------------------------
5
+
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from lxml import html
11
+
12
+ from novel_downloader.core.parsers.base import BaseParser
13
+ from novel_downloader.core.parsers.registry import register_parser
14
+ from novel_downloader.models import (
15
+ BookInfoDict,
16
+ ChapterDict,
17
+ ChapterInfoDict,
18
+ VolumeInfoDict,
19
+ )
20
+
21
+
22
+ @register_parser(
23
+ site_keys=["lewenn", "lewen"],
24
+ )
25
+ class LewennParser(BaseParser):
26
+ """
27
+ Parser for 乐文小说网 book pages.
28
+ """
29
+
30
+ BASE_URL = "https://www.lewenn.net"
31
+
32
+ ADS: set[str] = {
33
+ "app2",
34
+ "read2",
35
+ "chaptererror",
36
+ "记住乐文小说网",
37
+ "lewenn.net",
38
+ }
39
+
40
+ def parse_book_info(
41
+ self,
42
+ html_list: list[str],
43
+ **kwargs: Any,
44
+ ) -> BookInfoDict | None:
45
+ if not html_list:
46
+ return None
47
+
48
+ tree = html.fromstring(html_list[0])
49
+
50
+ # --- Metadata ---
51
+ book_name = self._first_str(tree.xpath('//div[@id="info"]/h1/text()'))
52
+ author = self._first_str(
53
+ tree.xpath('//div[@id="info"]/p[1]/text()'),
54
+ replaces=[(chr(0xA0), ""), ("作者:", "")],
55
+ )
56
+ serial_status = self._first_str(
57
+ tree.xpath('//div[@id="info"]/p[2]/text()'),
58
+ replaces=[(chr(0xA0), ""), ("状态:", "")],
59
+ )
60
+ update_time = self._first_str(
61
+ tree.xpath('//div[@id="info"]/p[3]/text()'),
62
+ replaces=[("最后更新:", "")],
63
+ )
64
+
65
+ cover_src = self._first_str(tree.xpath('//div[@id="sidebar"]//img/@src'))
66
+ cover_url = (
67
+ cover_src if cover_src.startswith("http") else f"{self.BASE_URL}{cover_src}"
68
+ )
69
+
70
+ summary_lines = tree.xpath('//div[@id="intro"]/p//text()')
71
+ summary = "\n".join(line.strip() for line in summary_lines).strip()
72
+
73
+ # --- Volumes & Chapters ---
74
+ chapters: list[ChapterInfoDict] = []
75
+ for dt in tree.xpath('//div[@class="listmain"]/dl/dt'):
76
+ title_text = dt.text_content().strip()
77
+ if "正文" in title_text:
78
+ # collect its <dd> siblings
79
+ sib = dt.getnext()
80
+ while sib is not None and sib.tag == "dd":
81
+ a = sib.xpath(".//a")[0]
82
+ chap_title = a.text_content().strip()
83
+ href = a.get("href")
84
+ url = href if href.startswith("http") else f"{self.BASE_URL}{href}"
85
+ chap_id = url.rstrip(".html").split("/")[-1]
86
+ chapters.append(
87
+ {"title": chap_title, "url": url, "chapterId": chap_id}
88
+ )
89
+ sib = sib.getnext()
90
+ break
91
+
92
+ volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
93
+
94
+ return {
95
+ "book_name": book_name,
96
+ "author": author,
97
+ "cover_url": cover_url,
98
+ "update_time": update_time,
99
+ "serial_status": serial_status,
100
+ "summary": summary,
101
+ "volumes": volumes,
102
+ "extra": {},
103
+ }
104
+
105
+ def parse_chapter(
106
+ self,
107
+ html_list: list[str],
108
+ chapter_id: str,
109
+ **kwargs: Any,
110
+ ) -> ChapterDict | None:
111
+ if not html_list:
112
+ return None
113
+
114
+ tree = html.fromstring(html_list[0])
115
+
116
+ title = self._first_str(tree.xpath('//div[@class="content"]/h1/text()'))
117
+
118
+ nodes = tree.xpath('//div[@id="content" and contains(@class,"showtxt")]')
119
+ if not nodes:
120
+ return None
121
+ content_div = nodes[0]
122
+
123
+ raw_lines = [ln.strip() for ln in content_div.xpath(".//text()")]
124
+
125
+ lines: list[str] = []
126
+ for ln in raw_lines:
127
+ if not ln or self._is_ad_line(ln):
128
+ continue
129
+ # if ln.startswith("(") and ln.endswith(")"):
130
+ # continue
131
+ lines.append(ln.replace(chr(0xA0), ""))
132
+
133
+ content = "\n".join(lines)
134
+ if not content.strip():
135
+ return None
136
+
137
+ return {
138
+ "id": chapter_id,
139
+ "title": title,
140
+ "content": content,
141
+ "extra": {"site": "lewenn"},
142
+ }
@@ -1,24 +1,34 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- novel_downloader.core.parsers.linovelib.main_parser
4
- ---------------------------------------------------
3
+ novel_downloader.core.parsers.linovelib
4
+ ---------------------------------------
5
5
 
6
6
  """
7
7
 
8
8
  import json
9
9
  from itertools import islice
10
- from pathlib import PurePosixPath
11
10
  from typing import Any
12
11
 
13
12
  from lxml import html
14
13
 
15
14
  from novel_downloader.core.parsers.base import BaseParser
16
- from novel_downloader.models import ChapterDict
15
+ from novel_downloader.core.parsers.registry import register_parser
16
+ from novel_downloader.models import (
17
+ BookInfoDict,
18
+ ChapterDict,
19
+ ChapterInfoDict,
20
+ VolumeInfoDict,
21
+ )
17
22
  from novel_downloader.utils.constants import LINOVELIB_FONT_MAP_PATH
18
23
 
19
24
 
25
+ @register_parser(
26
+ site_keys=["linovelib"],
27
+ )
20
28
  class LinovelibParser(BaseParser):
21
- """ """
29
+ """
30
+ Parser for 哔哩轻小说 book pages.
31
+ """
22
32
 
23
33
  # Book info XPaths
24
34
  _BOOK_NAME_XPATH = '//div[@class="book-info"]/h1[@class="book-name"]/text()'
@@ -46,68 +56,69 @@ class LinovelibParser(BaseParser):
46
56
  self,
47
57
  html_list: list[str],
48
58
  **kwargs: Any,
49
- ) -> dict[str, Any]:
50
- """
51
- Parse a book info page and extract metadata and chapter structure.
52
-
53
- :param html_list: Raw HTML of the book info page.
54
- :return: Parsed metadata and chapter structure as a dictionary.
55
- """
59
+ ) -> BookInfoDict | None:
56
60
  if not html_list:
57
- return {}
58
- info_tree = html.fromstring(html_list[0])
59
- result: dict[str, Any] = {}
60
-
61
- result["book_name"] = self._safe_xpath(info_tree, self._BOOK_NAME_XPATH)
62
- result["author"] = self._safe_xpath(info_tree, self._AUTHOR_XPATH)
63
- result["cover_url"] = self._safe_xpath(info_tree, self._COVER_URL_XPATH)
64
- result["update_time"] = self._safe_xpath(
65
- info_tree, self._UPDATE_TIME_XPATH, replace=("最后更新:", "")
61
+ return None
62
+ tree = html.fromstring(html_list[0])
63
+
64
+ book_name = self._first_str(tree.xpath(self._BOOK_NAME_XPATH))
65
+ author = self._first_str(tree.xpath(self._AUTHOR_XPATH))
66
+ cover_url = self._first_str(tree.xpath(self._COVER_URL_XPATH))
67
+ update_time = self._first_str(
68
+ tree.xpath(self._UPDATE_TIME_XPATH), replaces=[("最后更新:", "")]
66
69
  )
67
- result["serial_status"] = self._safe_xpath(info_tree, self._SERIAL_STATUS_XPATH)
68
- result["word_count"] = self._safe_xpath(
69
- info_tree, self._WORD_COUNT_XPATH, replace=("字数:", "")
70
+ serial_status = self._first_str(tree.xpath(self._SERIAL_STATUS_XPATH))
71
+ word_count = self._first_str(
72
+ tree.xpath(self._WORD_COUNT_XPATH), replaces=[("最后更新:", "")]
70
73
  )
71
74
 
72
- result["summary"] = self._extract_intro(info_tree, self._SUMMARY_XPATH)
75
+ summary = self._extract_intro(tree, self._SUMMARY_XPATH)
73
76
 
74
77
  vol_pages = html_list[1:]
75
- volumes: list[dict[str, Any]] = []
78
+ volumes: list[VolumeInfoDict] = []
76
79
  for vol_page in vol_pages:
77
80
  vol_tree = html.fromstring(vol_page)
78
- volume_cover = self._safe_xpath(vol_tree, self._COVER_URL_XPATH)
79
- volume_name = self._safe_xpath(vol_tree, self._BOOK_NAME_XPATH)
80
- update_time = self._safe_xpath(
81
- vol_tree, self._UPDATE_TIME_XPATH, replace=("最后更新:", "")
81
+ volume_cover = self._first_str(vol_tree.xpath(self._COVER_URL_XPATH))
82
+ volume_name = self._first_str(vol_tree.xpath(self._BOOK_NAME_XPATH))
83
+ vol_update_time = self._first_str(
84
+ vol_tree.xpath(self._UPDATE_TIME_XPATH), replaces=[("最后更新:", "")]
82
85
  )
83
- word_count = self._safe_xpath(
84
- vol_tree, self._WORD_COUNT_XPATH, replace=("字数:", "")
86
+ vol_word_count = self._first_str(
87
+ vol_tree.xpath(self._WORD_COUNT_XPATH), replaces=[("字数:", "")]
85
88
  )
86
89
  volume_intro = self._extract_intro(vol_tree, self._SUMMARY_XPATH)
87
90
 
88
- chapters = []
91
+ chapters: list[ChapterInfoDict] = []
89
92
  chapter_elements = vol_tree.xpath(self._CHAPTERS_XPATH)
90
93
  for a in chapter_elements:
91
94
  title = a.text.strip()
92
95
  url = a.attrib.get("href", "").strip()
93
- chap_path = PurePosixPath(url.rstrip("/"))
94
- chapters.append(
95
- {"title": title, "url": url, "chapterId": chap_path.stem}
96
- )
96
+ # '/novel/4668/276082.html' -> '276082'
97
+ cid = url.split("/")[-1].split(".")[0]
98
+ chapters.append({"title": title, "url": url, "chapterId": cid})
97
99
 
98
100
  volumes.append(
99
101
  {
100
102
  "volume_name": volume_name,
101
103
  "volume_cover": volume_cover,
102
- "update_time": update_time,
103
- "word_count": word_count,
104
+ "update_time": vol_update_time,
105
+ "word_count": vol_word_count,
104
106
  "volume_intro": volume_intro,
105
107
  "chapters": chapters,
106
108
  }
107
109
  )
108
- result["volumes"] = volumes
109
110
 
110
- return result
111
+ return {
112
+ "book_name": book_name,
113
+ "author": author,
114
+ "cover_url": cover_url,
115
+ "serial_status": serial_status,
116
+ "word_count": word_count,
117
+ "summary": summary,
118
+ "update_time": update_time,
119
+ "volumes": volumes,
120
+ "extra": {},
121
+ }
111
122
 
112
123
  def parse_chapter(
113
124
  self,
@@ -115,13 +126,6 @@ class LinovelibParser(BaseParser):
115
126
  chapter_id: str,
116
127
  **kwargs: Any,
117
128
  ) -> ChapterDict | None:
118
- """
119
- Parse chapter pages and extract clean text or simplified HTML.
120
-
121
- :param html_list: Raw HTML of the chapter page.
122
- :param chapter_id: Identifier of the chapter being parsed.
123
- :return: Cleaned chapter content as plain text or minimal HTML.
124
- """
125
129
  if not html_list:
126
130
  return None
127
131
  title_text: str = ""
@@ -165,25 +169,10 @@ class LinovelibParser(BaseParser):
165
169
  return {
166
170
  "id": chapter_id,
167
171
  "title": title_text,
168
- "content": "\n\n".join(contents),
172
+ "content": "\n".join(contents),
169
173
  "extra": {"site": "linovelib"},
170
174
  }
171
175
 
172
- def _safe_xpath(
173
- self,
174
- tree: html.HtmlElement,
175
- path: str,
176
- replace: tuple[str, str] | None = None,
177
- ) -> str:
178
- result = tree.xpath(path)
179
- if not result:
180
- return ""
181
- value: str = result[0].strip()
182
- if replace:
183
- old, new = replace
184
- value = value.replace(old, new)
185
- return value
186
-
187
176
  @staticmethod
188
177
  def _extract_intro(tree: html.HtmlElement, xpath: str) -> str:
189
178
  paragraphs = tree.xpath(xpath.replace("//text()", ""))
@@ -192,7 +181,7 @@ class LinovelibParser(BaseParser):
192
181
  text_segments = p.xpath(".//text()")
193
182
  cleaned = [seg.strip() for seg in text_segments if seg.strip()]
194
183
  lines.append("\n".join(cleaned))
195
- return "\n\n".join(lines)
184
+ return "\n".join(lines)
196
185
 
197
186
  @staticmethod
198
187
  def _is_encrypted(html: str) -> bool: