novel-downloader 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +2 -4
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +27 -104
  5. novel_downloader/cli/download.py +78 -66
  6. novel_downloader/cli/export.py +20 -21
  7. novel_downloader/cli/main.py +3 -1
  8. novel_downloader/cli/search.py +120 -0
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +10 -14
  11. novel_downloader/config/adapter.py +195 -99
  12. novel_downloader/config/{loader.py → file_io.py} +53 -27
  13. novel_downloader/core/__init__.py +14 -13
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/archived/qidian/searcher.py +79 -0
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +8 -30
  21. novel_downloader/core/downloaders/base.py +182 -30
  22. novel_downloader/core/downloaders/common.py +217 -384
  23. novel_downloader/core/downloaders/qianbi.py +332 -4
  24. novel_downloader/core/downloaders/qidian.py +250 -290
  25. novel_downloader/core/downloaders/registry.py +69 -0
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +8 -26
  28. novel_downloader/core/exporters/base.py +107 -31
  29. novel_downloader/core/exporters/common/__init__.py +3 -4
  30. novel_downloader/core/exporters/common/epub.py +92 -171
  31. novel_downloader/core/exporters/common/main_exporter.py +14 -67
  32. novel_downloader/core/exporters/common/txt.py +90 -86
  33. novel_downloader/core/exporters/epub_util.py +184 -1327
  34. novel_downloader/core/exporters/linovelib/__init__.py +3 -2
  35. novel_downloader/core/exporters/linovelib/epub.py +165 -222
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +10 -71
  37. novel_downloader/core/exporters/linovelib/txt.py +76 -66
  38. novel_downloader/core/exporters/qidian.py +15 -11
  39. novel_downloader/core/exporters/registry.py +55 -0
  40. novel_downloader/core/exporters/txt_util.py +67 -0
  41. novel_downloader/core/fetchers/__init__.py +57 -56
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +10 -10
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +63 -47
  45. novel_downloader/core/fetchers/biquyuedu.py +83 -0
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +23 -11
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +22 -26
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/{biquge/browser.py → lewenn.py} +15 -15
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +16 -12
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +9 -9
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +55 -40
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +60 -0
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +11 -9
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/{common/browser.py → shuhaige.py} +24 -19
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/{common/session.py → wanbengo.py} +21 -17
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +23 -11
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +8 -14
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +4 -17
  79. novel_downloader/core/interfaces/parser.py +5 -6
  80. novel_downloader/core/interfaces/searcher.py +26 -0
  81. novel_downloader/core/parsers/__init__.py +58 -22
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +63 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +67 -67
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +54 -65
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +54 -51
  99. novel_downloader/core/parsers/qidian/__init__.py +2 -2
  100. novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
  101. novel_downloader/core/parsers/qidian/chapter_encrypted.py +290 -346
  102. novel_downloader/core/parsers/qidian/chapter_normal.py +25 -56
  103. novel_downloader/core/parsers/qidian/main_parser.py +19 -57
  104. novel_downloader/core/parsers/qidian/utils/__init__.py +12 -11
  105. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +6 -7
  106. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
  107. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
  108. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
  109. novel_downloader/core/parsers/quanben5.py +103 -0
  110. novel_downloader/core/parsers/registry.py +57 -0
  111. novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +46 -48
  112. novel_downloader/core/parsers/shencou.py +215 -0
  113. novel_downloader/core/parsers/shuhaige.py +111 -0
  114. novel_downloader/core/parsers/tongrenquan.py +116 -0
  115. novel_downloader/core/parsers/ttkan.py +132 -0
  116. novel_downloader/core/parsers/wanbengo.py +191 -0
  117. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  118. novel_downloader/core/parsers/xiguashuwu.py +435 -0
  119. novel_downloader/core/parsers/xs63b.py +161 -0
  120. novel_downloader/core/parsers/xshbook.py +134 -0
  121. novel_downloader/core/parsers/yamibo.py +155 -0
  122. novel_downloader/core/parsers/yibige.py +166 -0
  123. novel_downloader/core/searchers/__init__.py +51 -0
  124. novel_downloader/core/searchers/aaatxt.py +107 -0
  125. novel_downloader/core/searchers/b520.py +84 -0
  126. novel_downloader/core/searchers/base.py +168 -0
  127. novel_downloader/core/searchers/dxmwx.py +105 -0
  128. novel_downloader/core/searchers/eightnovel.py +84 -0
  129. novel_downloader/core/searchers/esjzone.py +102 -0
  130. novel_downloader/core/searchers/hetushu.py +92 -0
  131. novel_downloader/core/searchers/i25zw.py +93 -0
  132. novel_downloader/core/searchers/ixdzs8.py +107 -0
  133. novel_downloader/core/searchers/jpxs123.py +107 -0
  134. novel_downloader/core/searchers/piaotia.py +100 -0
  135. novel_downloader/core/searchers/qbtr.py +106 -0
  136. novel_downloader/core/searchers/qianbi.py +165 -0
  137. novel_downloader/core/searchers/quanben5.py +144 -0
  138. novel_downloader/core/searchers/registry.py +79 -0
  139. novel_downloader/core/searchers/shuhaige.py +124 -0
  140. novel_downloader/core/searchers/tongrenquan.py +110 -0
  141. novel_downloader/core/searchers/ttkan.py +92 -0
  142. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  143. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  144. novel_downloader/core/searchers/xs63b.py +104 -0
  145. novel_downloader/locales/en.json +36 -79
  146. novel_downloader/locales/zh.json +37 -80
  147. novel_downloader/models/__init__.py +23 -50
  148. novel_downloader/models/book.py +44 -0
  149. novel_downloader/models/config.py +16 -43
  150. novel_downloader/models/login.py +1 -1
  151. novel_downloader/models/search.py +21 -0
  152. novel_downloader/resources/config/settings.toml +39 -74
  153. novel_downloader/resources/css_styles/intro.css +83 -0
  154. novel_downloader/resources/css_styles/main.css +30 -89
  155. novel_downloader/resources/json/xiguashuwu.json +718 -0
  156. novel_downloader/utils/__init__.py +43 -0
  157. novel_downloader/utils/chapter_storage.py +247 -226
  158. novel_downloader/utils/constants.py +5 -50
  159. novel_downloader/utils/cookies.py +6 -18
  160. novel_downloader/utils/crypto_utils/__init__.py +13 -0
  161. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  162. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  163. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  164. novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
  165. novel_downloader/utils/epub/__init__.py +34 -0
  166. novel_downloader/utils/epub/builder.py +377 -0
  167. novel_downloader/utils/epub/constants.py +118 -0
  168. novel_downloader/utils/epub/documents.py +297 -0
  169. novel_downloader/utils/epub/models.py +120 -0
  170. novel_downloader/utils/epub/utils.py +179 -0
  171. novel_downloader/utils/file_utils/__init__.py +5 -30
  172. novel_downloader/utils/file_utils/io.py +9 -150
  173. novel_downloader/utils/file_utils/normalize.py +2 -2
  174. novel_downloader/utils/file_utils/sanitize.py +2 -7
  175. novel_downloader/utils/fontocr.py +207 -0
  176. novel_downloader/utils/i18n.py +2 -0
  177. novel_downloader/utils/logger.py +10 -16
  178. novel_downloader/utils/network.py +111 -252
  179. novel_downloader/utils/state.py +5 -90
  180. novel_downloader/utils/text_utils/__init__.py +16 -21
  181. novel_downloader/utils/text_utils/diff_display.py +6 -9
  182. novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
  183. novel_downloader/utils/text_utils/text_cleaner.py +179 -0
  184. novel_downloader/utils/text_utils/truncate_utils.py +62 -0
  185. novel_downloader/utils/time_utils/__init__.py +6 -12
  186. novel_downloader/utils/time_utils/datetime_utils.py +23 -33
  187. novel_downloader/utils/time_utils/sleep_utils.py +5 -10
  188. novel_downloader/web/__init__.py +13 -0
  189. novel_downloader/web/components/__init__.py +11 -0
  190. novel_downloader/web/components/navigation.py +35 -0
  191. novel_downloader/web/main.py +66 -0
  192. novel_downloader/web/pages/__init__.py +17 -0
  193. novel_downloader/web/pages/download.py +78 -0
  194. novel_downloader/web/pages/progress.py +147 -0
  195. novel_downloader/web/pages/search.py +329 -0
  196. novel_downloader/web/services/__init__.py +17 -0
  197. novel_downloader/web/services/client_dialog.py +164 -0
  198. novel_downloader/web/services/cred_broker.py +113 -0
  199. novel_downloader/web/services/cred_models.py +35 -0
  200. novel_downloader/web/services/task_manager.py +264 -0
  201. novel_downloader-2.0.0.dist-info/METADATA +171 -0
  202. novel_downloader-2.0.0.dist-info/RECORD +210 -0
  203. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
  204. novel_downloader/config/site_rules.py +0 -94
  205. novel_downloader/core/downloaders/biquge.py +0 -25
  206. novel_downloader/core/downloaders/esjzone.py +0 -25
  207. novel_downloader/core/downloaders/linovelib.py +0 -25
  208. novel_downloader/core/downloaders/sfacg.py +0 -25
  209. novel_downloader/core/downloaders/yamibo.py +0 -25
  210. novel_downloader/core/exporters/biquge.py +0 -25
  211. novel_downloader/core/exporters/esjzone.py +0 -25
  212. novel_downloader/core/exporters/qianbi.py +0 -25
  213. novel_downloader/core/exporters/sfacg.py +0 -25
  214. novel_downloader/core/exporters/yamibo.py +0 -25
  215. novel_downloader/core/factory/__init__.py +0 -20
  216. novel_downloader/core/factory/downloader.py +0 -73
  217. novel_downloader/core/factory/exporter.py +0 -58
  218. novel_downloader/core/factory/fetcher.py +0 -96
  219. novel_downloader/core/factory/parser.py +0 -86
  220. novel_downloader/core/fetchers/base/__init__.py +0 -14
  221. novel_downloader/core/fetchers/base/browser.py +0 -403
  222. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  223. novel_downloader/core/fetchers/common/__init__.py +0 -14
  224. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  225. novel_downloader/core/fetchers/esjzone/browser.py +0 -204
  226. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  227. novel_downloader/core/fetchers/linovelib/browser.py +0 -193
  228. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  229. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  230. novel_downloader/core/fetchers/qidian/browser.py +0 -318
  231. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  232. novel_downloader/core/fetchers/sfacg/browser.py +0 -189
  233. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  234. novel_downloader/core/fetchers/yamibo/browser.py +0 -229
  235. novel_downloader/core/parsers/biquge/__init__.py +0 -10
  236. novel_downloader/core/parsers/biquge/main_parser.py +0 -134
  237. novel_downloader/core/parsers/common/__init__.py +0 -13
  238. novel_downloader/core/parsers/common/helper.py +0 -323
  239. novel_downloader/core/parsers/common/main_parser.py +0 -106
  240. novel_downloader/core/parsers/esjzone/__init__.py +0 -10
  241. novel_downloader/core/parsers/linovelib/__init__.py +0 -10
  242. novel_downloader/core/parsers/qianbi/__init__.py +0 -10
  243. novel_downloader/core/parsers/sfacg/__init__.py +0 -10
  244. novel_downloader/core/parsers/yamibo/__init__.py +0 -10
  245. novel_downloader/core/parsers/yamibo/main_parser.py +0 -194
  246. novel_downloader/models/browser.py +0 -21
  247. novel_downloader/models/chapter.py +0 -25
  248. novel_downloader/models/site_rules.py +0 -99
  249. novel_downloader/models/tasks.py +0 -33
  250. novel_downloader/models/types.py +0 -15
  251. novel_downloader/resources/css_styles/volume-intro.css +0 -56
  252. novel_downloader/resources/json/replace_word_map.json +0 -4
  253. novel_downloader/resources/text/blacklist.txt +0 -22
  254. novel_downloader/tui/__init__.py +0 -7
  255. novel_downloader/tui/app.py +0 -32
  256. novel_downloader/tui/main.py +0 -17
  257. novel_downloader/tui/screens/__init__.py +0 -14
  258. novel_downloader/tui/screens/home.py +0 -198
  259. novel_downloader/tui/screens/login.py +0 -74
  260. novel_downloader/tui/styles/home_layout.tcss +0 -79
  261. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  262. novel_downloader/utils/cache.py +0 -24
  263. novel_downloader/utils/fontocr/__init__.py +0 -22
  264. novel_downloader/utils/fontocr/model_loader.py +0 -69
  265. novel_downloader/utils/fontocr/ocr_v1.py +0 -303
  266. novel_downloader/utils/fontocr/ocr_v2.py +0 -752
  267. novel_downloader/utils/hash_store.py +0 -279
  268. novel_downloader/utils/hash_utils.py +0 -103
  269. novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
  270. novel_downloader/utils/text_utils/font_mapping.py +0 -28
  271. novel_downloader/utils/text_utils/text_cleaning.py +0 -107
  272. novel_downloader-1.4.5.dist-info/METADATA +0 -196
  273. novel_downloader-1.4.5.dist-info/RECORD +0 -165
  274. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
  275. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
  276. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,133 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.biquyuedu
4
+ ---------------------------------------
5
+
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from lxml import etree, html
11
+
12
+ from novel_downloader.core.parsers.base import BaseParser
13
+ from novel_downloader.core.parsers.registry import register_parser
14
+ from novel_downloader.models import (
15
+ BookInfoDict,
16
+ ChapterDict,
17
+ ChapterInfoDict,
18
+ VolumeInfoDict,
19
+ )
20
+
21
+
22
+ @register_parser(
23
+ site_keys=["biquyuedu"],
24
+ )
25
+ class BiquyueduParser(BaseParser):
26
+ """
27
+ Parser for 精彩小说 book pages.
28
+ """
29
+
30
+ ADS: set[str] = {
31
+ "笔趣阁",
32
+ "请记住本书首发域名",
33
+ "www.biquyuedu.com",
34
+ }
35
+
36
+ def parse_book_info(
37
+ self,
38
+ html_list: list[str],
39
+ **kwargs: Any,
40
+ ) -> BookInfoDict | None:
41
+ if not html_list:
42
+ return None
43
+
44
+ tree = html.fromstring(html_list[0])
45
+
46
+ # --- Metadata ---
47
+ book_name = self._first_str(tree.xpath("//div[@class='info']/h1/text()"))
48
+ author = self._first_str(
49
+ tree.xpath(
50
+ "//div[@class='info']//div[@class='small'][1]//span[1]//a/text()"
51
+ )
52
+ )
53
+ cover_url = self._first_str(
54
+ tree.xpath("//div[@class='info']//div[@class='cover']//img/@src")
55
+ )
56
+ update_time = self._first_str(
57
+ tree.xpath("//div[@class='info']//div[@class='small'][2]//span[1]/text()"),
58
+ replaces=[("更新时间:", "")],
59
+ )
60
+
61
+ crumbs = tree.xpath("//div[@class='path']//div[@class='p']/a/text()")
62
+ book_type = self._first_str(crumbs[1:2])
63
+ tags = [book_type] if book_type else []
64
+
65
+ intro_text = tree.xpath(
66
+ "string(//div[@class='info']//div[@class='intro'])"
67
+ ).strip()
68
+ summary = intro_text.replace("简介:", "", 1).split("作者:", 1)[0].strip()
69
+
70
+ # --- Chapters ---
71
+ chapters: list[ChapterInfoDict] = [
72
+ {
73
+ "title": (a.get("title") or a.text_content() or "").strip(),
74
+ "url": (a.get("href") or "").strip(),
75
+ "chapterId": (a.get("href") or "").rsplit("/", 1)[-1].split(".", 1)[0],
76
+ }
77
+ for a in tree.xpath(
78
+ "//div[@class='listmain']//dl/dd[preceding-sibling::dt[1][contains(text(),'全文')]]/a"
79
+ )
80
+ ]
81
+
82
+ volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
83
+
84
+ return {
85
+ "book_name": book_name,
86
+ "author": author,
87
+ "cover_url": cover_url,
88
+ "update_time": update_time,
89
+ "tags": tags,
90
+ "summary": summary,
91
+ "volumes": volumes,
92
+ "extra": {},
93
+ }
94
+
95
+ def parse_chapter(
96
+ self,
97
+ html_list: list[str],
98
+ chapter_id: str,
99
+ **kwargs: Any,
100
+ ) -> ChapterDict | None:
101
+ if not html_list:
102
+ return None
103
+ tree = html.fromstring(html_list[0])
104
+
105
+ # Extract chapter title via helper
106
+ title = self._first_str(tree.xpath("//div[@class='content']/h1/text()"))
107
+
108
+ # Find the main content container
109
+ content_nodes = tree.xpath("//div[@id='content']")
110
+ if not content_nodes:
111
+ return None
112
+ content_div = content_nodes[0]
113
+
114
+ etree.strip_elements(content_div, "script", with_tail=False)
115
+ raw_texts = content_div.xpath(".//text()[normalize-space()]")
116
+
117
+ # Clean & filter in one comprehension
118
+ paragraphs = [
119
+ txt.replace("\xa0", "").strip()
120
+ for txt in raw_texts
121
+ if not self._is_ad_line(txt)
122
+ ]
123
+
124
+ content = "\n".join(paragraphs)
125
+ if not content.strip():
126
+ return None
127
+
128
+ return {
129
+ "id": chapter_id,
130
+ "title": title,
131
+ "content": content,
132
+ "extra": {"site": "biquyuedu"},
133
+ }
@@ -0,0 +1,162 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.dxmwx
4
+ -----------------------------------
5
+
6
+ """
7
+
8
+ import re
9
+ from datetime import datetime
10
+ from typing import Any
11
+
12
+ from lxml import html
13
+
14
+ from novel_downloader.core.parsers.base import BaseParser
15
+ from novel_downloader.core.parsers.registry import register_parser
16
+ from novel_downloader.models import (
17
+ BookInfoDict,
18
+ ChapterDict,
19
+ ChapterInfoDict,
20
+ VolumeInfoDict,
21
+ )
22
+
23
+
24
+ @register_parser(
25
+ site_keys=["dxmwx"],
26
+ )
27
+ class DxmwxParser(BaseParser):
28
+ """
29
+ Parser for 大熊猫文学网 book pages.
30
+ """
31
+
32
+ _RE_DATE = re.compile(r"\d{4}-\d{2}-\d{2}")
33
+ _RE_SPACES = re.compile(r"[ \t\u3000]+")
34
+ _RE_NEWLINES = re.compile(r"\n{2,}")
35
+ _RE_TITLE_WS = re.compile(r"\s+")
36
+
37
+ def parse_book_info(
38
+ self,
39
+ html_list: list[str],
40
+ **kwargs: Any,
41
+ ) -> BookInfoDict | None:
42
+ if len(html_list) < 2:
43
+ return None
44
+
45
+ info_tree = html.fromstring(html_list[0])
46
+ catalog_tree = html.fromstring(html_list[1])
47
+
48
+ book_name = self._first_str(
49
+ info_tree.xpath("//span[contains(@style,'font-size: 24px')]/text()")
50
+ )
51
+ author = self._first_str(
52
+ info_tree.xpath(
53
+ "//div[contains(@style,'height: 28px') and contains(., '著')]//a/text()"
54
+ )
55
+ )
56
+ tags = [
57
+ t.strip()
58
+ for t in info_tree.xpath("//span[@class='typebut']//a/text()")
59
+ if t.strip()
60
+ ]
61
+ cover_url = "https://www.dxmwx.org" + self._first_str(
62
+ info_tree.xpath("//img[@class='imgwidth']/@src")
63
+ )
64
+
65
+ raw_update = self._first_str(
66
+ info_tree.xpath(
67
+ "normalize-space(string(//span[starts-with(normalize-space(.), '更新时间:')]))" # noqa: E501
68
+ )
69
+ )
70
+ raw_update = raw_update.replace("更新时间:", "").strip()
71
+ update_time = self._normalize_update_date(raw_update)
72
+
73
+ nodes = info_tree.xpath(
74
+ "//div[contains(@style,'min-height') and "
75
+ "contains(@style,'padding-left') and contains(@style,'padding-right')][1]"
76
+ )
77
+ summary = ""
78
+ if nodes:
79
+ texts = [
80
+ t.replace("\xa0", " ").strip() for t in nodes[0].xpath(".//text()")
81
+ ]
82
+ lines = [t for t in texts if t]
83
+ summary = "\n".join(lines)
84
+ summary = re.sub(r"^\s*[::]\s*", "", summary)
85
+ summary = self._clean_spaces(summary)
86
+
87
+ chapters: list[ChapterInfoDict] = []
88
+ for a in catalog_tree.xpath(
89
+ "//div[contains(@style,'height:40px') and contains(@style,'border-bottom')]//a" # noqa: E501
90
+ ):
91
+ href = a.get("href") or ""
92
+ title = (a.text_content() or "").strip()
93
+ if not href or not title:
94
+ continue
95
+ # "/read/57215_50197663.html" -> "50197663"
96
+ chap_id = href.split("read/", 1)[-1].split(".html", 1)[0].split("_")[-1]
97
+ chapters.append({"title": title, "url": href, "chapterId": chap_id})
98
+ volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
99
+
100
+ return {
101
+ "book_name": book_name,
102
+ "author": author,
103
+ "cover_url": cover_url,
104
+ "update_time": update_time,
105
+ "tags": tags,
106
+ "summary": summary,
107
+ "volumes": volumes,
108
+ "extra": {},
109
+ }
110
+
111
+ def parse_chapter(
112
+ self,
113
+ html_list: list[str],
114
+ chapter_id: str,
115
+ **kwargs: Any,
116
+ ) -> ChapterDict | None:
117
+ if not html_list:
118
+ return None
119
+
120
+ tree = html.fromstring(html_list[0])
121
+
122
+ title = self._first_str(tree.xpath("//h1[@id='ChapterTitle']/text()"))
123
+ title = self._RE_TITLE_WS.sub(" ", title).strip()
124
+ if not title:
125
+ title = f"第 {chapter_id} 章"
126
+
127
+ paragraphs: list[str] = []
128
+ for p in tree.xpath("//div[@id='Lab_Contents']//p"):
129
+ text = self._clean_spaces(p.text_content())
130
+ if not text:
131
+ continue
132
+ if "点这里听书" in text or "大熊猫文学" in text:
133
+ continue
134
+ paragraphs.append(text)
135
+
136
+ content = "\n".join(paragraphs).strip()
137
+ if not content:
138
+ return None
139
+
140
+ return {
141
+ "id": chapter_id,
142
+ "title": title,
143
+ "content": content,
144
+ "extra": {"site": "dxmwx"},
145
+ }
146
+
147
+ @classmethod
148
+ def _clean_spaces(cls, s: str) -> str:
149
+ s = s.replace("\xa0", " ")
150
+ s = cls._RE_SPACES.sub(" ", s)
151
+ s = cls._RE_NEWLINES.sub("\n", s)
152
+ return s.strip()
153
+
154
+ @classmethod
155
+ def _normalize_update_date(cls, raw: str) -> str:
156
+ """Return a YYYY-MM-DD string."""
157
+ if not raw:
158
+ return datetime.now().strftime("%Y-%m-%d")
159
+ m = cls._RE_DATE.search(raw)
160
+ if m:
161
+ return m.group(0)
162
+ return datetime.now().strftime("%Y-%m-%d")
@@ -0,0 +1,224 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.eightnovel
4
+ ----------------------------------------
5
+
6
+ """
7
+
8
+ import re
9
+ from typing import Any
10
+
11
+ from lxml import html
12
+
13
+ from novel_downloader.core.parsers.base import BaseParser
14
+ from novel_downloader.core.parsers.registry import register_parser
15
+ from novel_downloader.models import (
16
+ BookInfoDict,
17
+ ChapterDict,
18
+ ChapterInfoDict,
19
+ VolumeInfoDict,
20
+ )
21
+
22
+
23
+ @register_parser(
24
+ site_keys=["eightnovel", "8novel"],
25
+ )
26
+ class EightnovelParser(BaseParser):
27
+ """
28
+ Parser for 无限轻小说 book pages.
29
+ """
30
+
31
+ BASE_URL = "https://www.8novel.com"
32
+ _SPLIT_STR_PATTERN = re.compile(
33
+ r'["\']([^"\']+)["\']\s*\.split\s*\(\s*["\']\s*,\s*["\']\s*\)', re.DOTALL
34
+ )
35
+ _RE_AUTHOR = re.compile(r"作者[::]?\s*")
36
+ _RE_UPDATE = re.compile(r"更新[::]?\s*")
37
+
38
+ def parse_book_info(
39
+ self,
40
+ html_list: list[str],
41
+ **kwargs: Any,
42
+ ) -> BookInfoDict | None:
43
+ if not html_list:
44
+ return None
45
+
46
+ tree = html.fromstring(html_list[0])
47
+
48
+ # --- Basic metadata ---
49
+ book_name = self._first_str(tree.xpath("//li[contains(@class,'h2')]/text()"))
50
+
51
+ author_raw = self._first_str(
52
+ tree.xpath("//span[contains(@class,'item-info-author')]/text()")
53
+ )
54
+ author = self._RE_AUTHOR.sub("", author_raw)
55
+
56
+ cover_url = self.BASE_URL + self._first_str(
57
+ tree.xpath("//div[contains(@class,'item-cover')]//img/@src")
58
+ )
59
+
60
+ update_raw = self._first_str(
61
+ tree.xpath("//span[contains(@class,'item-info-date')]/text()")
62
+ )
63
+ update_time = self._RE_UPDATE.sub("", update_raw)
64
+
65
+ counts = tree.xpath(
66
+ "//li[@class='small text-gray']//span[contains(@class,'item-info-num')]/text()" # noqa: E501
67
+ )
68
+ word_count = counts[1].strip() + "萬字" if len(counts) >= 2 else ""
69
+
70
+ tags = tree.xpath("//meta[@property='og:novel:category']/@content")
71
+
72
+ # --- Summary ---
73
+ summary_nodes = tree.xpath(
74
+ "//li[contains(@class,'full_text') and contains(@class,'mt-2')]"
75
+ )
76
+ if summary_nodes:
77
+ texts = [t.strip() for t in summary_nodes[0].itertext()]
78
+ summary = "\n".join(line for line in texts if line)
79
+ else:
80
+ summary = ""
81
+
82
+ # --- Chapters / Volumes ---
83
+ volumes: list[VolumeInfoDict] = []
84
+ for vol_div in tree.xpath("//div[contains(@class,'folder') and @pid]"):
85
+ # Volume title
86
+ h3 = vol_div.xpath(".//div[contains(@class,'vol-title')]//h3")
87
+ vol_name = (
88
+ h3[0].text_content().split("/")[0].strip() if h3 else "Unnamed Volume"
89
+ )
90
+
91
+ # Chapters
92
+ chapters: list[ChapterInfoDict] = []
93
+ for a in vol_div.xpath(
94
+ ".//a[contains(@class,'episode_li') and contains(@class,'d-block')]"
95
+ ):
96
+ title = (a.text_content() or "").strip()
97
+ href = a.get("href") or ""
98
+ if not href or not title:
99
+ continue
100
+ url = href if href.startswith("http") else self.BASE_URL + href
101
+ chapter_id = href.split("?")[-1] # "/read/3355/?270015" -> "270015"
102
+ chapters.append({"title": title, "url": url, "chapterId": chapter_id})
103
+
104
+ volumes.append({"volume_name": vol_name, "chapters": chapters})
105
+
106
+ return {
107
+ "book_name": book_name,
108
+ "author": author,
109
+ "cover_url": cover_url,
110
+ "update_time": update_time,
111
+ "word_count": word_count,
112
+ "tags": tags,
113
+ "summary": summary,
114
+ "volumes": volumes,
115
+ "extra": {},
116
+ }
117
+
118
+ def parse_chapter(
119
+ self,
120
+ html_list: list[str],
121
+ chapter_id: str,
122
+ **kwargs: Any,
123
+ ) -> ChapterDict | None:
124
+ if len(html_list) < 2:
125
+ return None
126
+
127
+ try:
128
+ id_title_map = self._build_id_title_map(html_list[0])
129
+ title = id_title_map.get(chapter_id) or ""
130
+ except Exception:
131
+ title = ""
132
+
133
+ wrapper = html.fromstring(f"<div>{html_list[1]}</div>")
134
+
135
+ segments: list[str] = []
136
+
137
+ self._append_segment(segments, wrapper.text)
138
+
139
+ for node in wrapper:
140
+ tag = node.tag.lower() if isinstance(node.tag, str) else ""
141
+
142
+ # A picture‑gallery block
143
+ if tag == "div" and "content-pics" in (node.get("class") or ""):
144
+ for img in node.xpath(".//img"):
145
+ src = img.get("src")
146
+ full = src if not src.startswith("/") else self.BASE_URL + src
147
+ segments.append(f'<img src="{full}" />')
148
+ self._append_segment(segments, node.tail)
149
+
150
+ # Standalone <img>
151
+ elif tag == "img":
152
+ src = node.get("src")
153
+ if not src:
154
+ continue
155
+ full = src if not src.startswith("/") else self.BASE_URL + src
156
+ segments.append(f'<img src="{full}" />')
157
+ self._append_segment(segments, node.tail)
158
+
159
+ # Line break -> text in .tail is next paragraph
160
+ elif tag == "br":
161
+ self._append_segment(segments, node.tail)
162
+
163
+ # Any other element -> get its text content
164
+ else:
165
+ self._append_segment(segments, node.text_content())
166
+ self._append_segment(segments, node.tail)
167
+
168
+ # Remove final ad line if present
169
+ if segments and segments[-1] and segments[-1][0] in ("8", "⑧", "⒏"):
170
+ segments.pop()
171
+
172
+ content = "\n".join(segments).strip()
173
+ if not content.strip():
174
+ return None
175
+
176
+ return {
177
+ "id": chapter_id,
178
+ "title": title,
179
+ "content": content,
180
+ "extra": {"site": "eightnovel"},
181
+ }
182
+
183
+ @staticmethod
184
+ def _append_segment(segments: list[str], text: str | None) -> None:
185
+ """
186
+ Strip, filter out the '8novel' ad, and append non-empty text to segments.
187
+ """
188
+ if not text:
189
+ return
190
+ cleaned = text.strip()
191
+ if cleaned:
192
+ segments.append(cleaned)
193
+
194
+ @classmethod
195
+ def _build_id_title_map(cls, html_str: str) -> dict[str, str]:
196
+ """
197
+ Extracts two comma-split lists from html_str:
198
+ - A numeric list of IDs (one element longer)
199
+ - A list of titles
200
+ """
201
+ id_list = None
202
+ title_list = None
203
+
204
+ for content in cls._SPLIT_STR_PATTERN.findall(html_str):
205
+ items = [s.strip() for s in content.split(",")]
206
+ if items == [""]:
207
+ # skip bids=""
208
+ continue
209
+ if all(item.isdigit() for item in items):
210
+ id_list = items
211
+ else:
212
+ title_list = items
213
+
214
+ if id_list and title_list:
215
+ break
216
+
217
+ if not id_list or not title_list:
218
+ raise ValueError("Could not locate both ID and title lists")
219
+ if len(id_list) != len(title_list) + 1:
220
+ raise ValueError(
221
+ "ID list must be exactly one element longer than title list"
222
+ )
223
+
224
+ return dict(zip(id_list[:-1], title_list, strict=False))