novel-downloader 1.5.0__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (248) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +1 -3
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +26 -21
  5. novel_downloader/cli/download.py +79 -66
  6. novel_downloader/cli/export.py +17 -21
  7. novel_downloader/cli/main.py +1 -1
  8. novel_downloader/cli/search.py +62 -65
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +8 -5
  11. novel_downloader/config/adapter.py +206 -209
  12. novel_downloader/config/{loader.py → file_io.py} +53 -26
  13. novel_downloader/core/__init__.py +5 -5
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} +12 -20
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +3 -24
  21. novel_downloader/core/downloaders/base.py +49 -23
  22. novel_downloader/core/downloaders/common.py +191 -137
  23. novel_downloader/core/downloaders/qianbi.py +187 -146
  24. novel_downloader/core/downloaders/qidian.py +187 -141
  25. novel_downloader/core/downloaders/registry.py +4 -2
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +3 -20
  28. novel_downloader/core/exporters/base.py +33 -37
  29. novel_downloader/core/exporters/common/__init__.py +1 -2
  30. novel_downloader/core/exporters/common/epub.py +15 -10
  31. novel_downloader/core/exporters/common/main_exporter.py +19 -12
  32. novel_downloader/core/exporters/common/txt.py +17 -12
  33. novel_downloader/core/exporters/epub_util.py +59 -29
  34. novel_downloader/core/exporters/linovelib/__init__.py +1 -0
  35. novel_downloader/core/exporters/linovelib/epub.py +23 -25
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +8 -12
  37. novel_downloader/core/exporters/linovelib/txt.py +20 -14
  38. novel_downloader/core/exporters/qidian.py +2 -8
  39. novel_downloader/core/exporters/registry.py +4 -2
  40. novel_downloader/core/exporters/txt_util.py +7 -7
  41. novel_downloader/core/fetchers/__init__.py +54 -48
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +6 -11
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +37 -46
  45. novel_downloader/core/fetchers/{biquge/browser.py → biquyuedu.py} +12 -17
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +19 -12
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +19 -28
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/lewenn.py +83 -0
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +12 -13
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +5 -10
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +56 -64
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +5 -16
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +7 -10
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/shuhaige.py +84 -0
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/wanbengo.py +83 -0
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +19 -12
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +1 -9
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +6 -19
  79. novel_downloader/core/interfaces/parser.py +7 -8
  80. novel_downloader/core/interfaces/searcher.py +9 -1
  81. novel_downloader/core/parsers/__init__.py +49 -12
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +64 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/esjzone.py +64 -69
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/linovelib.py +48 -64
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/qianbi.py +48 -50
  99. novel_downloader/core/parsers/qidian/main_parser.py +756 -48
  100. novel_downloader/core/parsers/qidian/utils/__init__.py +3 -21
  101. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +1 -1
  102. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +4 -4
  103. novel_downloader/core/parsers/quanben5.py +103 -0
  104. novel_downloader/core/parsers/registry.py +5 -16
  105. novel_downloader/core/parsers/sfacg.py +38 -45
  106. novel_downloader/core/parsers/shencou.py +215 -0
  107. novel_downloader/core/parsers/shuhaige.py +111 -0
  108. novel_downloader/core/parsers/tongrenquan.py +116 -0
  109. novel_downloader/core/parsers/ttkan.py +132 -0
  110. novel_downloader/core/parsers/wanbengo.py +191 -0
  111. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  112. novel_downloader/core/parsers/xiguashuwu.py +429 -0
  113. novel_downloader/core/parsers/xs63b.py +161 -0
  114. novel_downloader/core/parsers/xshbook.py +134 -0
  115. novel_downloader/core/parsers/yamibo.py +87 -131
  116. novel_downloader/core/parsers/yibige.py +166 -0
  117. novel_downloader/core/searchers/__init__.py +34 -3
  118. novel_downloader/core/searchers/aaatxt.py +107 -0
  119. novel_downloader/core/searchers/{biquge.py → b520.py} +29 -28
  120. novel_downloader/core/searchers/base.py +112 -36
  121. novel_downloader/core/searchers/dxmwx.py +105 -0
  122. novel_downloader/core/searchers/eightnovel.py +84 -0
  123. novel_downloader/core/searchers/esjzone.py +43 -25
  124. novel_downloader/core/searchers/hetushu.py +92 -0
  125. novel_downloader/core/searchers/i25zw.py +93 -0
  126. novel_downloader/core/searchers/ixdzs8.py +107 -0
  127. novel_downloader/core/searchers/jpxs123.py +107 -0
  128. novel_downloader/core/searchers/piaotia.py +100 -0
  129. novel_downloader/core/searchers/qbtr.py +106 -0
  130. novel_downloader/core/searchers/qianbi.py +74 -40
  131. novel_downloader/core/searchers/quanben5.py +144 -0
  132. novel_downloader/core/searchers/registry.py +24 -8
  133. novel_downloader/core/searchers/shuhaige.py +124 -0
  134. novel_downloader/core/searchers/tongrenquan.py +110 -0
  135. novel_downloader/core/searchers/ttkan.py +92 -0
  136. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  137. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  138. novel_downloader/core/searchers/xs63b.py +104 -0
  139. novel_downloader/locales/en.json +34 -85
  140. novel_downloader/locales/zh.json +35 -86
  141. novel_downloader/models/__init__.py +21 -22
  142. novel_downloader/models/book.py +44 -0
  143. novel_downloader/models/config.py +4 -37
  144. novel_downloader/models/login.py +1 -1
  145. novel_downloader/models/search.py +5 -0
  146. novel_downloader/resources/config/settings.toml +8 -70
  147. novel_downloader/resources/json/xiguashuwu.json +718 -0
  148. novel_downloader/utils/__init__.py +13 -24
  149. novel_downloader/utils/chapter_storage.py +5 -5
  150. novel_downloader/utils/constants.py +4 -31
  151. novel_downloader/utils/cookies.py +38 -35
  152. novel_downloader/utils/crypto_utils/__init__.py +7 -0
  153. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  154. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  155. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  156. novel_downloader/utils/crypto_utils/rc4.py +54 -0
  157. novel_downloader/utils/epub/__init__.py +3 -4
  158. novel_downloader/utils/epub/builder.py +6 -6
  159. novel_downloader/utils/epub/constants.py +62 -21
  160. novel_downloader/utils/epub/documents.py +95 -201
  161. novel_downloader/utils/epub/models.py +8 -22
  162. novel_downloader/utils/epub/utils.py +73 -106
  163. novel_downloader/utils/file_utils/__init__.py +2 -23
  164. novel_downloader/utils/file_utils/io.py +53 -188
  165. novel_downloader/utils/file_utils/normalize.py +1 -7
  166. novel_downloader/utils/file_utils/sanitize.py +4 -15
  167. novel_downloader/utils/fontocr/__init__.py +5 -14
  168. novel_downloader/utils/fontocr/core.py +216 -0
  169. novel_downloader/utils/fontocr/loader.py +50 -0
  170. novel_downloader/utils/logger.py +81 -65
  171. novel_downloader/utils/network.py +17 -41
  172. novel_downloader/utils/state.py +4 -90
  173. novel_downloader/utils/text_utils/__init__.py +1 -7
  174. novel_downloader/utils/text_utils/diff_display.py +5 -7
  175. novel_downloader/utils/text_utils/text_cleaner.py +39 -30
  176. novel_downloader/utils/text_utils/truncate_utils.py +3 -14
  177. novel_downloader/utils/time_utils/__init__.py +5 -11
  178. novel_downloader/utils/time_utils/datetime_utils.py +20 -29
  179. novel_downloader/utils/time_utils/sleep_utils.py +55 -49
  180. novel_downloader/web/__init__.py +13 -0
  181. novel_downloader/web/components/__init__.py +11 -0
  182. novel_downloader/web/components/navigation.py +35 -0
  183. novel_downloader/web/main.py +66 -0
  184. novel_downloader/web/pages/__init__.py +17 -0
  185. novel_downloader/web/pages/download.py +78 -0
  186. novel_downloader/web/pages/progress.py +147 -0
  187. novel_downloader/web/pages/search.py +329 -0
  188. novel_downloader/web/services/__init__.py +17 -0
  189. novel_downloader/web/services/client_dialog.py +164 -0
  190. novel_downloader/web/services/cred_broker.py +113 -0
  191. novel_downloader/web/services/cred_models.py +35 -0
  192. novel_downloader/web/services/task_manager.py +264 -0
  193. novel_downloader-2.0.1.dist-info/METADATA +172 -0
  194. novel_downloader-2.0.1.dist-info/RECORD +206 -0
  195. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/entry_points.txt +1 -1
  196. novel_downloader/core/downloaders/biquge.py +0 -29
  197. novel_downloader/core/downloaders/esjzone.py +0 -29
  198. novel_downloader/core/downloaders/linovelib.py +0 -29
  199. novel_downloader/core/downloaders/sfacg.py +0 -29
  200. novel_downloader/core/downloaders/yamibo.py +0 -29
  201. novel_downloader/core/exporters/biquge.py +0 -22
  202. novel_downloader/core/exporters/esjzone.py +0 -22
  203. novel_downloader/core/exporters/qianbi.py +0 -22
  204. novel_downloader/core/exporters/sfacg.py +0 -22
  205. novel_downloader/core/exporters/yamibo.py +0 -22
  206. novel_downloader/core/fetchers/base/__init__.py +0 -14
  207. novel_downloader/core/fetchers/base/browser.py +0 -422
  208. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  209. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  210. novel_downloader/core/fetchers/esjzone/browser.py +0 -209
  211. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  212. novel_downloader/core/fetchers/linovelib/browser.py +0 -198
  213. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  214. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  215. novel_downloader/core/fetchers/qidian/browser.py +0 -326
  216. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  217. novel_downloader/core/fetchers/sfacg/browser.py +0 -194
  218. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  219. novel_downloader/core/fetchers/yamibo/browser.py +0 -234
  220. novel_downloader/core/parsers/biquge.py +0 -139
  221. novel_downloader/core/parsers/qidian/book_info_parser.py +0 -90
  222. novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -528
  223. novel_downloader/core/parsers/qidian/chapter_normal.py +0 -157
  224. novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
  225. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -114
  226. novel_downloader/models/chapter.py +0 -25
  227. novel_downloader/models/types.py +0 -13
  228. novel_downloader/tui/__init__.py +0 -7
  229. novel_downloader/tui/app.py +0 -32
  230. novel_downloader/tui/main.py +0 -17
  231. novel_downloader/tui/screens/__init__.py +0 -14
  232. novel_downloader/tui/screens/home.py +0 -198
  233. novel_downloader/tui/screens/login.py +0 -74
  234. novel_downloader/tui/styles/home_layout.tcss +0 -79
  235. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  236. novel_downloader/utils/cache.py +0 -24
  237. novel_downloader/utils/crypto_utils.py +0 -71
  238. novel_downloader/utils/fontocr/hash_store.py +0 -280
  239. novel_downloader/utils/fontocr/hash_utils.py +0 -103
  240. novel_downloader/utils/fontocr/model_loader.py +0 -69
  241. novel_downloader/utils/fontocr/ocr_v1.py +0 -315
  242. novel_downloader/utils/fontocr/ocr_v2.py +0 -764
  243. novel_downloader/utils/fontocr/ocr_v3.py +0 -744
  244. novel_downloader-1.5.0.dist-info/METADATA +0 -196
  245. novel_downloader-1.5.0.dist-info/RECORD +0 -164
  246. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/WHEEL +0 -0
  247. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/licenses/LICENSE +0 -0
  248. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,133 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.biquyuedu
4
+ ---------------------------------------
5
+
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from lxml import etree, html
11
+
12
+ from novel_downloader.core.parsers.base import BaseParser
13
+ from novel_downloader.core.parsers.registry import register_parser
14
+ from novel_downloader.models import (
15
+ BookInfoDict,
16
+ ChapterDict,
17
+ ChapterInfoDict,
18
+ VolumeInfoDict,
19
+ )
20
+
21
+
22
+ @register_parser(
23
+ site_keys=["biquyuedu"],
24
+ )
25
+ class BiquyueduParser(BaseParser):
26
+ """
27
+ Parser for 精彩小说 book pages.
28
+ """
29
+
30
+ ADS: set[str] = {
31
+ "笔趣阁",
32
+ "请记住本书首发域名",
33
+ "www.biquyuedu.com",
34
+ }
35
+
36
+ def parse_book_info(
37
+ self,
38
+ html_list: list[str],
39
+ **kwargs: Any,
40
+ ) -> BookInfoDict | None:
41
+ if not html_list:
42
+ return None
43
+
44
+ tree = html.fromstring(html_list[0])
45
+
46
+ # --- Metadata ---
47
+ book_name = self._first_str(tree.xpath("//div[@class='info']/h1/text()"))
48
+ author = self._first_str(
49
+ tree.xpath(
50
+ "//div[@class='info']//div[@class='small'][1]//span[1]//a/text()"
51
+ )
52
+ )
53
+ cover_url = self._first_str(
54
+ tree.xpath("//div[@class='info']//div[@class='cover']//img/@src")
55
+ )
56
+ update_time = self._first_str(
57
+ tree.xpath("//div[@class='info']//div[@class='small'][2]//span[1]/text()"),
58
+ replaces=[("更新时间:", "")],
59
+ )
60
+
61
+ crumbs = tree.xpath("//div[@class='path']//div[@class='p']/a/text()")
62
+ book_type = self._first_str(crumbs[1:2])
63
+ tags = [book_type] if book_type else []
64
+
65
+ intro_text = tree.xpath(
66
+ "string(//div[@class='info']//div[@class='intro'])"
67
+ ).strip()
68
+ summary = intro_text.replace("简介:", "", 1).split("作者:", 1)[0].strip()
69
+
70
+ # --- Chapters ---
71
+ chapters: list[ChapterInfoDict] = [
72
+ {
73
+ "title": (a.get("title") or a.text_content() or "").strip(),
74
+ "url": (a.get("href") or "").strip(),
75
+ "chapterId": (a.get("href") or "").rsplit("/", 1)[-1].split(".", 1)[0],
76
+ }
77
+ for a in tree.xpath(
78
+ "//div[@class='listmain']//dl/dd[preceding-sibling::dt[1][contains(text(),'全文')]]/a"
79
+ )
80
+ ]
81
+
82
+ volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
83
+
84
+ return {
85
+ "book_name": book_name,
86
+ "author": author,
87
+ "cover_url": cover_url,
88
+ "update_time": update_time,
89
+ "tags": tags,
90
+ "summary": summary,
91
+ "volumes": volumes,
92
+ "extra": {},
93
+ }
94
+
95
+ def parse_chapter(
96
+ self,
97
+ html_list: list[str],
98
+ chapter_id: str,
99
+ **kwargs: Any,
100
+ ) -> ChapterDict | None:
101
+ if not html_list:
102
+ return None
103
+ tree = html.fromstring(html_list[0])
104
+
105
+ # Extract chapter title via helper
106
+ title = self._first_str(tree.xpath("//div[@class='content']/h1/text()"))
107
+
108
+ # Find the main content container
109
+ content_nodes = tree.xpath("//div[@id='content']")
110
+ if not content_nodes:
111
+ return None
112
+ content_div = content_nodes[0]
113
+
114
+ etree.strip_elements(content_div, "script", with_tail=False)
115
+ raw_texts = content_div.xpath(".//text()[normalize-space()]")
116
+
117
+ # Clean & filter in one comprehension
118
+ paragraphs = [
119
+ txt.replace("\xa0", "").strip()
120
+ for txt in raw_texts
121
+ if not self._is_ad_line(txt)
122
+ ]
123
+
124
+ content = "\n".join(paragraphs)
125
+ if not content.strip():
126
+ return None
127
+
128
+ return {
129
+ "id": chapter_id,
130
+ "title": title,
131
+ "content": content,
132
+ "extra": {"site": "biquyuedu"},
133
+ }
@@ -0,0 +1,162 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.dxmwx
4
+ -----------------------------------
5
+
6
+ """
7
+
8
+ import re
9
+ from datetime import datetime
10
+ from typing import Any
11
+
12
+ from lxml import html
13
+
14
+ from novel_downloader.core.parsers.base import BaseParser
15
+ from novel_downloader.core.parsers.registry import register_parser
16
+ from novel_downloader.models import (
17
+ BookInfoDict,
18
+ ChapterDict,
19
+ ChapterInfoDict,
20
+ VolumeInfoDict,
21
+ )
22
+
23
+
24
+ @register_parser(
25
+ site_keys=["dxmwx"],
26
+ )
27
+ class DxmwxParser(BaseParser):
28
+ """
29
+ Parser for 大熊猫文学网 book pages.
30
+ """
31
+
32
+ _RE_DATE = re.compile(r"\d{4}-\d{2}-\d{2}")
33
+ _RE_SPACES = re.compile(r"[ \t\u3000]+")
34
+ _RE_NEWLINES = re.compile(r"\n{2,}")
35
+ _RE_TITLE_WS = re.compile(r"\s+")
36
+
37
+ def parse_book_info(
38
+ self,
39
+ html_list: list[str],
40
+ **kwargs: Any,
41
+ ) -> BookInfoDict | None:
42
+ if len(html_list) < 2:
43
+ return None
44
+
45
+ info_tree = html.fromstring(html_list[0])
46
+ catalog_tree = html.fromstring(html_list[1])
47
+
48
+ book_name = self._first_str(
49
+ info_tree.xpath("//span[contains(@style,'font-size: 24px')]/text()")
50
+ )
51
+ author = self._first_str(
52
+ info_tree.xpath(
53
+ "//div[contains(@style,'height: 28px') and contains(., '著')]//a/text()"
54
+ )
55
+ )
56
+ tags = [
57
+ t.strip()
58
+ for t in info_tree.xpath("//span[@class='typebut']//a/text()")
59
+ if t.strip()
60
+ ]
61
+ cover_url = "https://www.dxmwx.org" + self._first_str(
62
+ info_tree.xpath("//img[@class='imgwidth']/@src")
63
+ )
64
+
65
+ raw_update = self._first_str(
66
+ info_tree.xpath(
67
+ "normalize-space(string(//span[starts-with(normalize-space(.), '更新时间:')]))" # noqa: E501
68
+ )
69
+ )
70
+ raw_update = raw_update.replace("更新时间:", "").strip()
71
+ update_time = self._normalize_update_date(raw_update)
72
+
73
+ nodes = info_tree.xpath(
74
+ "//div[contains(@style,'min-height') and "
75
+ "contains(@style,'padding-left') and contains(@style,'padding-right')][1]"
76
+ )
77
+ summary = ""
78
+ if nodes:
79
+ texts = [
80
+ t.replace("\xa0", " ").strip() for t in nodes[0].xpath(".//text()")
81
+ ]
82
+ lines = [t for t in texts if t]
83
+ summary = "\n".join(lines)
84
+ summary = re.sub(r"^\s*[::]\s*", "", summary)
85
+ summary = self._clean_spaces(summary)
86
+
87
+ chapters: list[ChapterInfoDict] = []
88
+ for a in catalog_tree.xpath(
89
+ "//div[contains(@style,'height:40px') and contains(@style,'border-bottom')]//a" # noqa: E501
90
+ ):
91
+ href = a.get("href") or ""
92
+ title = (a.text_content() or "").strip()
93
+ if not href or not title:
94
+ continue
95
+ # "/read/57215_50197663.html" -> "50197663"
96
+ chap_id = href.split("read/", 1)[-1].split(".html", 1)[0].split("_")[-1]
97
+ chapters.append({"title": title, "url": href, "chapterId": chap_id})
98
+ volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
99
+
100
+ return {
101
+ "book_name": book_name,
102
+ "author": author,
103
+ "cover_url": cover_url,
104
+ "update_time": update_time,
105
+ "tags": tags,
106
+ "summary": summary,
107
+ "volumes": volumes,
108
+ "extra": {},
109
+ }
110
+
111
+ def parse_chapter(
112
+ self,
113
+ html_list: list[str],
114
+ chapter_id: str,
115
+ **kwargs: Any,
116
+ ) -> ChapterDict | None:
117
+ if not html_list:
118
+ return None
119
+
120
+ tree = html.fromstring(html_list[0])
121
+
122
+ title = self._first_str(tree.xpath("//h1[@id='ChapterTitle']/text()"))
123
+ title = self._RE_TITLE_WS.sub(" ", title).strip()
124
+ if not title:
125
+ title = f"第 {chapter_id} 章"
126
+
127
+ paragraphs: list[str] = []
128
+ for p in tree.xpath("//div[@id='Lab_Contents']//p"):
129
+ text = self._clean_spaces(p.text_content())
130
+ if not text:
131
+ continue
132
+ if "点这里听书" in text or "大熊猫文学" in text:
133
+ continue
134
+ paragraphs.append(text)
135
+
136
+ content = "\n".join(paragraphs).strip()
137
+ if not content:
138
+ return None
139
+
140
+ return {
141
+ "id": chapter_id,
142
+ "title": title,
143
+ "content": content,
144
+ "extra": {"site": "dxmwx"},
145
+ }
146
+
147
+ @classmethod
148
+ def _clean_spaces(cls, s: str) -> str:
149
+ s = s.replace("\xa0", " ")
150
+ s = cls._RE_SPACES.sub(" ", s)
151
+ s = cls._RE_NEWLINES.sub("\n", s)
152
+ return s.strip()
153
+
154
+ @classmethod
155
+ def _normalize_update_date(cls, raw: str) -> str:
156
+ """Return a YYYY-MM-DD string."""
157
+ if not raw:
158
+ return datetime.now().strftime("%Y-%m-%d")
159
+ m = cls._RE_DATE.search(raw)
160
+ if m:
161
+ return m.group(0)
162
+ return datetime.now().strftime("%Y-%m-%d")
@@ -0,0 +1,224 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.eightnovel
4
+ ----------------------------------------
5
+
6
+ """
7
+
8
+ import re
9
+ from typing import Any
10
+
11
+ from lxml import html
12
+
13
+ from novel_downloader.core.parsers.base import BaseParser
14
+ from novel_downloader.core.parsers.registry import register_parser
15
+ from novel_downloader.models import (
16
+ BookInfoDict,
17
+ ChapterDict,
18
+ ChapterInfoDict,
19
+ VolumeInfoDict,
20
+ )
21
+
22
+
23
+ @register_parser(
24
+ site_keys=["eightnovel", "8novel"],
25
+ )
26
+ class EightnovelParser(BaseParser):
27
+ """
28
+ Parser for 无限轻小说 book pages.
29
+ """
30
+
31
+ BASE_URL = "https://www.8novel.com"
32
+ _SPLIT_STR_PATTERN = re.compile(
33
+ r'["\']([^"\']+)["\']\s*\.split\s*\(\s*["\']\s*,\s*["\']\s*\)', re.DOTALL
34
+ )
35
+ _RE_AUTHOR = re.compile(r"作者[::]?\s*")
36
+ _RE_UPDATE = re.compile(r"更新[::]?\s*")
37
+
38
+ def parse_book_info(
39
+ self,
40
+ html_list: list[str],
41
+ **kwargs: Any,
42
+ ) -> BookInfoDict | None:
43
+ if not html_list:
44
+ return None
45
+
46
+ tree = html.fromstring(html_list[0])
47
+
48
+ # --- Basic metadata ---
49
+ book_name = self._first_str(tree.xpath("//li[contains(@class,'h2')]/text()"))
50
+
51
+ author_raw = self._first_str(
52
+ tree.xpath("//span[contains(@class,'item-info-author')]/text()")
53
+ )
54
+ author = self._RE_AUTHOR.sub("", author_raw)
55
+
56
+ cover_url = self.BASE_URL + self._first_str(
57
+ tree.xpath("//div[contains(@class,'item-cover')]//img/@src")
58
+ )
59
+
60
+ update_raw = self._first_str(
61
+ tree.xpath("//span[contains(@class,'item-info-date')]/text()")
62
+ )
63
+ update_time = self._RE_UPDATE.sub("", update_raw)
64
+
65
+ counts = tree.xpath(
66
+ "//li[@class='small text-gray']//span[contains(@class,'item-info-num')]/text()" # noqa: E501
67
+ )
68
+ word_count = counts[1].strip() + "萬字" if len(counts) >= 2 else ""
69
+
70
+ tags = tree.xpath("//meta[@property='og:novel:category']/@content")
71
+
72
+ # --- Summary ---
73
+ summary_nodes = tree.xpath(
74
+ "//li[contains(@class,'full_text') and contains(@class,'mt-2')]"
75
+ )
76
+ if summary_nodes:
77
+ texts = [t.strip() for t in summary_nodes[0].itertext()]
78
+ summary = "\n".join(line for line in texts if line)
79
+ else:
80
+ summary = ""
81
+
82
+ # --- Chapters / Volumes ---
83
+ volumes: list[VolumeInfoDict] = []
84
+ for vol_div in tree.xpath("//div[contains(@class,'folder') and @pid]"):
85
+ # Volume title
86
+ h3 = vol_div.xpath(".//div[contains(@class,'vol-title')]//h3")
87
+ vol_name = (
88
+ h3[0].text_content().split("/")[0].strip() if h3 else "Unnamed Volume"
89
+ )
90
+
91
+ # Chapters
92
+ chapters: list[ChapterInfoDict] = []
93
+ for a in vol_div.xpath(
94
+ ".//a[contains(@class,'episode_li') and contains(@class,'d-block')]"
95
+ ):
96
+ title = (a.text_content() or "").strip()
97
+ href = a.get("href") or ""
98
+ if not href or not title:
99
+ continue
100
+ url = href if href.startswith("http") else self.BASE_URL + href
101
+ chapter_id = href.split("?")[-1] # "/read/3355/?270015" -> "270015"
102
+ chapters.append({"title": title, "url": url, "chapterId": chapter_id})
103
+
104
+ volumes.append({"volume_name": vol_name, "chapters": chapters})
105
+
106
+ return {
107
+ "book_name": book_name,
108
+ "author": author,
109
+ "cover_url": cover_url,
110
+ "update_time": update_time,
111
+ "word_count": word_count,
112
+ "tags": tags,
113
+ "summary": summary,
114
+ "volumes": volumes,
115
+ "extra": {},
116
+ }
117
+
118
+ def parse_chapter(
119
+ self,
120
+ html_list: list[str],
121
+ chapter_id: str,
122
+ **kwargs: Any,
123
+ ) -> ChapterDict | None:
124
+ if len(html_list) < 2:
125
+ return None
126
+
127
+ try:
128
+ id_title_map = self._build_id_title_map(html_list[0])
129
+ title = id_title_map.get(chapter_id) or ""
130
+ except Exception:
131
+ title = ""
132
+
133
+ wrapper = html.fromstring(f"<div>{html_list[1]}</div>")
134
+
135
+ segments: list[str] = []
136
+
137
+ self._append_segment(segments, wrapper.text)
138
+
139
+ for node in wrapper:
140
+ tag = node.tag.lower() if isinstance(node.tag, str) else ""
141
+
142
+ # A picture‑gallery block
143
+ if tag == "div" and "content-pics" in (node.get("class") or ""):
144
+ for img in node.xpath(".//img"):
145
+ src = img.get("src")
146
+ full = src if not src.startswith("/") else self.BASE_URL + src
147
+ segments.append(f'<img src="{full}" />')
148
+ self._append_segment(segments, node.tail)
149
+
150
+ # Standalone <img>
151
+ elif tag == "img":
152
+ src = node.get("src")
153
+ if not src:
154
+ continue
155
+ full = src if not src.startswith("/") else self.BASE_URL + src
156
+ segments.append(f'<img src="{full}" />')
157
+ self._append_segment(segments, node.tail)
158
+
159
+ # Line break -> text in .tail is next paragraph
160
+ elif tag == "br":
161
+ self._append_segment(segments, node.tail)
162
+
163
+ # Any other element -> get its text content
164
+ else:
165
+ self._append_segment(segments, node.text_content())
166
+ self._append_segment(segments, node.tail)
167
+
168
+ # Remove final ad line if present
169
+ if segments and segments[-1] and segments[-1][0] in ("8", "⑧", "⒏"):
170
+ segments.pop()
171
+
172
+ content = "\n".join(segments).strip()
173
+ if not content.strip():
174
+ return None
175
+
176
+ return {
177
+ "id": chapter_id,
178
+ "title": title,
179
+ "content": content,
180
+ "extra": {"site": "eightnovel"},
181
+ }
182
+
183
+ @staticmethod
184
+ def _append_segment(segments: list[str], text: str | None) -> None:
185
+ """
186
+ Strip, filter out the '8novel' ad, and append non-empty text to segments.
187
+ """
188
+ if not text:
189
+ return
190
+ cleaned = text.strip()
191
+ if cleaned:
192
+ segments.append(cleaned)
193
+
194
+ @classmethod
195
+ def _build_id_title_map(cls, html_str: str) -> dict[str, str]:
196
+ """
197
+ Extracts two comma-split lists from html_str:
198
+ * A numeric list of IDs (one element longer)
199
+ * A list of titles
200
+ """
201
+ id_list = None
202
+ title_list = None
203
+
204
+ for content in cls._SPLIT_STR_PATTERN.findall(html_str):
205
+ items = [s.strip() for s in content.split(",")]
206
+ if items == [""]:
207
+ # skip bids=""
208
+ continue
209
+ if all(item.isdigit() for item in items):
210
+ id_list = items
211
+ else:
212
+ title_list = items
213
+
214
+ if id_list and title_list:
215
+ break
216
+
217
+ if not id_list or not title_list:
218
+ raise ValueError("Could not locate both ID and title lists")
219
+ if len(id_list) != len(title_list) + 1:
220
+ raise ValueError(
221
+ "ID list must be exactly one element longer than title list"
222
+ )
223
+
224
+ return dict(zip(id_list[:-1], title_list, strict=False))