novel-downloader 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +2 -4
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +27 -104
  5. novel_downloader/cli/download.py +78 -66
  6. novel_downloader/cli/export.py +20 -21
  7. novel_downloader/cli/main.py +3 -1
  8. novel_downloader/cli/search.py +120 -0
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +10 -14
  11. novel_downloader/config/adapter.py +195 -99
  12. novel_downloader/config/{loader.py → file_io.py} +53 -27
  13. novel_downloader/core/__init__.py +14 -13
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/archived/qidian/searcher.py +79 -0
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +8 -30
  21. novel_downloader/core/downloaders/base.py +182 -30
  22. novel_downloader/core/downloaders/common.py +217 -384
  23. novel_downloader/core/downloaders/qianbi.py +332 -4
  24. novel_downloader/core/downloaders/qidian.py +250 -290
  25. novel_downloader/core/downloaders/registry.py +69 -0
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +8 -26
  28. novel_downloader/core/exporters/base.py +107 -31
  29. novel_downloader/core/exporters/common/__init__.py +3 -4
  30. novel_downloader/core/exporters/common/epub.py +92 -171
  31. novel_downloader/core/exporters/common/main_exporter.py +14 -67
  32. novel_downloader/core/exporters/common/txt.py +90 -86
  33. novel_downloader/core/exporters/epub_util.py +184 -1327
  34. novel_downloader/core/exporters/linovelib/__init__.py +3 -2
  35. novel_downloader/core/exporters/linovelib/epub.py +165 -222
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +10 -71
  37. novel_downloader/core/exporters/linovelib/txt.py +76 -66
  38. novel_downloader/core/exporters/qidian.py +15 -11
  39. novel_downloader/core/exporters/registry.py +55 -0
  40. novel_downloader/core/exporters/txt_util.py +67 -0
  41. novel_downloader/core/fetchers/__init__.py +57 -56
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +10 -10
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +63 -47
  45. novel_downloader/core/fetchers/biquyuedu.py +83 -0
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +23 -11
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +22 -26
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/{biquge/browser.py → lewenn.py} +15 -15
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +16 -12
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +9 -9
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +55 -40
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +60 -0
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +11 -9
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/{common/browser.py → shuhaige.py} +24 -19
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/{common/session.py → wanbengo.py} +21 -17
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +23 -11
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +8 -14
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +4 -17
  79. novel_downloader/core/interfaces/parser.py +5 -6
  80. novel_downloader/core/interfaces/searcher.py +26 -0
  81. novel_downloader/core/parsers/__init__.py +58 -22
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +63 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +67 -67
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +54 -65
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +54 -51
  99. novel_downloader/core/parsers/qidian/__init__.py +2 -2
  100. novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
  101. novel_downloader/core/parsers/qidian/chapter_encrypted.py +290 -346
  102. novel_downloader/core/parsers/qidian/chapter_normal.py +25 -56
  103. novel_downloader/core/parsers/qidian/main_parser.py +19 -57
  104. novel_downloader/core/parsers/qidian/utils/__init__.py +12 -11
  105. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +6 -7
  106. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
  107. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
  108. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
  109. novel_downloader/core/parsers/quanben5.py +103 -0
  110. novel_downloader/core/parsers/registry.py +57 -0
  111. novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +46 -48
  112. novel_downloader/core/parsers/shencou.py +215 -0
  113. novel_downloader/core/parsers/shuhaige.py +111 -0
  114. novel_downloader/core/parsers/tongrenquan.py +116 -0
  115. novel_downloader/core/parsers/ttkan.py +132 -0
  116. novel_downloader/core/parsers/wanbengo.py +191 -0
  117. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  118. novel_downloader/core/parsers/xiguashuwu.py +435 -0
  119. novel_downloader/core/parsers/xs63b.py +161 -0
  120. novel_downloader/core/parsers/xshbook.py +134 -0
  121. novel_downloader/core/parsers/yamibo.py +155 -0
  122. novel_downloader/core/parsers/yibige.py +166 -0
  123. novel_downloader/core/searchers/__init__.py +51 -0
  124. novel_downloader/core/searchers/aaatxt.py +107 -0
  125. novel_downloader/core/searchers/b520.py +84 -0
  126. novel_downloader/core/searchers/base.py +168 -0
  127. novel_downloader/core/searchers/dxmwx.py +105 -0
  128. novel_downloader/core/searchers/eightnovel.py +84 -0
  129. novel_downloader/core/searchers/esjzone.py +102 -0
  130. novel_downloader/core/searchers/hetushu.py +92 -0
  131. novel_downloader/core/searchers/i25zw.py +93 -0
  132. novel_downloader/core/searchers/ixdzs8.py +107 -0
  133. novel_downloader/core/searchers/jpxs123.py +107 -0
  134. novel_downloader/core/searchers/piaotia.py +100 -0
  135. novel_downloader/core/searchers/qbtr.py +106 -0
  136. novel_downloader/core/searchers/qianbi.py +165 -0
  137. novel_downloader/core/searchers/quanben5.py +144 -0
  138. novel_downloader/core/searchers/registry.py +79 -0
  139. novel_downloader/core/searchers/shuhaige.py +124 -0
  140. novel_downloader/core/searchers/tongrenquan.py +110 -0
  141. novel_downloader/core/searchers/ttkan.py +92 -0
  142. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  143. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  144. novel_downloader/core/searchers/xs63b.py +104 -0
  145. novel_downloader/locales/en.json +36 -79
  146. novel_downloader/locales/zh.json +37 -80
  147. novel_downloader/models/__init__.py +23 -50
  148. novel_downloader/models/book.py +44 -0
  149. novel_downloader/models/config.py +16 -43
  150. novel_downloader/models/login.py +1 -1
  151. novel_downloader/models/search.py +21 -0
  152. novel_downloader/resources/config/settings.toml +39 -74
  153. novel_downloader/resources/css_styles/intro.css +83 -0
  154. novel_downloader/resources/css_styles/main.css +30 -89
  155. novel_downloader/resources/json/xiguashuwu.json +718 -0
  156. novel_downloader/utils/__init__.py +43 -0
  157. novel_downloader/utils/chapter_storage.py +247 -226
  158. novel_downloader/utils/constants.py +5 -50
  159. novel_downloader/utils/cookies.py +6 -18
  160. novel_downloader/utils/crypto_utils/__init__.py +13 -0
  161. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  162. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  163. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  164. novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
  165. novel_downloader/utils/epub/__init__.py +34 -0
  166. novel_downloader/utils/epub/builder.py +377 -0
  167. novel_downloader/utils/epub/constants.py +118 -0
  168. novel_downloader/utils/epub/documents.py +297 -0
  169. novel_downloader/utils/epub/models.py +120 -0
  170. novel_downloader/utils/epub/utils.py +179 -0
  171. novel_downloader/utils/file_utils/__init__.py +5 -30
  172. novel_downloader/utils/file_utils/io.py +9 -150
  173. novel_downloader/utils/file_utils/normalize.py +2 -2
  174. novel_downloader/utils/file_utils/sanitize.py +2 -7
  175. novel_downloader/utils/fontocr.py +207 -0
  176. novel_downloader/utils/i18n.py +2 -0
  177. novel_downloader/utils/logger.py +10 -16
  178. novel_downloader/utils/network.py +111 -252
  179. novel_downloader/utils/state.py +5 -90
  180. novel_downloader/utils/text_utils/__init__.py +16 -21
  181. novel_downloader/utils/text_utils/diff_display.py +6 -9
  182. novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
  183. novel_downloader/utils/text_utils/text_cleaner.py +179 -0
  184. novel_downloader/utils/text_utils/truncate_utils.py +62 -0
  185. novel_downloader/utils/time_utils/__init__.py +6 -12
  186. novel_downloader/utils/time_utils/datetime_utils.py +23 -33
  187. novel_downloader/utils/time_utils/sleep_utils.py +5 -10
  188. novel_downloader/web/__init__.py +13 -0
  189. novel_downloader/web/components/__init__.py +11 -0
  190. novel_downloader/web/components/navigation.py +35 -0
  191. novel_downloader/web/main.py +66 -0
  192. novel_downloader/web/pages/__init__.py +17 -0
  193. novel_downloader/web/pages/download.py +78 -0
  194. novel_downloader/web/pages/progress.py +147 -0
  195. novel_downloader/web/pages/search.py +329 -0
  196. novel_downloader/web/services/__init__.py +17 -0
  197. novel_downloader/web/services/client_dialog.py +164 -0
  198. novel_downloader/web/services/cred_broker.py +113 -0
  199. novel_downloader/web/services/cred_models.py +35 -0
  200. novel_downloader/web/services/task_manager.py +264 -0
  201. novel_downloader-2.0.0.dist-info/METADATA +171 -0
  202. novel_downloader-2.0.0.dist-info/RECORD +210 -0
  203. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
  204. novel_downloader/config/site_rules.py +0 -94
  205. novel_downloader/core/downloaders/biquge.py +0 -25
  206. novel_downloader/core/downloaders/esjzone.py +0 -25
  207. novel_downloader/core/downloaders/linovelib.py +0 -25
  208. novel_downloader/core/downloaders/sfacg.py +0 -25
  209. novel_downloader/core/downloaders/yamibo.py +0 -25
  210. novel_downloader/core/exporters/biquge.py +0 -25
  211. novel_downloader/core/exporters/esjzone.py +0 -25
  212. novel_downloader/core/exporters/qianbi.py +0 -25
  213. novel_downloader/core/exporters/sfacg.py +0 -25
  214. novel_downloader/core/exporters/yamibo.py +0 -25
  215. novel_downloader/core/factory/__init__.py +0 -20
  216. novel_downloader/core/factory/downloader.py +0 -73
  217. novel_downloader/core/factory/exporter.py +0 -58
  218. novel_downloader/core/factory/fetcher.py +0 -96
  219. novel_downloader/core/factory/parser.py +0 -86
  220. novel_downloader/core/fetchers/base/__init__.py +0 -14
  221. novel_downloader/core/fetchers/base/browser.py +0 -403
  222. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  223. novel_downloader/core/fetchers/common/__init__.py +0 -14
  224. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  225. novel_downloader/core/fetchers/esjzone/browser.py +0 -204
  226. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  227. novel_downloader/core/fetchers/linovelib/browser.py +0 -193
  228. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  229. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  230. novel_downloader/core/fetchers/qidian/browser.py +0 -318
  231. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  232. novel_downloader/core/fetchers/sfacg/browser.py +0 -189
  233. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  234. novel_downloader/core/fetchers/yamibo/browser.py +0 -229
  235. novel_downloader/core/parsers/biquge/__init__.py +0 -10
  236. novel_downloader/core/parsers/biquge/main_parser.py +0 -134
  237. novel_downloader/core/parsers/common/__init__.py +0 -13
  238. novel_downloader/core/parsers/common/helper.py +0 -323
  239. novel_downloader/core/parsers/common/main_parser.py +0 -106
  240. novel_downloader/core/parsers/esjzone/__init__.py +0 -10
  241. novel_downloader/core/parsers/linovelib/__init__.py +0 -10
  242. novel_downloader/core/parsers/qianbi/__init__.py +0 -10
  243. novel_downloader/core/parsers/sfacg/__init__.py +0 -10
  244. novel_downloader/core/parsers/yamibo/__init__.py +0 -10
  245. novel_downloader/core/parsers/yamibo/main_parser.py +0 -194
  246. novel_downloader/models/browser.py +0 -21
  247. novel_downloader/models/chapter.py +0 -25
  248. novel_downloader/models/site_rules.py +0 -99
  249. novel_downloader/models/tasks.py +0 -33
  250. novel_downloader/models/types.py +0 -15
  251. novel_downloader/resources/css_styles/volume-intro.css +0 -56
  252. novel_downloader/resources/json/replace_word_map.json +0 -4
  253. novel_downloader/resources/text/blacklist.txt +0 -22
  254. novel_downloader/tui/__init__.py +0 -7
  255. novel_downloader/tui/app.py +0 -32
  256. novel_downloader/tui/main.py +0 -17
  257. novel_downloader/tui/screens/__init__.py +0 -14
  258. novel_downloader/tui/screens/home.py +0 -198
  259. novel_downloader/tui/screens/login.py +0 -74
  260. novel_downloader/tui/styles/home_layout.tcss +0 -79
  261. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  262. novel_downloader/utils/cache.py +0 -24
  263. novel_downloader/utils/fontocr/__init__.py +0 -22
  264. novel_downloader/utils/fontocr/model_loader.py +0 -69
  265. novel_downloader/utils/fontocr/ocr_v1.py +0 -303
  266. novel_downloader/utils/fontocr/ocr_v2.py +0 -752
  267. novel_downloader/utils/hash_store.py +0 -279
  268. novel_downloader/utils/hash_utils.py +0 -103
  269. novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
  270. novel_downloader/utils/text_utils/font_mapping.py +0 -28
  271. novel_downloader/utils/text_utils/text_cleaning.py +0 -107
  272. novel_downloader-1.4.5.dist-info/METADATA +0 -196
  273. novel_downloader-1.4.5.dist-info/RECORD +0 -165
  274. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
  275. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
  276. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,134 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.xshbook
4
+ -------------------------------------
5
+
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from lxml import html
11
+
12
+ from novel_downloader.core.parsers.base import BaseParser
13
+ from novel_downloader.core.parsers.registry import register_parser
14
+ from novel_downloader.models import (
15
+ BookInfoDict,
16
+ ChapterDict,
17
+ ChapterInfoDict,
18
+ VolumeInfoDict,
19
+ )
20
+
21
+
22
+ @register_parser(
23
+ site_keys=["xshbook"],
24
+ )
25
+ class XshbookParser(BaseParser):
26
+ """Parser for 小说虎 book pages."""
27
+
28
+ BASE = "http://www.xshbook.com"
29
+
30
+ def parse_book_info(
31
+ self,
32
+ html_list: list[str],
33
+ **kwargs: Any,
34
+ ) -> BookInfoDict | None:
35
+ if not html_list:
36
+ return None
37
+
38
+ tree = html.fromstring(html_list[0])
39
+
40
+ book_name = self._first_str(tree.xpath("//div[@id='info']/h1/text()"))
41
+
42
+ author = self._first_str(
43
+ tree.xpath("//div[@id='info']/p[1]/text()"),
44
+ replaces=[("\xa0", ""), ("作者:", "")],
45
+ )
46
+
47
+ update_time = self._first_str(
48
+ tree.xpath("//meta[@property='og:novel:update_time']/@content")
49
+ )
50
+
51
+ summary = "\n".join(
52
+ self._first_str(p.xpath("string()").splitlines())
53
+ for p in tree.xpath("//div[@id='intro']//p")
54
+ ).strip()
55
+ summary = summary.split("本站提示", 1)[0].strip()
56
+
57
+ cover_url = self._first_str(tree.xpath("//div[@id='fmimg']//img/@src"))
58
+
59
+ book_type = self._first_str(tree.xpath("//div[@class='con_top']/a[2]/text()"))
60
+ tags: list[str] = [book_type] if book_type else []
61
+
62
+ chapters: list[ChapterInfoDict] = []
63
+ for a in tree.xpath("//div[@id='list']//dd/a"):
64
+ href = a.get("href", "")
65
+ title = self._norm_space(a.text_content())
66
+ # /95071/95071941/389027455.html -> "389027455"
67
+ chapter_id = href.rsplit("/", 1)[-1].split(".", 1)[0]
68
+ chapters.append({"title": title, "url": href, "chapterId": chapter_id})
69
+
70
+ volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
71
+
72
+ return {
73
+ "book_name": book_name,
74
+ "author": author,
75
+ "cover_url": cover_url,
76
+ "update_time": update_time,
77
+ "summary": summary,
78
+ "tags": tags,
79
+ "volumes": volumes,
80
+ "extra": {},
81
+ }
82
+
83
+ def parse_chapter(
84
+ self,
85
+ html_list: list[str],
86
+ chapter_id: str,
87
+ **kwargs: Any,
88
+ ) -> ChapterDict | None:
89
+ if not html_list:
90
+ return None
91
+ tree = html.fromstring(html_list[0])
92
+
93
+ title = self._first_str(tree.xpath("//div[@class='bookname']/h1/text()"))
94
+ if not title:
95
+ title = self._first_str(
96
+ tree.xpath("//div[@class='con_top']/text()[last()]")
97
+ )
98
+
99
+ cont_nodes = tree.xpath("//div[@id='content']")
100
+ if not cont_nodes:
101
+ return None
102
+ cont = cont_nodes[0]
103
+
104
+ # remove scripts under content
105
+ for s in cont.xpath(".//script"):
106
+ s.getparent().remove(s)
107
+
108
+ paragraphs: list[str] = []
109
+ for p in cont.xpath(".//p"):
110
+ text = html.tostring(p, method="text", encoding="unicode")
111
+ text = text.replace("\xa0", " ")
112
+ # filter boilerplate lines
113
+ bad = (
114
+ "谨记我们的网址" in text
115
+ or "温馨提示" in text
116
+ or "提示" in text
117
+ and "本文" not in text
118
+ and len(text) < 60
119
+ or "分享" in text
120
+ and len(text) < 40
121
+ )
122
+ if not bad:
123
+ paragraphs.append(text)
124
+
125
+ content = "\n".join(self._norm_space(p) for p in paragraphs if p.strip())
126
+ if not content.strip():
127
+ return None
128
+
129
+ return {
130
+ "id": chapter_id,
131
+ "title": title,
132
+ "content": content,
133
+ "extra": {"site": "xshbook"},
134
+ }
@@ -0,0 +1,155 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.yamibo
4
+ ------------------------------------
5
+
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from lxml import html
11
+
12
+ from novel_downloader.core.parsers.base import BaseParser
13
+ from novel_downloader.core.parsers.registry import register_parser
14
+ from novel_downloader.models import (
15
+ BookInfoDict,
16
+ ChapterDict,
17
+ ChapterInfoDict,
18
+ VolumeInfoDict,
19
+ )
20
+
21
+
22
+ @register_parser(
23
+ site_keys=["yamibo"],
24
+ )
25
+ class YamiboParser(BaseParser):
26
+ """
27
+ Parser for 百合会 book pages.
28
+ """
29
+
30
+ BASE_URL = "https://www.yamibo.com"
31
+
32
+ def parse_book_info(
33
+ self,
34
+ html_list: list[str],
35
+ **kwargs: Any,
36
+ ) -> BookInfoDict | None:
37
+ if not html_list:
38
+ return None
39
+
40
+ tree = html.fromstring(html_list[0])
41
+
42
+ book_name = self._first_str(
43
+ tree.xpath('//h3[contains(@class,"col-md-12")]/text()')
44
+ )
45
+ author = self._first_str(
46
+ tree.xpath('//h5[contains(@class,"text-warning")]/text()')
47
+ )
48
+ cover_url = self.BASE_URL + self._first_str(
49
+ tree.xpath('//img[contains(@class,"img-responsive")]/@src')
50
+ )
51
+
52
+ update_time = self._first_str(
53
+ tree.xpath('//p[contains(text(),"更新时间:")]/text()'),
54
+ replaces=[("更新时间:", "")],
55
+ )
56
+ serial_status = self._first_str(
57
+ tree.xpath('//p[contains(text(),"作品状态:")]/text()'),
58
+ replaces=[("作品状态:", "")],
59
+ )
60
+ book_type = self._first_str(
61
+ tree.xpath('//p[contains(text(),"作品分类:")]/text()'),
62
+ replaces=[("作品分类:", "")],
63
+ )
64
+ summary = self._first_str([tree.xpath('string(//div[@id="w0-collapse1"]/div)')])
65
+
66
+ # volumes & chapters
67
+ volumes: list[VolumeInfoDict] = []
68
+ for volume_node in tree.xpath(
69
+ '//div[contains(@class,"panel-info") and contains(@class,"panel-default")]'
70
+ ):
71
+ volume_name = (
72
+ self._first_str(
73
+ volume_node.xpath(
74
+ './/div[contains(@class,"panel-heading")]//a/text()'
75
+ )
76
+ )
77
+ or "未命名卷"
78
+ )
79
+ chapters: list[ChapterInfoDict] = []
80
+ for chap in volume_node.xpath(
81
+ './/div[contains(@class,"panel-body")]//a[contains(@href,"view-chapter")]'
82
+ ):
83
+ title = self._first_str([chap.xpath("string()")])
84
+ url = chap.get("href", "")
85
+ chapter_id = url.split("id=")[-1]
86
+ chapters.append({"title": title, "url": url, "chapterId": chapter_id})
87
+ volumes.append({"volume_name": volume_name, "chapters": chapters})
88
+
89
+ # fallback: flat chapter list
90
+ if not volumes:
91
+ chapters = []
92
+ for chap in tree.xpath(
93
+ '//div[@class="panel-body"]//a[contains(@href,"view-chapter")]'
94
+ ):
95
+ title = self._first_str([chap.xpath("string()")])
96
+ url = chap.get("href", "")
97
+ chapter_id = url.split("id=")[-1] if "id=" in url else ""
98
+ chapters.append({"title": title, "url": url, "chapterId": chapter_id})
99
+ volumes = [{"volume_name": "单卷", "chapters": chapters}]
100
+
101
+ return {
102
+ "book_name": book_name,
103
+ "author": author,
104
+ "cover_url": cover_url,
105
+ "update_time": update_time,
106
+ "serial_status": serial_status,
107
+ "tags": [book_type],
108
+ "summary": summary,
109
+ "volumes": volumes,
110
+ "extra": {},
111
+ }
112
+
113
+ def parse_chapter(
114
+ self,
115
+ html_list: list[str],
116
+ chapter_id: str,
117
+ **kwargs: Any,
118
+ ) -> ChapterDict | None:
119
+ if not html_list:
120
+ return None
121
+ tree = html.fromstring(html_list[0])
122
+
123
+ content_lines = tree.xpath("//div[@id='w0-collapse1']//p//text()")
124
+ content = "\n".join(line.strip() for line in content_lines if line.strip())
125
+ if not content:
126
+ return None
127
+
128
+ title = self._first_str(
129
+ [tree.xpath("string(//section[contains(@class,'col-md-9')]//h3)")]
130
+ )
131
+
132
+ updated_at = self._first_str(
133
+ tree.xpath(
134
+ "//div[contains(@class,'row')]//div[contains(text(),'更新时间')]/text()"
135
+ ),
136
+ replaces=[("更新时间:", "")],
137
+ )
138
+ word_str = self._first_str(
139
+ tree.xpath(
140
+ "//div[contains(@class,'row')]//div[contains(text(),'章节字数')]/text()"
141
+ ),
142
+ replaces=[("章节字数:", "")],
143
+ )
144
+ word_count = int(word_str) if word_str.isdigit() else 0
145
+
146
+ return {
147
+ "id": chapter_id,
148
+ "title": title,
149
+ "content": content,
150
+ "extra": {
151
+ "site": "yamibo",
152
+ "word_count": word_count,
153
+ "updated_at": updated_at,
154
+ },
155
+ }
@@ -0,0 +1,166 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.yibige
4
+ ------------------------------------
5
+
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from lxml import html
11
+
12
+ from novel_downloader.core.parsers.base import BaseParser
13
+ from novel_downloader.core.parsers.registry import register_parser
14
+ from novel_downloader.models import (
15
+ BookInfoDict,
16
+ ChapterDict,
17
+ ChapterInfoDict,
18
+ VolumeInfoDict,
19
+ )
20
+
21
+
22
+ @register_parser(
23
+ site_keys=["yibige"],
24
+ )
25
+ class YibigeParser(BaseParser):
26
+ """
27
+ Parser for 一笔阁 book pages.
28
+ """
29
+
30
+ ADS = {
31
+ "首发无广告",
32
+ "请分享",
33
+ "读之阁",
34
+ "小说网",
35
+ "首发地址",
36
+ "手机阅读",
37
+ "一笔阁",
38
+ "site_con_ad(",
39
+ "chapter_content(",
40
+ }
41
+
42
+ def parse_book_info(
43
+ self,
44
+ html_list: list[str],
45
+ **kwargs: Any,
46
+ ) -> BookInfoDict | None:
47
+ if len(html_list) < 2:
48
+ return None
49
+
50
+ # Parse trees
51
+ info_tree = html.fromstring(html_list[0])
52
+ catalog_tree = html.fromstring(html_list[1])
53
+
54
+ # --- From <meta> data ---
55
+ book_name = self._meta(info_tree, "og:novel:book_name") or self._first_str(
56
+ info_tree.xpath("//div[@id='info']/h1/text()")
57
+ )
58
+
59
+ author = self._meta(info_tree, "og:novel:author") or self._first_str(
60
+ info_tree.xpath("//div[@id='info']/p[a]/a/text()")
61
+ )
62
+
63
+ cover_url = self._meta(info_tree, "og:image") or self._first_str(
64
+ info_tree.xpath("//div[@id='fmimg']//img/@src")
65
+ )
66
+
67
+ update_time = self._meta(info_tree, "og:novel:update_time").replace("T", " ")
68
+ serial_status = self._meta(info_tree, "og:novel:status") or "连载中"
69
+
70
+ word_count = self._first_str(
71
+ info_tree.xpath("//div[@id='info']/p[contains(., '字数:')]/text()[1]"),
72
+ replaces=[("字数:", "")],
73
+ )
74
+
75
+ # Summary: first paragraph under #intro
76
+ summary = self._first_str(info_tree.xpath("//div[@id='intro']//p[1]/text()"))
77
+
78
+ # Category and tags
79
+ book_type = self._meta(info_tree, "og:novel:category")
80
+ tags_set = set(self._meta_all(info_tree, "book:tag"))
81
+ if book_type:
82
+ tags_set.add(book_type)
83
+ tags = list(tags_set)
84
+
85
+ # --- Chapters from the catalog page ---
86
+ chapters: list[ChapterInfoDict] = []
87
+ for a in catalog_tree.xpath("//div[@id='list']/dl/dd/a"):
88
+ href = (a.get("href") or "").strip()
89
+ if not href:
90
+ continue
91
+ title = (a.text_content() or "").strip()
92
+ if not title:
93
+ continue
94
+ # /6238/2496.html -> 2496
95
+ chap_id = href.split("/")[-1].split(".")[0]
96
+ chapters.append({"title": title, "url": href, "chapterId": chap_id})
97
+
98
+ volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
99
+
100
+ return {
101
+ "book_name": book_name,
102
+ "author": author,
103
+ "cover_url": cover_url,
104
+ "update_time": update_time,
105
+ "serial_status": serial_status,
106
+ "word_count": word_count,
107
+ "summary": summary,
108
+ "tags": tags,
109
+ "volumes": volumes,
110
+ "extra": {},
111
+ }
112
+
113
+ def parse_chapter(
114
+ self,
115
+ html_list: list[str],
116
+ chapter_id: str,
117
+ **kwargs: Any,
118
+ ) -> ChapterDict | None:
119
+ if not html_list:
120
+ return None
121
+ tree = html.fromstring(html_list[0])
122
+
123
+ title = self._first_str(tree.xpath("//div[@class='bookname']/h1/text()"))
124
+
125
+ paragraphs: list[str] = []
126
+ for p in tree.xpath("//div[@id='content']//p"):
127
+ txt = self._norm_space(p.text_content())
128
+ if not txt or self._is_ad(txt):
129
+ continue
130
+ paragraphs.append(txt)
131
+
132
+ content = "\n".join(paragraphs).strip()
133
+ if not content:
134
+ return None
135
+
136
+ return {
137
+ "id": chapter_id,
138
+ "title": title,
139
+ "content": content,
140
+ "extra": {"site": "yibige"},
141
+ }
142
+
143
+ def _is_ad(self, s: str) -> bool:
144
+ """
145
+ Filter for footer junk inside #content.
146
+ """
147
+ if self._is_ad_line(s):
148
+ return True
149
+
150
+ ss = s.replace(" ", "")
151
+ # return any(b in s or b in ss for b in self.ADS)
152
+ return self._is_ad_line(ss)
153
+
154
+ @classmethod
155
+ def _meta(cls, tree: html.HtmlElement, prop: str) -> str:
156
+ """
157
+ Get a single meta property content
158
+ """
159
+ return cls._first_str(tree.xpath(f"//meta[@property='{prop}']/@content"))
160
+
161
+ @staticmethod
162
+ def _meta_all(tree: html.HtmlElement, prop: str) -> list[str]:
163
+ """
164
+ Get all meta property content values
165
+ """
166
+ return tree.xpath(f"//meta[@property='{prop}']/@content") or []
@@ -0,0 +1,51 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.searchers
4
+ -------------------------------
5
+
6
+ Site-specific searcher implementations for discovering novels across multiple sources
7
+ """
8
+
9
+ __all__ = [
10
+ "search",
11
+ "AaatxtSearcher",
12
+ "BiqugeSearcher",
13
+ "DxmwxSearcher",
14
+ "EightnovelSearcher",
15
+ "EsjzoneSearcher",
16
+ "HetushuSearcher",
17
+ "I25zwSearcher",
18
+ "Ixdzs8Searcher",
19
+ "Jpxs123Searcher",
20
+ "PiaotiaSearcher",
21
+ "QbtrSearcher",
22
+ "QianbiSearcher",
23
+ "Quanben5Searcher",
24
+ "ShuhaigeSearcher",
25
+ "TongrenquanSearcher",
26
+ "TtkanSearcher",
27
+ "XiaoshuowuSearcher",
28
+ "XiguashuwuSearcher",
29
+ "Xs63bSearcher",
30
+ ]
31
+
32
+ from .aaatxt import AaatxtSearcher
33
+ from .b520 import BiqugeSearcher
34
+ from .dxmwx import DxmwxSearcher
35
+ from .eightnovel import EightnovelSearcher
36
+ from .esjzone import EsjzoneSearcher
37
+ from .hetushu import HetushuSearcher
38
+ from .i25zw import I25zwSearcher
39
+ from .ixdzs8 import Ixdzs8Searcher
40
+ from .jpxs123 import Jpxs123Searcher
41
+ from .piaotia import PiaotiaSearcher
42
+ from .qbtr import QbtrSearcher
43
+ from .qianbi import QianbiSearcher
44
+ from .quanben5 import Quanben5Searcher
45
+ from .registry import search
46
+ from .shuhaige import ShuhaigeSearcher
47
+ from .tongrenquan import TongrenquanSearcher
48
+ from .ttkan import TtkanSearcher
49
+ from .xiaoshuowu import XiaoshuowuSearcher
50
+ from .xiguashuwu import XiguashuwuSearcher
51
+ from .xs63b import Xs63bSearcher
@@ -0,0 +1,107 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.searchers.aaatxt
4
+ --------------------------------------
5
+
6
+ """
7
+
8
+ import logging
9
+
10
+ from lxml import html
11
+
12
+ from novel_downloader.core.searchers.base import BaseSearcher
13
+ from novel_downloader.core.searchers.registry import register_searcher
14
+ from novel_downloader.models import SearchResult
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @register_searcher(
20
+ site_keys=["aaatxt"],
21
+ )
22
+ class AaatxtSearcher(BaseSearcher):
23
+ site_name = "aaatxt"
24
+ priority = 500
25
+ SEARCH_URL = "http://www.aaatxt.com/search.php"
26
+
27
+ @classmethod
28
+ async def _fetch_html(cls, keyword: str) -> str:
29
+ # gbk / gb2312
30
+ params = {
31
+ "keyword": cls._quote(keyword, encoding="gb2312", errors="replace"),
32
+ "submit": cls._quote("搜 索", encoding="gb2312", errors="replace"),
33
+ }
34
+ full_url = cls._build_url(cls.SEARCH_URL, params) # need build manually
35
+ headers = {
36
+ "Host": "www.aaatxt.com",
37
+ "Referer": "http://www.aaatxt.com/",
38
+ }
39
+ try:
40
+ async with (await cls._http_get(full_url, headers=headers)) as resp:
41
+ return await cls._response_to_str(resp, "gb2312")
42
+ except Exception:
43
+ logger.error(
44
+ "Failed to fetch HTML for keyword '%s' from '%s'",
45
+ keyword,
46
+ cls.SEARCH_URL,
47
+ )
48
+ return ""
49
+
50
+ @classmethod
51
+ def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
52
+ doc = html.fromstring(html_str)
53
+ rows = doc.xpath("//div[@class='sort']//div[@class='list']/table")
54
+ results: list[SearchResult] = []
55
+
56
+ for idx, row in enumerate(rows):
57
+ href = cls._first_str(row.xpath(".//td[@class='name']/h3/a/@href"))
58
+ if not href:
59
+ continue
60
+
61
+ if limit is not None and idx >= limit:
62
+ break
63
+
64
+ book_id = href.split("/")[-1].split(".")[0]
65
+ book_url = cls._abs_url(href)
66
+
67
+ cover_rel = cls._first_str(row.xpath(".//td[@class='cover']/a/img/@src"))
68
+ cover_url = cls._abs_url(cover_rel) if cover_rel else ""
69
+
70
+ title = cls._first_str(row.xpath(".//td[@class='name']/h3/a//text()"))
71
+
72
+ size_text = row.xpath("string(.//td[@class='size'])")
73
+ size_norm = size_text.replace("\u00a0", " ").replace("&nbsp;", " ").strip()
74
+ tokens = [t for t in size_norm.split() if t]
75
+
76
+ word_count = "-"
77
+ author = "-"
78
+ for tok in tokens:
79
+ if tok.startswith("大小:"):
80
+ word_count = tok.split(":", 1)[1].strip()
81
+ elif tok.startswith("上传:"):
82
+ author = tok.split(":", 1)[1].strip()
83
+
84
+ intro_text = row.xpath("string(.//td[@class='intro'])")
85
+ intro_norm = intro_text.replace("\u00a0", " ").replace("&nbsp;", " ")
86
+ update_date = "-"
87
+ for marker in ("更新:", "更新:"):
88
+ if marker in intro_norm:
89
+ tail = intro_norm.split(marker, 1)[1].strip()
90
+ update_date = tail.split()[0] if tail else "-"
91
+ break
92
+
93
+ results.append(
94
+ SearchResult(
95
+ site=cls.site_name,
96
+ book_id=book_id,
97
+ book_url=book_url,
98
+ cover_url=cover_url,
99
+ title=title,
100
+ author=author,
101
+ latest_chapter="-",
102
+ update_date=update_date,
103
+ word_count=word_count,
104
+ priority=cls.priority + idx,
105
+ )
106
+ )
107
+ return results
@@ -0,0 +1,84 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.searchers.b520
4
+ ------------------------------------
5
+
6
+ """
7
+
8
+ import logging
9
+
10
+ from lxml import html
11
+
12
+ from novel_downloader.core.searchers.base import BaseSearcher
13
+ from novel_downloader.core.searchers.registry import register_searcher
14
+ from novel_downloader.models import SearchResult
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @register_searcher(
20
+ site_keys=["biquge", "bqg", "b520"],
21
+ )
22
+ class BiqugeSearcher(BaseSearcher):
23
+ site_name = "biquge"
24
+ priority = 30
25
+ BASE_URL = "http://www.b520.cc/"
26
+ SEARCH_URL = "http://www.b520.cc/modules/article/search.php"
27
+
28
+ @classmethod
29
+ async def _fetch_html(cls, keyword: str) -> str:
30
+ params = {"searchkey": keyword}
31
+ try:
32
+ async with (await cls._http_get(cls.SEARCH_URL, params=params)) as resp:
33
+ return await cls._response_to_str(resp)
34
+ except Exception:
35
+ logger.error(
36
+ "Failed to fetch HTML for keyword '%s' from '%s'",
37
+ keyword,
38
+ cls.SEARCH_URL,
39
+ )
40
+ return ""
41
+
42
+ @classmethod
43
+ def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
44
+ doc = html.fromstring(html_str)
45
+ rows = doc.xpath('//table[@class="grid"]//tr[position()>1]')
46
+ results: list[SearchResult] = []
47
+
48
+ for idx, row in enumerate(rows):
49
+ href = cls._first_str(row.xpath(".//td[1]/a[1]/@href"))
50
+ if not href:
51
+ continue
52
+
53
+ if limit is not None and idx >= limit:
54
+ break
55
+
56
+ book_id = href.strip("/").split("/")[-1]
57
+ book_url = cls._abs_url(href)
58
+
59
+ title = cls._first_str(row.xpath(".//td[1]/a[1]/text()"))
60
+
61
+ latest_chapter = cls._first_str(row.xpath(".//td[2]/a[1]/text()")) or "-"
62
+
63
+ author = cls._first_str(row.xpath(".//td[3]//text()"))
64
+ word_count = cls._first_str(row.xpath(".//td[4]//text()"))
65
+ update_date = cls._first_str(row.xpath(".//td[5]//text()"))
66
+
67
+ # Compute priority
68
+ prio = cls.priority + idx
69
+
70
+ results.append(
71
+ SearchResult(
72
+ site=cls.site_name,
73
+ book_id=book_id,
74
+ book_url=book_url,
75
+ cover_url="",
76
+ title=title,
77
+ author=author,
78
+ latest_chapter=latest_chapter,
79
+ update_date=update_date,
80
+ word_count=word_count,
81
+ priority=prio,
82
+ )
83
+ )
84
+ return results