novel-downloader 1.5.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +1 -3
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +26 -21
  5. novel_downloader/cli/download.py +77 -64
  6. novel_downloader/cli/export.py +16 -20
  7. novel_downloader/cli/main.py +1 -1
  8. novel_downloader/cli/search.py +62 -65
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +8 -5
  11. novel_downloader/config/adapter.py +65 -105
  12. novel_downloader/config/{loader.py → file_io.py} +53 -26
  13. novel_downloader/core/__init__.py +1 -0
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} +12 -20
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +3 -24
  21. novel_downloader/core/downloaders/base.py +49 -23
  22. novel_downloader/core/downloaders/common.py +191 -137
  23. novel_downloader/core/downloaders/qianbi.py +187 -146
  24. novel_downloader/core/downloaders/qidian.py +187 -141
  25. novel_downloader/core/downloaders/registry.py +4 -2
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +3 -20
  28. novel_downloader/core/exporters/base.py +33 -37
  29. novel_downloader/core/exporters/common/__init__.py +1 -2
  30. novel_downloader/core/exporters/common/epub.py +15 -10
  31. novel_downloader/core/exporters/common/main_exporter.py +19 -12
  32. novel_downloader/core/exporters/common/txt.py +14 -9
  33. novel_downloader/core/exporters/epub_util.py +59 -29
  34. novel_downloader/core/exporters/linovelib/__init__.py +1 -0
  35. novel_downloader/core/exporters/linovelib/epub.py +23 -25
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +8 -12
  37. novel_downloader/core/exporters/linovelib/txt.py +17 -11
  38. novel_downloader/core/exporters/qidian.py +2 -8
  39. novel_downloader/core/exporters/registry.py +4 -2
  40. novel_downloader/core/exporters/txt_util.py +7 -7
  41. novel_downloader/core/fetchers/__init__.py +54 -48
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +6 -11
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +37 -46
  45. novel_downloader/core/fetchers/{biquge/browser.py → biquyuedu.py} +12 -17
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +19 -12
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +19 -28
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/lewenn.py +83 -0
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +12 -13
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +5 -10
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +46 -39
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +5 -16
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +7 -10
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/shuhaige.py +84 -0
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/wanbengo.py +83 -0
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +19 -12
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +1 -9
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +4 -17
  79. novel_downloader/core/interfaces/parser.py +5 -6
  80. novel_downloader/core/interfaces/searcher.py +9 -1
  81. novel_downloader/core/parsers/__init__.py +49 -12
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +63 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/esjzone.py +61 -66
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/linovelib.py +48 -64
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/qianbi.py +48 -50
  99. novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
  100. novel_downloader/core/parsers/qidian/chapter_encrypted.py +272 -330
  101. novel_downloader/core/parsers/qidian/chapter_normal.py +24 -55
  102. novel_downloader/core/parsers/qidian/main_parser.py +11 -38
  103. novel_downloader/core/parsers/qidian/utils/__init__.py +1 -0
  104. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +1 -1
  105. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
  106. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
  107. novel_downloader/core/parsers/quanben5.py +103 -0
  108. novel_downloader/core/parsers/registry.py +5 -16
  109. novel_downloader/core/parsers/sfacg.py +38 -45
  110. novel_downloader/core/parsers/shencou.py +215 -0
  111. novel_downloader/core/parsers/shuhaige.py +111 -0
  112. novel_downloader/core/parsers/tongrenquan.py +116 -0
  113. novel_downloader/core/parsers/ttkan.py +132 -0
  114. novel_downloader/core/parsers/wanbengo.py +191 -0
  115. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  116. novel_downloader/core/parsers/xiguashuwu.py +435 -0
  117. novel_downloader/core/parsers/xs63b.py +161 -0
  118. novel_downloader/core/parsers/xshbook.py +134 -0
  119. novel_downloader/core/parsers/yamibo.py +87 -131
  120. novel_downloader/core/parsers/yibige.py +166 -0
  121. novel_downloader/core/searchers/__init__.py +34 -3
  122. novel_downloader/core/searchers/aaatxt.py +107 -0
  123. novel_downloader/core/searchers/{biquge.py → b520.py} +29 -28
  124. novel_downloader/core/searchers/base.py +112 -36
  125. novel_downloader/core/searchers/dxmwx.py +105 -0
  126. novel_downloader/core/searchers/eightnovel.py +84 -0
  127. novel_downloader/core/searchers/esjzone.py +43 -25
  128. novel_downloader/core/searchers/hetushu.py +92 -0
  129. novel_downloader/core/searchers/i25zw.py +93 -0
  130. novel_downloader/core/searchers/ixdzs8.py +107 -0
  131. novel_downloader/core/searchers/jpxs123.py +107 -0
  132. novel_downloader/core/searchers/piaotia.py +100 -0
  133. novel_downloader/core/searchers/qbtr.py +106 -0
  134. novel_downloader/core/searchers/qianbi.py +74 -40
  135. novel_downloader/core/searchers/quanben5.py +144 -0
  136. novel_downloader/core/searchers/registry.py +24 -8
  137. novel_downloader/core/searchers/shuhaige.py +124 -0
  138. novel_downloader/core/searchers/tongrenquan.py +110 -0
  139. novel_downloader/core/searchers/ttkan.py +92 -0
  140. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  141. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  142. novel_downloader/core/searchers/xs63b.py +104 -0
  143. novel_downloader/locales/en.json +31 -82
  144. novel_downloader/locales/zh.json +32 -83
  145. novel_downloader/models/__init__.py +21 -22
  146. novel_downloader/models/book.py +44 -0
  147. novel_downloader/models/config.py +4 -37
  148. novel_downloader/models/login.py +1 -1
  149. novel_downloader/models/search.py +5 -0
  150. novel_downloader/resources/config/settings.toml +8 -70
  151. novel_downloader/resources/json/xiguashuwu.json +718 -0
  152. novel_downloader/utils/__init__.py +13 -22
  153. novel_downloader/utils/chapter_storage.py +3 -2
  154. novel_downloader/utils/constants.py +4 -29
  155. novel_downloader/utils/cookies.py +6 -18
  156. novel_downloader/utils/crypto_utils/__init__.py +13 -0
  157. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  158. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  159. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  160. novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
  161. novel_downloader/utils/epub/__init__.py +1 -1
  162. novel_downloader/utils/epub/constants.py +57 -16
  163. novel_downloader/utils/epub/documents.py +88 -194
  164. novel_downloader/utils/epub/models.py +0 -14
  165. novel_downloader/utils/epub/utils.py +63 -96
  166. novel_downloader/utils/file_utils/__init__.py +2 -23
  167. novel_downloader/utils/file_utils/io.py +3 -113
  168. novel_downloader/utils/file_utils/sanitize.py +0 -4
  169. novel_downloader/utils/fontocr.py +207 -0
  170. novel_downloader/utils/logger.py +8 -16
  171. novel_downloader/utils/network.py +2 -2
  172. novel_downloader/utils/state.py +4 -90
  173. novel_downloader/utils/text_utils/__init__.py +1 -7
  174. novel_downloader/utils/text_utils/diff_display.py +5 -7
  175. novel_downloader/utils/time_utils/__init__.py +5 -11
  176. novel_downloader/utils/time_utils/datetime_utils.py +20 -29
  177. novel_downloader/utils/time_utils/sleep_utils.py +4 -8
  178. novel_downloader/web/__init__.py +13 -0
  179. novel_downloader/web/components/__init__.py +11 -0
  180. novel_downloader/web/components/navigation.py +35 -0
  181. novel_downloader/web/main.py +66 -0
  182. novel_downloader/web/pages/__init__.py +17 -0
  183. novel_downloader/web/pages/download.py +78 -0
  184. novel_downloader/web/pages/progress.py +147 -0
  185. novel_downloader/web/pages/search.py +329 -0
  186. novel_downloader/web/services/__init__.py +17 -0
  187. novel_downloader/web/services/client_dialog.py +164 -0
  188. novel_downloader/web/services/cred_broker.py +113 -0
  189. novel_downloader/web/services/cred_models.py +35 -0
  190. novel_downloader/web/services/task_manager.py +264 -0
  191. novel_downloader-2.0.0.dist-info/METADATA +171 -0
  192. novel_downloader-2.0.0.dist-info/RECORD +210 -0
  193. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
  194. novel_downloader/core/downloaders/biquge.py +0 -29
  195. novel_downloader/core/downloaders/esjzone.py +0 -29
  196. novel_downloader/core/downloaders/linovelib.py +0 -29
  197. novel_downloader/core/downloaders/sfacg.py +0 -29
  198. novel_downloader/core/downloaders/yamibo.py +0 -29
  199. novel_downloader/core/exporters/biquge.py +0 -22
  200. novel_downloader/core/exporters/esjzone.py +0 -22
  201. novel_downloader/core/exporters/qianbi.py +0 -22
  202. novel_downloader/core/exporters/sfacg.py +0 -22
  203. novel_downloader/core/exporters/yamibo.py +0 -22
  204. novel_downloader/core/fetchers/base/__init__.py +0 -14
  205. novel_downloader/core/fetchers/base/browser.py +0 -422
  206. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  207. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  208. novel_downloader/core/fetchers/esjzone/browser.py +0 -209
  209. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  210. novel_downloader/core/fetchers/linovelib/browser.py +0 -198
  211. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  212. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  213. novel_downloader/core/fetchers/qidian/browser.py +0 -326
  214. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  215. novel_downloader/core/fetchers/sfacg/browser.py +0 -194
  216. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  217. novel_downloader/core/fetchers/yamibo/browser.py +0 -234
  218. novel_downloader/core/parsers/biquge.py +0 -139
  219. novel_downloader/models/chapter.py +0 -25
  220. novel_downloader/models/types.py +0 -13
  221. novel_downloader/tui/__init__.py +0 -7
  222. novel_downloader/tui/app.py +0 -32
  223. novel_downloader/tui/main.py +0 -17
  224. novel_downloader/tui/screens/__init__.py +0 -14
  225. novel_downloader/tui/screens/home.py +0 -198
  226. novel_downloader/tui/screens/login.py +0 -74
  227. novel_downloader/tui/styles/home_layout.tcss +0 -79
  228. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  229. novel_downloader/utils/cache.py +0 -24
  230. novel_downloader/utils/fontocr/__init__.py +0 -22
  231. novel_downloader/utils/fontocr/hash_store.py +0 -280
  232. novel_downloader/utils/fontocr/hash_utils.py +0 -103
  233. novel_downloader/utils/fontocr/model_loader.py +0 -69
  234. novel_downloader/utils/fontocr/ocr_v1.py +0 -315
  235. novel_downloader/utils/fontocr/ocr_v2.py +0 -764
  236. novel_downloader/utils/fontocr/ocr_v3.py +0 -744
  237. novel_downloader-1.5.0.dist-info/METADATA +0 -196
  238. novel_downloader-1.5.0.dist-info/RECORD +0 -164
  239. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
  240. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
  241. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,134 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.xshbook
4
+ -------------------------------------
5
+
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from lxml import html
11
+
12
+ from novel_downloader.core.parsers.base import BaseParser
13
+ from novel_downloader.core.parsers.registry import register_parser
14
+ from novel_downloader.models import (
15
+ BookInfoDict,
16
+ ChapterDict,
17
+ ChapterInfoDict,
18
+ VolumeInfoDict,
19
+ )
20
+
21
+
22
+ @register_parser(
23
+ site_keys=["xshbook"],
24
+ )
25
+ class XshbookParser(BaseParser):
26
+ """Parser for 小说虎 book pages."""
27
+
28
+ BASE = "http://www.xshbook.com"
29
+
30
+ def parse_book_info(
31
+ self,
32
+ html_list: list[str],
33
+ **kwargs: Any,
34
+ ) -> BookInfoDict | None:
35
+ if not html_list:
36
+ return None
37
+
38
+ tree = html.fromstring(html_list[0])
39
+
40
+ book_name = self._first_str(tree.xpath("//div[@id='info']/h1/text()"))
41
+
42
+ author = self._first_str(
43
+ tree.xpath("//div[@id='info']/p[1]/text()"),
44
+ replaces=[("\xa0", ""), ("作者:", "")],
45
+ )
46
+
47
+ update_time = self._first_str(
48
+ tree.xpath("//meta[@property='og:novel:update_time']/@content")
49
+ )
50
+
51
+ summary = "\n".join(
52
+ self._first_str(p.xpath("string()").splitlines())
53
+ for p in tree.xpath("//div[@id='intro']//p")
54
+ ).strip()
55
+ summary = summary.split("本站提示", 1)[0].strip()
56
+
57
+ cover_url = self._first_str(tree.xpath("//div[@id='fmimg']//img/@src"))
58
+
59
+ book_type = self._first_str(tree.xpath("//div[@class='con_top']/a[2]/text()"))
60
+ tags: list[str] = [book_type] if book_type else []
61
+
62
+ chapters: list[ChapterInfoDict] = []
63
+ for a in tree.xpath("//div[@id='list']//dd/a"):
64
+ href = a.get("href", "")
65
+ title = self._norm_space(a.text_content())
66
+ # /95071/95071941/389027455.html -> "389027455"
67
+ chapter_id = href.rsplit("/", 1)[-1].split(".", 1)[0]
68
+ chapters.append({"title": title, "url": href, "chapterId": chapter_id})
69
+
70
+ volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
71
+
72
+ return {
73
+ "book_name": book_name,
74
+ "author": author,
75
+ "cover_url": cover_url,
76
+ "update_time": update_time,
77
+ "summary": summary,
78
+ "tags": tags,
79
+ "volumes": volumes,
80
+ "extra": {},
81
+ }
82
+
83
+ def parse_chapter(
84
+ self,
85
+ html_list: list[str],
86
+ chapter_id: str,
87
+ **kwargs: Any,
88
+ ) -> ChapterDict | None:
89
+ if not html_list:
90
+ return None
91
+ tree = html.fromstring(html_list[0])
92
+
93
+ title = self._first_str(tree.xpath("//div[@class='bookname']/h1/text()"))
94
+ if not title:
95
+ title = self._first_str(
96
+ tree.xpath("//div[@class='con_top']/text()[last()]")
97
+ )
98
+
99
+ cont_nodes = tree.xpath("//div[@id='content']")
100
+ if not cont_nodes:
101
+ return None
102
+ cont = cont_nodes[0]
103
+
104
+ # remove scripts under content
105
+ for s in cont.xpath(".//script"):
106
+ s.getparent().remove(s)
107
+
108
+ paragraphs: list[str] = []
109
+ for p in cont.xpath(".//p"):
110
+ text = html.tostring(p, method="text", encoding="unicode")
111
+ text = text.replace("\xa0", " ")
112
+ # filter boilerplate lines
113
+ bad = (
114
+ "谨记我们的网址" in text
115
+ or "温馨提示" in text
116
+ or "提示" in text
117
+ and "本文" not in text
118
+ and len(text) < 60
119
+ or "分享" in text
120
+ and len(text) < 40
121
+ )
122
+ if not bad:
123
+ paragraphs.append(text)
124
+
125
+ content = "\n".join(self._norm_space(p) for p in paragraphs if p.strip())
126
+ if not content.strip():
127
+ return None
128
+
129
+ return {
130
+ "id": chapter_id,
131
+ "title": title,
132
+ "content": content,
133
+ "extra": {"site": "xshbook"},
134
+ }
@@ -11,148 +11,104 @@ from lxml import html
11
11
 
12
12
  from novel_downloader.core.parsers.base import BaseParser
13
13
  from novel_downloader.core.parsers.registry import register_parser
14
- from novel_downloader.models import ChapterDict
14
+ from novel_downloader.models import (
15
+ BookInfoDict,
16
+ ChapterDict,
17
+ ChapterInfoDict,
18
+ VolumeInfoDict,
19
+ )
15
20
 
16
21
 
17
22
  @register_parser(
18
23
  site_keys=["yamibo"],
19
- backends=["session", "browser"],
20
24
  )
21
25
  class YamiboParser(BaseParser):
22
- """ """
26
+ """
27
+ Parser for 百合会 book pages.
28
+ """
23
29
 
24
30
  BASE_URL = "https://www.yamibo.com"
25
- # Book info XPaths
26
- _BOOK_NAME_XPATH = 'string(//h3[contains(@class, "col-md-12")])'
27
- _AUTHOR_XPATH = 'string(//h5[contains(@class, "text-warning")])'
28
- _COVER_URL_XPATH = '//img[contains(@class, "img-responsive")]/@src'
29
- _UPDATE_TIME_XPATH = '//p[contains(text(), "更新时间:")]'
30
- _SERIAL_STATUS_XPATH = '//p[contains(text(), "作品状态:")]'
31
- _TYPE_XPATH = '//p[contains(text(), "作品分类:")]'
32
- _SUMMARY_XPATH = 'string(//div[@id="w0-collapse1"]/div)'
33
-
34
- _VOLUME_NODE_XPATH = (
35
- '//div[contains(@class, "panel-info") and contains(@class, "panel-default")]'
36
- )
37
- _VOLUME_TITLE_XPATH = './/div[contains(@class, "panel-heading")]//a/text()'
38
- _CHAPTER_NODE_XPATH = (
39
- './/div[contains(@class, "panel-body")]//a[contains(@href, "view-chapter")]'
40
- )
41
- _CHAPTER_FLAT_XPATH = (
42
- '//div[@class="panel-body"]//a[contains(@href, "view-chapter")]'
43
- )
44
-
45
- # Chapter field XPaths
46
- _CHAPTER_TITLE_XPATH = "string(//section[contains(@class, 'col-md-9')]//h3)"
47
- _CHAPTER_TIME_XPATH = (
48
- "//div[contains(@class, 'row')]//div[contains(text(), '更新时间')]"
49
- )
50
- _CHAPTER_WORD_COUNT_XPATH = (
51
- "//div[contains(@class, 'row')]//div[contains(text(), '章节字数')]"
52
- )
53
- _CHAPTER_CONTENT_XPATH = "//div[@id='w0-collapse1']//p//text()"
54
31
 
55
32
  def parse_book_info(
56
33
  self,
57
34
  html_list: list[str],
58
35
  **kwargs: Any,
59
- ) -> dict[str, Any]:
60
- """
61
- Parse a book info page and extract metadata and chapter structure.
62
-
63
- :param html_list: Raw HTML of the book info page.
64
- :return: Parsed metadata and chapter structure as a dictionary.
65
- """
36
+ ) -> BookInfoDict | None:
66
37
  if not html_list:
67
- return {}
38
+ return None
68
39
 
69
40
  tree = html.fromstring(html_list[0])
70
- result: dict[str, Any] = {}
71
41
 
72
- result["book_name"] = tree.xpath(self._BOOK_NAME_XPATH).strip()
73
- result["author"] = tree.xpath(self._AUTHOR_XPATH).strip()
74
-
75
- cover = tree.xpath(self._COVER_URL_XPATH)
76
- result["cover_url"] = f"{self.BASE_URL}{cover[0]}" if cover else ""
77
-
78
- update_node = tree.xpath(self._UPDATE_TIME_XPATH)
79
- result["update_time"] = (
80
- update_node[0].xpath("string()").replace("更新时间:", "").strip()
81
- if update_node
82
- else ""
42
+ book_name = self._first_str(
43
+ tree.xpath('//h3[contains(@class,"col-md-12")]/text()')
83
44
  )
84
-
85
- serial_node = tree.xpath(self._SERIAL_STATUS_XPATH)
86
- result["serial_status"] = (
87
- serial_node[0].xpath("string()").replace("作品状态:", "").strip()
88
- if serial_node
89
- else ""
45
+ author = self._first_str(
46
+ tree.xpath('//h5[contains(@class,"text-warning")]/text()')
90
47
  )
91
-
92
- type_node = tree.xpath(self._TYPE_XPATH)
93
- result["type"] = (
94
- type_node[0].xpath("string()").replace("作品分类:", "").strip()
95
- if type_node
96
- else ""
48
+ cover_url = self.BASE_URL + self._first_str(
49
+ tree.xpath('//img[contains(@class,"img-responsive")]/@src')
97
50
  )
98
51
 
99
- result["summary"] = tree.xpath(self._SUMMARY_XPATH).strip()
100
-
101
- volumes = []
102
- volume_nodes = tree.xpath(self._VOLUME_NODE_XPATH)
103
-
104
- if volume_nodes:
105
- for volume_node in volume_nodes:
106
- title_node = volume_node.xpath(self._VOLUME_TITLE_XPATH)
107
- volume_name = title_node[0].strip() if title_node else "未命名卷"
108
-
109
- chapter_nodes = volume_node.xpath(self._CHAPTER_NODE_XPATH)
110
- chapters = []
111
- for chap in chapter_nodes:
112
- title = chap.xpath("string()").strip()
113
- url = chap.get("href", "")
114
- chapter_id = url.split("id=")[-1] if "id=" in url else ""
115
- chapters.append(
116
- {
117
- "title": title,
118
- "url": url,
119
- "chapterId": chapter_id,
120
- }
52
+ update_time = self._first_str(
53
+ tree.xpath('//p[contains(text(),"更新时间:")]/text()'),
54
+ replaces=[("更新时间:", "")],
55
+ )
56
+ serial_status = self._first_str(
57
+ tree.xpath('//p[contains(text(),"作品状态:")]/text()'),
58
+ replaces=[("作品状态:", "")],
59
+ )
60
+ book_type = self._first_str(
61
+ tree.xpath('//p[contains(text(),"作品分类:")]/text()'),
62
+ replaces=[("作品分类:", "")],
63
+ )
64
+ summary = self._first_str([tree.xpath('string(//div[@id="w0-collapse1"]/div)')])
65
+
66
+ # volumes & chapters
67
+ volumes: list[VolumeInfoDict] = []
68
+ for volume_node in tree.xpath(
69
+ '//div[contains(@class,"panel-info") and contains(@class,"panel-default")]'
70
+ ):
71
+ volume_name = (
72
+ self._first_str(
73
+ volume_node.xpath(
74
+ './/div[contains(@class,"panel-heading")]//a/text()'
121
75
  )
122
-
123
- volumes.append(
124
- {
125
- "volume_name": volume_name,
126
- "chapters": chapters,
127
- }
128
76
  )
77
+ or "未命名卷"
78
+ )
79
+ chapters: list[ChapterInfoDict] = []
80
+ for chap in volume_node.xpath(
81
+ './/div[contains(@class,"panel-body")]//a[contains(@href,"view-chapter")]'
82
+ ):
83
+ title = self._first_str([chap.xpath("string()")])
84
+ url = chap.get("href", "")
85
+ chapter_id = url.split("id=")[-1]
86
+ chapters.append({"title": title, "url": url, "chapterId": chapter_id})
87
+ volumes.append({"volume_name": volume_name, "chapters": chapters})
129
88
 
130
- else:
131
- # fallback: flat list
132
- chapter_nodes = tree.xpath(self._CHAPTER_FLAT_XPATH)
89
+ # fallback: flat chapter list
90
+ if not volumes:
133
91
  chapters = []
134
- for chap in chapter_nodes:
135
- title = chap.xpath("string()").strip()
92
+ for chap in tree.xpath(
93
+ '//div[@class="panel-body"]//a[contains(@href,"view-chapter")]'
94
+ ):
95
+ title = self._first_str([chap.xpath("string()")])
136
96
  url = chap.get("href", "")
137
97
  chapter_id = url.split("id=")[-1] if "id=" in url else ""
138
- chapters.append(
139
- {
140
- "title": title,
141
- "url": url,
142
- "chapterId": chapter_id,
143
- }
144
- )
145
-
146
- volumes = [
147
- {
148
- "volume_name": "单卷",
149
- "chapters": chapters,
150
- }
151
- ]
98
+ chapters.append({"title": title, "url": url, "chapterId": chapter_id})
99
+ volumes = [{"volume_name": "单卷", "chapters": chapters}]
152
100
 
153
- result["volumes"] = volumes
154
-
155
- return result
101
+ return {
102
+ "book_name": book_name,
103
+ "author": author,
104
+ "cover_url": cover_url,
105
+ "update_time": update_time,
106
+ "serial_status": serial_status,
107
+ "tags": [book_type],
108
+ "summary": summary,
109
+ "volumes": volumes,
110
+ "extra": {},
111
+ }
156
112
 
157
113
  def parse_chapter(
158
114
  self,
@@ -160,32 +116,32 @@ class YamiboParser(BaseParser):
160
116
  chapter_id: str,
161
117
  **kwargs: Any,
162
118
  ) -> ChapterDict | None:
163
- """
164
- Parse a single chapter page and extract clean text or simplified HTML.
165
-
166
- :param html_list: Raw HTML of the chapter page.
167
- :param chapter_id: Identifier of the chapter being parsed.
168
- :return: Cleaned chapter content as plain text or minimal HTML.
169
- """
170
119
  if not html_list:
171
120
  return None
172
121
  tree = html.fromstring(html_list[0])
173
122
 
174
- content_lines = tree.xpath(self._CHAPTER_CONTENT_XPATH)
175
- content = "\n\n".join(line.strip() for line in content_lines if line.strip())
123
+ content_lines = tree.xpath("//div[@id='w0-collapse1']//p//text()")
124
+ content = "\n".join(line.strip() for line in content_lines if line.strip())
176
125
  if not content:
177
126
  return None
178
127
 
179
- title = tree.xpath(self._CHAPTER_TITLE_XPATH).strip()
180
-
181
- update_node = tree.xpath(self._CHAPTER_TIME_XPATH)
182
- updated_at = (
183
- update_node[0].text.strip().replace("更新时间:", "") if update_node else ""
128
+ title = self._first_str(
129
+ [tree.xpath("string(//section[contains(@class,'col-md-9')]//h3)")]
184
130
  )
185
131
 
186
- word_node = tree.xpath(self._CHAPTER_WORD_COUNT_XPATH)
187
- word = word_node[0].text.strip().replace("章节字数:", "") if word_node else ""
188
- word_count = int(word) if word.isdigit() else 0
132
+ updated_at = self._first_str(
133
+ tree.xpath(
134
+ "//div[contains(@class,'row')]//div[contains(text(),'更新时间')]/text()"
135
+ ),
136
+ replaces=[("更新时间:", "")],
137
+ )
138
+ word_str = self._first_str(
139
+ tree.xpath(
140
+ "//div[contains(@class,'row')]//div[contains(text(),'章节字数')]/text()"
141
+ ),
142
+ replaces=[("章节字数:", "")],
143
+ )
144
+ word_count = int(word_str) if word_str.isdigit() else 0
189
145
 
190
146
  return {
191
147
  "id": chapter_id,
@@ -0,0 +1,166 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.yibige
4
+ ------------------------------------
5
+
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from lxml import html
11
+
12
+ from novel_downloader.core.parsers.base import BaseParser
13
+ from novel_downloader.core.parsers.registry import register_parser
14
+ from novel_downloader.models import (
15
+ BookInfoDict,
16
+ ChapterDict,
17
+ ChapterInfoDict,
18
+ VolumeInfoDict,
19
+ )
20
+
21
+
22
+ @register_parser(
23
+ site_keys=["yibige"],
24
+ )
25
+ class YibigeParser(BaseParser):
26
+ """
27
+ Parser for 一笔阁 book pages.
28
+ """
29
+
30
+ ADS = {
31
+ "首发无广告",
32
+ "请分享",
33
+ "读之阁",
34
+ "小说网",
35
+ "首发地址",
36
+ "手机阅读",
37
+ "一笔阁",
38
+ "site_con_ad(",
39
+ "chapter_content(",
40
+ }
41
+
42
+ def parse_book_info(
43
+ self,
44
+ html_list: list[str],
45
+ **kwargs: Any,
46
+ ) -> BookInfoDict | None:
47
+ if len(html_list) < 2:
48
+ return None
49
+
50
+ # Parse trees
51
+ info_tree = html.fromstring(html_list[0])
52
+ catalog_tree = html.fromstring(html_list[1])
53
+
54
+ # --- From <meta> data ---
55
+ book_name = self._meta(info_tree, "og:novel:book_name") or self._first_str(
56
+ info_tree.xpath("//div[@id='info']/h1/text()")
57
+ )
58
+
59
+ author = self._meta(info_tree, "og:novel:author") or self._first_str(
60
+ info_tree.xpath("//div[@id='info']/p[a]/a/text()")
61
+ )
62
+
63
+ cover_url = self._meta(info_tree, "og:image") or self._first_str(
64
+ info_tree.xpath("//div[@id='fmimg']//img/@src")
65
+ )
66
+
67
+ update_time = self._meta(info_tree, "og:novel:update_time").replace("T", " ")
68
+ serial_status = self._meta(info_tree, "og:novel:status") or "连载中"
69
+
70
+ word_count = self._first_str(
71
+ info_tree.xpath("//div[@id='info']/p[contains(., '字数:')]/text()[1]"),
72
+ replaces=[("字数:", "")],
73
+ )
74
+
75
+ # Summary: first paragraph under #intro
76
+ summary = self._first_str(info_tree.xpath("//div[@id='intro']//p[1]/text()"))
77
+
78
+ # Category and tags
79
+ book_type = self._meta(info_tree, "og:novel:category")
80
+ tags_set = set(self._meta_all(info_tree, "book:tag"))
81
+ if book_type:
82
+ tags_set.add(book_type)
83
+ tags = list(tags_set)
84
+
85
+ # --- Chapters from the catalog page ---
86
+ chapters: list[ChapterInfoDict] = []
87
+ for a in catalog_tree.xpath("//div[@id='list']/dl/dd/a"):
88
+ href = (a.get("href") or "").strip()
89
+ if not href:
90
+ continue
91
+ title = (a.text_content() or "").strip()
92
+ if not title:
93
+ continue
94
+ # /6238/2496.html -> 2496
95
+ chap_id = href.split("/")[-1].split(".")[0]
96
+ chapters.append({"title": title, "url": href, "chapterId": chap_id})
97
+
98
+ volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
99
+
100
+ return {
101
+ "book_name": book_name,
102
+ "author": author,
103
+ "cover_url": cover_url,
104
+ "update_time": update_time,
105
+ "serial_status": serial_status,
106
+ "word_count": word_count,
107
+ "summary": summary,
108
+ "tags": tags,
109
+ "volumes": volumes,
110
+ "extra": {},
111
+ }
112
+
113
+ def parse_chapter(
114
+ self,
115
+ html_list: list[str],
116
+ chapter_id: str,
117
+ **kwargs: Any,
118
+ ) -> ChapterDict | None:
119
+ if not html_list:
120
+ return None
121
+ tree = html.fromstring(html_list[0])
122
+
123
+ title = self._first_str(tree.xpath("//div[@class='bookname']/h1/text()"))
124
+
125
+ paragraphs: list[str] = []
126
+ for p in tree.xpath("//div[@id='content']//p"):
127
+ txt = self._norm_space(p.text_content())
128
+ if not txt or self._is_ad(txt):
129
+ continue
130
+ paragraphs.append(txt)
131
+
132
+ content = "\n".join(paragraphs).strip()
133
+ if not content:
134
+ return None
135
+
136
+ return {
137
+ "id": chapter_id,
138
+ "title": title,
139
+ "content": content,
140
+ "extra": {"site": "yibige"},
141
+ }
142
+
143
+ def _is_ad(self, s: str) -> bool:
144
+ """
145
+ Filter for footer junk inside #content.
146
+ """
147
+ if self._is_ad_line(s):
148
+ return True
149
+
150
+ ss = s.replace(" ", "")
151
+ # return any(b in s or b in ss for b in self.ADS)
152
+ return self._is_ad_line(ss)
153
+
154
+ @classmethod
155
+ def _meta(cls, tree: html.HtmlElement, prop: str) -> str:
156
+ """
157
+ Get a single meta property content
158
+ """
159
+ return cls._first_str(tree.xpath(f"//meta[@property='{prop}']/@content"))
160
+
161
+ @staticmethod
162
+ def _meta_all(tree: html.HtmlElement, prop: str) -> list[str]:
163
+ """
164
+ Get all meta property content values
165
+ """
166
+ return tree.xpath(f"//meta[@property='{prop}']/@content") or []
@@ -3,18 +3,49 @@
3
3
  novel_downloader.core.searchers
4
4
  -------------------------------
5
5
 
6
+ Site-specific searcher implementations for discovering novels across multiple sources
6
7
  """
7
8
 
8
9
  __all__ = [
9
10
  "search",
11
+ "AaatxtSearcher",
10
12
  "BiqugeSearcher",
13
+ "DxmwxSearcher",
14
+ "EightnovelSearcher",
11
15
  "EsjzoneSearcher",
16
+ "HetushuSearcher",
17
+ "I25zwSearcher",
18
+ "Ixdzs8Searcher",
19
+ "Jpxs123Searcher",
20
+ "PiaotiaSearcher",
21
+ "QbtrSearcher",
12
22
  "QianbiSearcher",
13
- "QidianSearcher",
23
+ "Quanben5Searcher",
24
+ "ShuhaigeSearcher",
25
+ "TongrenquanSearcher",
26
+ "TtkanSearcher",
27
+ "XiaoshuowuSearcher",
28
+ "XiguashuwuSearcher",
29
+ "Xs63bSearcher",
14
30
  ]
15
31
 
16
- from .biquge import BiqugeSearcher
32
+ from .aaatxt import AaatxtSearcher
33
+ from .b520 import BiqugeSearcher
34
+ from .dxmwx import DxmwxSearcher
35
+ from .eightnovel import EightnovelSearcher
17
36
  from .esjzone import EsjzoneSearcher
37
+ from .hetushu import HetushuSearcher
38
+ from .i25zw import I25zwSearcher
39
+ from .ixdzs8 import Ixdzs8Searcher
40
+ from .jpxs123 import Jpxs123Searcher
41
+ from .piaotia import PiaotiaSearcher
42
+ from .qbtr import QbtrSearcher
18
43
  from .qianbi import QianbiSearcher
19
- from .qidian import QidianSearcher
44
+ from .quanben5 import Quanben5Searcher
20
45
  from .registry import search
46
+ from .shuhaige import ShuhaigeSearcher
47
+ from .tongrenquan import TongrenquanSearcher
48
+ from .ttkan import TtkanSearcher
49
+ from .xiaoshuowu import XiaoshuowuSearcher
50
+ from .xiguashuwu import XiguashuwuSearcher
51
+ from .xs63b import Xs63bSearcher