novel-downloader 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +2 -4
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +27 -104
  5. novel_downloader/cli/download.py +78 -66
  6. novel_downloader/cli/export.py +20 -21
  7. novel_downloader/cli/main.py +3 -1
  8. novel_downloader/cli/search.py +120 -0
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +10 -14
  11. novel_downloader/config/adapter.py +195 -99
  12. novel_downloader/config/{loader.py → file_io.py} +53 -27
  13. novel_downloader/core/__init__.py +14 -13
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/archived/qidian/searcher.py +79 -0
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +8 -30
  21. novel_downloader/core/downloaders/base.py +182 -30
  22. novel_downloader/core/downloaders/common.py +217 -384
  23. novel_downloader/core/downloaders/qianbi.py +332 -4
  24. novel_downloader/core/downloaders/qidian.py +250 -290
  25. novel_downloader/core/downloaders/registry.py +69 -0
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +8 -26
  28. novel_downloader/core/exporters/base.py +107 -31
  29. novel_downloader/core/exporters/common/__init__.py +3 -4
  30. novel_downloader/core/exporters/common/epub.py +92 -171
  31. novel_downloader/core/exporters/common/main_exporter.py +14 -67
  32. novel_downloader/core/exporters/common/txt.py +90 -86
  33. novel_downloader/core/exporters/epub_util.py +184 -1327
  34. novel_downloader/core/exporters/linovelib/__init__.py +3 -2
  35. novel_downloader/core/exporters/linovelib/epub.py +165 -222
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +10 -71
  37. novel_downloader/core/exporters/linovelib/txt.py +76 -66
  38. novel_downloader/core/exporters/qidian.py +15 -11
  39. novel_downloader/core/exporters/registry.py +55 -0
  40. novel_downloader/core/exporters/txt_util.py +67 -0
  41. novel_downloader/core/fetchers/__init__.py +57 -56
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +10 -10
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +63 -47
  45. novel_downloader/core/fetchers/biquyuedu.py +83 -0
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +23 -11
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +22 -26
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/{biquge/browser.py → lewenn.py} +15 -15
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +16 -12
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +9 -9
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +55 -40
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +60 -0
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +11 -9
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/{common/browser.py → shuhaige.py} +24 -19
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/{common/session.py → wanbengo.py} +21 -17
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +23 -11
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +8 -14
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +4 -17
  79. novel_downloader/core/interfaces/parser.py +5 -6
  80. novel_downloader/core/interfaces/searcher.py +26 -0
  81. novel_downloader/core/parsers/__init__.py +58 -22
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +63 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +67 -67
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +54 -65
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +54 -51
  99. novel_downloader/core/parsers/qidian/__init__.py +2 -2
  100. novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
  101. novel_downloader/core/parsers/qidian/chapter_encrypted.py +290 -346
  102. novel_downloader/core/parsers/qidian/chapter_normal.py +25 -56
  103. novel_downloader/core/parsers/qidian/main_parser.py +19 -57
  104. novel_downloader/core/parsers/qidian/utils/__init__.py +12 -11
  105. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +6 -7
  106. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
  107. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
  108. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
  109. novel_downloader/core/parsers/quanben5.py +103 -0
  110. novel_downloader/core/parsers/registry.py +57 -0
  111. novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +46 -48
  112. novel_downloader/core/parsers/shencou.py +215 -0
  113. novel_downloader/core/parsers/shuhaige.py +111 -0
  114. novel_downloader/core/parsers/tongrenquan.py +116 -0
  115. novel_downloader/core/parsers/ttkan.py +132 -0
  116. novel_downloader/core/parsers/wanbengo.py +191 -0
  117. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  118. novel_downloader/core/parsers/xiguashuwu.py +435 -0
  119. novel_downloader/core/parsers/xs63b.py +161 -0
  120. novel_downloader/core/parsers/xshbook.py +134 -0
  121. novel_downloader/core/parsers/yamibo.py +155 -0
  122. novel_downloader/core/parsers/yibige.py +166 -0
  123. novel_downloader/core/searchers/__init__.py +51 -0
  124. novel_downloader/core/searchers/aaatxt.py +107 -0
  125. novel_downloader/core/searchers/b520.py +84 -0
  126. novel_downloader/core/searchers/base.py +168 -0
  127. novel_downloader/core/searchers/dxmwx.py +105 -0
  128. novel_downloader/core/searchers/eightnovel.py +84 -0
  129. novel_downloader/core/searchers/esjzone.py +102 -0
  130. novel_downloader/core/searchers/hetushu.py +92 -0
  131. novel_downloader/core/searchers/i25zw.py +93 -0
  132. novel_downloader/core/searchers/ixdzs8.py +107 -0
  133. novel_downloader/core/searchers/jpxs123.py +107 -0
  134. novel_downloader/core/searchers/piaotia.py +100 -0
  135. novel_downloader/core/searchers/qbtr.py +106 -0
  136. novel_downloader/core/searchers/qianbi.py +165 -0
  137. novel_downloader/core/searchers/quanben5.py +144 -0
  138. novel_downloader/core/searchers/registry.py +79 -0
  139. novel_downloader/core/searchers/shuhaige.py +124 -0
  140. novel_downloader/core/searchers/tongrenquan.py +110 -0
  141. novel_downloader/core/searchers/ttkan.py +92 -0
  142. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  143. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  144. novel_downloader/core/searchers/xs63b.py +104 -0
  145. novel_downloader/locales/en.json +36 -79
  146. novel_downloader/locales/zh.json +37 -80
  147. novel_downloader/models/__init__.py +23 -50
  148. novel_downloader/models/book.py +44 -0
  149. novel_downloader/models/config.py +16 -43
  150. novel_downloader/models/login.py +1 -1
  151. novel_downloader/models/search.py +21 -0
  152. novel_downloader/resources/config/settings.toml +39 -74
  153. novel_downloader/resources/css_styles/intro.css +83 -0
  154. novel_downloader/resources/css_styles/main.css +30 -89
  155. novel_downloader/resources/json/xiguashuwu.json +718 -0
  156. novel_downloader/utils/__init__.py +43 -0
  157. novel_downloader/utils/chapter_storage.py +247 -226
  158. novel_downloader/utils/constants.py +5 -50
  159. novel_downloader/utils/cookies.py +6 -18
  160. novel_downloader/utils/crypto_utils/__init__.py +13 -0
  161. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  162. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  163. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  164. novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
  165. novel_downloader/utils/epub/__init__.py +34 -0
  166. novel_downloader/utils/epub/builder.py +377 -0
  167. novel_downloader/utils/epub/constants.py +118 -0
  168. novel_downloader/utils/epub/documents.py +297 -0
  169. novel_downloader/utils/epub/models.py +120 -0
  170. novel_downloader/utils/epub/utils.py +179 -0
  171. novel_downloader/utils/file_utils/__init__.py +5 -30
  172. novel_downloader/utils/file_utils/io.py +9 -150
  173. novel_downloader/utils/file_utils/normalize.py +2 -2
  174. novel_downloader/utils/file_utils/sanitize.py +2 -7
  175. novel_downloader/utils/fontocr.py +207 -0
  176. novel_downloader/utils/i18n.py +2 -0
  177. novel_downloader/utils/logger.py +10 -16
  178. novel_downloader/utils/network.py +111 -252
  179. novel_downloader/utils/state.py +5 -90
  180. novel_downloader/utils/text_utils/__init__.py +16 -21
  181. novel_downloader/utils/text_utils/diff_display.py +6 -9
  182. novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
  183. novel_downloader/utils/text_utils/text_cleaner.py +179 -0
  184. novel_downloader/utils/text_utils/truncate_utils.py +62 -0
  185. novel_downloader/utils/time_utils/__init__.py +6 -12
  186. novel_downloader/utils/time_utils/datetime_utils.py +23 -33
  187. novel_downloader/utils/time_utils/sleep_utils.py +5 -10
  188. novel_downloader/web/__init__.py +13 -0
  189. novel_downloader/web/components/__init__.py +11 -0
  190. novel_downloader/web/components/navigation.py +35 -0
  191. novel_downloader/web/main.py +66 -0
  192. novel_downloader/web/pages/__init__.py +17 -0
  193. novel_downloader/web/pages/download.py +78 -0
  194. novel_downloader/web/pages/progress.py +147 -0
  195. novel_downloader/web/pages/search.py +329 -0
  196. novel_downloader/web/services/__init__.py +17 -0
  197. novel_downloader/web/services/client_dialog.py +164 -0
  198. novel_downloader/web/services/cred_broker.py +113 -0
  199. novel_downloader/web/services/cred_models.py +35 -0
  200. novel_downloader/web/services/task_manager.py +264 -0
  201. novel_downloader-2.0.0.dist-info/METADATA +171 -0
  202. novel_downloader-2.0.0.dist-info/RECORD +210 -0
  203. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
  204. novel_downloader/config/site_rules.py +0 -94
  205. novel_downloader/core/downloaders/biquge.py +0 -25
  206. novel_downloader/core/downloaders/esjzone.py +0 -25
  207. novel_downloader/core/downloaders/linovelib.py +0 -25
  208. novel_downloader/core/downloaders/sfacg.py +0 -25
  209. novel_downloader/core/downloaders/yamibo.py +0 -25
  210. novel_downloader/core/exporters/biquge.py +0 -25
  211. novel_downloader/core/exporters/esjzone.py +0 -25
  212. novel_downloader/core/exporters/qianbi.py +0 -25
  213. novel_downloader/core/exporters/sfacg.py +0 -25
  214. novel_downloader/core/exporters/yamibo.py +0 -25
  215. novel_downloader/core/factory/__init__.py +0 -20
  216. novel_downloader/core/factory/downloader.py +0 -73
  217. novel_downloader/core/factory/exporter.py +0 -58
  218. novel_downloader/core/factory/fetcher.py +0 -96
  219. novel_downloader/core/factory/parser.py +0 -86
  220. novel_downloader/core/fetchers/base/__init__.py +0 -14
  221. novel_downloader/core/fetchers/base/browser.py +0 -403
  222. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  223. novel_downloader/core/fetchers/common/__init__.py +0 -14
  224. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  225. novel_downloader/core/fetchers/esjzone/browser.py +0 -204
  226. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  227. novel_downloader/core/fetchers/linovelib/browser.py +0 -193
  228. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  229. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  230. novel_downloader/core/fetchers/qidian/browser.py +0 -318
  231. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  232. novel_downloader/core/fetchers/sfacg/browser.py +0 -189
  233. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  234. novel_downloader/core/fetchers/yamibo/browser.py +0 -229
  235. novel_downloader/core/parsers/biquge/__init__.py +0 -10
  236. novel_downloader/core/parsers/biquge/main_parser.py +0 -134
  237. novel_downloader/core/parsers/common/__init__.py +0 -13
  238. novel_downloader/core/parsers/common/helper.py +0 -323
  239. novel_downloader/core/parsers/common/main_parser.py +0 -106
  240. novel_downloader/core/parsers/esjzone/__init__.py +0 -10
  241. novel_downloader/core/parsers/linovelib/__init__.py +0 -10
  242. novel_downloader/core/parsers/qianbi/__init__.py +0 -10
  243. novel_downloader/core/parsers/sfacg/__init__.py +0 -10
  244. novel_downloader/core/parsers/yamibo/__init__.py +0 -10
  245. novel_downloader/core/parsers/yamibo/main_parser.py +0 -194
  246. novel_downloader/models/browser.py +0 -21
  247. novel_downloader/models/chapter.py +0 -25
  248. novel_downloader/models/site_rules.py +0 -99
  249. novel_downloader/models/tasks.py +0 -33
  250. novel_downloader/models/types.py +0 -15
  251. novel_downloader/resources/css_styles/volume-intro.css +0 -56
  252. novel_downloader/resources/json/replace_word_map.json +0 -4
  253. novel_downloader/resources/text/blacklist.txt +0 -22
  254. novel_downloader/tui/__init__.py +0 -7
  255. novel_downloader/tui/app.py +0 -32
  256. novel_downloader/tui/main.py +0 -17
  257. novel_downloader/tui/screens/__init__.py +0 -14
  258. novel_downloader/tui/screens/home.py +0 -198
  259. novel_downloader/tui/screens/login.py +0 -74
  260. novel_downloader/tui/styles/home_layout.tcss +0 -79
  261. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  262. novel_downloader/utils/cache.py +0 -24
  263. novel_downloader/utils/fontocr/__init__.py +0 -22
  264. novel_downloader/utils/fontocr/model_loader.py +0 -69
  265. novel_downloader/utils/fontocr/ocr_v1.py +0 -303
  266. novel_downloader/utils/fontocr/ocr_v2.py +0 -752
  267. novel_downloader/utils/hash_store.py +0 -279
  268. novel_downloader/utils/hash_utils.py +0 -103
  269. novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
  270. novel_downloader/utils/text_utils/font_mapping.py +0 -28
  271. novel_downloader/utils/text_utils/text_cleaning.py +0 -107
  272. novel_downloader-1.4.5.dist-info/METADATA +0 -196
  273. novel_downloader-1.4.5.dist-info/RECORD +0 -165
  274. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
  275. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
  276. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- novel_downloader.core.parsers.esjzone.main_parser
4
- -------------------------------------------------
3
+ novel_downloader.core.parsers.esjzone
4
+ -------------------------------------
5
5
 
6
6
  """
7
7
 
@@ -11,22 +11,21 @@ from typing import Any
11
11
  from lxml import html
12
12
 
13
13
  from novel_downloader.core.parsers.base import BaseParser
14
- from novel_downloader.models import ChapterDict
14
+ from novel_downloader.core.parsers.registry import register_parser
15
+ from novel_downloader.models import (
16
+ BookInfoDict,
17
+ ChapterDict,
18
+ VolumeInfoDict,
19
+ )
15
20
 
16
21
 
22
+ @register_parser(
23
+ site_keys=["esjzone"],
24
+ )
17
25
  class EsjzoneParser(BaseParser):
18
- """ """
19
-
20
- # Book info XPaths
21
- _BOOK_NAME_XPATH = '//h2[contains(@class, "text-normal")]/text()'
22
- _AUTHOR_XPATH = '//li[strong[text()="作者:"]]/a/text()'
23
- _COVER_URL_XPATH = '//div[contains(@class,"product-gallery")]//img/@src'
24
- _UPDATE_TIME_XPATH = '//li[strong[text()="更新日期:"]]/text()'
25
- _WORD_COUNT_XPATH = '//span[@id="txt"]/text()'
26
- _TYPE_XPATH = '//li[strong[text()="類型:"]]/text()'
27
- _ALT_NAME_XPATH = '//li[strong[text()="其他書名:"]]/text()'
28
- _WEB_URL_XPATH = '//li[strong[text()="Web生肉:"]]/a/@href'
29
- _SUMMARY_XPATH = '//div[@class="description"]/p//text()'
26
+ """
27
+ Parser for esjzone book pages.
28
+ """
30
29
 
31
30
  # Chapter XPaths
32
31
  _CHAPTER_TEXT_XPATH = 'string(//div[contains(@class, "forum-content")])'
@@ -35,14 +34,13 @@ class EsjzoneParser(BaseParser):
35
34
  '//i[contains(@class, "icon-clock")]/following-sibling::text()',
36
35
  '//i[contains(@class, "icon-pen-tool")]/following-sibling::text()',
37
36
  ]
38
-
39
37
  _CHECK_FORUM_XPATH = '//div[@class="page-title"]//ul[@class="breadcrumbs"]/li[not(@class="slash")]//text()' # noqa: E501
40
38
 
41
39
  def parse_book_info(
42
40
  self,
43
41
  html_list: list[str],
44
42
  **kwargs: Any,
45
- ) -> dict[str, Any]:
43
+ ) -> BookInfoDict | None:
46
44
  """
47
45
  Parse a book info page and extract metadata and chapter structure.
48
46
 
@@ -53,27 +51,40 @@ class EsjzoneParser(BaseParser):
53
51
  :return: Parsed metadata and chapter structure as a dictionary.
54
52
  """
55
53
  if not html_list or self._is_forum_page(html_list):
56
- return {}
54
+ return None
55
+
57
56
  tree = html.fromstring(html_list[0])
58
- result: dict[str, Any] = {}
59
-
60
- result["book_name"] = self._get_text(tree, self._BOOK_NAME_XPATH)
61
- result["author"] = self._get_text(tree, self._AUTHOR_XPATH)
62
- result["cover_url"] = self._get_text(tree, self._COVER_URL_XPATH)
63
- result["update_time"] = self._get_text(tree, self._UPDATE_TIME_XPATH)
64
- result["word_count"] = self._get_text(
65
- tree, self._WORD_COUNT_XPATH, clean_comma=True
57
+
58
+ # --- Basic metadata ---
59
+ book_name = self._first_str(
60
+ tree.xpath('//h2[contains(@class,"text-normal")]/text()')
61
+ )
62
+ author = self._first_str(tree.xpath('//li[strong[text()="作者:"]]/a/text()'))
63
+ cover_url = self._first_str(
64
+ tree.xpath('//div[contains(@class,"product-gallery")]//img/@src')
66
65
  )
67
- result["type"] = self._get_text(tree, self._TYPE_XPATH)
68
- result["alt_name"] = self._get_text(tree, self._ALT_NAME_XPATH)
69
- result["web_url"] = self._get_text(tree, self._WEB_URL_XPATH)
70
- # result["summary"] = self._get_text(tree, self._SUMMARY_XPATH, join=True)
66
+ update_time = self._first_str(
67
+ tree.xpath('//li[strong[text()="更新日期:"]]/text()')
68
+ ) # noqa: E501
69
+ word_count = self._first_str(
70
+ tree.xpath('//span[@id="txt"]/text()'), replaces=[(",", "")]
71
+ )
72
+ book_type = self._first_str(tree.xpath('//li[strong[text()="類型:"]]/text()'))
73
+ alt_name = self._first_str(
74
+ tree.xpath('//li[strong[text()="其他書名:"]]/text()')
75
+ ) # noqa: E501
76
+ web_url = self._first_str(tree.xpath('//li[strong[text()="Web生肉:"]]/a/@href'))
77
+
78
+ # Summary paragraphs
71
79
  paras = tree.xpath('//div[@class="description"]/p')
72
80
  texts = [p.xpath("string()").strip() for p in paras]
73
- result["summary"] = "\n".join(texts).strip()
81
+ summary = "\n".join(t for t in texts if t)
74
82
 
75
- volumes: list[dict[str, Any]] = []
76
- current_vol: dict[str, Any] = {}
83
+ current_vol: VolumeInfoDict = {
84
+ "volume_name": "單卷",
85
+ "chapters": [],
86
+ }
87
+ volumes: list[VolumeInfoDict] = [current_vol]
77
88
 
78
89
  def _is_garbage_title(name: str) -> bool:
79
90
  stripped = name.strip()
@@ -84,25 +95,18 @@ class EsjzoneParser(BaseParser):
84
95
  if _is_garbage_title(name):
85
96
  return
86
97
  name = name.strip() or "未命名卷"
87
- if name == "未命名卷" and current_vol is not None:
98
+ if current_vol and current_vol["volume_name"] == name:
88
99
  return
89
100
  current_vol = {"volume_name": name, "chapters": []}
90
101
  volumes.append(current_vol)
91
102
 
92
- _start_volume("單卷")
93
-
94
- # nodes = tree.xpath('//div[@id="chapterList"]/details') + tree.xpath(
95
- # '//div[@id="chapterList"]/*[not(self::details)]'
96
- # )
97
103
  nodes = tree.xpath('//div[@id="chapterList"]/*')
98
-
99
104
  for node in nodes:
100
105
  tag = node.tag.lower()
101
106
 
102
107
  if tag == "details":
103
108
  # ---- DETAILS-based layout ----
104
- summary = node.find("summary")
105
- vol_name = summary.text if summary is not None else "未命名卷"
109
+ vol_name = node.xpath("string(./summary)").strip() or "未命名卷"
106
110
  _start_volume(vol_name)
107
111
 
108
112
  # all chapters inside this details
@@ -111,7 +115,11 @@ class EsjzoneParser(BaseParser):
111
115
  href = a.get("href", "")
112
116
  chap_id = href.rstrip("/").split("/")[-1].split(".", 1)[0]
113
117
  current_vol["chapters"].append(
114
- {"title": title, "url": href, "chapterId": chap_id}
118
+ {
119
+ "title": title,
120
+ "url": href,
121
+ "chapterId": chap_id,
122
+ }
115
123
  )
116
124
 
117
125
  elif (
@@ -134,9 +142,21 @@ class EsjzoneParser(BaseParser):
134
142
  {"title": title, "url": href, "chapterId": chap_id}
135
143
  )
136
144
  volumes = [vol for vol in volumes if vol["chapters"]]
137
- result["volumes"] = volumes
138
145
 
139
- return result
146
+ return {
147
+ "book_name": book_name,
148
+ "author": author,
149
+ "cover_url": cover_url,
150
+ "update_time": update_time,
151
+ "summary": summary,
152
+ "tags": [book_type],
153
+ "word_count": word_count,
154
+ "volumes": volumes,
155
+ "extra": {
156
+ "alt_name": alt_name,
157
+ "web_url": web_url,
158
+ },
159
+ }
140
160
 
141
161
  def parse_chapter(
142
162
  self,
@@ -144,16 +164,9 @@ class EsjzoneParser(BaseParser):
144
164
  chapter_id: str,
145
165
  **kwargs: Any,
146
166
  ) -> ChapterDict | None:
147
- """
148
- Parse a single chapter page and extract clean text or simplified HTML.
149
-
150
- :param html_list: Raw HTML of the chapter page.
151
- :param chapter_id: Identifier of the chapter being parsed.
152
- :return: Cleaned chapter content as plain text or minimal HTML.
153
- """
154
167
  if not html_list or self._is_forum_page(html_list):
155
168
  return None
156
- tree = html.fromstring(html_list[0], parser=None)
169
+ tree = html.fromstring(html_list[0])
157
170
 
158
171
  content_lines: list[str] = []
159
172
  content_nodes = tree.xpath(self._CHAPTER_CONTENT_NODES_XPATH)
@@ -173,7 +186,7 @@ class EsjzoneParser(BaseParser):
173
186
  content_lines.append(f'<img src="{src}" />')
174
187
 
175
188
  content = (
176
- "\n\n".join(content_lines).strip()
189
+ "\n".join(content_lines).strip()
177
190
  if content_lines
178
191
  else tree.xpath(self._CHAPTER_TEXT_XPATH).strip()
179
192
  )
@@ -211,16 +224,3 @@ class EsjzoneParser(BaseParser):
211
224
  breadcrumb: list[str] = tree.xpath(self._CHECK_FORUM_XPATH)
212
225
  breadcrumb = [s.strip() for s in breadcrumb if s.strip()]
213
226
  return breadcrumb == ["Home", "論壇"]
214
-
215
- @staticmethod
216
- def _get_text(
217
- tree: html.HtmlElement,
218
- xpath: str,
219
- join: bool = False,
220
- clean_comma: bool = False,
221
- ) -> str:
222
- data = tree.xpath(xpath)
223
- if not data:
224
- return ""
225
- text = "\n".join(data) if join else data[0].strip()
226
- return text.replace(",", "") if clean_comma else text
@@ -0,0 +1,128 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.guidaye
4
+ -------------------------------------
5
+
6
+ """
7
+
8
+ import re
9
+ from datetime import datetime
10
+ from typing import Any
11
+
12
+ from lxml import html
13
+
14
+ from novel_downloader.core.parsers.base import BaseParser
15
+ from novel_downloader.core.parsers.registry import register_parser
16
+ from novel_downloader.models import (
17
+ BookInfoDict,
18
+ ChapterDict,
19
+ VolumeInfoDict,
20
+ )
21
+
22
+
23
+ @register_parser(
24
+ site_keys=["guidaye"],
25
+ )
26
+ class GuidayeParser(BaseParser):
27
+ """
28
+ Parser for 名著阅读 book pages.
29
+ """
30
+
31
+ BASE_URL = "https://b.guidaye.com"
32
+
33
+ def parse_book_info(
34
+ self,
35
+ html_list: list[str],
36
+ **kwargs: Any,
37
+ ) -> BookInfoDict | None:
38
+ if not html_list:
39
+ return None
40
+
41
+ tree = html.fromstring(html_list[0])
42
+
43
+ # Book metadata
44
+ book_name = self._first_str(tree.xpath('//h1[@class="page-title"]/a/text()'))
45
+ author = self._first_str(
46
+ tree.xpath('//div[@id="category-description-author"]/a/text()')
47
+ )
48
+ cover_url = self.BASE_URL + self._first_str(
49
+ tree.xpath('//div[@id="category-description-image"]//img/@src')
50
+ )
51
+
52
+ # Summary paragraphs
53
+ summary = (
54
+ tree.xpath('string(//div[@id="category-description-text"])')
55
+ .replace("内容简介:", "", 1)
56
+ .strip()
57
+ )
58
+
59
+ # Chapter volumes & listings
60
+ volumes: list[VolumeInfoDict] = []
61
+ curr_vol: VolumeInfoDict = {"volume_name": "未命名卷", "chapters": []}
62
+
63
+ items = tree.xpath('//div[@class="entry-content"]/ul/*')
64
+ for elem in items:
65
+ if elem.tag.lower() == "h3":
66
+ # Flush previous volume
67
+ if curr_vol["chapters"]:
68
+ volumes.append(curr_vol)
69
+ curr_vol = {"volume_name": elem.text_content().strip(), "chapters": []}
70
+ elif elem.tag.lower() == "li":
71
+ link = elem.xpath(".//a")[0]
72
+ href = link.get("href", "").strip()
73
+ title = link.get("title", "").strip()
74
+ cid_match = re.search(r"/(\d+)\.html$", href)
75
+ chapter_id = cid_match.group(1) if cid_match else ""
76
+ curr_vol["chapters"].append(
77
+ {"title": title, "url": href, "chapterId": chapter_id}
78
+ )
79
+
80
+ # Append last volume
81
+ if curr_vol["chapters"]:
82
+ volumes.append(curr_vol)
83
+
84
+ # Timestamp of parsing
85
+ share_text = tree.xpath('string(//div[@id="category-description-share"])')
86
+ m = re.search(r"最近更新[::]\s*([\d-]+)", share_text)
87
+ update_time = m.group(1) if m else datetime.now().strftime("%Y-%m-%d")
88
+
89
+ return {
90
+ "book_name": book_name,
91
+ "author": author,
92
+ "cover_url": cover_url,
93
+ "update_time": update_time,
94
+ "summary": summary,
95
+ "volumes": volumes,
96
+ "extra": {},
97
+ }
98
+
99
+ def parse_chapter(
100
+ self,
101
+ html_list: list[str],
102
+ chapter_id: str,
103
+ **kwargs: Any,
104
+ ) -> ChapterDict | None:
105
+ if not html_list:
106
+ return None
107
+ tree = html.fromstring(html_list[0])
108
+
109
+ # Title from entry-title
110
+ title = self._first_str(tree.xpath('//h1[@class="entry-title"]/text()'))
111
+
112
+ # Extract paragraphs within entry-content
113
+ full_text = tree.xpath('string(//div[@class="entry-content"])')
114
+ full_text = full_text.replace("\u00A0", " ")
115
+
116
+ # 3. Split into lines and clean up
117
+ lines = [line.strip() for line in full_text.splitlines() if line.strip()]
118
+ if not lines:
119
+ return None
120
+
121
+ content = "\n".join(lines)
122
+
123
+ return {
124
+ "id": chapter_id,
125
+ "title": title,
126
+ "content": content,
127
+ "extra": {"site": "guidaye"},
128
+ }
@@ -0,0 +1,139 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.hetushu
4
+ -------------------------------------
5
+
6
+ """
7
+
8
+ import re
9
+ from datetime import datetime
10
+ from typing import Any
11
+
12
+ from lxml import html
13
+
14
+ from novel_downloader.core.parsers.base import BaseParser
15
+ from novel_downloader.core.parsers.registry import register_parser
16
+ from novel_downloader.models import (
17
+ BookInfoDict,
18
+ ChapterDict,
19
+ VolumeInfoDict,
20
+ )
21
+
22
+
23
+ @register_parser(
24
+ site_keys=["hetushu"],
25
+ )
26
+ class HetushuParser(BaseParser):
27
+ """
28
+ Parser for 和图书 book pages.
29
+ """
30
+
31
+ BASE_URL = "https://www.hetushu.com"
32
+
33
+ def parse_book_info(
34
+ self,
35
+ html_list: list[str],
36
+ **kwargs: Any,
37
+ ) -> BookInfoDict | None:
38
+ if not html_list:
39
+ return None
40
+
41
+ tree = html.fromstring(html_list[0])
42
+
43
+ # --- Metadata ---
44
+ book_name = self._first_str(
45
+ tree.xpath('//div[contains(@class,"book_info")]/h2/text()')
46
+ )
47
+ author = self._first_str(
48
+ tree.xpath(
49
+ '//div[contains(@class,"book_info")]/div[contains(.,"作者")]/a/text()'
50
+ )
51
+ )
52
+ cover_url = self.BASE_URL + self._first_str(
53
+ tree.xpath('//div[contains(@class,"book_info")]//img/@src')
54
+ )
55
+
56
+ cls_attr = self._first_str(
57
+ tree.xpath('//div[contains(@class,"book_info")]/@class')
58
+ )
59
+ serial_status = "已完结" if "finish" in cls_attr else "连载中"
60
+
61
+ tags = [
62
+ a.strip()
63
+ for a in tree.xpath('//dl[@class="tag"]//dd/a/text()')
64
+ if a.strip()
65
+ ]
66
+
67
+ paras = tree.xpath('//div[@class="intro"]/p/text()')
68
+ summary = "\n".join(p.strip() for p in paras if p.strip())
69
+
70
+ # --- Chapter volumes & listings ---
71
+ volumes: list[VolumeInfoDict] = []
72
+ curr_vol: VolumeInfoDict = {"volume_name": "未命名卷", "chapters": []}
73
+
74
+ for elem in tree.xpath('//dl[@id="dir"]/*'):
75
+ if elem.tag == "dt":
76
+ # Start a new volume
77
+ if curr_vol["chapters"]:
78
+ volumes.append(curr_vol)
79
+ curr_vol = {
80
+ "volume_name": elem.text_content().strip(),
81
+ "chapters": [],
82
+ }
83
+ elif elem.tag == "dd":
84
+ link = elem.xpath(".//a")[0]
85
+ href = link.get("href", "").strip()
86
+ title = link.get("title", "").strip()
87
+ # Extract numeric chapterId from the URL
88
+ m = re.search(r"/book/\d+/(?P<id>\d+)\.html", href)
89
+ chapter_id = m.group("id") if m else ""
90
+ curr_vol["chapters"].append(
91
+ {"title": title, "url": href, "chapterId": chapter_id}
92
+ )
93
+
94
+ # Append the last volume if it has any chapters
95
+ if curr_vol["chapters"]:
96
+ volumes.append(curr_vol)
97
+
98
+ update_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
99
+
100
+ return {
101
+ "book_name": book_name,
102
+ "author": author,
103
+ "cover_url": cover_url,
104
+ "update_time": update_time,
105
+ "serial_status": serial_status,
106
+ "tags": tags,
107
+ "summary": summary,
108
+ "volumes": volumes,
109
+ "extra": {},
110
+ }
111
+
112
+ def parse_chapter(
113
+ self,
114
+ html_list: list[str],
115
+ chapter_id: str,
116
+ **kwargs: Any,
117
+ ) -> ChapterDict | None:
118
+ if not html_list:
119
+ return None
120
+
121
+ tree = html.fromstring(html_list[0])
122
+
123
+ title = self._first_str(
124
+ tree.xpath('//div[@id="content"]//h2[@class="h2"]/text()')
125
+ )
126
+
127
+ paras = tree.xpath('//div[@id="content"]/div[not(@class)]/text()')
128
+ paragraph_texts = [p.strip() for p in paras if p.strip()]
129
+
130
+ content = "\n".join(paragraph_texts)
131
+ if not content.strip():
132
+ return None
133
+
134
+ return {
135
+ "id": chapter_id,
136
+ "title": title,
137
+ "content": content,
138
+ "extra": {"site": "hetushu"},
139
+ }
@@ -0,0 +1,137 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.i25zw
4
+ -----------------------------------
5
+
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from lxml import html
11
+
12
+ from novel_downloader.core.parsers.base import BaseParser
13
+ from novel_downloader.core.parsers.registry import register_parser
14
+ from novel_downloader.models import (
15
+ BookInfoDict,
16
+ ChapterDict,
17
+ ChapterInfoDict,
18
+ VolumeInfoDict,
19
+ )
20
+
21
+
22
+ @register_parser(
23
+ site_keys=["i25zw"],
24
+ )
25
+ class I25zwParser(BaseParser):
26
+ """
27
+ Parser for 25中文网 book-info pages.
28
+ """
29
+
30
+ def parse_book_info(
31
+ self,
32
+ html_list: list[str],
33
+ **kwargs: Any,
34
+ ) -> BookInfoDict | None:
35
+ if len(html_list) < 2:
36
+ return None
37
+
38
+ info_tree = html.fromstring(html_list[0])
39
+ catalog_tree = html.fromstring(html_list[1])
40
+
41
+ # Metadata extraction
42
+ book_name = self._first_str(info_tree.xpath("//h1[@class='f21h']/text()"))
43
+ author = self._first_str(info_tree.xpath("//h1[@class='f21h']/em/a/text()"))
44
+ cover_url = self._first_str(info_tree.xpath("//div[@class='pic']/img/@src"))
45
+
46
+ # Tags, status, word count, update time
47
+ tag = self._first_str(
48
+ info_tree.xpath("//b[contains(text(),'小说分类')]/parent::td/text()")
49
+ )
50
+ serial_status = self._first_str(
51
+ info_tree.xpath("//b[contains(text(),'小说状态')]/parent::td/text()")
52
+ )
53
+ word_count = self._first_str(
54
+ info_tree.xpath("//b[contains(text(),'全文字数')]/parent::td/text()")
55
+ )
56
+ raw_update = self._first_str(
57
+ info_tree.xpath("//b[contains(text(),'更新时间')]/parent::td/text()")
58
+ )
59
+ update_time = raw_update.strip("()")
60
+
61
+ # Summary from styled intro div
62
+ full_intro = info_tree.xpath("string(//div[@class='intro'][@style])").strip()
63
+ summary = full_intro.replace(f"关于{book_name}:", "", 1).strip()
64
+
65
+ # Chapter list extraction
66
+ dl = catalog_tree.xpath("//div[@id='list']/dl")[0]
67
+ # Full-text section dd's
68
+ dds = dl.xpath("./dd[preceding-sibling::dt[1][contains(., '正文')]]/a")
69
+ if not dds:
70
+ # Fallback to second <dt>'s following <dd>
71
+ dds = dl.xpath("./dt[2]/following-sibling::dd/a")
72
+
73
+ chapters: list[ChapterInfoDict] = []
74
+ for a in dds:
75
+ url = a.get("href", "").strip()
76
+ title = a.text_content().strip()
77
+ # '/311006/252845677.html' -> '252845677'
78
+ chapter_id = url.split("/")[-1].split(".")[0]
79
+ chapters.append(
80
+ {
81
+ "title": title,
82
+ "url": url,
83
+ "chapterId": chapter_id,
84
+ }
85
+ )
86
+ volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
87
+
88
+ return {
89
+ "book_name": book_name,
90
+ "author": author,
91
+ "cover_url": cover_url,
92
+ "update_time": update_time,
93
+ "word_count": word_count,
94
+ "serial_status": serial_status,
95
+ "tags": [tag] if tag else [],
96
+ "summary": summary,
97
+ "volumes": volumes,
98
+ "extra": {},
99
+ }
100
+
101
+ def parse_chapter(
102
+ self,
103
+ html_list: list[str],
104
+ chapter_id: str,
105
+ **kwargs: Any,
106
+ ) -> ChapterDict | None:
107
+ if not html_list:
108
+ return None
109
+
110
+ tree = html.fromstring(html_list[0])
111
+
112
+ title_text = self._first_str(
113
+ tree.xpath("//div[@class='zhangjieming']/h1/text()")
114
+ )
115
+
116
+ content_divs = tree.xpath("//div[@id='content']")
117
+ if not content_divs:
118
+ return None
119
+ content_div = content_divs[0]
120
+
121
+ # Only select direct <p> children to avoid nav links
122
+ paragraphs = []
123
+ for p in content_div.xpath("./p"):
124
+ text = p.text_content().strip()
125
+ if text:
126
+ paragraphs.append(text)
127
+
128
+ content_text = "\n".join(paragraphs)
129
+ if not content_text.strip():
130
+ return None
131
+
132
+ return {
133
+ "id": chapter_id,
134
+ "title": title_text,
135
+ "content": content_text,
136
+ "extra": {"site": "i25zw"},
137
+ }