novel-downloader 1.5.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +1 -3
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +26 -21
  5. novel_downloader/cli/download.py +77 -64
  6. novel_downloader/cli/export.py +16 -20
  7. novel_downloader/cli/main.py +1 -1
  8. novel_downloader/cli/search.py +62 -65
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +8 -5
  11. novel_downloader/config/adapter.py +65 -105
  12. novel_downloader/config/{loader.py → file_io.py} +53 -26
  13. novel_downloader/core/__init__.py +1 -0
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} +12 -20
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +3 -24
  21. novel_downloader/core/downloaders/base.py +49 -23
  22. novel_downloader/core/downloaders/common.py +191 -137
  23. novel_downloader/core/downloaders/qianbi.py +187 -146
  24. novel_downloader/core/downloaders/qidian.py +187 -141
  25. novel_downloader/core/downloaders/registry.py +4 -2
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +3 -20
  28. novel_downloader/core/exporters/base.py +33 -37
  29. novel_downloader/core/exporters/common/__init__.py +1 -2
  30. novel_downloader/core/exporters/common/epub.py +15 -10
  31. novel_downloader/core/exporters/common/main_exporter.py +19 -12
  32. novel_downloader/core/exporters/common/txt.py +14 -9
  33. novel_downloader/core/exporters/epub_util.py +59 -29
  34. novel_downloader/core/exporters/linovelib/__init__.py +1 -0
  35. novel_downloader/core/exporters/linovelib/epub.py +23 -25
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +8 -12
  37. novel_downloader/core/exporters/linovelib/txt.py +17 -11
  38. novel_downloader/core/exporters/qidian.py +2 -8
  39. novel_downloader/core/exporters/registry.py +4 -2
  40. novel_downloader/core/exporters/txt_util.py +7 -7
  41. novel_downloader/core/fetchers/__init__.py +54 -48
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +6 -11
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +37 -46
  45. novel_downloader/core/fetchers/{biquge/browser.py → biquyuedu.py} +12 -17
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +19 -12
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +19 -28
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/lewenn.py +83 -0
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +12 -13
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +5 -10
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +46 -39
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +5 -16
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +7 -10
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/shuhaige.py +84 -0
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/wanbengo.py +83 -0
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +19 -12
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +1 -9
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +4 -17
  79. novel_downloader/core/interfaces/parser.py +5 -6
  80. novel_downloader/core/interfaces/searcher.py +9 -1
  81. novel_downloader/core/parsers/__init__.py +49 -12
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +63 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/esjzone.py +61 -66
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/linovelib.py +48 -64
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/qianbi.py +48 -50
  99. novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
  100. novel_downloader/core/parsers/qidian/chapter_encrypted.py +272 -330
  101. novel_downloader/core/parsers/qidian/chapter_normal.py +24 -55
  102. novel_downloader/core/parsers/qidian/main_parser.py +11 -38
  103. novel_downloader/core/parsers/qidian/utils/__init__.py +1 -0
  104. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +1 -1
  105. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
  106. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
  107. novel_downloader/core/parsers/quanben5.py +103 -0
  108. novel_downloader/core/parsers/registry.py +5 -16
  109. novel_downloader/core/parsers/sfacg.py +38 -45
  110. novel_downloader/core/parsers/shencou.py +215 -0
  111. novel_downloader/core/parsers/shuhaige.py +111 -0
  112. novel_downloader/core/parsers/tongrenquan.py +116 -0
  113. novel_downloader/core/parsers/ttkan.py +132 -0
  114. novel_downloader/core/parsers/wanbengo.py +191 -0
  115. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  116. novel_downloader/core/parsers/xiguashuwu.py +435 -0
  117. novel_downloader/core/parsers/xs63b.py +161 -0
  118. novel_downloader/core/parsers/xshbook.py +134 -0
  119. novel_downloader/core/parsers/yamibo.py +87 -131
  120. novel_downloader/core/parsers/yibige.py +166 -0
  121. novel_downloader/core/searchers/__init__.py +34 -3
  122. novel_downloader/core/searchers/aaatxt.py +107 -0
  123. novel_downloader/core/searchers/{biquge.py → b520.py} +29 -28
  124. novel_downloader/core/searchers/base.py +112 -36
  125. novel_downloader/core/searchers/dxmwx.py +105 -0
  126. novel_downloader/core/searchers/eightnovel.py +84 -0
  127. novel_downloader/core/searchers/esjzone.py +43 -25
  128. novel_downloader/core/searchers/hetushu.py +92 -0
  129. novel_downloader/core/searchers/i25zw.py +93 -0
  130. novel_downloader/core/searchers/ixdzs8.py +107 -0
  131. novel_downloader/core/searchers/jpxs123.py +107 -0
  132. novel_downloader/core/searchers/piaotia.py +100 -0
  133. novel_downloader/core/searchers/qbtr.py +106 -0
  134. novel_downloader/core/searchers/qianbi.py +74 -40
  135. novel_downloader/core/searchers/quanben5.py +144 -0
  136. novel_downloader/core/searchers/registry.py +24 -8
  137. novel_downloader/core/searchers/shuhaige.py +124 -0
  138. novel_downloader/core/searchers/tongrenquan.py +110 -0
  139. novel_downloader/core/searchers/ttkan.py +92 -0
  140. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  141. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  142. novel_downloader/core/searchers/xs63b.py +104 -0
  143. novel_downloader/locales/en.json +31 -82
  144. novel_downloader/locales/zh.json +32 -83
  145. novel_downloader/models/__init__.py +21 -22
  146. novel_downloader/models/book.py +44 -0
  147. novel_downloader/models/config.py +4 -37
  148. novel_downloader/models/login.py +1 -1
  149. novel_downloader/models/search.py +5 -0
  150. novel_downloader/resources/config/settings.toml +8 -70
  151. novel_downloader/resources/json/xiguashuwu.json +718 -0
  152. novel_downloader/utils/__init__.py +13 -22
  153. novel_downloader/utils/chapter_storage.py +3 -2
  154. novel_downloader/utils/constants.py +4 -29
  155. novel_downloader/utils/cookies.py +6 -18
  156. novel_downloader/utils/crypto_utils/__init__.py +13 -0
  157. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  158. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  159. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  160. novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
  161. novel_downloader/utils/epub/__init__.py +1 -1
  162. novel_downloader/utils/epub/constants.py +57 -16
  163. novel_downloader/utils/epub/documents.py +88 -194
  164. novel_downloader/utils/epub/models.py +0 -14
  165. novel_downloader/utils/epub/utils.py +63 -96
  166. novel_downloader/utils/file_utils/__init__.py +2 -23
  167. novel_downloader/utils/file_utils/io.py +3 -113
  168. novel_downloader/utils/file_utils/sanitize.py +0 -4
  169. novel_downloader/utils/fontocr.py +207 -0
  170. novel_downloader/utils/logger.py +8 -16
  171. novel_downloader/utils/network.py +2 -2
  172. novel_downloader/utils/state.py +4 -90
  173. novel_downloader/utils/text_utils/__init__.py +1 -7
  174. novel_downloader/utils/text_utils/diff_display.py +5 -7
  175. novel_downloader/utils/time_utils/__init__.py +5 -11
  176. novel_downloader/utils/time_utils/datetime_utils.py +20 -29
  177. novel_downloader/utils/time_utils/sleep_utils.py +4 -8
  178. novel_downloader/web/__init__.py +13 -0
  179. novel_downloader/web/components/__init__.py +11 -0
  180. novel_downloader/web/components/navigation.py +35 -0
  181. novel_downloader/web/main.py +66 -0
  182. novel_downloader/web/pages/__init__.py +17 -0
  183. novel_downloader/web/pages/download.py +78 -0
  184. novel_downloader/web/pages/progress.py +147 -0
  185. novel_downloader/web/pages/search.py +329 -0
  186. novel_downloader/web/services/__init__.py +17 -0
  187. novel_downloader/web/services/client_dialog.py +164 -0
  188. novel_downloader/web/services/cred_broker.py +113 -0
  189. novel_downloader/web/services/cred_models.py +35 -0
  190. novel_downloader/web/services/task_manager.py +264 -0
  191. novel_downloader-2.0.0.dist-info/METADATA +171 -0
  192. novel_downloader-2.0.0.dist-info/RECORD +210 -0
  193. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
  194. novel_downloader/core/downloaders/biquge.py +0 -29
  195. novel_downloader/core/downloaders/esjzone.py +0 -29
  196. novel_downloader/core/downloaders/linovelib.py +0 -29
  197. novel_downloader/core/downloaders/sfacg.py +0 -29
  198. novel_downloader/core/downloaders/yamibo.py +0 -29
  199. novel_downloader/core/exporters/biquge.py +0 -22
  200. novel_downloader/core/exporters/esjzone.py +0 -22
  201. novel_downloader/core/exporters/qianbi.py +0 -22
  202. novel_downloader/core/exporters/sfacg.py +0 -22
  203. novel_downloader/core/exporters/yamibo.py +0 -22
  204. novel_downloader/core/fetchers/base/__init__.py +0 -14
  205. novel_downloader/core/fetchers/base/browser.py +0 -422
  206. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  207. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  208. novel_downloader/core/fetchers/esjzone/browser.py +0 -209
  209. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  210. novel_downloader/core/fetchers/linovelib/browser.py +0 -198
  211. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  212. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  213. novel_downloader/core/fetchers/qidian/browser.py +0 -326
  214. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  215. novel_downloader/core/fetchers/sfacg/browser.py +0 -194
  216. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  217. novel_downloader/core/fetchers/yamibo/browser.py +0 -234
  218. novel_downloader/core/parsers/biquge.py +0 -139
  219. novel_downloader/models/chapter.py +0 -25
  220. novel_downloader/models/types.py +0 -13
  221. novel_downloader/tui/__init__.py +0 -7
  222. novel_downloader/tui/app.py +0 -32
  223. novel_downloader/tui/main.py +0 -17
  224. novel_downloader/tui/screens/__init__.py +0 -14
  225. novel_downloader/tui/screens/home.py +0 -198
  226. novel_downloader/tui/screens/login.py +0 -74
  227. novel_downloader/tui/styles/home_layout.tcss +0 -79
  228. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  229. novel_downloader/utils/cache.py +0 -24
  230. novel_downloader/utils/fontocr/__init__.py +0 -22
  231. novel_downloader/utils/fontocr/hash_store.py +0 -280
  232. novel_downloader/utils/fontocr/hash_utils.py +0 -103
  233. novel_downloader/utils/fontocr/model_loader.py +0 -69
  234. novel_downloader/utils/fontocr/ocr_v1.py +0 -315
  235. novel_downloader/utils/fontocr/ocr_v2.py +0 -764
  236. novel_downloader/utils/fontocr/ocr_v3.py +0 -744
  237. novel_downloader-1.5.0.dist-info/METADATA +0 -196
  238. novel_downloader-1.5.0.dist-info/RECORD +0 -164
  239. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
  240. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
  241. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,128 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.guidaye
4
+ -------------------------------------
5
+
6
+ """
7
+
8
+ import re
9
+ from datetime import datetime
10
+ from typing import Any
11
+
12
+ from lxml import html
13
+
14
+ from novel_downloader.core.parsers.base import BaseParser
15
+ from novel_downloader.core.parsers.registry import register_parser
16
+ from novel_downloader.models import (
17
+ BookInfoDict,
18
+ ChapterDict,
19
+ VolumeInfoDict,
20
+ )
21
+
22
+
23
+ @register_parser(
24
+ site_keys=["guidaye"],
25
+ )
26
+ class GuidayeParser(BaseParser):
27
+ """
28
+ Parser for 名著阅读 book pages.
29
+ """
30
+
31
+ BASE_URL = "https://b.guidaye.com"
32
+
33
+ def parse_book_info(
34
+ self,
35
+ html_list: list[str],
36
+ **kwargs: Any,
37
+ ) -> BookInfoDict | None:
38
+ if not html_list:
39
+ return None
40
+
41
+ tree = html.fromstring(html_list[0])
42
+
43
+ # Book metadata
44
+ book_name = self._first_str(tree.xpath('//h1[@class="page-title"]/a/text()'))
45
+ author = self._first_str(
46
+ tree.xpath('//div[@id="category-description-author"]/a/text()')
47
+ )
48
+ cover_url = self.BASE_URL + self._first_str(
49
+ tree.xpath('//div[@id="category-description-image"]//img/@src')
50
+ )
51
+
52
+ # Summary paragraphs
53
+ summary = (
54
+ tree.xpath('string(//div[@id="category-description-text"])')
55
+ .replace("内容简介:", "", 1)
56
+ .strip()
57
+ )
58
+
59
+ # Chapter volumes & listings
60
+ volumes: list[VolumeInfoDict] = []
61
+ curr_vol: VolumeInfoDict = {"volume_name": "未命名卷", "chapters": []}
62
+
63
+ items = tree.xpath('//div[@class="entry-content"]/ul/*')
64
+ for elem in items:
65
+ if elem.tag.lower() == "h3":
66
+ # Flush previous volume
67
+ if curr_vol["chapters"]:
68
+ volumes.append(curr_vol)
69
+ curr_vol = {"volume_name": elem.text_content().strip(), "chapters": []}
70
+ elif elem.tag.lower() == "li":
71
+ link = elem.xpath(".//a")[0]
72
+ href = link.get("href", "").strip()
73
+ title = link.get("title", "").strip()
74
+ cid_match = re.search(r"/(\d+)\.html$", href)
75
+ chapter_id = cid_match.group(1) if cid_match else ""
76
+ curr_vol["chapters"].append(
77
+ {"title": title, "url": href, "chapterId": chapter_id}
78
+ )
79
+
80
+ # Append last volume
81
+ if curr_vol["chapters"]:
82
+ volumes.append(curr_vol)
83
+
84
+ # Timestamp of parsing
85
+ share_text = tree.xpath('string(//div[@id="category-description-share"])')
86
+ m = re.search(r"最近更新[::]\s*([\d-]+)", share_text)
87
+ update_time = m.group(1) if m else datetime.now().strftime("%Y-%m-%d")
88
+
89
+ return {
90
+ "book_name": book_name,
91
+ "author": author,
92
+ "cover_url": cover_url,
93
+ "update_time": update_time,
94
+ "summary": summary,
95
+ "volumes": volumes,
96
+ "extra": {},
97
+ }
98
+
99
+ def parse_chapter(
100
+ self,
101
+ html_list: list[str],
102
+ chapter_id: str,
103
+ **kwargs: Any,
104
+ ) -> ChapterDict | None:
105
+ if not html_list:
106
+ return None
107
+ tree = html.fromstring(html_list[0])
108
+
109
+ # Title from entry-title
110
+ title = self._first_str(tree.xpath('//h1[@class="entry-title"]/text()'))
111
+
112
+ # Extract paragraphs within entry-content
113
+ full_text = tree.xpath('string(//div[@class="entry-content"])')
114
+ full_text = full_text.replace("\u00A0", " ")
115
+
116
+ # 3. Split into lines and clean up
117
+ lines = [line.strip() for line in full_text.splitlines() if line.strip()]
118
+ if not lines:
119
+ return None
120
+
121
+ content = "\n".join(lines)
122
+
123
+ return {
124
+ "id": chapter_id,
125
+ "title": title,
126
+ "content": content,
127
+ "extra": {"site": "guidaye"},
128
+ }
@@ -0,0 +1,139 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.hetushu
4
+ -------------------------------------
5
+
6
+ """
7
+
8
+ import re
9
+ from datetime import datetime
10
+ from typing import Any
11
+
12
+ from lxml import html
13
+
14
+ from novel_downloader.core.parsers.base import BaseParser
15
+ from novel_downloader.core.parsers.registry import register_parser
16
+ from novel_downloader.models import (
17
+ BookInfoDict,
18
+ ChapterDict,
19
+ VolumeInfoDict,
20
+ )
21
+
22
+
23
+ @register_parser(
24
+ site_keys=["hetushu"],
25
+ )
26
+ class HetushuParser(BaseParser):
27
+ """
28
+ Parser for 和图书 book pages.
29
+ """
30
+
31
+ BASE_URL = "https://www.hetushu.com"
32
+
33
+ def parse_book_info(
34
+ self,
35
+ html_list: list[str],
36
+ **kwargs: Any,
37
+ ) -> BookInfoDict | None:
38
+ if not html_list:
39
+ return None
40
+
41
+ tree = html.fromstring(html_list[0])
42
+
43
+ # --- Metadata ---
44
+ book_name = self._first_str(
45
+ tree.xpath('//div[contains(@class,"book_info")]/h2/text()')
46
+ )
47
+ author = self._first_str(
48
+ tree.xpath(
49
+ '//div[contains(@class,"book_info")]/div[contains(.,"作者")]/a/text()'
50
+ )
51
+ )
52
+ cover_url = self.BASE_URL + self._first_str(
53
+ tree.xpath('//div[contains(@class,"book_info")]//img/@src')
54
+ )
55
+
56
+ cls_attr = self._first_str(
57
+ tree.xpath('//div[contains(@class,"book_info")]/@class')
58
+ )
59
+ serial_status = "已完结" if "finish" in cls_attr else "连载中"
60
+
61
+ tags = [
62
+ a.strip()
63
+ for a in tree.xpath('//dl[@class="tag"]//dd/a/text()')
64
+ if a.strip()
65
+ ]
66
+
67
+ paras = tree.xpath('//div[@class="intro"]/p/text()')
68
+ summary = "\n".join(p.strip() for p in paras if p.strip())
69
+
70
+ # --- Chapter volumes & listings ---
71
+ volumes: list[VolumeInfoDict] = []
72
+ curr_vol: VolumeInfoDict = {"volume_name": "未命名卷", "chapters": []}
73
+
74
+ for elem in tree.xpath('//dl[@id="dir"]/*'):
75
+ if elem.tag == "dt":
76
+ # Start a new volume
77
+ if curr_vol["chapters"]:
78
+ volumes.append(curr_vol)
79
+ curr_vol = {
80
+ "volume_name": elem.text_content().strip(),
81
+ "chapters": [],
82
+ }
83
+ elif elem.tag == "dd":
84
+ link = elem.xpath(".//a")[0]
85
+ href = link.get("href", "").strip()
86
+ title = link.get("title", "").strip()
87
+ # Extract numeric chapterId from the URL
88
+ m = re.search(r"/book/\d+/(?P<id>\d+)\.html", href)
89
+ chapter_id = m.group("id") if m else ""
90
+ curr_vol["chapters"].append(
91
+ {"title": title, "url": href, "chapterId": chapter_id}
92
+ )
93
+
94
+ # Append the last volume if it has any chapters
95
+ if curr_vol["chapters"]:
96
+ volumes.append(curr_vol)
97
+
98
+ update_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
99
+
100
+ return {
101
+ "book_name": book_name,
102
+ "author": author,
103
+ "cover_url": cover_url,
104
+ "update_time": update_time,
105
+ "serial_status": serial_status,
106
+ "tags": tags,
107
+ "summary": summary,
108
+ "volumes": volumes,
109
+ "extra": {},
110
+ }
111
+
112
+ def parse_chapter(
113
+ self,
114
+ html_list: list[str],
115
+ chapter_id: str,
116
+ **kwargs: Any,
117
+ ) -> ChapterDict | None:
118
+ if not html_list:
119
+ return None
120
+
121
+ tree = html.fromstring(html_list[0])
122
+
123
+ title = self._first_str(
124
+ tree.xpath('//div[@id="content"]//h2[@class="h2"]/text()')
125
+ )
126
+
127
+ paras = tree.xpath('//div[@id="content"]/div[not(@class)]/text()')
128
+ paragraph_texts = [p.strip() for p in paras if p.strip()]
129
+
130
+ content = "\n".join(paragraph_texts)
131
+ if not content.strip():
132
+ return None
133
+
134
+ return {
135
+ "id": chapter_id,
136
+ "title": title,
137
+ "content": content,
138
+ "extra": {"site": "hetushu"},
139
+ }
@@ -0,0 +1,137 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.i25zw
4
+ -----------------------------------
5
+
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from lxml import html
11
+
12
+ from novel_downloader.core.parsers.base import BaseParser
13
+ from novel_downloader.core.parsers.registry import register_parser
14
+ from novel_downloader.models import (
15
+ BookInfoDict,
16
+ ChapterDict,
17
+ ChapterInfoDict,
18
+ VolumeInfoDict,
19
+ )
20
+
21
+
22
+ @register_parser(
23
+ site_keys=["i25zw"],
24
+ )
25
+ class I25zwParser(BaseParser):
26
+ """
27
+ Parser for 25中文网 book-info pages.
28
+ """
29
+
30
+ def parse_book_info(
31
+ self,
32
+ html_list: list[str],
33
+ **kwargs: Any,
34
+ ) -> BookInfoDict | None:
35
+ if len(html_list) < 2:
36
+ return None
37
+
38
+ info_tree = html.fromstring(html_list[0])
39
+ catalog_tree = html.fromstring(html_list[1])
40
+
41
+ # Metadata extraction
42
+ book_name = self._first_str(info_tree.xpath("//h1[@class='f21h']/text()"))
43
+ author = self._first_str(info_tree.xpath("//h1[@class='f21h']/em/a/text()"))
44
+ cover_url = self._first_str(info_tree.xpath("//div[@class='pic']/img/@src"))
45
+
46
+ # Tags, status, word count, update time
47
+ tag = self._first_str(
48
+ info_tree.xpath("//b[contains(text(),'小说分类')]/parent::td/text()")
49
+ )
50
+ serial_status = self._first_str(
51
+ info_tree.xpath("//b[contains(text(),'小说状态')]/parent::td/text()")
52
+ )
53
+ word_count = self._first_str(
54
+ info_tree.xpath("//b[contains(text(),'全文字数')]/parent::td/text()")
55
+ )
56
+ raw_update = self._first_str(
57
+ info_tree.xpath("//b[contains(text(),'更新时间')]/parent::td/text()")
58
+ )
59
+ update_time = raw_update.strip("()")
60
+
61
+ # Summary from styled intro div
62
+ full_intro = info_tree.xpath("string(//div[@class='intro'][@style])").strip()
63
+ summary = full_intro.replace(f"关于{book_name}:", "", 1).strip()
64
+
65
+ # Chapter list extraction
66
+ dl = catalog_tree.xpath("//div[@id='list']/dl")[0]
67
+ # Full-text section dd's
68
+ dds = dl.xpath("./dd[preceding-sibling::dt[1][contains(., '正文')]]/a")
69
+ if not dds:
70
+ # Fallback to second <dt>'s following <dd>
71
+ dds = dl.xpath("./dt[2]/following-sibling::dd/a")
72
+
73
+ chapters: list[ChapterInfoDict] = []
74
+ for a in dds:
75
+ url = a.get("href", "").strip()
76
+ title = a.text_content().strip()
77
+ # '/311006/252845677.html' -> '252845677'
78
+ chapter_id = url.split("/")[-1].split(".")[0]
79
+ chapters.append(
80
+ {
81
+ "title": title,
82
+ "url": url,
83
+ "chapterId": chapter_id,
84
+ }
85
+ )
86
+ volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
87
+
88
+ return {
89
+ "book_name": book_name,
90
+ "author": author,
91
+ "cover_url": cover_url,
92
+ "update_time": update_time,
93
+ "word_count": word_count,
94
+ "serial_status": serial_status,
95
+ "tags": [tag] if tag else [],
96
+ "summary": summary,
97
+ "volumes": volumes,
98
+ "extra": {},
99
+ }
100
+
101
+ def parse_chapter(
102
+ self,
103
+ html_list: list[str],
104
+ chapter_id: str,
105
+ **kwargs: Any,
106
+ ) -> ChapterDict | None:
107
+ if not html_list:
108
+ return None
109
+
110
+ tree = html.fromstring(html_list[0])
111
+
112
+ title_text = self._first_str(
113
+ tree.xpath("//div[@class='zhangjieming']/h1/text()")
114
+ )
115
+
116
+ content_divs = tree.xpath("//div[@id='content']")
117
+ if not content_divs:
118
+ return None
119
+ content_div = content_divs[0]
120
+
121
+ # Only select direct <p> children to avoid nav links
122
+ paragraphs = []
123
+ for p in content_div.xpath("./p"):
124
+ text = p.text_content().strip()
125
+ if text:
126
+ paragraphs.append(text)
127
+
128
+ content_text = "\n".join(paragraphs)
129
+ if not content_text.strip():
130
+ return None
131
+
132
+ return {
133
+ "id": chapter_id,
134
+ "title": title_text,
135
+ "content": content_text,
136
+ "extra": {"site": "i25zw"},
137
+ }
@@ -0,0 +1,186 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.ixdzs8
4
+ ------------------------------------
5
+
6
+ """
7
+
8
+ import contextlib
9
+ import json
10
+ from typing import Any
11
+
12
+ from lxml import html
13
+
14
+ from novel_downloader.core.parsers.base import BaseParser
15
+ from novel_downloader.core.parsers.registry import register_parser
16
+ from novel_downloader.models import (
17
+ BookInfoDict,
18
+ ChapterDict,
19
+ ChapterInfoDict,
20
+ VolumeInfoDict,
21
+ )
22
+
23
+
24
+ @register_parser(
25
+ site_keys=["ixdzs8"],
26
+ )
27
+ class Ixdzs8Parser(BaseParser):
28
+ """
29
+ Parser for 爱下电子书 book pages.
30
+ """
31
+
32
+ def parse_book_info(
33
+ self,
34
+ html_list: list[str],
35
+ **kwargs: Any,
36
+ ) -> BookInfoDict | None:
37
+ if len(html_list) < 2 or not html_list[0] or not html_list[1]:
38
+ return None
39
+
40
+ # Parse HTML
41
+ tree = html.fromstring(html_list[0])
42
+
43
+ book_name = self._meta(tree, "og:novel:book_name") or self._first_str(
44
+ tree.xpath("//div[@class='n-text']/h1/text()")
45
+ )
46
+
47
+ author = self._meta(tree, "og:novel:author") or self._first_str(
48
+ tree.xpath("//div[@class='n-text']//a[contains(@class,'bauthor')]/text()")
49
+ )
50
+
51
+ cover_url = self._meta(tree, "og:image")
52
+ if not cover_url:
53
+ cover_url = self._first_str(tree.xpath("//div[@class='n-img']//img/@src"))
54
+
55
+ serial_status = self._meta(tree, "og:novel:status")
56
+
57
+ # 2022-08-25T18:08:03+08:00 -> 2022-08-25 18:08:03
58
+ iso_time = self._meta(tree, "og:novel:update_time")
59
+ update_time = ""
60
+ if iso_time:
61
+ update_time = iso_time.replace("T", " ").split("+", 1)[0].strip()
62
+
63
+ word_count = self._first_str(
64
+ tree.xpath("//div[@class='n-text']//span[contains(@class,'nsize')]/text()")
65
+ )
66
+
67
+ raw_summary = self._meta(tree, "og:description")
68
+ summary = ""
69
+ if raw_summary:
70
+ s = raw_summary.replace("&nbsp;", "")
71
+ s = s.replace("<br />", "\n")
72
+ summary = "\n".join(
73
+ self._norm_space(line) for line in s.splitlines()
74
+ ).strip()
75
+
76
+ tags = [
77
+ self._norm_space(t)
78
+ for t in tree.xpath("//div[contains(@class,'tags')]//em/a/text()")
79
+ if t and t.strip()
80
+ ]
81
+ category = self._meta(tree, "og:novel:category") or self._first_str(
82
+ tree.xpath("//div[@class='n-text']/p[a[contains(@class,'nsort')]]/a/text()")
83
+ )
84
+ if category:
85
+ tags.append(category)
86
+
87
+ book_path = self._meta(tree, "og:novel:read_url") or self._meta(tree, "og:url")
88
+ book_id = ""
89
+ if book_path:
90
+ book_id = book_path.strip("/").split("/")[-1]
91
+
92
+ data = {}
93
+ with contextlib.suppress(Exception):
94
+ data = json.loads(html_list[1])
95
+ clist = data.get("data", []) if isinstance(data, dict) else []
96
+
97
+ chapters: list[ChapterInfoDict] = []
98
+ for chap in clist:
99
+ ordernum = str(chap.get("ordernum", "")).strip()
100
+ if not ordernum:
101
+ continue
102
+ title = self._norm_space(chap.get("title", "") or "") or "未命名章节"
103
+ url = f"/read/{book_id}/p{ordernum}.html" if book_id else ""
104
+ chapters.append(
105
+ {
106
+ "url": url,
107
+ "title": title,
108
+ "chapterId": f"p{ordernum}",
109
+ }
110
+ )
111
+
112
+ volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
113
+
114
+ return {
115
+ "book_name": book_name,
116
+ "author": author,
117
+ "cover_url": cover_url,
118
+ "serial_status": serial_status,
119
+ "update_time": update_time,
120
+ "word_count": word_count,
121
+ "summary": summary,
122
+ "tags": tags,
123
+ "volumes": volumes,
124
+ "extra": {},
125
+ }
126
+
127
+ def parse_chapter(
128
+ self,
129
+ html_list: list[str],
130
+ chapter_id: str,
131
+ **kwargs: Any,
132
+ ) -> ChapterDict | None:
133
+ if not html_list:
134
+ return None
135
+ tree = html.fromstring(html_list[0])
136
+
137
+ title = self._first_str(tree.xpath("//div[@class='page-d-top']/h1/text()"))
138
+ if not title:
139
+ title = self._first_str(
140
+ tree.xpath("//article[contains(@class,'page-content')]//h3/text()")
141
+ )
142
+ title = self._norm_space(title)
143
+
144
+ # paragraphs within the reading section; skip ad containers
145
+ ps = tree.xpath(
146
+ "//article[contains(@class,'page-content')]//section//p[not(contains(@class,'abg'))]"
147
+ )
148
+
149
+ paragraphs: list[str] = []
150
+ for p in ps:
151
+ raw = p.text_content()
152
+ txt = self._norm_space(raw)
153
+ if not txt or self._is_ad_line(txt):
154
+ continue
155
+ paragraphs.append(txt)
156
+
157
+ if not paragraphs:
158
+ return None
159
+
160
+ # Replace FIRST line with .replace(title, "")
161
+ first = paragraphs[0].replace(title, "")
162
+ first = first.replace(title.replace(" ", ""), "").strip()
163
+ if first:
164
+ paragraphs[0] = first
165
+ else:
166
+ paragraphs.pop(0)
167
+
168
+ if paragraphs:
169
+ last = paragraphs[-1]
170
+ if "本章完" in last:
171
+ paragraphs.pop()
172
+
173
+ content = "\n".join(paragraphs)
174
+ if not content.strip():
175
+ return None
176
+
177
+ return {
178
+ "id": chapter_id,
179
+ "title": title,
180
+ "content": content,
181
+ "extra": {"site": "ixdzs8"},
182
+ }
183
+
184
+ @classmethod
185
+ def _meta(cls, tree: html.HtmlElement, prop: str) -> str:
186
+ return cls._first_str(tree.xpath(f"//meta[@property='{prop}']/@content"))