novel-downloader 1.5.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +1 -3
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +26 -21
  5. novel_downloader/cli/download.py +77 -64
  6. novel_downloader/cli/export.py +16 -20
  7. novel_downloader/cli/main.py +1 -1
  8. novel_downloader/cli/search.py +62 -65
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +8 -5
  11. novel_downloader/config/adapter.py +65 -105
  12. novel_downloader/config/{loader.py → file_io.py} +53 -26
  13. novel_downloader/core/__init__.py +1 -0
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} +12 -20
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +3 -24
  21. novel_downloader/core/downloaders/base.py +49 -23
  22. novel_downloader/core/downloaders/common.py +191 -137
  23. novel_downloader/core/downloaders/qianbi.py +187 -146
  24. novel_downloader/core/downloaders/qidian.py +187 -141
  25. novel_downloader/core/downloaders/registry.py +4 -2
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +3 -20
  28. novel_downloader/core/exporters/base.py +33 -37
  29. novel_downloader/core/exporters/common/__init__.py +1 -2
  30. novel_downloader/core/exporters/common/epub.py +15 -10
  31. novel_downloader/core/exporters/common/main_exporter.py +19 -12
  32. novel_downloader/core/exporters/common/txt.py +14 -9
  33. novel_downloader/core/exporters/epub_util.py +59 -29
  34. novel_downloader/core/exporters/linovelib/__init__.py +1 -0
  35. novel_downloader/core/exporters/linovelib/epub.py +23 -25
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +8 -12
  37. novel_downloader/core/exporters/linovelib/txt.py +17 -11
  38. novel_downloader/core/exporters/qidian.py +2 -8
  39. novel_downloader/core/exporters/registry.py +4 -2
  40. novel_downloader/core/exporters/txt_util.py +7 -7
  41. novel_downloader/core/fetchers/__init__.py +54 -48
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +6 -11
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +37 -46
  45. novel_downloader/core/fetchers/{biquge/browser.py → biquyuedu.py} +12 -17
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +19 -12
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +19 -28
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/lewenn.py +83 -0
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +12 -13
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +5 -10
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +46 -39
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +5 -16
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +7 -10
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/shuhaige.py +84 -0
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/wanbengo.py +83 -0
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +19 -12
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +1 -9
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +4 -17
  79. novel_downloader/core/interfaces/parser.py +5 -6
  80. novel_downloader/core/interfaces/searcher.py +9 -1
  81. novel_downloader/core/parsers/__init__.py +49 -12
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +63 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/esjzone.py +61 -66
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/linovelib.py +48 -64
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/qianbi.py +48 -50
  99. novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
  100. novel_downloader/core/parsers/qidian/chapter_encrypted.py +272 -330
  101. novel_downloader/core/parsers/qidian/chapter_normal.py +24 -55
  102. novel_downloader/core/parsers/qidian/main_parser.py +11 -38
  103. novel_downloader/core/parsers/qidian/utils/__init__.py +1 -0
  104. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +1 -1
  105. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
  106. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
  107. novel_downloader/core/parsers/quanben5.py +103 -0
  108. novel_downloader/core/parsers/registry.py +5 -16
  109. novel_downloader/core/parsers/sfacg.py +38 -45
  110. novel_downloader/core/parsers/shencou.py +215 -0
  111. novel_downloader/core/parsers/shuhaige.py +111 -0
  112. novel_downloader/core/parsers/tongrenquan.py +116 -0
  113. novel_downloader/core/parsers/ttkan.py +132 -0
  114. novel_downloader/core/parsers/wanbengo.py +191 -0
  115. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  116. novel_downloader/core/parsers/xiguashuwu.py +435 -0
  117. novel_downloader/core/parsers/xs63b.py +161 -0
  118. novel_downloader/core/parsers/xshbook.py +134 -0
  119. novel_downloader/core/parsers/yamibo.py +87 -131
  120. novel_downloader/core/parsers/yibige.py +166 -0
  121. novel_downloader/core/searchers/__init__.py +34 -3
  122. novel_downloader/core/searchers/aaatxt.py +107 -0
  123. novel_downloader/core/searchers/{biquge.py → b520.py} +29 -28
  124. novel_downloader/core/searchers/base.py +112 -36
  125. novel_downloader/core/searchers/dxmwx.py +105 -0
  126. novel_downloader/core/searchers/eightnovel.py +84 -0
  127. novel_downloader/core/searchers/esjzone.py +43 -25
  128. novel_downloader/core/searchers/hetushu.py +92 -0
  129. novel_downloader/core/searchers/i25zw.py +93 -0
  130. novel_downloader/core/searchers/ixdzs8.py +107 -0
  131. novel_downloader/core/searchers/jpxs123.py +107 -0
  132. novel_downloader/core/searchers/piaotia.py +100 -0
  133. novel_downloader/core/searchers/qbtr.py +106 -0
  134. novel_downloader/core/searchers/qianbi.py +74 -40
  135. novel_downloader/core/searchers/quanben5.py +144 -0
  136. novel_downloader/core/searchers/registry.py +24 -8
  137. novel_downloader/core/searchers/shuhaige.py +124 -0
  138. novel_downloader/core/searchers/tongrenquan.py +110 -0
  139. novel_downloader/core/searchers/ttkan.py +92 -0
  140. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  141. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  142. novel_downloader/core/searchers/xs63b.py +104 -0
  143. novel_downloader/locales/en.json +31 -82
  144. novel_downloader/locales/zh.json +32 -83
  145. novel_downloader/models/__init__.py +21 -22
  146. novel_downloader/models/book.py +44 -0
  147. novel_downloader/models/config.py +4 -37
  148. novel_downloader/models/login.py +1 -1
  149. novel_downloader/models/search.py +5 -0
  150. novel_downloader/resources/config/settings.toml +8 -70
  151. novel_downloader/resources/json/xiguashuwu.json +718 -0
  152. novel_downloader/utils/__init__.py +13 -22
  153. novel_downloader/utils/chapter_storage.py +3 -2
  154. novel_downloader/utils/constants.py +4 -29
  155. novel_downloader/utils/cookies.py +6 -18
  156. novel_downloader/utils/crypto_utils/__init__.py +13 -0
  157. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  158. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  159. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  160. novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
  161. novel_downloader/utils/epub/__init__.py +1 -1
  162. novel_downloader/utils/epub/constants.py +57 -16
  163. novel_downloader/utils/epub/documents.py +88 -194
  164. novel_downloader/utils/epub/models.py +0 -14
  165. novel_downloader/utils/epub/utils.py +63 -96
  166. novel_downloader/utils/file_utils/__init__.py +2 -23
  167. novel_downloader/utils/file_utils/io.py +3 -113
  168. novel_downloader/utils/file_utils/sanitize.py +0 -4
  169. novel_downloader/utils/fontocr.py +207 -0
  170. novel_downloader/utils/logger.py +8 -16
  171. novel_downloader/utils/network.py +2 -2
  172. novel_downloader/utils/state.py +4 -90
  173. novel_downloader/utils/text_utils/__init__.py +1 -7
  174. novel_downloader/utils/text_utils/diff_display.py +5 -7
  175. novel_downloader/utils/time_utils/__init__.py +5 -11
  176. novel_downloader/utils/time_utils/datetime_utils.py +20 -29
  177. novel_downloader/utils/time_utils/sleep_utils.py +4 -8
  178. novel_downloader/web/__init__.py +13 -0
  179. novel_downloader/web/components/__init__.py +11 -0
  180. novel_downloader/web/components/navigation.py +35 -0
  181. novel_downloader/web/main.py +66 -0
  182. novel_downloader/web/pages/__init__.py +17 -0
  183. novel_downloader/web/pages/download.py +78 -0
  184. novel_downloader/web/pages/progress.py +147 -0
  185. novel_downloader/web/pages/search.py +329 -0
  186. novel_downloader/web/services/__init__.py +17 -0
  187. novel_downloader/web/services/client_dialog.py +164 -0
  188. novel_downloader/web/services/cred_broker.py +113 -0
  189. novel_downloader/web/services/cred_models.py +35 -0
  190. novel_downloader/web/services/task_manager.py +264 -0
  191. novel_downloader-2.0.0.dist-info/METADATA +171 -0
  192. novel_downloader-2.0.0.dist-info/RECORD +210 -0
  193. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
  194. novel_downloader/core/downloaders/biquge.py +0 -29
  195. novel_downloader/core/downloaders/esjzone.py +0 -29
  196. novel_downloader/core/downloaders/linovelib.py +0 -29
  197. novel_downloader/core/downloaders/sfacg.py +0 -29
  198. novel_downloader/core/downloaders/yamibo.py +0 -29
  199. novel_downloader/core/exporters/biquge.py +0 -22
  200. novel_downloader/core/exporters/esjzone.py +0 -22
  201. novel_downloader/core/exporters/qianbi.py +0 -22
  202. novel_downloader/core/exporters/sfacg.py +0 -22
  203. novel_downloader/core/exporters/yamibo.py +0 -22
  204. novel_downloader/core/fetchers/base/__init__.py +0 -14
  205. novel_downloader/core/fetchers/base/browser.py +0 -422
  206. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  207. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  208. novel_downloader/core/fetchers/esjzone/browser.py +0 -209
  209. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  210. novel_downloader/core/fetchers/linovelib/browser.py +0 -198
  211. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  212. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  213. novel_downloader/core/fetchers/qidian/browser.py +0 -326
  214. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  215. novel_downloader/core/fetchers/sfacg/browser.py +0 -194
  216. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  217. novel_downloader/core/fetchers/yamibo/browser.py +0 -234
  218. novel_downloader/core/parsers/biquge.py +0 -139
  219. novel_downloader/models/chapter.py +0 -25
  220. novel_downloader/models/types.py +0 -13
  221. novel_downloader/tui/__init__.py +0 -7
  222. novel_downloader/tui/app.py +0 -32
  223. novel_downloader/tui/main.py +0 -17
  224. novel_downloader/tui/screens/__init__.py +0 -14
  225. novel_downloader/tui/screens/home.py +0 -198
  226. novel_downloader/tui/screens/login.py +0 -74
  227. novel_downloader/tui/styles/home_layout.tcss +0 -79
  228. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  229. novel_downloader/utils/cache.py +0 -24
  230. novel_downloader/utils/fontocr/__init__.py +0 -22
  231. novel_downloader/utils/fontocr/hash_store.py +0 -280
  232. novel_downloader/utils/fontocr/hash_utils.py +0 -103
  233. novel_downloader/utils/fontocr/model_loader.py +0 -69
  234. novel_downloader/utils/fontocr/ocr_v1.py +0 -315
  235. novel_downloader/utils/fontocr/ocr_v2.py +0 -764
  236. novel_downloader/utils/fontocr/ocr_v3.py +0 -744
  237. novel_downloader-1.5.0.dist-info/METADATA +0 -196
  238. novel_downloader-1.5.0.dist-info/RECORD +0 -164
  239. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
  240. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
  241. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,215 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.shencou
4
+ -------------------------------------
5
+
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from lxml import etree, html
11
+
12
+ from novel_downloader.core.parsers.base import BaseParser
13
+ from novel_downloader.core.parsers.registry import register_parser
14
+ from novel_downloader.models import (
15
+ BookInfoDict,
16
+ ChapterDict,
17
+ VolumeInfoDict,
18
+ )
19
+
20
+
21
+ @register_parser(
22
+ site_keys=["shencou"],
23
+ )
24
+ class ShencouParser(BaseParser):
25
+ """
26
+ Parser for 神凑轻小说 book pages.
27
+ """
28
+
29
+ def parse_book_info(
30
+ self,
31
+ html_list: list[str],
32
+ **kwargs: Any,
33
+ ) -> BookInfoDict | None:
34
+ if len(html_list) < 2:
35
+ return None
36
+
37
+ info_tree = html.fromstring(html_list[0])
38
+ catalog_tree = html.fromstring(html_list[1])
39
+
40
+ # --- Metadata ---
41
+ raw_name = self._first_str(info_tree.xpath("//span//a/text()"))
42
+ book_name = raw_name[:-2] if raw_name.endswith("小说") else raw_name
43
+
44
+ author = self._first_str(
45
+ info_tree.xpath('//td[contains(text(),"小说作者")]/text()'),
46
+ replaces=[("小说作者:", "")],
47
+ )
48
+
49
+ cover_url = self._first_str(
50
+ info_tree.xpath('//a[contains(@href,"/files/article/image")]/img/@src')
51
+ )
52
+
53
+ # word count
54
+ word_count = self._first_str(
55
+ info_tree.xpath('//td[contains(text(),"全文长度")]/text()'),
56
+ replaces=[("全文长度:", "")],
57
+ )
58
+
59
+ # update time
60
+ update_time = self._first_str(
61
+ info_tree.xpath('//td[contains(text(),"最后更新")]/text()'),
62
+ replaces=[("最后更新:", "")],
63
+ )
64
+
65
+ # serial status
66
+ serial_status = self._first_str(
67
+ info_tree.xpath('//td[contains(text(),"写作进度")]/text()'),
68
+ replaces=[("写作进度:", "")],
69
+ )
70
+
71
+ # summary
72
+ raw_detail = self._norm_space(
73
+ info_tree.xpath('string(//td[@width="80%" and @valign="top"])')
74
+ )
75
+ summary = ""
76
+ if "内容简介:" in raw_detail and "本书公告:" in raw_detail:
77
+ intro = raw_detail.split("内容简介:", 1)[1]
78
+ summary = intro.split("本书公告:", 1)[0].strip()
79
+
80
+ # --- Catalog / Chapters ---
81
+ volumes: list[VolumeInfoDict] = []
82
+ curr_vol: VolumeInfoDict = {"volume_name": "未命名卷", "chapters": []}
83
+
84
+ # Walk through volume headers (.zjbox) and lists (.zjlist4) in document order
85
+ for elem in catalog_tree.xpath(
86
+ '//div[@class="zjbox"] | //div[@class="zjlist4"]'
87
+ ):
88
+ cls_attr = elem.get("class", "")
89
+ if "zjbox" in cls_attr:
90
+ # before starting new volume, save the previous if it has chapters
91
+ if curr_vol["chapters"]:
92
+ volumes.append(curr_vol)
93
+ # start a new volume
94
+ vol_name = elem.xpath(".//h2/text()")[0].strip()
95
+ curr_vol = {"volume_name": vol_name, "chapters": []}
96
+ elif "zjlist4" in cls_attr:
97
+ # collect all <li><a> entries under this list
98
+ for a in elem.xpath(".//ol/li/a"):
99
+ url = a.get("href").strip()
100
+ title = a.text_content().strip()
101
+ # '203740.html' -> '203740'
102
+ chap_id = url.split(".")[0]
103
+ curr_vol["chapters"].append(
104
+ {
105
+ "title": title,
106
+ "url": url,
107
+ "chapterId": chap_id,
108
+ }
109
+ )
110
+
111
+ # append last volume if not empty
112
+ if curr_vol["chapters"]:
113
+ volumes.append(curr_vol)
114
+
115
+ return {
116
+ "book_name": book_name,
117
+ "author": author,
118
+ "cover_url": cover_url,
119
+ "update_time": update_time,
120
+ "summary": summary,
121
+ "volumes": volumes,
122
+ "word_count": word_count,
123
+ "serial_status": serial_status,
124
+ "extra": {},
125
+ }
126
+
127
+ def parse_chapter(
128
+ self,
129
+ html_list: list[str],
130
+ chapter_id: str,
131
+ **kwargs: Any,
132
+ ) -> ChapterDict | None:
133
+ if not html_list:
134
+ return None
135
+
136
+ tree = html.fromstring(html_list[0])
137
+ title = self._first_str(tree.xpath("//h1/text()"))
138
+ if not title:
139
+ return None
140
+
141
+ # strip book-name prefix if present
142
+ bc = tree.xpath('//div[@id="breadCrumb"]//a/text()')
143
+ if len(bc) >= 2:
144
+ book_name = bc[1].strip()
145
+ title = title.removeprefix(book_name).lstrip(" ::–—-").strip()
146
+
147
+ anchors = tree.xpath('//div[@id="BookSee_Right"]')
148
+ if not anchors:
149
+ return None
150
+ marker = anchors[0]
151
+
152
+ lines: list[str] = []
153
+
154
+ def _append_text(text: str) -> None:
155
+ for ln in text.replace("\xa0", " ").splitlines():
156
+ ln2 = ln.strip()
157
+ if ln2:
158
+ lines.append(ln2)
159
+
160
+ if marker.tail:
161
+ _append_text(marker.tail)
162
+
163
+ # 4. Walk through siblings until <!--over-->
164
+ node = marker
165
+ while True:
166
+ sib = node.getnext()
167
+ if sib is None:
168
+ break
169
+ node = sib
170
+
171
+ # Stop on the closing comment
172
+ if isinstance(sib, etree._Comment) and "over" in (sib.text or ""):
173
+ break
174
+
175
+ # Process comment tails (e.g. <!--go--> tail)
176
+ if isinstance(sib, etree._Comment):
177
+ if sib.tail:
178
+ _append_text(sib.tail)
179
+ continue
180
+
181
+ if isinstance(sib, html.HtmlElement):
182
+ # tag = sib.tag.lower()
183
+ tag = str(sib.tag).lower()
184
+ cls = sib.get("class", "") or ""
185
+
186
+ if tag == "div" and "divimage" in cls:
187
+ srcs = sib.xpath(".//img/@src")
188
+ if srcs:
189
+ lines.append(f'<img src="{srcs[0]}" />')
190
+ # text after the div
191
+ if sib.tail:
192
+ _append_text(sib.tail)
193
+ continue
194
+
195
+ if tag == "br":
196
+ if sib.tail:
197
+ _append_text(sib.tail)
198
+ continue
199
+
200
+ text = sib.text_content()
201
+ _append_text(text)
202
+ if sib.tail:
203
+ _append_text(sib.tail)
204
+ continue
205
+
206
+ content = "\n".join(lines)
207
+ if not content:
208
+ return None
209
+
210
+ return {
211
+ "id": chapter_id,
212
+ "title": title,
213
+ "content": content,
214
+ "extra": {"site": "shencou"},
215
+ }
@@ -0,0 +1,111 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.shuhaige
4
+ --------------------------------------
5
+
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from lxml import html
11
+
12
+ from novel_downloader.core.parsers.base import BaseParser
13
+ from novel_downloader.core.parsers.registry import register_parser
14
+ from novel_downloader.models import (
15
+ BookInfoDict,
16
+ ChapterDict,
17
+ ChapterInfoDict,
18
+ VolumeInfoDict,
19
+ )
20
+
21
+
22
+ @register_parser(
23
+ site_keys=["shuhaige"],
24
+ )
25
+ class ShuhaigeParser(BaseParser):
26
+ """
27
+ Parser for 书海阁小说网 book pages.
28
+ """
29
+
30
+ def parse_book_info(
31
+ self,
32
+ html_list: list[str],
33
+ **kwargs: Any,
34
+ ) -> BookInfoDict | None:
35
+ if not html_list:
36
+ return None
37
+
38
+ tree = html.fromstring(html_list[0])
39
+
40
+ book_name = self._first_str(tree.xpath('//div[@id="info"]/h1/text()'))
41
+ author = self._first_str(tree.xpath('//div[@id="info"]/p[1]/a/text()'))
42
+
43
+ cover_url = self._first_str(tree.xpath('//div[@id="fmimg"]/img/@src'))
44
+
45
+ update_time = self._first_str(
46
+ tree.xpath('//div[@id="info"]/p[3]/text()'),
47
+ replaces=[("最后更新:", "")],
48
+ )
49
+
50
+ summary = self._first_str(tree.xpath('//div[@id="intro"]/p[1]/text()'))
51
+
52
+ book_type = self._first_str(tree.xpath('//div[@class="con_top"]/a[2]/text()'))
53
+ tags = [book_type] if book_type else []
54
+
55
+ chapters: list[ChapterInfoDict] = [
56
+ {
57
+ "title": (a.text or "").strip(),
58
+ "url": (a.get("href") or "").strip(),
59
+ "chapterId": (a.get("href") or "").rsplit("/", 1)[-1].split(".", 1)[0],
60
+ }
61
+ for a in tree.xpath(
62
+ '//div[@id="list"]/dl/dt[contains(., "正文")]/following-sibling::dd/a'
63
+ )
64
+ ]
65
+
66
+ volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
67
+
68
+ return {
69
+ "book_name": book_name,
70
+ "author": author,
71
+ "cover_url": cover_url,
72
+ "update_time": update_time,
73
+ "tags": tags,
74
+ "summary": summary,
75
+ "volumes": volumes,
76
+ "extra": {},
77
+ }
78
+
79
+ def parse_chapter(
80
+ self,
81
+ html_list: list[str],
82
+ chapter_id: str,
83
+ **kwargs: Any,
84
+ ) -> ChapterDict | None:
85
+ if not html_list:
86
+ return None
87
+ tree = html.fromstring(html_list[0])
88
+
89
+ title = self._first_str(tree.xpath('//div[@class="bookname"]/h1/text()'))
90
+ if not title:
91
+ title = f"第 {chapter_id} 章"
92
+
93
+ content_elem = tree.xpath('//div[@id="content"]')
94
+ if not content_elem:
95
+ return None
96
+ paragraphs = [
97
+ "".join(p.itertext()).strip() for p in content_elem[0].xpath(".//p")
98
+ ]
99
+ if paragraphs and "www.shuhaige.net" in paragraphs[-1]:
100
+ paragraphs.pop()
101
+
102
+ content = "\n".join(paragraphs)
103
+ if not content.strip():
104
+ return None
105
+
106
+ return {
107
+ "id": chapter_id,
108
+ "title": title,
109
+ "content": content,
110
+ "extra": {"site": "shuhaige"},
111
+ }
@@ -0,0 +1,116 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.tongrenquan
4
+ -----------------------------------------
5
+
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from lxml import html
11
+
12
+ from novel_downloader.core.parsers.base import BaseParser
13
+ from novel_downloader.core.parsers.registry import register_parser
14
+ from novel_downloader.models import (
15
+ BookInfoDict,
16
+ ChapterDict,
17
+ ChapterInfoDict,
18
+ VolumeInfoDict,
19
+ )
20
+
21
+
22
+ @register_parser(
23
+ site_keys=["tongrenquan"],
24
+ )
25
+ class TongrenquanParser(BaseParser):
26
+ """
27
+ Parser for 同人圈 book pages.
28
+ """
29
+
30
+ BASE_URL = "https://www.tongrenquan.org"
31
+
32
+ def parse_book_info(
33
+ self,
34
+ html_list: list[str],
35
+ **kwargs: Any,
36
+ ) -> BookInfoDict | None:
37
+ if not html_list:
38
+ return None
39
+
40
+ tree = html.fromstring(html_list[0])
41
+
42
+ # Metadata
43
+ book_name = self._first_str(tree.xpath('//div[@class="infos"]/h1/text()'))
44
+ author = self._first_str(
45
+ tree.xpath('//div[@class="date"]/span/text()'),
46
+ replaces=[("作者:", "")],
47
+ )
48
+ cover_url = self.BASE_URL + self._first_str(
49
+ tree.xpath('//div[@class="pic"]//img/@src')
50
+ )
51
+ update_time = self._first_str(
52
+ tree.xpath('//div[@class="date"]/text()'),
53
+ replaces=[("日期:", "")],
54
+ )
55
+
56
+ # Summary (collapse text within the <p> tag)
57
+ paras = tree.xpath('//div[@class="infos"]/p//text()')
58
+ summary = "\n".join(p.strip() for p in paras if p.strip())
59
+
60
+ # Chapters extraction
61
+ chapters: list[ChapterInfoDict] = []
62
+ for a in tree.xpath('//div[contains(@class,"book_list")]//ul//li/a'):
63
+ url = a.get("href", "").strip()
64
+ title = a.text_content().strip()
65
+ # General pattern: /category/bookId/chapterId.html
66
+ # '/tongren/7562/462.html' -> '462'
67
+ chapter_id = url.rstrip(".html").split("/")[-1]
68
+ chapters.append({"title": title, "url": url, "chapterId": chapter_id})
69
+
70
+ volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
71
+
72
+ return {
73
+ "book_name": book_name,
74
+ "author": author,
75
+ "cover_url": cover_url,
76
+ "update_time": update_time,
77
+ "tags": ["同人小说"],
78
+ "summary": summary,
79
+ "volumes": volumes,
80
+ "extra": {},
81
+ }
82
+
83
+ def parse_chapter(
84
+ self,
85
+ html_list: list[str],
86
+ chapter_id: str,
87
+ **kwargs: Any,
88
+ ) -> ChapterDict | None:
89
+ if not html_list:
90
+ return None
91
+
92
+ tree = html.fromstring(html_list[0])
93
+
94
+ raw_title = self._first_str(
95
+ tree.xpath('//div[contains(@class,"read_chapterName")]//h1/text()')
96
+ )
97
+
98
+ book_name = self._first_str(
99
+ tree.xpath('//div[contains(@class,"readTop")]//a[last()]/text()')
100
+ )
101
+
102
+ title = raw_title.replace(book_name, "").strip()
103
+
104
+ # Extract paragraphs of content
105
+ paras = tree.xpath('//div[contains(@class,"read_chapterDetail")]/p')
106
+ texts = [p.text_content().strip() for p in paras if p.text_content().strip()]
107
+ content = "\n".join(texts)
108
+ if not content:
109
+ return None
110
+
111
+ return {
112
+ "id": chapter_id,
113
+ "title": title,
114
+ "content": content,
115
+ "extra": {"site": "tongrenquan"},
116
+ }
@@ -0,0 +1,132 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.ttkan
4
+ -----------------------------------
5
+
6
+ """
7
+
8
+ from datetime import datetime
9
+ from typing import Any
10
+
11
+ from lxml import html
12
+
13
+ from novel_downloader.core.parsers.base import BaseParser
14
+ from novel_downloader.core.parsers.registry import register_parser
15
+ from novel_downloader.models import (
16
+ BookInfoDict,
17
+ ChapterDict,
18
+ ChapterInfoDict,
19
+ VolumeInfoDict,
20
+ )
21
+
22
+
23
+ @register_parser(
24
+ site_keys=["ttkan"],
25
+ )
26
+ class TtkanParser(BaseParser):
27
+ """
28
+ Parser for 天天看小說 book pages.
29
+ """
30
+
31
+ def parse_book_info(
32
+ self,
33
+ html_list: list[str],
34
+ **kwargs: Any,
35
+ ) -> BookInfoDict | None:
36
+ if not html_list:
37
+ return None
38
+
39
+ tree = html.fromstring(html_list[0])
40
+
41
+ # Book metadata
42
+ book_name = self._first_str(
43
+ tree.xpath('//div[contains(@class,"novel_info")]//h1/text()')
44
+ )
45
+
46
+ author = self._first_str(
47
+ tree.xpath(
48
+ '//div[contains(@class,"novel_info")]//li[span/text()="作者:"]/a/text()'
49
+ )
50
+ )
51
+
52
+ cover_url = self._first_str(
53
+ tree.xpath('//div[contains(@class,"novel_info")]//amp-img/@src')
54
+ )
55
+
56
+ serial_status = self._first_str(
57
+ tree.xpath(
58
+ '//div[contains(@class,"novel_info")]//span[contains(@class,"state_serial")]/text()'
59
+ )
60
+ )
61
+
62
+ update_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
63
+
64
+ # Summary
65
+ summary_nodes = tree.xpath('//div[@class="description"]//p/text()')
66
+ summary = "".join(summary_nodes).strip()
67
+
68
+ # Single "正文" volume with all chapter links
69
+ chapters: list[ChapterInfoDict] = []
70
+ for a in tree.xpath('//div[@class="full_chapters"]/div[1]/a'):
71
+ url = a.get("href", "").strip()
72
+ title = a.text_content().strip()
73
+ # '/novel/pagea/wushenzhuzai-anmoshi_6094.html' -> '6094'
74
+ chap_id = url.rstrip(".html").split("_")[-1]
75
+ chapters.append(
76
+ {
77
+ "chapterId": chap_id,
78
+ "title": title,
79
+ "url": url,
80
+ }
81
+ )
82
+
83
+ volumes: list[VolumeInfoDict] = [
84
+ {
85
+ "volume_name": "正文",
86
+ "chapters": chapters,
87
+ }
88
+ ]
89
+
90
+ return {
91
+ "book_name": book_name,
92
+ "author": author,
93
+ "cover_url": cover_url,
94
+ "update_time": update_time,
95
+ "serial_status": serial_status,
96
+ "summary": summary,
97
+ "volumes": volumes,
98
+ "extra": {},
99
+ }
100
+
101
+ def parse_chapter(
102
+ self,
103
+ html_list: list[str],
104
+ chapter_id: str,
105
+ **kwargs: Any,
106
+ ) -> ChapterDict | None:
107
+ if not html_list:
108
+ return None
109
+ tree = html.fromstring(html_list[0])
110
+
111
+ # Title
112
+ title_nodes = tree.xpath('//div[@class="title"]/h1/text()')
113
+ title = title_nodes[0].strip() if title_nodes else ""
114
+
115
+ # Content paragraphs under <div class="content">
116
+ paras = tree.xpath('//div[@class="content"]/p')
117
+ lines = []
118
+ for p in paras:
119
+ text = p.text_content().strip()
120
+ if text:
121
+ lines.append(text)
122
+
123
+ content = "\n".join(lines).strip()
124
+ if not content:
125
+ return None
126
+
127
+ return {
128
+ "id": chapter_id,
129
+ "title": title,
130
+ "content": content,
131
+ "extra": {"site": "ttkan"},
132
+ }