novel-downloader 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +2 -4
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +27 -104
  5. novel_downloader/cli/download.py +78 -66
  6. novel_downloader/cli/export.py +20 -21
  7. novel_downloader/cli/main.py +3 -1
  8. novel_downloader/cli/search.py +120 -0
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +10 -14
  11. novel_downloader/config/adapter.py +195 -99
  12. novel_downloader/config/{loader.py → file_io.py} +53 -27
  13. novel_downloader/core/__init__.py +14 -13
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/archived/qidian/searcher.py +79 -0
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +8 -30
  21. novel_downloader/core/downloaders/base.py +182 -30
  22. novel_downloader/core/downloaders/common.py +217 -384
  23. novel_downloader/core/downloaders/qianbi.py +332 -4
  24. novel_downloader/core/downloaders/qidian.py +250 -290
  25. novel_downloader/core/downloaders/registry.py +69 -0
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +8 -26
  28. novel_downloader/core/exporters/base.py +107 -31
  29. novel_downloader/core/exporters/common/__init__.py +3 -4
  30. novel_downloader/core/exporters/common/epub.py +92 -171
  31. novel_downloader/core/exporters/common/main_exporter.py +14 -67
  32. novel_downloader/core/exporters/common/txt.py +90 -86
  33. novel_downloader/core/exporters/epub_util.py +184 -1327
  34. novel_downloader/core/exporters/linovelib/__init__.py +3 -2
  35. novel_downloader/core/exporters/linovelib/epub.py +165 -222
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +10 -71
  37. novel_downloader/core/exporters/linovelib/txt.py +76 -66
  38. novel_downloader/core/exporters/qidian.py +15 -11
  39. novel_downloader/core/exporters/registry.py +55 -0
  40. novel_downloader/core/exporters/txt_util.py +67 -0
  41. novel_downloader/core/fetchers/__init__.py +57 -56
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +10 -10
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +63 -47
  45. novel_downloader/core/fetchers/biquyuedu.py +83 -0
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +23 -11
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +22 -26
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/{biquge/browser.py → lewenn.py} +15 -15
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +16 -12
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +9 -9
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +55 -40
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +60 -0
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +11 -9
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/{common/browser.py → shuhaige.py} +24 -19
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/{common/session.py → wanbengo.py} +21 -17
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +23 -11
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +8 -14
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +4 -17
  79. novel_downloader/core/interfaces/parser.py +5 -6
  80. novel_downloader/core/interfaces/searcher.py +26 -0
  81. novel_downloader/core/parsers/__init__.py +58 -22
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +63 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +67 -67
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +54 -65
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +54 -51
  99. novel_downloader/core/parsers/qidian/__init__.py +2 -2
  100. novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
  101. novel_downloader/core/parsers/qidian/chapter_encrypted.py +290 -346
  102. novel_downloader/core/parsers/qidian/chapter_normal.py +25 -56
  103. novel_downloader/core/parsers/qidian/main_parser.py +19 -57
  104. novel_downloader/core/parsers/qidian/utils/__init__.py +12 -11
  105. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +6 -7
  106. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
  107. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
  108. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
  109. novel_downloader/core/parsers/quanben5.py +103 -0
  110. novel_downloader/core/parsers/registry.py +57 -0
  111. novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +46 -48
  112. novel_downloader/core/parsers/shencou.py +215 -0
  113. novel_downloader/core/parsers/shuhaige.py +111 -0
  114. novel_downloader/core/parsers/tongrenquan.py +116 -0
  115. novel_downloader/core/parsers/ttkan.py +132 -0
  116. novel_downloader/core/parsers/wanbengo.py +191 -0
  117. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  118. novel_downloader/core/parsers/xiguashuwu.py +435 -0
  119. novel_downloader/core/parsers/xs63b.py +161 -0
  120. novel_downloader/core/parsers/xshbook.py +134 -0
  121. novel_downloader/core/parsers/yamibo.py +155 -0
  122. novel_downloader/core/parsers/yibige.py +166 -0
  123. novel_downloader/core/searchers/__init__.py +51 -0
  124. novel_downloader/core/searchers/aaatxt.py +107 -0
  125. novel_downloader/core/searchers/b520.py +84 -0
  126. novel_downloader/core/searchers/base.py +168 -0
  127. novel_downloader/core/searchers/dxmwx.py +105 -0
  128. novel_downloader/core/searchers/eightnovel.py +84 -0
  129. novel_downloader/core/searchers/esjzone.py +102 -0
  130. novel_downloader/core/searchers/hetushu.py +92 -0
  131. novel_downloader/core/searchers/i25zw.py +93 -0
  132. novel_downloader/core/searchers/ixdzs8.py +107 -0
  133. novel_downloader/core/searchers/jpxs123.py +107 -0
  134. novel_downloader/core/searchers/piaotia.py +100 -0
  135. novel_downloader/core/searchers/qbtr.py +106 -0
  136. novel_downloader/core/searchers/qianbi.py +165 -0
  137. novel_downloader/core/searchers/quanben5.py +144 -0
  138. novel_downloader/core/searchers/registry.py +79 -0
  139. novel_downloader/core/searchers/shuhaige.py +124 -0
  140. novel_downloader/core/searchers/tongrenquan.py +110 -0
  141. novel_downloader/core/searchers/ttkan.py +92 -0
  142. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  143. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  144. novel_downloader/core/searchers/xs63b.py +104 -0
  145. novel_downloader/locales/en.json +36 -79
  146. novel_downloader/locales/zh.json +37 -80
  147. novel_downloader/models/__init__.py +23 -50
  148. novel_downloader/models/book.py +44 -0
  149. novel_downloader/models/config.py +16 -43
  150. novel_downloader/models/login.py +1 -1
  151. novel_downloader/models/search.py +21 -0
  152. novel_downloader/resources/config/settings.toml +39 -74
  153. novel_downloader/resources/css_styles/intro.css +83 -0
  154. novel_downloader/resources/css_styles/main.css +30 -89
  155. novel_downloader/resources/json/xiguashuwu.json +718 -0
  156. novel_downloader/utils/__init__.py +43 -0
  157. novel_downloader/utils/chapter_storage.py +247 -226
  158. novel_downloader/utils/constants.py +5 -50
  159. novel_downloader/utils/cookies.py +6 -18
  160. novel_downloader/utils/crypto_utils/__init__.py +13 -0
  161. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  162. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  163. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  164. novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
  165. novel_downloader/utils/epub/__init__.py +34 -0
  166. novel_downloader/utils/epub/builder.py +377 -0
  167. novel_downloader/utils/epub/constants.py +118 -0
  168. novel_downloader/utils/epub/documents.py +297 -0
  169. novel_downloader/utils/epub/models.py +120 -0
  170. novel_downloader/utils/epub/utils.py +179 -0
  171. novel_downloader/utils/file_utils/__init__.py +5 -30
  172. novel_downloader/utils/file_utils/io.py +9 -150
  173. novel_downloader/utils/file_utils/normalize.py +2 -2
  174. novel_downloader/utils/file_utils/sanitize.py +2 -7
  175. novel_downloader/utils/fontocr.py +207 -0
  176. novel_downloader/utils/i18n.py +2 -0
  177. novel_downloader/utils/logger.py +10 -16
  178. novel_downloader/utils/network.py +111 -252
  179. novel_downloader/utils/state.py +5 -90
  180. novel_downloader/utils/text_utils/__init__.py +16 -21
  181. novel_downloader/utils/text_utils/diff_display.py +6 -9
  182. novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
  183. novel_downloader/utils/text_utils/text_cleaner.py +179 -0
  184. novel_downloader/utils/text_utils/truncate_utils.py +62 -0
  185. novel_downloader/utils/time_utils/__init__.py +6 -12
  186. novel_downloader/utils/time_utils/datetime_utils.py +23 -33
  187. novel_downloader/utils/time_utils/sleep_utils.py +5 -10
  188. novel_downloader/web/__init__.py +13 -0
  189. novel_downloader/web/components/__init__.py +11 -0
  190. novel_downloader/web/components/navigation.py +35 -0
  191. novel_downloader/web/main.py +66 -0
  192. novel_downloader/web/pages/__init__.py +17 -0
  193. novel_downloader/web/pages/download.py +78 -0
  194. novel_downloader/web/pages/progress.py +147 -0
  195. novel_downloader/web/pages/search.py +329 -0
  196. novel_downloader/web/services/__init__.py +17 -0
  197. novel_downloader/web/services/client_dialog.py +164 -0
  198. novel_downloader/web/services/cred_broker.py +113 -0
  199. novel_downloader/web/services/cred_models.py +35 -0
  200. novel_downloader/web/services/task_manager.py +264 -0
  201. novel_downloader-2.0.0.dist-info/METADATA +171 -0
  202. novel_downloader-2.0.0.dist-info/RECORD +210 -0
  203. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
  204. novel_downloader/config/site_rules.py +0 -94
  205. novel_downloader/core/downloaders/biquge.py +0 -25
  206. novel_downloader/core/downloaders/esjzone.py +0 -25
  207. novel_downloader/core/downloaders/linovelib.py +0 -25
  208. novel_downloader/core/downloaders/sfacg.py +0 -25
  209. novel_downloader/core/downloaders/yamibo.py +0 -25
  210. novel_downloader/core/exporters/biquge.py +0 -25
  211. novel_downloader/core/exporters/esjzone.py +0 -25
  212. novel_downloader/core/exporters/qianbi.py +0 -25
  213. novel_downloader/core/exporters/sfacg.py +0 -25
  214. novel_downloader/core/exporters/yamibo.py +0 -25
  215. novel_downloader/core/factory/__init__.py +0 -20
  216. novel_downloader/core/factory/downloader.py +0 -73
  217. novel_downloader/core/factory/exporter.py +0 -58
  218. novel_downloader/core/factory/fetcher.py +0 -96
  219. novel_downloader/core/factory/parser.py +0 -86
  220. novel_downloader/core/fetchers/base/__init__.py +0 -14
  221. novel_downloader/core/fetchers/base/browser.py +0 -403
  222. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  223. novel_downloader/core/fetchers/common/__init__.py +0 -14
  224. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  225. novel_downloader/core/fetchers/esjzone/browser.py +0 -204
  226. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  227. novel_downloader/core/fetchers/linovelib/browser.py +0 -193
  228. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  229. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  230. novel_downloader/core/fetchers/qidian/browser.py +0 -318
  231. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  232. novel_downloader/core/fetchers/sfacg/browser.py +0 -189
  233. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  234. novel_downloader/core/fetchers/yamibo/browser.py +0 -229
  235. novel_downloader/core/parsers/biquge/__init__.py +0 -10
  236. novel_downloader/core/parsers/biquge/main_parser.py +0 -134
  237. novel_downloader/core/parsers/common/__init__.py +0 -13
  238. novel_downloader/core/parsers/common/helper.py +0 -323
  239. novel_downloader/core/parsers/common/main_parser.py +0 -106
  240. novel_downloader/core/parsers/esjzone/__init__.py +0 -10
  241. novel_downloader/core/parsers/linovelib/__init__.py +0 -10
  242. novel_downloader/core/parsers/qianbi/__init__.py +0 -10
  243. novel_downloader/core/parsers/sfacg/__init__.py +0 -10
  244. novel_downloader/core/parsers/yamibo/__init__.py +0 -10
  245. novel_downloader/core/parsers/yamibo/main_parser.py +0 -194
  246. novel_downloader/models/browser.py +0 -21
  247. novel_downloader/models/chapter.py +0 -25
  248. novel_downloader/models/site_rules.py +0 -99
  249. novel_downloader/models/tasks.py +0 -33
  250. novel_downloader/models/types.py +0 -15
  251. novel_downloader/resources/css_styles/volume-intro.css +0 -56
  252. novel_downloader/resources/json/replace_word_map.json +0 -4
  253. novel_downloader/resources/text/blacklist.txt +0 -22
  254. novel_downloader/tui/__init__.py +0 -7
  255. novel_downloader/tui/app.py +0 -32
  256. novel_downloader/tui/main.py +0 -17
  257. novel_downloader/tui/screens/__init__.py +0 -14
  258. novel_downloader/tui/screens/home.py +0 -198
  259. novel_downloader/tui/screens/login.py +0 -74
  260. novel_downloader/tui/styles/home_layout.tcss +0 -79
  261. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  262. novel_downloader/utils/cache.py +0 -24
  263. novel_downloader/utils/fontocr/__init__.py +0 -22
  264. novel_downloader/utils/fontocr/model_loader.py +0 -69
  265. novel_downloader/utils/fontocr/ocr_v1.py +0 -303
  266. novel_downloader/utils/fontocr/ocr_v2.py +0 -752
  267. novel_downloader/utils/hash_store.py +0 -279
  268. novel_downloader/utils/hash_utils.py +0 -103
  269. novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
  270. novel_downloader/utils/text_utils/font_mapping.py +0 -28
  271. novel_downloader/utils/text_utils/text_cleaning.py +0 -107
  272. novel_downloader-1.4.5.dist-info/METADATA +0 -196
  273. novel_downloader-1.4.5.dist-info/RECORD +0 -165
  274. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
  275. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
  276. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,115 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.archived.deqixs.fetcher
4
+ ---------------------------------------------
5
+
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from novel_downloader.core.fetchers.base import BaseSession
11
+ from novel_downloader.models import FetcherConfig
12
+ from novel_downloader.utils import async_jitter_sleep
13
+
14
+ # from novel_downloader.core.fetchers.registry import register_fetcher
15
+
16
+
17
+ # @register_fetcher(
18
+ # site_keys=["deqixs"],
19
+ # )
20
+ class DeqixsSession(BaseSession):
21
+ """
22
+ A session class for interacting with the 得奇小说网 (www.deqixs.com) novel website.
23
+ """
24
+
25
+ BASE_URL = "https://www.deqixs.com"
26
+ BOOK_INFO_URL = "https://www.deqixs.com/xiaoshuo/{book_id}/"
27
+ CHAPTER_URL = "https://www.deqixs.com/xiaoshuo/{book_id}/{chapter_id}.html"
28
+
29
+ def __init__(
30
+ self,
31
+ config: FetcherConfig,
32
+ cookies: dict[str, str] | None = None,
33
+ **kwargs: Any,
34
+ ) -> None:
35
+ super().__init__("deqixs", config, cookies, **kwargs)
36
+
37
+ async def get_book_info(
38
+ self,
39
+ book_id: str,
40
+ **kwargs: Any,
41
+ ) -> list[str]:
42
+ """
43
+ Fetch the raw HTML of the book info page asynchronously.
44
+
45
+ :param book_id: The book identifier.
46
+ :return: The page content as a string.
47
+ """
48
+ url = self.book_info_url(book_id=book_id)
49
+ return [await self.fetch(url, **kwargs)]
50
+
51
+ async def get_book_chapter(
52
+ self,
53
+ book_id: str,
54
+ chapter_id: str,
55
+ **kwargs: Any,
56
+ ) -> list[str]:
57
+ """
58
+ Fetch the raw HTML of a single chapter asynchronously.
59
+
60
+ :param book_id: The book identifier.
61
+ :param chapter_id: The chapter identifier.
62
+ :return: The chapter content as a string.
63
+ """
64
+ html_pages: list[str] = []
65
+ idx = 1
66
+
67
+ while True:
68
+ chapter_suffix = chapter_id if idx == 1 else f"{chapter_id}-{idx}"
69
+ relative_path = f"/xiaoshuo/{book_id}/{chapter_suffix}.html"
70
+ full_url = self.BASE_URL + relative_path
71
+
72
+ if idx > 1 and relative_path not in html_pages[-1]:
73
+ break
74
+
75
+ try:
76
+ html = await self.fetch(full_url, **kwargs)
77
+ except Exception as exc:
78
+ self.logger.warning(
79
+ "[async] get_book_chapter(%s page %d) failed: %s",
80
+ chapter_id,
81
+ idx,
82
+ exc,
83
+ )
84
+ break
85
+
86
+ html_pages.append(html)
87
+ idx += 1
88
+ await async_jitter_sleep(
89
+ self.request_interval,
90
+ mul_spread=1.1,
91
+ max_sleep=self.request_interval + 2,
92
+ )
93
+
94
+ return html_pages
95
+
96
+ @classmethod
97
+ def book_info_url(cls, book_id: str) -> str:
98
+ """
99
+ Construct the URL for fetching a book's info page.
100
+
101
+ :param book_id: The identifier of the book.
102
+ :return: Fully qualified URL for the book info page.
103
+ """
104
+ return cls.BOOK_INFO_URL.format(book_id=book_id)
105
+
106
+ @classmethod
107
+ def chapter_url(cls, book_id: str, chapter_id: str) -> str:
108
+ """
109
+ Construct the URL for fetching a specific chapter.
110
+
111
+ :param book_id: The identifier of the book.
112
+ :param chapter_id: The identifier of the chapter.
113
+ :return: Fully qualified chapter URL.
114
+ """
115
+ return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
@@ -0,0 +1,132 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.archived.deqixs.parser
4
+ --------------------------------------------
5
+
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from lxml import html
11
+ from novel_downloader.core.parsers.base import BaseParser
12
+ from novel_downloader.models import (
13
+ BookInfoDict,
14
+ ChapterDict,
15
+ ChapterInfoDict,
16
+ VolumeInfoDict,
17
+ )
18
+
19
+ # from novel_downloader.core.parsers.registry import register_parser
20
+
21
+
22
+ # @register_parser(
23
+ # site_keys=["deqixs"],
24
+ # )
25
+ class DeqixsParser(BaseParser):
26
+ """
27
+ Parser for 得奇小说网 book pages.
28
+ """
29
+
30
+ ADS: set[str] = {
31
+ "更新不易",
32
+ "记得分享",
33
+ "(本章完)",
34
+ }
35
+
36
+ def parse_book_info(
37
+ self,
38
+ html_list: list[str],
39
+ **kwargs: Any,
40
+ ) -> BookInfoDict | None:
41
+ if not html_list:
42
+ return None
43
+
44
+ tree = html.fromstring(html_list[0])
45
+
46
+ # Extract book title and word count
47
+ book_name = tree.xpath("//div[@class='itemtxt']/h1/a/text()")[0].strip()
48
+ word_count = tree.xpath("//div[@class='itemtxt']/h1/i/text()")[0].strip()
49
+
50
+ # Extract serialization status and genre tags
51
+ spans = tree.xpath("//div[@class='itemtxt']/p[1]/span/text()")
52
+ serial_status = spans[0].strip() if spans else ""
53
+ tags = [s.strip() for s in spans[1:-1]] if len(spans) > 2 else []
54
+
55
+ # Extract author
56
+ author_text = tree.xpath("//div[@class='itemtxt']/p[2]/a/text()")[0]
57
+ author = author_text.replace("作者:", "").strip()
58
+
59
+ # Extract cover URL
60
+ cover_src = tree.xpath("//div[@class='item']//a/img/@src")[0]
61
+ cover_url = "https:" + cover_src if cover_src.startswith("//") else cover_src
62
+
63
+ # Extract last update time
64
+ update_raw = tree.xpath("//h2[@id='dir']/span/text()")[0].strip()
65
+ update_time = update_raw.replace("更新时间:", "").strip()
66
+
67
+ # Extract summary paragraphs (first description block)
68
+ paras = tree.xpath("(//div[@class='des bb'])[1]/p/text()")
69
+ summary = "\n".join(p.strip() for p in paras if p.strip())
70
+
71
+ # Extract chapters list
72
+ chapter_nodes = tree.xpath("//div[@id='list']//ul/li/a")
73
+ chapters: list[ChapterInfoDict] = []
74
+ for a in chapter_nodes:
75
+ href = a.get("href")
76
+ chapter_id = href.split("/")[-1].replace(".html", "")
77
+ title = a.text_content().strip()
78
+ chapters.append({"title": title, "url": href, "chapterId": chapter_id})
79
+
80
+ volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
81
+
82
+ return {
83
+ "book_name": book_name,
84
+ "author": author,
85
+ "cover_url": cover_url,
86
+ "update_time": update_time,
87
+ "serial_status": serial_status,
88
+ "word_count": word_count,
89
+ "summary": summary,
90
+ "tags": tags,
91
+ "volumes": volumes,
92
+ "extra": {},
93
+ }
94
+
95
+ def parse_chapter(
96
+ self,
97
+ html_list: list[str],
98
+ chapter_id: str,
99
+ **kwargs: Any,
100
+ ) -> ChapterDict | None:
101
+ if not html_list:
102
+ return None
103
+
104
+ title_text = ""
105
+ contents: list[str] = []
106
+ for curr_html in html_list:
107
+ tree = html.fromstring(curr_html)
108
+ # Extract title once
109
+ if not title_text:
110
+ full_title = tree.xpath("string(//div[@class='submenu']/h1)")
111
+ if ">" in full_title:
112
+ title_text = full_title.split(">", 1)[1].strip()
113
+ else:
114
+ title_text = full_title.strip()
115
+ # Extract paragraphs
116
+ for p in tree.xpath("//div[@class='con']/p"):
117
+ text = p.text_content().strip()
118
+ # Filter out ads or empty paragraphs
119
+ if not text or any(ad in text for ad in self.ADS):
120
+ continue
121
+ contents.append(text)
122
+
123
+ content = "\n".join(contents)
124
+ if not content:
125
+ return None
126
+
127
+ return {
128
+ "id": chapter_id,
129
+ "title": title_text,
130
+ "content": content,
131
+ "extra": {"site": "deqixs"},
132
+ }
@@ -0,0 +1,89 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.archived.deqixs.searcher
4
+ ----------------------------------------------
5
+
6
+ """
7
+
8
+ import logging
9
+
10
+ from lxml import html
11
+ from novel_downloader.core.searchers.base import BaseSearcher
12
+ from novel_downloader.models import SearchResult
13
+
14
+ # from novel_downloader.core.searchers.registry import register_searcher
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ # @register_searcher(
20
+ # site_keys=["deqixs"],
21
+ # )
22
+ class DeqixsSearcher(BaseSearcher):
23
+ site_name = "deqixs"
24
+ priority = 20
25
+ BASE_URL = "https://www.deqixs.com"
26
+ SEARCH_URL = "https://www.deqixs.com/tag/"
27
+
28
+ @classmethod
29
+ async def _fetch_html(cls, keyword: str) -> str:
30
+ params = {"key": keyword}
31
+ try:
32
+ async with (await cls._http_get(cls.SEARCH_URL, params=params)) as resp:
33
+ return await cls._response_to_str(resp)
34
+ except Exception:
35
+ logger.error(
36
+ "Failed to fetch HTML for keyword '%s' from '%s'",
37
+ keyword,
38
+ cls.SEARCH_URL,
39
+ )
40
+ return ""
41
+
42
+ @classmethod
43
+ def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
44
+ doc = html.fromstring(html_str)
45
+ rows = doc.xpath("//div[@class='container']/div[@class='item']")
46
+ results: list[SearchResult] = []
47
+
48
+ for idx, row in enumerate(rows):
49
+ if limit is not None and idx >= limit:
50
+ break
51
+
52
+ href = row.xpath(".//h3/a/@href")[0]
53
+ book_id = href.strip("/ ").split("/")[-1]
54
+ if not book_id:
55
+ continue
56
+ book_url = cls.BASE_URL + href
57
+ img_src = row.xpath(".//a/img/@src")[0]
58
+ cover_url = "https:" + img_src if img_src.startswith("//") else img_src
59
+ title = row.xpath(".//h3/a/text()")[0].strip()
60
+
61
+ author_text = row.xpath(".//p[2]/a/text()")[0]
62
+ author = author_text.replace("作者:", "").strip()
63
+
64
+ spans = row.xpath(".//p[1]/span/text()")
65
+ word_count = spans[2].strip() if len(spans) > 2 else ""
66
+
67
+ # Extract latest chapter and update date
68
+ first_li = row.xpath(".//ul/li")[0]
69
+ update_date = first_li.xpath("./i/text()")[0].strip()
70
+ latest_chapter = first_li.xpath("./a/text()")[0].strip()
71
+
72
+ # Compute priority
73
+ prio = cls.priority + idx
74
+
75
+ results.append(
76
+ SearchResult(
77
+ site=cls.site_name,
78
+ book_id=book_id,
79
+ book_url=book_url,
80
+ cover_url=cover_url,
81
+ title=title,
82
+ author=author,
83
+ latest_chapter=latest_chapter,
84
+ update_date=update_date,
85
+ word_count=word_count,
86
+ priority=prio,
87
+ )
88
+ )
89
+ return results
@@ -0,0 +1,79 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.archived.qidian.searcher
4
+ ----------------------------------------------
5
+
6
+ """
7
+
8
+ import logging
9
+
10
+ from lxml import html
11
+ from novel_downloader.core.searchers.base import BaseSearcher
12
+ from novel_downloader.models import SearchResult
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ # @register_searcher(
18
+ # site_keys=["qidian", "qd"],
19
+ # )
20
+ class QidianSearcher(BaseSearcher):
21
+ """
22
+ TODO: 现在默认没有 cookie 会跳转
23
+ """
24
+
25
+ site_name = "qidian"
26
+ priority = 0
27
+ SEARCH_URL = "https://www.qidian.com/so/{query}.html"
28
+
29
+ @classmethod
30
+ async def _fetch_html(cls, keyword: str) -> str:
31
+ url = cls.SEARCH_URL.format(query=cls._quote(keyword))
32
+ try:
33
+ async with (await cls._http_get(url)) as resp:
34
+ return await cls._response_to_str(resp)
35
+ except Exception:
36
+ logger.error(
37
+ "Failed to fetch HTML for keyword '%s' from '%s'",
38
+ keyword,
39
+ url,
40
+ )
41
+ return ""
42
+
43
+ @classmethod
44
+ def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
45
+ doc = html.fromstring(html_str)
46
+ items = doc.xpath(
47
+ '//div[@id="result-list"]//li[contains(@class, "res-book-item")]'
48
+ )
49
+ results: list[SearchResult] = []
50
+
51
+ base_prio = getattr(cls, "priority", 0)
52
+ for idx, item in enumerate(items):
53
+ if limit is not None and idx >= limit:
54
+ break
55
+ book_id = item.get("data-bid")
56
+ if not book_id:
57
+ continue
58
+ title_elem = item.xpath('.//h3[@class="book-info-title"]/a')[0]
59
+ title = title_elem.text_content().strip()
60
+ author_nodes = item.xpath(
61
+ './/p[@class="author"]/a[@class="name"] | .//p[@class="author"]/i'
62
+ )
63
+ author = author_nodes[0].text_content().strip() if author_nodes else ""
64
+ prio = base_prio + idx
65
+ results.append(
66
+ SearchResult(
67
+ site=cls.site_name,
68
+ book_id=book_id,
69
+ book_url="",
70
+ cover_url="",
71
+ title=title,
72
+ author=author,
73
+ latest_chapter="-",
74
+ update_date="-",
75
+ word_count="-",
76
+ priority=prio,
77
+ )
78
+ )
79
+ return results
@@ -0,0 +1,98 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.archived.wanbengo.searcher
4
+ ------------------------------------------------
5
+
6
+ """
7
+
8
+ import logging
9
+
10
+ from lxml import html
11
+ from novel_downloader.core.searchers.base import BaseSearcher
12
+ from novel_downloader.models import SearchResult
13
+
14
+ # from novel_downloader.core.searchers.registry import register_searcher
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ # @register_searcher(
20
+ # site_keys=["wanbengo"],
21
+ # )
22
+ class WanbengoSearcher(BaseSearcher):
23
+ site_name = "wanbengo"
24
+ priority = 30
25
+ BASE_URL = "https://www.wanbengo.com"
26
+ SEARCH_URL = "https://www.sososhu.com/"
27
+
28
+ @classmethod
29
+ async def _fetch_html(cls, keyword: str) -> str:
30
+ params = {
31
+ "q": keyword,
32
+ "site": "wbsz",
33
+ }
34
+ try:
35
+ async with (await cls._http_get(cls.SEARCH_URL, params=params)) as resp:
36
+ return await cls._response_to_str(resp)
37
+ except Exception:
38
+ logger.error(
39
+ "Failed to fetch HTML for keyword '%s' from '%s'",
40
+ keyword,
41
+ cls.SEARCH_URL,
42
+ )
43
+ return ""
44
+
45
+ @classmethod
46
+ def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
47
+ doc = html.fromstring(html_str)
48
+ rows = doc.xpath(
49
+ "//div[contains(@class,'so_list')]//div[contains(@class,'hot')]//div[contains(@class,'item')]"
50
+ )
51
+ results: list[SearchResult] = []
52
+
53
+ for idx, row in enumerate(rows):
54
+ if limit is not None and idx >= limit:
55
+ break
56
+ a_nodes = row.xpath(".//dl/dt/a[1]")
57
+ a = a_nodes[0] if a_nodes else None
58
+ href = a.get("href") if a is not None else ""
59
+ if not href:
60
+ continue
61
+
62
+ book_url = cls._restore_url(cls._abs_url(href))
63
+ book_id = cls._book_id_from_url(book_url) if book_url else ""
64
+
65
+ title = (a.text_content() if a is not None else "").strip()
66
+ author = cls._first_str(row.xpath(".//dl/dt/span[1]/text()"))
67
+ cover_url = cls._first_str(
68
+ row.xpath(".//div[contains(@class,'image')]//img/@src")
69
+ )
70
+
71
+ # Compute priority
72
+ prio = cls.priority + idx
73
+
74
+ results.append(
75
+ SearchResult(
76
+ site=cls.site_name,
77
+ book_id=book_id,
78
+ book_url=book_url,
79
+ cover_url=cover_url,
80
+ title=title,
81
+ author=author,
82
+ latest_chapter="-",
83
+ update_date="-",
84
+ word_count="-",
85
+ priority=prio,
86
+ )
87
+ )
88
+ return results
89
+
90
+ @staticmethod
91
+ def _restore_url(url: str) -> str:
92
+ return url.replace("www.wbsz.org", "www.wanbengo.com")
93
+
94
+ @staticmethod
95
+ def _book_id_from_url(url: str) -> str:
96
+ tail = url.split("wanbengo.com", 1)[-1]
97
+ tail = tail.strip("/")
98
+ return tail.replace("/", "-")
@@ -0,0 +1,93 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.archived.xshbook.searcher
4
+ -----------------------------------------------
5
+
6
+ """
7
+
8
+ import logging
9
+
10
+ from lxml import html
11
+ from novel_downloader.core.searchers.base import BaseSearcher
12
+ from novel_downloader.models import SearchResult
13
+
14
+ # from novel_downloader.core.searchers.registry import register_searcher
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ # @register_searcher(
20
+ # site_keys=["xshbook"],
21
+ # )
22
+ class XshbookSearcher(BaseSearcher):
23
+ site_name = "xshbook"
24
+ priority = 30
25
+ BASE_URL = "https://www.xshbook.com"
26
+ SEARCH_URL = "https://www.sososhu.com/"
27
+
28
+ @classmethod
29
+ async def _fetch_html(cls, keyword: str) -> str:
30
+ params = {
31
+ "q": keyword,
32
+ "site": "xshbook",
33
+ }
34
+ try:
35
+ async with (await cls._http_get(cls.SEARCH_URL, params=params)) as resp:
36
+ return await cls._response_to_str(resp)
37
+ except Exception:
38
+ logger.error(
39
+ "Failed to fetch HTML for keyword '%s' from '%s'",
40
+ keyword,
41
+ cls.SEARCH_URL,
42
+ )
43
+ return ""
44
+
45
+ @classmethod
46
+ def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
47
+ doc = html.fromstring(html_str)
48
+ rows = doc.xpath(
49
+ "//div[contains(@class,'so_list')]//div[contains(@class,'hot')]//div[contains(@class,'item')]"
50
+ )
51
+ results: list[SearchResult] = []
52
+
53
+ for idx, row in enumerate(rows):
54
+ if limit is not None and idx >= limit:
55
+ break
56
+ a_nodes = row.xpath(".//dl/dt/a[1]")
57
+ a = a_nodes[0] if a_nodes else None
58
+ href = a.get("href") if a is not None else ""
59
+ book_url = cls._abs_url(href)
60
+ book_id = cls._book_id_from_url(book_url) if book_url else ""
61
+ if not book_id:
62
+ continue
63
+
64
+ title = (a.text_content() if a is not None else "").strip()
65
+ author = cls._first_str(row.xpath(".//dl/dt/span[1]/text()"))
66
+ cover_url = cls._first_str(
67
+ row.xpath(".//div[contains(@class,'image')]//img/@src")
68
+ )
69
+
70
+ # Compute priority
71
+ prio = cls.priority + idx
72
+
73
+ results.append(
74
+ SearchResult(
75
+ site=cls.site_name,
76
+ book_id=book_id,
77
+ book_url=book_url,
78
+ cover_url=cover_url,
79
+ title=title,
80
+ author=author,
81
+ latest_chapter="-",
82
+ update_date="-",
83
+ word_count="-",
84
+ priority=prio,
85
+ )
86
+ )
87
+ return results
88
+
89
+ @staticmethod
90
+ def _book_id_from_url(url: str) -> str:
91
+ tail = url.split("xshbook.com", 1)[-1]
92
+ tail = tail.strip("/")
93
+ return tail.replace("/", "-")
@@ -3,39 +3,17 @@
3
3
  novel_downloader.core.downloaders
4
4
  ---------------------------------
5
5
 
6
- This subpackage contains concrete downloader implementations for
7
- specific novel platforms.
8
-
9
- Each downloader is responsible for orchestrating the full lifecycle
10
- of retrieving, parsing, and saving novel content for a given source.
11
-
12
- Currently supported platforms:
13
- - biquge (笔趣阁)
14
- - esjzone (ESJ Zone)
15
- - linovelib (哔哩轻小说)
16
- - qianbi (铅笔小说)
17
- - qidian (起点中文网)
18
- - sfacg (SF轻小说)
19
- - yamibo (百合会)
20
- - common (通用架构)
6
+ Downloader implementations for retrieving novels from different sources
21
7
  """
22
8
 
23
- from .biquge import BiqugeDownloader
24
- from .common import CommonDownloader
25
- from .esjzone import EsjzoneDownloader
26
- from .linovelib import LinovelibDownloader
27
- from .qianbi import QianbiDownloader
28
- from .qidian import QidianDownloader
29
- from .sfacg import SfacgDownloader
30
- from .yamibo import YamiboDownloader
31
-
32
9
  __all__ = [
33
- "BiqugeDownloader",
34
- "EsjzoneDownloader",
35
- "LinovelibDownloader",
10
+ "get_downloader",
11
+ "CommonDownloader",
36
12
  "QianbiDownloader",
37
13
  "QidianDownloader",
38
- "SfacgDownloader",
39
- "YamiboDownloader",
40
- "CommonDownloader",
41
14
  ]
15
+
16
+ from .common import CommonDownloader
17
+ from .qianbi import QianbiDownloader
18
+ from .qidian import QidianDownloader
19
+ from .registry import get_downloader