novel-downloader 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +2 -4
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +27 -104
  5. novel_downloader/cli/download.py +78 -66
  6. novel_downloader/cli/export.py +20 -21
  7. novel_downloader/cli/main.py +3 -1
  8. novel_downloader/cli/search.py +120 -0
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +10 -14
  11. novel_downloader/config/adapter.py +195 -99
  12. novel_downloader/config/{loader.py → file_io.py} +53 -27
  13. novel_downloader/core/__init__.py +14 -13
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/archived/qidian/searcher.py +79 -0
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +8 -30
  21. novel_downloader/core/downloaders/base.py +182 -30
  22. novel_downloader/core/downloaders/common.py +217 -384
  23. novel_downloader/core/downloaders/qianbi.py +332 -4
  24. novel_downloader/core/downloaders/qidian.py +250 -290
  25. novel_downloader/core/downloaders/registry.py +69 -0
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +8 -26
  28. novel_downloader/core/exporters/base.py +107 -31
  29. novel_downloader/core/exporters/common/__init__.py +3 -4
  30. novel_downloader/core/exporters/common/epub.py +92 -171
  31. novel_downloader/core/exporters/common/main_exporter.py +14 -67
  32. novel_downloader/core/exporters/common/txt.py +90 -86
  33. novel_downloader/core/exporters/epub_util.py +184 -1327
  34. novel_downloader/core/exporters/linovelib/__init__.py +3 -2
  35. novel_downloader/core/exporters/linovelib/epub.py +165 -222
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +10 -71
  37. novel_downloader/core/exporters/linovelib/txt.py +76 -66
  38. novel_downloader/core/exporters/qidian.py +15 -11
  39. novel_downloader/core/exporters/registry.py +55 -0
  40. novel_downloader/core/exporters/txt_util.py +67 -0
  41. novel_downloader/core/fetchers/__init__.py +57 -56
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +10 -10
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +63 -47
  45. novel_downloader/core/fetchers/biquyuedu.py +83 -0
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +23 -11
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +22 -26
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/{biquge/browser.py → lewenn.py} +15 -15
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +16 -12
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +9 -9
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +55 -40
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +60 -0
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +11 -9
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/{common/browser.py → shuhaige.py} +24 -19
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/{common/session.py → wanbengo.py} +21 -17
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +23 -11
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +8 -14
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +4 -17
  79. novel_downloader/core/interfaces/parser.py +5 -6
  80. novel_downloader/core/interfaces/searcher.py +26 -0
  81. novel_downloader/core/parsers/__init__.py +58 -22
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +63 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +67 -67
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +54 -65
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +54 -51
  99. novel_downloader/core/parsers/qidian/__init__.py +2 -2
  100. novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
  101. novel_downloader/core/parsers/qidian/chapter_encrypted.py +290 -346
  102. novel_downloader/core/parsers/qidian/chapter_normal.py +25 -56
  103. novel_downloader/core/parsers/qidian/main_parser.py +19 -57
  104. novel_downloader/core/parsers/qidian/utils/__init__.py +12 -11
  105. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +6 -7
  106. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
  107. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
  108. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
  109. novel_downloader/core/parsers/quanben5.py +103 -0
  110. novel_downloader/core/parsers/registry.py +57 -0
  111. novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +46 -48
  112. novel_downloader/core/parsers/shencou.py +215 -0
  113. novel_downloader/core/parsers/shuhaige.py +111 -0
  114. novel_downloader/core/parsers/tongrenquan.py +116 -0
  115. novel_downloader/core/parsers/ttkan.py +132 -0
  116. novel_downloader/core/parsers/wanbengo.py +191 -0
  117. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  118. novel_downloader/core/parsers/xiguashuwu.py +435 -0
  119. novel_downloader/core/parsers/xs63b.py +161 -0
  120. novel_downloader/core/parsers/xshbook.py +134 -0
  121. novel_downloader/core/parsers/yamibo.py +155 -0
  122. novel_downloader/core/parsers/yibige.py +166 -0
  123. novel_downloader/core/searchers/__init__.py +51 -0
  124. novel_downloader/core/searchers/aaatxt.py +107 -0
  125. novel_downloader/core/searchers/b520.py +84 -0
  126. novel_downloader/core/searchers/base.py +168 -0
  127. novel_downloader/core/searchers/dxmwx.py +105 -0
  128. novel_downloader/core/searchers/eightnovel.py +84 -0
  129. novel_downloader/core/searchers/esjzone.py +102 -0
  130. novel_downloader/core/searchers/hetushu.py +92 -0
  131. novel_downloader/core/searchers/i25zw.py +93 -0
  132. novel_downloader/core/searchers/ixdzs8.py +107 -0
  133. novel_downloader/core/searchers/jpxs123.py +107 -0
  134. novel_downloader/core/searchers/piaotia.py +100 -0
  135. novel_downloader/core/searchers/qbtr.py +106 -0
  136. novel_downloader/core/searchers/qianbi.py +165 -0
  137. novel_downloader/core/searchers/quanben5.py +144 -0
  138. novel_downloader/core/searchers/registry.py +79 -0
  139. novel_downloader/core/searchers/shuhaige.py +124 -0
  140. novel_downloader/core/searchers/tongrenquan.py +110 -0
  141. novel_downloader/core/searchers/ttkan.py +92 -0
  142. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  143. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  144. novel_downloader/core/searchers/xs63b.py +104 -0
  145. novel_downloader/locales/en.json +36 -79
  146. novel_downloader/locales/zh.json +37 -80
  147. novel_downloader/models/__init__.py +23 -50
  148. novel_downloader/models/book.py +44 -0
  149. novel_downloader/models/config.py +16 -43
  150. novel_downloader/models/login.py +1 -1
  151. novel_downloader/models/search.py +21 -0
  152. novel_downloader/resources/config/settings.toml +39 -74
  153. novel_downloader/resources/css_styles/intro.css +83 -0
  154. novel_downloader/resources/css_styles/main.css +30 -89
  155. novel_downloader/resources/json/xiguashuwu.json +718 -0
  156. novel_downloader/utils/__init__.py +43 -0
  157. novel_downloader/utils/chapter_storage.py +247 -226
  158. novel_downloader/utils/constants.py +5 -50
  159. novel_downloader/utils/cookies.py +6 -18
  160. novel_downloader/utils/crypto_utils/__init__.py +13 -0
  161. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  162. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  163. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  164. novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
  165. novel_downloader/utils/epub/__init__.py +34 -0
  166. novel_downloader/utils/epub/builder.py +377 -0
  167. novel_downloader/utils/epub/constants.py +118 -0
  168. novel_downloader/utils/epub/documents.py +297 -0
  169. novel_downloader/utils/epub/models.py +120 -0
  170. novel_downloader/utils/epub/utils.py +179 -0
  171. novel_downloader/utils/file_utils/__init__.py +5 -30
  172. novel_downloader/utils/file_utils/io.py +9 -150
  173. novel_downloader/utils/file_utils/normalize.py +2 -2
  174. novel_downloader/utils/file_utils/sanitize.py +2 -7
  175. novel_downloader/utils/fontocr.py +207 -0
  176. novel_downloader/utils/i18n.py +2 -0
  177. novel_downloader/utils/logger.py +10 -16
  178. novel_downloader/utils/network.py +111 -252
  179. novel_downloader/utils/state.py +5 -90
  180. novel_downloader/utils/text_utils/__init__.py +16 -21
  181. novel_downloader/utils/text_utils/diff_display.py +6 -9
  182. novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
  183. novel_downloader/utils/text_utils/text_cleaner.py +179 -0
  184. novel_downloader/utils/text_utils/truncate_utils.py +62 -0
  185. novel_downloader/utils/time_utils/__init__.py +6 -12
  186. novel_downloader/utils/time_utils/datetime_utils.py +23 -33
  187. novel_downloader/utils/time_utils/sleep_utils.py +5 -10
  188. novel_downloader/web/__init__.py +13 -0
  189. novel_downloader/web/components/__init__.py +11 -0
  190. novel_downloader/web/components/navigation.py +35 -0
  191. novel_downloader/web/main.py +66 -0
  192. novel_downloader/web/pages/__init__.py +17 -0
  193. novel_downloader/web/pages/download.py +78 -0
  194. novel_downloader/web/pages/progress.py +147 -0
  195. novel_downloader/web/pages/search.py +329 -0
  196. novel_downloader/web/services/__init__.py +17 -0
  197. novel_downloader/web/services/client_dialog.py +164 -0
  198. novel_downloader/web/services/cred_broker.py +113 -0
  199. novel_downloader/web/services/cred_models.py +35 -0
  200. novel_downloader/web/services/task_manager.py +264 -0
  201. novel_downloader-2.0.0.dist-info/METADATA +171 -0
  202. novel_downloader-2.0.0.dist-info/RECORD +210 -0
  203. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
  204. novel_downloader/config/site_rules.py +0 -94
  205. novel_downloader/core/downloaders/biquge.py +0 -25
  206. novel_downloader/core/downloaders/esjzone.py +0 -25
  207. novel_downloader/core/downloaders/linovelib.py +0 -25
  208. novel_downloader/core/downloaders/sfacg.py +0 -25
  209. novel_downloader/core/downloaders/yamibo.py +0 -25
  210. novel_downloader/core/exporters/biquge.py +0 -25
  211. novel_downloader/core/exporters/esjzone.py +0 -25
  212. novel_downloader/core/exporters/qianbi.py +0 -25
  213. novel_downloader/core/exporters/sfacg.py +0 -25
  214. novel_downloader/core/exporters/yamibo.py +0 -25
  215. novel_downloader/core/factory/__init__.py +0 -20
  216. novel_downloader/core/factory/downloader.py +0 -73
  217. novel_downloader/core/factory/exporter.py +0 -58
  218. novel_downloader/core/factory/fetcher.py +0 -96
  219. novel_downloader/core/factory/parser.py +0 -86
  220. novel_downloader/core/fetchers/base/__init__.py +0 -14
  221. novel_downloader/core/fetchers/base/browser.py +0 -403
  222. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  223. novel_downloader/core/fetchers/common/__init__.py +0 -14
  224. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  225. novel_downloader/core/fetchers/esjzone/browser.py +0 -204
  226. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  227. novel_downloader/core/fetchers/linovelib/browser.py +0 -193
  228. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  229. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  230. novel_downloader/core/fetchers/qidian/browser.py +0 -318
  231. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  232. novel_downloader/core/fetchers/sfacg/browser.py +0 -189
  233. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  234. novel_downloader/core/fetchers/yamibo/browser.py +0 -229
  235. novel_downloader/core/parsers/biquge/__init__.py +0 -10
  236. novel_downloader/core/parsers/biquge/main_parser.py +0 -134
  237. novel_downloader/core/parsers/common/__init__.py +0 -13
  238. novel_downloader/core/parsers/common/helper.py +0 -323
  239. novel_downloader/core/parsers/common/main_parser.py +0 -106
  240. novel_downloader/core/parsers/esjzone/__init__.py +0 -10
  241. novel_downloader/core/parsers/linovelib/__init__.py +0 -10
  242. novel_downloader/core/parsers/qianbi/__init__.py +0 -10
  243. novel_downloader/core/parsers/sfacg/__init__.py +0 -10
  244. novel_downloader/core/parsers/yamibo/__init__.py +0 -10
  245. novel_downloader/core/parsers/yamibo/main_parser.py +0 -194
  246. novel_downloader/models/browser.py +0 -21
  247. novel_downloader/models/chapter.py +0 -25
  248. novel_downloader/models/site_rules.py +0 -99
  249. novel_downloader/models/tasks.py +0 -33
  250. novel_downloader/models/types.py +0 -15
  251. novel_downloader/resources/css_styles/volume-intro.css +0 -56
  252. novel_downloader/resources/json/replace_word_map.json +0 -4
  253. novel_downloader/resources/text/blacklist.txt +0 -22
  254. novel_downloader/tui/__init__.py +0 -7
  255. novel_downloader/tui/app.py +0 -32
  256. novel_downloader/tui/main.py +0 -17
  257. novel_downloader/tui/screens/__init__.py +0 -14
  258. novel_downloader/tui/screens/home.py +0 -198
  259. novel_downloader/tui/screens/login.py +0 -74
  260. novel_downloader/tui/styles/home_layout.tcss +0 -79
  261. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  262. novel_downloader/utils/cache.py +0 -24
  263. novel_downloader/utils/fontocr/__init__.py +0 -22
  264. novel_downloader/utils/fontocr/model_loader.py +0 -69
  265. novel_downloader/utils/fontocr/ocr_v1.py +0 -303
  266. novel_downloader/utils/fontocr/ocr_v2.py +0 -752
  267. novel_downloader/utils/hash_store.py +0 -279
  268. novel_downloader/utils/hash_utils.py +0 -103
  269. novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
  270. novel_downloader/utils/text_utils/font_mapping.py +0 -28
  271. novel_downloader/utils/text_utils/text_cleaning.py +0 -107
  272. novel_downloader-1.4.5.dist-info/METADATA +0 -196
  273. novel_downloader-1.4.5.dist-info/RECORD +0 -165
  274. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
  275. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
  276. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,168 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.searchers.base
4
+ ------------------------------------
5
+
6
+ Abstract base class providing common utilities for site-specific searchers.
7
+ """
8
+
9
+ import abc
10
+ from typing import Any, ClassVar
11
+ from urllib.parse import quote_plus, urljoin
12
+
13
+ import aiohttp
14
+
15
+ from novel_downloader.core.interfaces import SearcherProtocol
16
+ from novel_downloader.models import SearchResult
17
+ from novel_downloader.utils.constants import DEFAULT_USER_HEADERS
18
+
19
+
20
+ class BaseSearcher(abc.ABC, SearcherProtocol):
21
+ site_name: str
22
+ BASE_URL: str = ""
23
+ _session: ClassVar[aiohttp.ClientSession | None] = None
24
+
25
+ @classmethod
26
+ def configure(cls, session: aiohttp.ClientSession) -> None:
27
+ cls._session = session
28
+
29
+ @classmethod
30
+ async def search(cls, keyword: str, limit: int | None = None) -> list[SearchResult]:
31
+ html = await cls._fetch_html(keyword)
32
+ return cls._parse_html(html, limit)
33
+
34
+ @classmethod
35
+ @abc.abstractmethod
36
+ async def _fetch_html(cls, keyword: str) -> str:
37
+ """
38
+ Fetch raw HTML from search API or page
39
+
40
+ :param keyword: The search term to query.
41
+ :return: HTML text of the search results page, or an empty string on fail.
42
+ """
43
+ pass
44
+
45
+ @classmethod
46
+ @abc.abstractmethod
47
+ def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
48
+ """
49
+ Parse raw HTML from search API or page into list of SearchResult.
50
+
51
+ :param html_str: Raw HTML string from search results page.
52
+ :param limit: Maximum number of results to return, or None for all.
53
+ :return: List of SearchResult dicts.
54
+ """
55
+ pass
56
+
57
+ @classmethod
58
+ async def _http_get(
59
+ cls,
60
+ url: str,
61
+ *,
62
+ params: dict[str, str] | None = None,
63
+ headers: dict[str, str] | None = None,
64
+ **kwargs: Any,
65
+ ) -> aiohttp.ClientResponse:
66
+ """
67
+ Helper for GET requests with default headers.
68
+ """
69
+ session = cls._ensure_session()
70
+ hdrs = {**DEFAULT_USER_HEADERS, **(headers or {})}
71
+ resp = await session.get(url, params=params, headers=hdrs, **kwargs)
72
+ try:
73
+ resp.raise_for_status()
74
+ except aiohttp.ClientResponseError:
75
+ try:
76
+ await resp.read()
77
+ finally:
78
+ resp.release()
79
+ raise
80
+ return resp
81
+
82
+ @classmethod
83
+ async def _http_post(
84
+ cls,
85
+ url: str,
86
+ *,
87
+ data: dict[str, str] | str | None = None,
88
+ headers: dict[str, str] | None = None,
89
+ **kwargs: Any,
90
+ ) -> aiohttp.ClientResponse:
91
+ """
92
+ Helper for POST requests with default headers.
93
+ """
94
+ session = cls._ensure_session()
95
+ hdrs = {**DEFAULT_USER_HEADERS, **(headers or {})}
96
+ resp = await session.post(url, data=data, headers=hdrs, **kwargs)
97
+ try:
98
+ resp.raise_for_status()
99
+ except aiohttp.ClientResponseError:
100
+ try:
101
+ await resp.read()
102
+ finally:
103
+ resp.release()
104
+ raise
105
+ return resp
106
+
107
+ @classmethod
108
+ def _ensure_session(cls) -> aiohttp.ClientSession:
109
+ if cls._session is None:
110
+ raise RuntimeError(
111
+ f"{cls.__name__} has no aiohttp session. "
112
+ "Call .configure(session) first."
113
+ )
114
+ return cls._session
115
+
116
+ @staticmethod
117
+ def _quote(q: str, encoding: str | None = None, errors: str | None = None) -> str:
118
+ """URL-encode a query string safely."""
119
+ return quote_plus(q, encoding=encoding, errors=errors)
120
+
121
+ @staticmethod
122
+ async def _response_to_str(
123
+ resp: aiohttp.ClientResponse,
124
+ encoding: str | None = None,
125
+ ) -> str:
126
+ """
127
+ Read the full body of resp as text. First try the declared charset,
128
+ then on UnicodeDecodeError fall back to a lenient utf-8 decode.
129
+ """
130
+ data: bytes = await resp.read()
131
+ encodings = [
132
+ encoding,
133
+ resp.charset,
134
+ "gb2312",
135
+ "gb18030",
136
+ "gbk",
137
+ "utf-8",
138
+ ]
139
+ encodings_list: list[str] = [e for e in encodings if e]
140
+ for enc in encodings_list:
141
+ try:
142
+ return data.decode(enc)
143
+ except UnicodeDecodeError:
144
+ continue
145
+ encoding = encoding or "utf-8"
146
+ return data.decode(encoding, errors="ignore")
147
+
148
+ @staticmethod
149
+ def _first_str(xs: list[str], replaces: list[tuple[str, str]] | None = None) -> str:
150
+ replaces = replaces or []
151
+ value: str = xs[0].strip() if xs else ""
152
+ for replace in replaces:
153
+ old, new = replace
154
+ value = value.replace(old, new)
155
+ return value
156
+
157
+ @staticmethod
158
+ def _build_url(base: str, params: dict[str, str]) -> str:
159
+ query_string = "&".join(f"{k}={v}" for k, v in params.items())
160
+ return f"{base}?{query_string}"
161
+
162
+ @classmethod
163
+ def _abs_url(cls, url: str) -> str:
164
+ return (
165
+ url
166
+ if url.startswith(("http://", "https://"))
167
+ else urljoin(cls.BASE_URL, url)
168
+ )
@@ -0,0 +1,105 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.searchers.dxmwx
4
+ -------------------------------------
5
+
6
+ """
7
+
8
+ import logging
9
+
10
+ from lxml import html
11
+
12
+ from novel_downloader.core.searchers.base import BaseSearcher
13
+ from novel_downloader.core.searchers.registry import register_searcher
14
+ from novel_downloader.models import SearchResult
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @register_searcher(
20
+ site_keys=["dxmwx"],
21
+ )
22
+ class DxmwxSearcher(BaseSearcher):
23
+ site_name = "dxmwx"
24
+ priority = 30
25
+ BASE_URL = "https://www.dxmwx.org"
26
+ SEARCH_URL = "https://www.dxmwx.org/list/{query}.html"
27
+
28
+ @classmethod
29
+ async def _fetch_html(cls, keyword: str) -> str:
30
+ url = cls.SEARCH_URL.format(query=cls._quote(keyword))
31
+ try:
32
+ async with (await cls._http_get(url)) as resp:
33
+ return await cls._response_to_str(resp)
34
+ except Exception:
35
+ logger.error(
36
+ "Failed to fetch HTML for keyword '%s' from '%s'",
37
+ keyword,
38
+ cls.SEARCH_URL,
39
+ )
40
+ return ""
41
+
42
+ @classmethod
43
+ def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
44
+ doc = html.fromstring(html_str)
45
+ rows = doc.xpath(
46
+ "//div[@id='ListContents']/div[contains(@style,'position: relative')]"
47
+ )
48
+ results: list[SearchResult] = []
49
+
50
+ for idx, row in enumerate(rows):
51
+ href = cls._first_str(
52
+ row.xpath(".//div[contains(@class,'margin0h5')]//a[1]/@href")
53
+ )
54
+ if not href:
55
+ continue
56
+
57
+ if limit is not None and idx >= limit:
58
+ break
59
+
60
+ book_url = cls._abs_url(href)
61
+ # "/book/10409.html" -> "10409"
62
+ book_id = href.split("/")[-1].split(".", 1)[0]
63
+
64
+ title = cls._first_str(
65
+ row.xpath(".//div[contains(@class,'margin0h5')]//a[1]/text()")
66
+ )
67
+
68
+ author = cls._first_str(
69
+ row.xpath(".//div[contains(@class,'margin0h5')]//a[2]/text()")
70
+ )
71
+
72
+ cover_src = cls._first_str(
73
+ row.xpath(".//div[contains(@class,'imgwidth')]//img/@src")
74
+ )
75
+ cover_url = cls._abs_url(cover_src) if cover_src else ""
76
+
77
+ latest_chapter = cls._first_str(
78
+ row.xpath(
79
+ ".//a[span and span[contains(normalize-space(.),'最新章节')]]"
80
+ "/span/following-sibling::text()[1]"
81
+ )
82
+ )
83
+
84
+ update_date = cls._first_str(
85
+ row.xpath(".//span[contains(@class,'lefth5')]/text()")
86
+ )
87
+
88
+ # Compute priority
89
+ prio = cls.priority + idx
90
+
91
+ results.append(
92
+ SearchResult(
93
+ site=cls.site_name,
94
+ book_id=book_id,
95
+ book_url=book_url,
96
+ cover_url=cover_url,
97
+ title=title,
98
+ author=author,
99
+ latest_chapter=latest_chapter,
100
+ update_date=update_date,
101
+ word_count="-",
102
+ priority=prio,
103
+ )
104
+ )
105
+ return results
@@ -0,0 +1,84 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.searchers.eightnovel
4
+ ------------------------------------------
5
+
6
+ """
7
+
8
+ import logging
9
+
10
+ from lxml import html
11
+
12
+ from novel_downloader.core.searchers.base import BaseSearcher
13
+ from novel_downloader.core.searchers.registry import register_searcher
14
+ from novel_downloader.models import SearchResult
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @register_searcher(
20
+ site_keys=["eightnovel", "8novel"],
21
+ )
22
+ class EightnovelSearcher(BaseSearcher):
23
+ site_name = "8novel"
24
+ priority = 20
25
+ BASE_URL = "https://www.8novel.com"
26
+ SEARCH_URL = "https://www.8novel.com/search/"
27
+
28
+ @classmethod
29
+ async def _fetch_html(cls, keyword: str) -> str:
30
+ params = {"key": keyword}
31
+ try:
32
+ async with (await cls._http_get(cls.SEARCH_URL, params=params)) as resp:
33
+ return await cls._response_to_str(resp)
34
+ except Exception:
35
+ logger.error(
36
+ "Failed to fetch HTML for keyword '%s' from '%s'",
37
+ keyword,
38
+ cls.SEARCH_URL,
39
+ )
40
+ return ""
41
+
42
+ @classmethod
43
+ def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
44
+ doc = html.fromstring(html_str)
45
+ anchors = doc.xpath("//div[contains(@class,'picsize')]/a")
46
+ results: list[SearchResult] = []
47
+
48
+ for idx, a in enumerate(anchors):
49
+ href = cls._first_str(a.xpath("./@href"))
50
+ if not href:
51
+ continue
52
+
53
+ if limit is not None and idx >= limit:
54
+ break
55
+
56
+ # '/novelbooks/6045' -> "6045"
57
+ book_id = href.rstrip("/").split("/")[-1]
58
+ book_url = cls._abs_url(href)
59
+
60
+ cover_rel = cls._first_str(a.xpath(".//img/@src"))
61
+ cover_url = cls._abs_url(cover_rel) if cover_rel else ""
62
+
63
+ title = cls._first_str(a.xpath("./@title"))
64
+
65
+ word_count = cls._first_str(a.xpath(".//eps//text()")) or "-"
66
+
67
+ # Compute priority
68
+ prio = cls.priority + idx
69
+
70
+ results.append(
71
+ SearchResult(
72
+ site=cls.site_name,
73
+ book_id=book_id,
74
+ book_url=book_url,
75
+ cover_url=cover_url,
76
+ title=title,
77
+ author="-",
78
+ latest_chapter="-",
79
+ update_date="-",
80
+ word_count=word_count,
81
+ priority=prio,
82
+ )
83
+ )
84
+ return results
@@ -0,0 +1,102 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.searchers.esjzone
4
+ ---------------------------------------
5
+
6
+ """
7
+
8
+ import logging
9
+
10
+ from lxml import html
11
+
12
+ from novel_downloader.core.searchers.base import BaseSearcher
13
+ from novel_downloader.core.searchers.registry import register_searcher
14
+ from novel_downloader.models import SearchResult
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @register_searcher(
20
+ site_keys=["esjzone"],
21
+ )
22
+ class EsjzoneSearcher(BaseSearcher):
23
+ site_name = "esjzone"
24
+ priority = 30
25
+ BASE_URL = "https://www.esjzone.cc"
26
+ SEARCH_URL = "https://www.esjzone.cc/tags/{query}/"
27
+
28
+ @classmethod
29
+ async def _fetch_html(cls, keyword: str) -> str:
30
+ url = cls.SEARCH_URL.format(query=cls._quote(keyword))
31
+ try:
32
+ async with (await cls._http_get(url)) as resp:
33
+ return await cls._response_to_str(resp)
34
+ except Exception:
35
+ logger.error(
36
+ "Failed to fetch HTML for keyword '%s' from '%s'",
37
+ keyword,
38
+ url,
39
+ )
40
+ return ""
41
+
42
+ @classmethod
43
+ def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
44
+ doc = html.fromstring(html_str)
45
+ cards = doc.xpath('//div[contains(@class,"card-body")]')
46
+ results: list[SearchResult] = []
47
+
48
+ for idx, card in enumerate(cards):
49
+ href = cls._first_str(
50
+ card.xpath(".//h5[contains(@class,'card-title')]/a[1]/@href")
51
+ )
52
+ if not href:
53
+ continue
54
+
55
+ if limit is not None and idx >= limit:
56
+ break
57
+
58
+ # href format: /detail/<book_id>.html
59
+ book_id = href.split("/")[-1].split(".")[0]
60
+ book_url = cls._abs_url(href)
61
+
62
+ title = cls._first_str(
63
+ card.xpath(".//h5[contains(@class,'card-title')]/a[1]//text()")
64
+ )
65
+
66
+ latest_chapter = (
67
+ cls._first_str(
68
+ card.xpath(".//div[contains(@class,'card-ep')]//a[1]//text()")
69
+ )
70
+ or "-"
71
+ )
72
+
73
+ # Author
74
+ author = cls._first_str(
75
+ card.xpath(".//div[contains(@class,'card-author')]//a[1]//text()")
76
+ ) or cls._first_str(
77
+ card.xpath(".//div[contains(@class,'card-author')]//text()")
78
+ )
79
+
80
+ cover_data = card.xpath(
81
+ './preceding-sibling::a[contains(@class,"card-img-tiles")]'
82
+ '//div[contains(@class,"lazyload")]/@data-src'
83
+ )
84
+ cover_url = cover_data[0].strip() if cover_data else ""
85
+
86
+ # Compute priority incrementally
87
+ prio = cls.priority + idx
88
+ results.append(
89
+ SearchResult(
90
+ site=cls.site_name,
91
+ book_id=book_id,
92
+ book_url=book_url,
93
+ cover_url=cover_url,
94
+ title=title,
95
+ author=author,
96
+ latest_chapter=latest_chapter,
97
+ update_date="-",
98
+ word_count="-",
99
+ priority=prio,
100
+ )
101
+ )
102
+ return results
@@ -0,0 +1,92 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.searchers.hetushu
4
+ ---------------------------------------
5
+
6
+ """
7
+
8
+ import logging
9
+
10
+ from lxml import html
11
+
12
+ from novel_downloader.core.searchers.base import BaseSearcher
13
+ from novel_downloader.core.searchers.registry import register_searcher
14
+ from novel_downloader.models import SearchResult
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @register_searcher(
20
+ site_keys=["hetushu"],
21
+ )
22
+ class HetushuSearcher(BaseSearcher):
23
+ site_name = "hetushu"
24
+ priority = 5
25
+ SEARCH_URL = "https://www.hetushu.com/search/"
26
+ BASE_URL = "https://www.hetushu.com"
27
+
28
+ @classmethod
29
+ async def _fetch_html(cls, keyword: str) -> str:
30
+ params = {"keyword": keyword}
31
+ headers = {
32
+ "Referer": "https://www.hetushu.com/",
33
+ }
34
+ try:
35
+ async with (
36
+ await cls._http_get(cls.SEARCH_URL, params=params, headers=headers)
37
+ ) as resp:
38
+ return await cls._response_to_str(resp)
39
+ except Exception:
40
+ logger.error(
41
+ "Failed to fetch HTML for keyword '%s' from '%s'",
42
+ keyword,
43
+ cls.SEARCH_URL,
44
+ )
45
+ return ""
46
+
47
+ @classmethod
48
+ def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
49
+ doc = html.fromstring(html_str)
50
+ rows = doc.xpath('//dl[@class="list" and @id="body"]/dd')
51
+ results: list[SearchResult] = []
52
+
53
+ for idx, row in enumerate(rows):
54
+ href = cls._first_str(row.xpath(".//h4/a/@href"))
55
+ if not href:
56
+ continue
57
+
58
+ if limit is not None and idx >= limit:
59
+ break
60
+
61
+ # "/book/7631/index.html" -> "7631"
62
+ book_id = href.rstrip("/index.html").split("/")[-1]
63
+ book_url = cls._abs_url(href)
64
+
65
+ title = cls._first_str(row.xpath(".//h4/a/text()"))
66
+
67
+ # Author from the adjacent <span>, strip "/" delimiters
68
+ # e.x. " / 风行云亦行 / "
69
+ author_raw = cls._first_str(row.xpath(".//h4/span/text()"))
70
+ author = author_raw.strip("/").strip()
71
+
72
+ cover_rel = cls._first_str(row.xpath(".//a/img/@src"))
73
+ cover_url = cls._abs_url(cover_rel) if cover_rel else ""
74
+
75
+ # Compute priority
76
+ prio = cls.priority + idx
77
+
78
+ results.append(
79
+ SearchResult(
80
+ site=cls.site_name,
81
+ book_id=book_id,
82
+ book_url=book_url,
83
+ cover_url=cover_url,
84
+ title=title,
85
+ author=author,
86
+ latest_chapter="-",
87
+ update_date="-",
88
+ word_count="-",
89
+ priority=prio,
90
+ )
91
+ )
92
+ return results
@@ -0,0 +1,93 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.searchers.i25zw
4
+ -------------------------------------
5
+
6
+ """
7
+
8
+ import logging
9
+
10
+ from lxml import html
11
+
12
+ from novel_downloader.core.searchers.base import BaseSearcher
13
+ from novel_downloader.core.searchers.registry import register_searcher
14
+ from novel_downloader.models import SearchResult
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @register_searcher(
20
+ site_keys=["i25zw"],
21
+ )
22
+ class I25zwSearcher(BaseSearcher):
23
+ site_name = "i25zw"
24
+ priority = 30
25
+ SEARCH_URL = "https://www.i25zw.com/search.html"
26
+
27
+ @classmethod
28
+ async def _fetch_html(cls, keyword: str) -> str:
29
+ payload = {
30
+ "searchkey": keyword,
31
+ "searchtype": "all",
32
+ "Submit": "",
33
+ }
34
+ try:
35
+ async with (await cls._http_post(cls.SEARCH_URL, data=payload)) as resp:
36
+ return await cls._response_to_str(resp)
37
+ except Exception:
38
+ logger.error(
39
+ "Failed to fetch HTML for keyword '%s' from '%s'",
40
+ keyword,
41
+ cls.SEARCH_URL,
42
+ )
43
+ return ""
44
+
45
+ @classmethod
46
+ def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
47
+ doc = html.fromstring(html_str)
48
+ rows = doc.xpath("//div[@id='alistbox']")
49
+ results: list[SearchResult] = []
50
+
51
+ for idx, row in enumerate(rows):
52
+ book_url = cls._first_str(row.xpath(".//div[@class='pic']/a/@href"))
53
+ if not book_url:
54
+ continue
55
+
56
+ if limit is not None and idx >= limit:
57
+ break
58
+
59
+ # 'https://www.i25zw.com/book/309209.html' -> "309209"
60
+ book_id = book_url.split("/")[-1].split(".")[0]
61
+
62
+ title = cls._first_str(row.xpath(".//div[@class='title']/h2/a/text()"))
63
+
64
+ author = cls._first_str(
65
+ row.xpath(".//div[@class='title']/span/text()"),
66
+ replaces=[("作者:", "")],
67
+ )
68
+
69
+ cover_rel = cls._first_str(row.xpath(".//div[@class='pic']//img/@src"))
70
+ cover_url = cls._abs_url(cover_rel) if cover_rel else ""
71
+
72
+ # Latest chapter
73
+ latest_chapter = (
74
+ cls._first_str(row.xpath(".//div[@class='sys']//li[1]/a/text()")) or "-"
75
+ )
76
+
77
+ prio = cls.priority + idx
78
+
79
+ results.append(
80
+ SearchResult(
81
+ site=cls.site_name,
82
+ book_id=book_id,
83
+ book_url=book_url,
84
+ cover_url=cover_url,
85
+ title=title,
86
+ author=author,
87
+ latest_chapter=latest_chapter,
88
+ update_date="-",
89
+ word_count="-",
90
+ priority=prio,
91
+ )
92
+ )
93
+ return results