novel-downloader 1.5.0__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (248) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +1 -3
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +26 -21
  5. novel_downloader/cli/download.py +79 -66
  6. novel_downloader/cli/export.py +17 -21
  7. novel_downloader/cli/main.py +1 -1
  8. novel_downloader/cli/search.py +62 -65
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +8 -5
  11. novel_downloader/config/adapter.py +206 -209
  12. novel_downloader/config/{loader.py → file_io.py} +53 -26
  13. novel_downloader/core/__init__.py +5 -5
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} +12 -20
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +3 -24
  21. novel_downloader/core/downloaders/base.py +49 -23
  22. novel_downloader/core/downloaders/common.py +191 -137
  23. novel_downloader/core/downloaders/qianbi.py +187 -146
  24. novel_downloader/core/downloaders/qidian.py +187 -141
  25. novel_downloader/core/downloaders/registry.py +4 -2
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +3 -20
  28. novel_downloader/core/exporters/base.py +33 -37
  29. novel_downloader/core/exporters/common/__init__.py +1 -2
  30. novel_downloader/core/exporters/common/epub.py +15 -10
  31. novel_downloader/core/exporters/common/main_exporter.py +19 -12
  32. novel_downloader/core/exporters/common/txt.py +17 -12
  33. novel_downloader/core/exporters/epub_util.py +59 -29
  34. novel_downloader/core/exporters/linovelib/__init__.py +1 -0
  35. novel_downloader/core/exporters/linovelib/epub.py +23 -25
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +8 -12
  37. novel_downloader/core/exporters/linovelib/txt.py +20 -14
  38. novel_downloader/core/exporters/qidian.py +2 -8
  39. novel_downloader/core/exporters/registry.py +4 -2
  40. novel_downloader/core/exporters/txt_util.py +7 -7
  41. novel_downloader/core/fetchers/__init__.py +54 -48
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +6 -11
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +37 -46
  45. novel_downloader/core/fetchers/{biquge/browser.py → biquyuedu.py} +12 -17
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +19 -12
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +19 -28
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/lewenn.py +83 -0
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +12 -13
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +5 -10
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +56 -64
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +5 -16
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +7 -10
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/shuhaige.py +84 -0
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/wanbengo.py +83 -0
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +19 -12
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +1 -9
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +6 -19
  79. novel_downloader/core/interfaces/parser.py +7 -8
  80. novel_downloader/core/interfaces/searcher.py +9 -1
  81. novel_downloader/core/parsers/__init__.py +49 -12
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +64 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/esjzone.py +64 -69
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/linovelib.py +48 -64
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/qianbi.py +48 -50
  99. novel_downloader/core/parsers/qidian/main_parser.py +756 -48
  100. novel_downloader/core/parsers/qidian/utils/__init__.py +3 -21
  101. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +1 -1
  102. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +4 -4
  103. novel_downloader/core/parsers/quanben5.py +103 -0
  104. novel_downloader/core/parsers/registry.py +5 -16
  105. novel_downloader/core/parsers/sfacg.py +38 -45
  106. novel_downloader/core/parsers/shencou.py +215 -0
  107. novel_downloader/core/parsers/shuhaige.py +111 -0
  108. novel_downloader/core/parsers/tongrenquan.py +116 -0
  109. novel_downloader/core/parsers/ttkan.py +132 -0
  110. novel_downloader/core/parsers/wanbengo.py +191 -0
  111. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  112. novel_downloader/core/parsers/xiguashuwu.py +429 -0
  113. novel_downloader/core/parsers/xs63b.py +161 -0
  114. novel_downloader/core/parsers/xshbook.py +134 -0
  115. novel_downloader/core/parsers/yamibo.py +87 -131
  116. novel_downloader/core/parsers/yibige.py +166 -0
  117. novel_downloader/core/searchers/__init__.py +34 -3
  118. novel_downloader/core/searchers/aaatxt.py +107 -0
  119. novel_downloader/core/searchers/{biquge.py → b520.py} +29 -28
  120. novel_downloader/core/searchers/base.py +112 -36
  121. novel_downloader/core/searchers/dxmwx.py +105 -0
  122. novel_downloader/core/searchers/eightnovel.py +84 -0
  123. novel_downloader/core/searchers/esjzone.py +43 -25
  124. novel_downloader/core/searchers/hetushu.py +92 -0
  125. novel_downloader/core/searchers/i25zw.py +93 -0
  126. novel_downloader/core/searchers/ixdzs8.py +107 -0
  127. novel_downloader/core/searchers/jpxs123.py +107 -0
  128. novel_downloader/core/searchers/piaotia.py +100 -0
  129. novel_downloader/core/searchers/qbtr.py +106 -0
  130. novel_downloader/core/searchers/qianbi.py +74 -40
  131. novel_downloader/core/searchers/quanben5.py +144 -0
  132. novel_downloader/core/searchers/registry.py +24 -8
  133. novel_downloader/core/searchers/shuhaige.py +124 -0
  134. novel_downloader/core/searchers/tongrenquan.py +110 -0
  135. novel_downloader/core/searchers/ttkan.py +92 -0
  136. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  137. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  138. novel_downloader/core/searchers/xs63b.py +104 -0
  139. novel_downloader/locales/en.json +34 -85
  140. novel_downloader/locales/zh.json +35 -86
  141. novel_downloader/models/__init__.py +21 -22
  142. novel_downloader/models/book.py +44 -0
  143. novel_downloader/models/config.py +4 -37
  144. novel_downloader/models/login.py +1 -1
  145. novel_downloader/models/search.py +5 -0
  146. novel_downloader/resources/config/settings.toml +8 -70
  147. novel_downloader/resources/json/xiguashuwu.json +718 -0
  148. novel_downloader/utils/__init__.py +13 -24
  149. novel_downloader/utils/chapter_storage.py +5 -5
  150. novel_downloader/utils/constants.py +4 -31
  151. novel_downloader/utils/cookies.py +38 -35
  152. novel_downloader/utils/crypto_utils/__init__.py +7 -0
  153. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  154. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  155. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  156. novel_downloader/utils/crypto_utils/rc4.py +54 -0
  157. novel_downloader/utils/epub/__init__.py +3 -4
  158. novel_downloader/utils/epub/builder.py +6 -6
  159. novel_downloader/utils/epub/constants.py +62 -21
  160. novel_downloader/utils/epub/documents.py +95 -201
  161. novel_downloader/utils/epub/models.py +8 -22
  162. novel_downloader/utils/epub/utils.py +73 -106
  163. novel_downloader/utils/file_utils/__init__.py +2 -23
  164. novel_downloader/utils/file_utils/io.py +53 -188
  165. novel_downloader/utils/file_utils/normalize.py +1 -7
  166. novel_downloader/utils/file_utils/sanitize.py +4 -15
  167. novel_downloader/utils/fontocr/__init__.py +5 -14
  168. novel_downloader/utils/fontocr/core.py +216 -0
  169. novel_downloader/utils/fontocr/loader.py +50 -0
  170. novel_downloader/utils/logger.py +81 -65
  171. novel_downloader/utils/network.py +17 -41
  172. novel_downloader/utils/state.py +4 -90
  173. novel_downloader/utils/text_utils/__init__.py +1 -7
  174. novel_downloader/utils/text_utils/diff_display.py +5 -7
  175. novel_downloader/utils/text_utils/text_cleaner.py +39 -30
  176. novel_downloader/utils/text_utils/truncate_utils.py +3 -14
  177. novel_downloader/utils/time_utils/__init__.py +5 -11
  178. novel_downloader/utils/time_utils/datetime_utils.py +20 -29
  179. novel_downloader/utils/time_utils/sleep_utils.py +55 -49
  180. novel_downloader/web/__init__.py +13 -0
  181. novel_downloader/web/components/__init__.py +11 -0
  182. novel_downloader/web/components/navigation.py +35 -0
  183. novel_downloader/web/main.py +66 -0
  184. novel_downloader/web/pages/__init__.py +17 -0
  185. novel_downloader/web/pages/download.py +78 -0
  186. novel_downloader/web/pages/progress.py +147 -0
  187. novel_downloader/web/pages/search.py +329 -0
  188. novel_downloader/web/services/__init__.py +17 -0
  189. novel_downloader/web/services/client_dialog.py +164 -0
  190. novel_downloader/web/services/cred_broker.py +113 -0
  191. novel_downloader/web/services/cred_models.py +35 -0
  192. novel_downloader/web/services/task_manager.py +264 -0
  193. novel_downloader-2.0.1.dist-info/METADATA +172 -0
  194. novel_downloader-2.0.1.dist-info/RECORD +206 -0
  195. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/entry_points.txt +1 -1
  196. novel_downloader/core/downloaders/biquge.py +0 -29
  197. novel_downloader/core/downloaders/esjzone.py +0 -29
  198. novel_downloader/core/downloaders/linovelib.py +0 -29
  199. novel_downloader/core/downloaders/sfacg.py +0 -29
  200. novel_downloader/core/downloaders/yamibo.py +0 -29
  201. novel_downloader/core/exporters/biquge.py +0 -22
  202. novel_downloader/core/exporters/esjzone.py +0 -22
  203. novel_downloader/core/exporters/qianbi.py +0 -22
  204. novel_downloader/core/exporters/sfacg.py +0 -22
  205. novel_downloader/core/exporters/yamibo.py +0 -22
  206. novel_downloader/core/fetchers/base/__init__.py +0 -14
  207. novel_downloader/core/fetchers/base/browser.py +0 -422
  208. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  209. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  210. novel_downloader/core/fetchers/esjzone/browser.py +0 -209
  211. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  212. novel_downloader/core/fetchers/linovelib/browser.py +0 -198
  213. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  214. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  215. novel_downloader/core/fetchers/qidian/browser.py +0 -326
  216. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  217. novel_downloader/core/fetchers/sfacg/browser.py +0 -194
  218. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  219. novel_downloader/core/fetchers/yamibo/browser.py +0 -234
  220. novel_downloader/core/parsers/biquge.py +0 -139
  221. novel_downloader/core/parsers/qidian/book_info_parser.py +0 -90
  222. novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -528
  223. novel_downloader/core/parsers/qidian/chapter_normal.py +0 -157
  224. novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
  225. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -114
  226. novel_downloader/models/chapter.py +0 -25
  227. novel_downloader/models/types.py +0 -13
  228. novel_downloader/tui/__init__.py +0 -7
  229. novel_downloader/tui/app.py +0 -32
  230. novel_downloader/tui/main.py +0 -17
  231. novel_downloader/tui/screens/__init__.py +0 -14
  232. novel_downloader/tui/screens/home.py +0 -198
  233. novel_downloader/tui/screens/login.py +0 -74
  234. novel_downloader/tui/styles/home_layout.tcss +0 -79
  235. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  236. novel_downloader/utils/cache.py +0 -24
  237. novel_downloader/utils/crypto_utils.py +0 -71
  238. novel_downloader/utils/fontocr/hash_store.py +0 -280
  239. novel_downloader/utils/fontocr/hash_utils.py +0 -103
  240. novel_downloader/utils/fontocr/model_loader.py +0 -69
  241. novel_downloader/utils/fontocr/ocr_v1.py +0 -315
  242. novel_downloader/utils/fontocr/ocr_v2.py +0 -764
  243. novel_downloader/utils/fontocr/ocr_v3.py +0 -744
  244. novel_downloader-1.5.0.dist-info/METADATA +0 -196
  245. novel_downloader-1.5.0.dist-info/RECORD +0 -164
  246. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/WHEEL +0 -0
  247. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/licenses/LICENSE +0 -0
  248. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,107 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.searchers.aaatxt
4
+ --------------------------------------
5
+
6
+ """
7
+
8
+ import logging
9
+
10
+ from lxml import html
11
+
12
+ from novel_downloader.core.searchers.base import BaseSearcher
13
+ from novel_downloader.core.searchers.registry import register_searcher
14
+ from novel_downloader.models import SearchResult
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @register_searcher(
20
+ site_keys=["aaatxt"],
21
+ )
22
+ class AaatxtSearcher(BaseSearcher):
23
+ site_name = "aaatxt"
24
+ priority = 500
25
+ SEARCH_URL = "http://www.aaatxt.com/search.php"
26
+
27
+ @classmethod
28
+ async def _fetch_html(cls, keyword: str) -> str:
29
+ # gbk / gb2312
30
+ params = {
31
+ "keyword": cls._quote(keyword, encoding="gb2312", errors="replace"),
32
+ "submit": cls._quote("搜 索", encoding="gb2312", errors="replace"),
33
+ }
34
+ full_url = cls._build_url(cls.SEARCH_URL, params) # need build manually
35
+ headers = {
36
+ "Host": "www.aaatxt.com",
37
+ "Referer": "http://www.aaatxt.com/",
38
+ }
39
+ try:
40
+ async with (await cls._http_get(full_url, headers=headers)) as resp:
41
+ return await cls._response_to_str(resp, "gb2312")
42
+ except Exception:
43
+ logger.error(
44
+ "Failed to fetch HTML for keyword '%s' from '%s'",
45
+ keyword,
46
+ cls.SEARCH_URL,
47
+ )
48
+ return ""
49
+
50
+ @classmethod
51
+ def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
52
+ doc = html.fromstring(html_str)
53
+ rows = doc.xpath("//div[@class='sort']//div[@class='list']/table")
54
+ results: list[SearchResult] = []
55
+
56
+ for idx, row in enumerate(rows):
57
+ href = cls._first_str(row.xpath(".//td[@class='name']/h3/a/@href"))
58
+ if not href:
59
+ continue
60
+
61
+ if limit is not None and idx >= limit:
62
+ break
63
+
64
+ book_id = href.split("/")[-1].split(".")[0]
65
+ book_url = cls._abs_url(href)
66
+
67
+ cover_rel = cls._first_str(row.xpath(".//td[@class='cover']/a/img/@src"))
68
+ cover_url = cls._abs_url(cover_rel) if cover_rel else ""
69
+
70
+ title = cls._first_str(row.xpath(".//td[@class='name']/h3/a//text()"))
71
+
72
+ size_text = row.xpath("string(.//td[@class='size'])")
73
+ size_norm = size_text.replace("\u00a0", " ").replace(" ", " ").strip()
74
+ tokens = [t for t in size_norm.split() if t]
75
+
76
+ word_count = "-"
77
+ author = "-"
78
+ for tok in tokens:
79
+ if tok.startswith("大小:"):
80
+ word_count = tok.split(":", 1)[1].strip()
81
+ elif tok.startswith("上传:"):
82
+ author = tok.split(":", 1)[1].strip()
83
+
84
+ intro_text = row.xpath("string(.//td[@class='intro'])")
85
+ intro_norm = intro_text.replace("\u00a0", " ").replace(" ", " ")
86
+ update_date = "-"
87
+ for marker in ("更新:", "更新:"):
88
+ if marker in intro_norm:
89
+ tail = intro_norm.split(marker, 1)[1].strip()
90
+ update_date = tail.split()[0] if tail else "-"
91
+ break
92
+
93
+ results.append(
94
+ SearchResult(
95
+ site=cls.site_name,
96
+ book_id=book_id,
97
+ book_url=book_url,
98
+ cover_url=cover_url,
99
+ title=title,
100
+ author=author,
101
+ latest_chapter="-",
102
+ update_date=update_date,
103
+ word_count=word_count,
104
+ priority=cls.priority + idx,
105
+ )
106
+ )
107
+ return results
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- novel_downloader.core.searchers.biquge
4
- --------------------------------------
3
+ novel_downloader.core.searchers.b520
4
+ ------------------------------------
5
5
 
6
6
  """
7
7
 
@@ -17,57 +17,53 @@ logger = logging.getLogger(__name__)
17
17
 
18
18
 
19
19
  @register_searcher(
20
- site_keys=["biquge", "bqg"],
20
+ site_keys=["biquge", "bqg", "b520"],
21
21
  )
22
22
  class BiqugeSearcher(BaseSearcher):
23
23
  site_name = "biquge"
24
- priority = 5
24
+ priority = 30
25
+ BASE_URL = "http://www.b520.cc/"
25
26
  SEARCH_URL = "http://www.b520.cc/modules/article/search.php"
26
27
 
27
28
  @classmethod
28
- def _fetch_html(cls, keyword: str) -> str:
29
- """
30
- Fetch raw HTML from Biquge's search page.
31
-
32
- :param keyword: The search term to query on Biquge.
33
- :return: HTML text of the search results page, or an empty string on fail.
34
- """
29
+ async def _fetch_html(cls, keyword: str) -> str:
35
30
  params = {"searchkey": keyword}
36
31
  try:
37
- response = cls._http_get(cls.SEARCH_URL, params=params)
38
- return response.text
32
+ async with (await cls._http_get(cls.SEARCH_URL, params=params)) as resp:
33
+ return await cls._response_to_str(resp)
39
34
  except Exception:
40
35
  logger.error(
41
36
  "Failed to fetch HTML for keyword '%s' from '%s'",
42
37
  keyword,
43
38
  cls.SEARCH_URL,
44
- exc_info=True,
45
39
  )
46
40
  return ""
47
41
 
48
42
  @classmethod
49
43
  def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
50
- """
51
- Parse raw HTML from Biquge search results into list of SearchResult.
52
-
53
- :param html_str: Raw HTML string from Biquge search results page.
54
- :param limit: Maximum number of results to return, or None for all.
55
- :return: List of SearchResult dicts.
56
- """
57
44
  doc = html.fromstring(html_str)
58
45
  rows = doc.xpath('//table[@class="grid"]//tr[position()>1]')
59
46
  results: list[SearchResult] = []
60
47
 
61
48
  for idx, row in enumerate(rows):
49
+ href = cls._first_str(row.xpath(".//td[1]/a[1]/@href"))
50
+ if not href:
51
+ continue
52
+
62
53
  if limit is not None and idx >= limit:
63
54
  break
64
- # Title and book_id
65
- title_elem = row.xpath(".//td[1]/a")[0]
66
- title = title_elem.text_content().strip()
67
- href = title_elem.get("href", "").strip("/")
68
- book_id = href.split("/")[0] if href else ""
69
- # Author
70
- author = row.xpath(".//td[3]")[0].text_content().strip()
55
+
56
+ book_id = href.strip("/").split("/")[-1]
57
+ book_url = cls._abs_url(href)
58
+
59
+ title = cls._first_str(row.xpath(".//td[1]/a[1]/text()"))
60
+
61
+ latest_chapter = cls._first_str(row.xpath(".//td[2]/a[1]/text()")) or "-"
62
+
63
+ author = cls._first_str(row.xpath(".//td[3]//text()"))
64
+ word_count = cls._first_str(row.xpath(".//td[4]//text()"))
65
+ update_date = cls._first_str(row.xpath(".//td[5]//text()"))
66
+
71
67
  # Compute priority
72
68
  prio = cls.priority + idx
73
69
 
@@ -75,8 +71,13 @@ class BiqugeSearcher(BaseSearcher):
75
71
  SearchResult(
76
72
  site=cls.site_name,
77
73
  book_id=book_id,
74
+ book_url=book_url,
75
+ cover_url="",
78
76
  title=title,
79
77
  author=author,
78
+ latest_chapter=latest_chapter,
79
+ update_date=update_date,
80
+ word_count=word_count,
80
81
  priority=prio,
81
82
  )
82
83
  )
@@ -3,13 +3,14 @@
3
3
  novel_downloader.core.searchers.base
4
4
  ------------------------------------
5
5
 
6
+ Abstract base class providing common utilities for site-specific searchers.
6
7
  """
7
8
 
8
9
  import abc
9
- from typing import Any
10
- from urllib.parse import quote_plus
10
+ from typing import Any, ClassVar
11
+ from urllib.parse import quote_plus, urljoin
11
12
 
12
- import requests
13
+ import aiohttp
13
14
 
14
15
  from novel_downloader.core.interfaces import SearcherProtocol
15
16
  from novel_downloader.models import SearchResult
@@ -18,75 +19,150 @@ from novel_downloader.utils.constants import DEFAULT_USER_HEADERS
18
19
 
19
20
  class BaseSearcher(abc.ABC, SearcherProtocol):
20
21
  site_name: str
21
- _session = requests.Session()
22
- _DEFAULT_TIMEOUT: tuple[int, int] = (5, 10)
22
+ BASE_URL: str = ""
23
+ _session: ClassVar[aiohttp.ClientSession | None] = None
23
24
 
24
25
  @classmethod
25
- def search(cls, keyword: str, limit: int | None = None) -> list[SearchResult]:
26
- html = cls._fetch_html(keyword)
26
+ def configure(cls, session: aiohttp.ClientSession) -> None:
27
+ cls._session = session
28
+
29
+ @classmethod
30
+ async def search(cls, keyword: str, limit: int | None = None) -> list[SearchResult]:
31
+ html = await cls._fetch_html(keyword)
27
32
  return cls._parse_html(html, limit)
28
33
 
29
34
  @classmethod
30
35
  @abc.abstractmethod
31
- def _fetch_html(cls, keyword: str) -> str:
32
- """Get raw HTML from search API or page"""
36
+ async def _fetch_html(cls, keyword: str) -> str:
37
+ """
38
+ Fetch raw HTML from search API or page
39
+
40
+ :param keyword: The search term to query.
41
+ :return: HTML text of the search results page, or an empty string on fail.
42
+ """
33
43
  pass
34
44
 
35
45
  @classmethod
36
46
  @abc.abstractmethod
37
47
  def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
38
- """Parse HTML into standard search result list"""
48
+ """
49
+ Parse raw HTML from search API or page into list of SearchResult.
50
+
51
+ :param html_str: Raw HTML string from search results page.
52
+ :param limit: Maximum number of results to return, or None for all.
53
+ :return: List of SearchResult dicts.
54
+ """
39
55
  pass
40
56
 
41
57
  @classmethod
42
- def _http_get(
58
+ async def _http_get(
43
59
  cls,
44
60
  url: str,
45
61
  *,
46
62
  params: dict[str, str] | None = None,
47
63
  headers: dict[str, str] | None = None,
48
- timeout: tuple[int, int] | None = None,
49
64
  **kwargs: Any,
50
- ) -> requests.Response:
65
+ ) -> aiohttp.ClientResponse:
51
66
  """
52
- Helper for GET requests with default headers, timeout, and error-raising.
67
+ Helper for GET requests with default headers.
53
68
  """
69
+ session = cls._ensure_session()
54
70
  hdrs = {**DEFAULT_USER_HEADERS, **(headers or {})}
55
- resp = cls._session.get(
56
- url,
57
- params=params,
58
- headers=hdrs,
59
- timeout=timeout or cls._DEFAULT_TIMEOUT,
60
- **kwargs,
61
- )
62
- resp.raise_for_status()
71
+ resp = await session.get(url, params=params, headers=hdrs, **kwargs)
72
+ try:
73
+ resp.raise_for_status()
74
+ except aiohttp.ClientResponseError:
75
+ try:
76
+ await resp.read()
77
+ finally:
78
+ resp.release()
79
+ raise
63
80
  return resp
64
81
 
65
82
  @classmethod
66
- def _http_post(
83
+ async def _http_post(
67
84
  cls,
68
85
  url: str,
69
86
  *,
70
87
  data: dict[str, str] | str | None = None,
71
88
  headers: dict[str, str] | None = None,
72
- timeout: tuple[int, int] | None = None,
73
89
  **kwargs: Any,
74
- ) -> requests.Response:
90
+ ) -> aiohttp.ClientResponse:
75
91
  """
76
- Helper for POST requests with default headers, timeout, and error-raising.
92
+ Helper for POST requests with default headers.
77
93
  """
94
+ session = cls._ensure_session()
78
95
  hdrs = {**DEFAULT_USER_HEADERS, **(headers or {})}
79
- resp = cls._session.post(
80
- url,
81
- data=data,
82
- headers=hdrs,
83
- timeout=timeout or cls._DEFAULT_TIMEOUT,
84
- **kwargs,
85
- )
86
- resp.raise_for_status()
96
+ resp = await session.post(url, data=data, headers=hdrs, **kwargs)
97
+ try:
98
+ resp.raise_for_status()
99
+ except aiohttp.ClientResponseError:
100
+ try:
101
+ await resp.read()
102
+ finally:
103
+ resp.release()
104
+ raise
87
105
  return resp
88
106
 
107
+ @classmethod
108
+ def _ensure_session(cls) -> aiohttp.ClientSession:
109
+ if cls._session is None:
110
+ raise RuntimeError(
111
+ f"{cls.__name__} has no aiohttp session. "
112
+ "Call .configure(session) first."
113
+ )
114
+ return cls._session
115
+
89
116
  @staticmethod
90
- def _quote(q: str) -> str:
117
+ def _quote(q: str, encoding: str | None = None, errors: str | None = None) -> str:
91
118
  """URL-encode a query string safely."""
92
- return quote_plus(q)
119
+ return quote_plus(q, encoding=encoding, errors=errors)
120
+
121
+ @staticmethod
122
+ async def _response_to_str(
123
+ resp: aiohttp.ClientResponse,
124
+ encoding: str | None = None,
125
+ ) -> str:
126
+ """
127
+ Read the full body of resp as text. First try the declared charset,
128
+ then on UnicodeDecodeError fall back to a lenient utf-8 decode.
129
+ """
130
+ data: bytes = await resp.read()
131
+ encodings = [
132
+ encoding,
133
+ resp.charset,
134
+ "gb2312",
135
+ "gb18030",
136
+ "gbk",
137
+ "utf-8",
138
+ ]
139
+ encodings_list: list[str] = [e for e in encodings if e]
140
+ for enc in encodings_list:
141
+ try:
142
+ return data.decode(enc)
143
+ except UnicodeDecodeError:
144
+ continue
145
+ encoding = encoding or "utf-8"
146
+ return data.decode(encoding, errors="ignore")
147
+
148
+ @staticmethod
149
+ def _first_str(xs: list[str], replaces: list[tuple[str, str]] | None = None) -> str:
150
+ replaces = replaces or []
151
+ value: str = xs[0].strip() if xs else ""
152
+ for replace in replaces:
153
+ old, new = replace
154
+ value = value.replace(old, new)
155
+ return value
156
+
157
+ @staticmethod
158
+ def _build_url(base: str, params: dict[str, str]) -> str:
159
+ query_string = "&".join(f"{k}={v}" for k, v in params.items())
160
+ return f"{base}?{query_string}"
161
+
162
+ @classmethod
163
+ def _abs_url(cls, url: str) -> str:
164
+ return (
165
+ url
166
+ if url.startswith(("http://", "https://"))
167
+ else urljoin(cls.BASE_URL, url)
168
+ )
@@ -0,0 +1,105 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.searchers.dxmwx
4
+ -------------------------------------
5
+
6
+ """
7
+
8
+ import logging
9
+
10
+ from lxml import html
11
+
12
+ from novel_downloader.core.searchers.base import BaseSearcher
13
+ from novel_downloader.core.searchers.registry import register_searcher
14
+ from novel_downloader.models import SearchResult
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @register_searcher(
20
+ site_keys=["dxmwx"],
21
+ )
22
+ class DxmwxSearcher(BaseSearcher):
23
+ site_name = "dxmwx"
24
+ priority = 30
25
+ BASE_URL = "https://www.dxmwx.org"
26
+ SEARCH_URL = "https://www.dxmwx.org/list/{query}.html"
27
+
28
+ @classmethod
29
+ async def _fetch_html(cls, keyword: str) -> str:
30
+ url = cls.SEARCH_URL.format(query=cls._quote(keyword))
31
+ try:
32
+ async with (await cls._http_get(url)) as resp:
33
+ return await cls._response_to_str(resp)
34
+ except Exception:
35
+ logger.error(
36
+ "Failed to fetch HTML for keyword '%s' from '%s'",
37
+ keyword,
38
+ cls.SEARCH_URL,
39
+ )
40
+ return ""
41
+
42
+ @classmethod
43
+ def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
44
+ doc = html.fromstring(html_str)
45
+ rows = doc.xpath(
46
+ "//div[@id='ListContents']/div[contains(@style,'position: relative')]"
47
+ )
48
+ results: list[SearchResult] = []
49
+
50
+ for idx, row in enumerate(rows):
51
+ href = cls._first_str(
52
+ row.xpath(".//div[contains(@class,'margin0h5')]//a[1]/@href")
53
+ )
54
+ if not href:
55
+ continue
56
+
57
+ if limit is not None and idx >= limit:
58
+ break
59
+
60
+ book_url = cls._abs_url(href)
61
+ # "/book/10409.html" -> "10409"
62
+ book_id = href.split("/")[-1].split(".", 1)[0]
63
+
64
+ title = cls._first_str(
65
+ row.xpath(".//div[contains(@class,'margin0h5')]//a[1]/text()")
66
+ )
67
+
68
+ author = cls._first_str(
69
+ row.xpath(".//div[contains(@class,'margin0h5')]//a[2]/text()")
70
+ )
71
+
72
+ cover_src = cls._first_str(
73
+ row.xpath(".//div[contains(@class,'imgwidth')]//img/@src")
74
+ )
75
+ cover_url = cls._abs_url(cover_src) if cover_src else ""
76
+
77
+ latest_chapter = cls._first_str(
78
+ row.xpath(
79
+ ".//a[span and span[contains(normalize-space(.),'最新章节')]]"
80
+ "/span/following-sibling::text()[1]"
81
+ )
82
+ )
83
+
84
+ update_date = cls._first_str(
85
+ row.xpath(".//span[contains(@class,'lefth5')]/text()")
86
+ )
87
+
88
+ # Compute priority
89
+ prio = cls.priority + idx
90
+
91
+ results.append(
92
+ SearchResult(
93
+ site=cls.site_name,
94
+ book_id=book_id,
95
+ book_url=book_url,
96
+ cover_url=cover_url,
97
+ title=title,
98
+ author=author,
99
+ latest_chapter=latest_chapter,
100
+ update_date=update_date,
101
+ word_count="-",
102
+ priority=prio,
103
+ )
104
+ )
105
+ return results
@@ -0,0 +1,84 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.searchers.eightnovel
4
+ ------------------------------------------
5
+
6
+ """
7
+
8
+ import logging
9
+
10
+ from lxml import html
11
+
12
+ from novel_downloader.core.searchers.base import BaseSearcher
13
+ from novel_downloader.core.searchers.registry import register_searcher
14
+ from novel_downloader.models import SearchResult
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @register_searcher(
20
+ site_keys=["eightnovel", "8novel"],
21
+ )
22
+ class EightnovelSearcher(BaseSearcher):
23
+ site_name = "8novel"
24
+ priority = 20
25
+ BASE_URL = "https://www.8novel.com"
26
+ SEARCH_URL = "https://www.8novel.com/search/"
27
+
28
+ @classmethod
29
+ async def _fetch_html(cls, keyword: str) -> str:
30
+ params = {"key": keyword}
31
+ try:
32
+ async with (await cls._http_get(cls.SEARCH_URL, params=params)) as resp:
33
+ return await cls._response_to_str(resp)
34
+ except Exception:
35
+ logger.error(
36
+ "Failed to fetch HTML for keyword '%s' from '%s'",
37
+ keyword,
38
+ cls.SEARCH_URL,
39
+ )
40
+ return ""
41
+
42
+ @classmethod
43
+ def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
44
+ doc = html.fromstring(html_str)
45
+ anchors = doc.xpath("//div[contains(@class,'picsize')]/a")
46
+ results: list[SearchResult] = []
47
+
48
+ for idx, a in enumerate(anchors):
49
+ href = cls._first_str(a.xpath("./@href"))
50
+ if not href:
51
+ continue
52
+
53
+ if limit is not None and idx >= limit:
54
+ break
55
+
56
+ # '/novelbooks/6045' -> "6045"
57
+ book_id = href.rstrip("/").split("/")[-1]
58
+ book_url = cls._abs_url(href)
59
+
60
+ cover_rel = cls._first_str(a.xpath(".//img/@src"))
61
+ cover_url = cls._abs_url(cover_rel) if cover_rel else ""
62
+
63
+ title = cls._first_str(a.xpath("./@title"))
64
+
65
+ word_count = cls._first_str(a.xpath(".//eps//text()")) or "-"
66
+
67
+ # Compute priority
68
+ prio = cls.priority + idx
69
+
70
+ results.append(
71
+ SearchResult(
72
+ site=cls.site_name,
73
+ book_id=book_id,
74
+ book_url=book_url,
75
+ cover_url=cover_url,
76
+ title=title,
77
+ author="-",
78
+ latest_chapter="-",
79
+ update_date="-",
80
+ word_count=word_count,
81
+ priority=prio,
82
+ )
83
+ )
84
+ return results