novel-downloader 1.5.0__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (248) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +1 -3
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +26 -21
  5. novel_downloader/cli/download.py +79 -66
  6. novel_downloader/cli/export.py +17 -21
  7. novel_downloader/cli/main.py +1 -1
  8. novel_downloader/cli/search.py +62 -65
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +8 -5
  11. novel_downloader/config/adapter.py +206 -209
  12. novel_downloader/config/{loader.py → file_io.py} +53 -26
  13. novel_downloader/core/__init__.py +5 -5
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} +12 -20
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +3 -24
  21. novel_downloader/core/downloaders/base.py +49 -23
  22. novel_downloader/core/downloaders/common.py +191 -137
  23. novel_downloader/core/downloaders/qianbi.py +187 -146
  24. novel_downloader/core/downloaders/qidian.py +187 -141
  25. novel_downloader/core/downloaders/registry.py +4 -2
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +3 -20
  28. novel_downloader/core/exporters/base.py +33 -37
  29. novel_downloader/core/exporters/common/__init__.py +1 -2
  30. novel_downloader/core/exporters/common/epub.py +15 -10
  31. novel_downloader/core/exporters/common/main_exporter.py +19 -12
  32. novel_downloader/core/exporters/common/txt.py +17 -12
  33. novel_downloader/core/exporters/epub_util.py +59 -29
  34. novel_downloader/core/exporters/linovelib/__init__.py +1 -0
  35. novel_downloader/core/exporters/linovelib/epub.py +23 -25
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +8 -12
  37. novel_downloader/core/exporters/linovelib/txt.py +20 -14
  38. novel_downloader/core/exporters/qidian.py +2 -8
  39. novel_downloader/core/exporters/registry.py +4 -2
  40. novel_downloader/core/exporters/txt_util.py +7 -7
  41. novel_downloader/core/fetchers/__init__.py +54 -48
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +6 -11
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +37 -46
  45. novel_downloader/core/fetchers/{biquge/browser.py → biquyuedu.py} +12 -17
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +19 -12
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +19 -28
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/lewenn.py +83 -0
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +12 -13
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +5 -10
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +56 -64
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +5 -16
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +7 -10
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/shuhaige.py +84 -0
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/wanbengo.py +83 -0
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +19 -12
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +1 -9
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +6 -19
  79. novel_downloader/core/interfaces/parser.py +7 -8
  80. novel_downloader/core/interfaces/searcher.py +9 -1
  81. novel_downloader/core/parsers/__init__.py +49 -12
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +64 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/esjzone.py +64 -69
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/linovelib.py +48 -64
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/qianbi.py +48 -50
  99. novel_downloader/core/parsers/qidian/main_parser.py +756 -48
  100. novel_downloader/core/parsers/qidian/utils/__init__.py +3 -21
  101. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +1 -1
  102. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +4 -4
  103. novel_downloader/core/parsers/quanben5.py +103 -0
  104. novel_downloader/core/parsers/registry.py +5 -16
  105. novel_downloader/core/parsers/sfacg.py +38 -45
  106. novel_downloader/core/parsers/shencou.py +215 -0
  107. novel_downloader/core/parsers/shuhaige.py +111 -0
  108. novel_downloader/core/parsers/tongrenquan.py +116 -0
  109. novel_downloader/core/parsers/ttkan.py +132 -0
  110. novel_downloader/core/parsers/wanbengo.py +191 -0
  111. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  112. novel_downloader/core/parsers/xiguashuwu.py +429 -0
  113. novel_downloader/core/parsers/xs63b.py +161 -0
  114. novel_downloader/core/parsers/xshbook.py +134 -0
  115. novel_downloader/core/parsers/yamibo.py +87 -131
  116. novel_downloader/core/parsers/yibige.py +166 -0
  117. novel_downloader/core/searchers/__init__.py +34 -3
  118. novel_downloader/core/searchers/aaatxt.py +107 -0
  119. novel_downloader/core/searchers/{biquge.py → b520.py} +29 -28
  120. novel_downloader/core/searchers/base.py +112 -36
  121. novel_downloader/core/searchers/dxmwx.py +105 -0
  122. novel_downloader/core/searchers/eightnovel.py +84 -0
  123. novel_downloader/core/searchers/esjzone.py +43 -25
  124. novel_downloader/core/searchers/hetushu.py +92 -0
  125. novel_downloader/core/searchers/i25zw.py +93 -0
  126. novel_downloader/core/searchers/ixdzs8.py +107 -0
  127. novel_downloader/core/searchers/jpxs123.py +107 -0
  128. novel_downloader/core/searchers/piaotia.py +100 -0
  129. novel_downloader/core/searchers/qbtr.py +106 -0
  130. novel_downloader/core/searchers/qianbi.py +74 -40
  131. novel_downloader/core/searchers/quanben5.py +144 -0
  132. novel_downloader/core/searchers/registry.py +24 -8
  133. novel_downloader/core/searchers/shuhaige.py +124 -0
  134. novel_downloader/core/searchers/tongrenquan.py +110 -0
  135. novel_downloader/core/searchers/ttkan.py +92 -0
  136. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  137. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  138. novel_downloader/core/searchers/xs63b.py +104 -0
  139. novel_downloader/locales/en.json +34 -85
  140. novel_downloader/locales/zh.json +35 -86
  141. novel_downloader/models/__init__.py +21 -22
  142. novel_downloader/models/book.py +44 -0
  143. novel_downloader/models/config.py +4 -37
  144. novel_downloader/models/login.py +1 -1
  145. novel_downloader/models/search.py +5 -0
  146. novel_downloader/resources/config/settings.toml +8 -70
  147. novel_downloader/resources/json/xiguashuwu.json +718 -0
  148. novel_downloader/utils/__init__.py +13 -24
  149. novel_downloader/utils/chapter_storage.py +5 -5
  150. novel_downloader/utils/constants.py +4 -31
  151. novel_downloader/utils/cookies.py +38 -35
  152. novel_downloader/utils/crypto_utils/__init__.py +7 -0
  153. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  154. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  155. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  156. novel_downloader/utils/crypto_utils/rc4.py +54 -0
  157. novel_downloader/utils/epub/__init__.py +3 -4
  158. novel_downloader/utils/epub/builder.py +6 -6
  159. novel_downloader/utils/epub/constants.py +62 -21
  160. novel_downloader/utils/epub/documents.py +95 -201
  161. novel_downloader/utils/epub/models.py +8 -22
  162. novel_downloader/utils/epub/utils.py +73 -106
  163. novel_downloader/utils/file_utils/__init__.py +2 -23
  164. novel_downloader/utils/file_utils/io.py +53 -188
  165. novel_downloader/utils/file_utils/normalize.py +1 -7
  166. novel_downloader/utils/file_utils/sanitize.py +4 -15
  167. novel_downloader/utils/fontocr/__init__.py +5 -14
  168. novel_downloader/utils/fontocr/core.py +216 -0
  169. novel_downloader/utils/fontocr/loader.py +50 -0
  170. novel_downloader/utils/logger.py +81 -65
  171. novel_downloader/utils/network.py +17 -41
  172. novel_downloader/utils/state.py +4 -90
  173. novel_downloader/utils/text_utils/__init__.py +1 -7
  174. novel_downloader/utils/text_utils/diff_display.py +5 -7
  175. novel_downloader/utils/text_utils/text_cleaner.py +39 -30
  176. novel_downloader/utils/text_utils/truncate_utils.py +3 -14
  177. novel_downloader/utils/time_utils/__init__.py +5 -11
  178. novel_downloader/utils/time_utils/datetime_utils.py +20 -29
  179. novel_downloader/utils/time_utils/sleep_utils.py +55 -49
  180. novel_downloader/web/__init__.py +13 -0
  181. novel_downloader/web/components/__init__.py +11 -0
  182. novel_downloader/web/components/navigation.py +35 -0
  183. novel_downloader/web/main.py +66 -0
  184. novel_downloader/web/pages/__init__.py +17 -0
  185. novel_downloader/web/pages/download.py +78 -0
  186. novel_downloader/web/pages/progress.py +147 -0
  187. novel_downloader/web/pages/search.py +329 -0
  188. novel_downloader/web/services/__init__.py +17 -0
  189. novel_downloader/web/services/client_dialog.py +164 -0
  190. novel_downloader/web/services/cred_broker.py +113 -0
  191. novel_downloader/web/services/cred_models.py +35 -0
  192. novel_downloader/web/services/task_manager.py +264 -0
  193. novel_downloader-2.0.1.dist-info/METADATA +172 -0
  194. novel_downloader-2.0.1.dist-info/RECORD +206 -0
  195. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/entry_points.txt +1 -1
  196. novel_downloader/core/downloaders/biquge.py +0 -29
  197. novel_downloader/core/downloaders/esjzone.py +0 -29
  198. novel_downloader/core/downloaders/linovelib.py +0 -29
  199. novel_downloader/core/downloaders/sfacg.py +0 -29
  200. novel_downloader/core/downloaders/yamibo.py +0 -29
  201. novel_downloader/core/exporters/biquge.py +0 -22
  202. novel_downloader/core/exporters/esjzone.py +0 -22
  203. novel_downloader/core/exporters/qianbi.py +0 -22
  204. novel_downloader/core/exporters/sfacg.py +0 -22
  205. novel_downloader/core/exporters/yamibo.py +0 -22
  206. novel_downloader/core/fetchers/base/__init__.py +0 -14
  207. novel_downloader/core/fetchers/base/browser.py +0 -422
  208. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  209. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  210. novel_downloader/core/fetchers/esjzone/browser.py +0 -209
  211. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  212. novel_downloader/core/fetchers/linovelib/browser.py +0 -198
  213. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  214. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  215. novel_downloader/core/fetchers/qidian/browser.py +0 -326
  216. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  217. novel_downloader/core/fetchers/sfacg/browser.py +0 -194
  218. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  219. novel_downloader/core/fetchers/yamibo/browser.py +0 -234
  220. novel_downloader/core/parsers/biquge.py +0 -139
  221. novel_downloader/core/parsers/qidian/book_info_parser.py +0 -90
  222. novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -528
  223. novel_downloader/core/parsers/qidian/chapter_normal.py +0 -157
  224. novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
  225. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -114
  226. novel_downloader/models/chapter.py +0 -25
  227. novel_downloader/models/types.py +0 -13
  228. novel_downloader/tui/__init__.py +0 -7
  229. novel_downloader/tui/app.py +0 -32
  230. novel_downloader/tui/main.py +0 -17
  231. novel_downloader/tui/screens/__init__.py +0 -14
  232. novel_downloader/tui/screens/home.py +0 -198
  233. novel_downloader/tui/screens/login.py +0 -74
  234. novel_downloader/tui/styles/home_layout.tcss +0 -79
  235. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  236. novel_downloader/utils/cache.py +0 -24
  237. novel_downloader/utils/crypto_utils.py +0 -71
  238. novel_downloader/utils/fontocr/hash_store.py +0 -280
  239. novel_downloader/utils/fontocr/hash_utils.py +0 -103
  240. novel_downloader/utils/fontocr/model_loader.py +0 -69
  241. novel_downloader/utils/fontocr/ocr_v1.py +0 -315
  242. novel_downloader/utils/fontocr/ocr_v2.py +0 -764
  243. novel_downloader/utils/fontocr/ocr_v3.py +0 -744
  244. novel_downloader-1.5.0.dist-info/METADATA +0 -196
  245. novel_downloader-1.5.0.dist-info/RECORD +0 -164
  246. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/WHEEL +0 -0
  247. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/licenses/LICENSE +0 -0
  248. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,106 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.searchers.qbtr
4
+ ------------------------------------
5
+
6
+ """
7
+
8
+ import logging
9
+
10
+ from lxml import html
11
+
12
+ from novel_downloader.core.searchers.base import BaseSearcher
13
+ from novel_downloader.core.searchers.registry import register_searcher
14
+ from novel_downloader.models import SearchResult
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @register_searcher(
20
+ site_keys=["qbtr"],
21
+ )
22
+ class QbtrSearcher(BaseSearcher):
23
+ site_name = "qbtr"
24
+ priority = 30
25
+ BASE_URL = "https://www.qbtr.cc"
26
+ SEARCH_URL = "https://www.qbtr.cc/e/search/index.php"
27
+
28
+ @classmethod
29
+ async def _fetch_html(cls, keyword: str) -> str:
30
+ keyboard = cls._quote(keyword, encoding="gbk", errors="replace")
31
+ show = "title"
32
+ classid = "0"
33
+ body = f"keyboard={keyboard}&show={show}&classid={classid}"
34
+ headers = {
35
+ "Origin": "https://www.qbtr.cc",
36
+ "Referer": "https://www.qbtr.cc/",
37
+ "Content-Type": "application/x-www-form-urlencoded",
38
+ }
39
+ try:
40
+ async with (
41
+ await cls._http_post(cls.SEARCH_URL, data=body, headers=headers)
42
+ ) as resp:
43
+ return await cls._response_to_str(resp)
44
+ except Exception:
45
+ logger.error(
46
+ "Failed to fetch HTML for keyword '%s' from '%s'",
47
+ keyword,
48
+ cls.SEARCH_URL,
49
+ )
50
+ return ""
51
+
52
+ @classmethod
53
+ def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
54
+ doc = html.fromstring(html_str)
55
+ rows = doc.xpath('//div[@class="books m-cols"]/div[@class="bk"]')
56
+ results: list[SearchResult] = []
57
+
58
+ for idx, row in enumerate(rows):
59
+ href = cls._first_str(row.xpath(".//h3/a[1]/@href"))
60
+ if not href:
61
+ continue
62
+
63
+ if limit is not None and idx >= limit:
64
+ break
65
+
66
+ # '/tongren/8850.html' -> "tongren-8850"
67
+ book_id = href.strip("/").split(".")[0].replace("/", "-")
68
+ book_url = cls._abs_url(href)
69
+
70
+ title = cls._first_str(row.xpath(".//h3/a[1]//text()"))
71
+
72
+ author = (
73
+ cls._first_str(
74
+ row.xpath(".//div[contains(@class,'booknews')]/text()"),
75
+ replaces=[("作者:", "")],
76
+ )
77
+ or "-"
78
+ )
79
+
80
+ update_date = (
81
+ cls._first_str(
82
+ row.xpath(
83
+ ".//div[contains(@class,'booknews')]/label[contains(@class,'date')]/text()"
84
+ )
85
+ )
86
+ or "-"
87
+ )
88
+
89
+ # Compute priority
90
+ prio = cls.priority + idx
91
+
92
+ results.append(
93
+ SearchResult(
94
+ site=cls.site_name,
95
+ book_id=book_id,
96
+ book_url=book_url,
97
+ cover_url="",
98
+ title=title,
99
+ author=author,
100
+ latest_chapter="-",
101
+ update_date=update_date,
102
+ word_count="-",
103
+ priority=prio,
104
+ )
105
+ )
106
+ return results
@@ -1,12 +1,11 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
3
  novel_downloader.core.searchers.qianbi
4
- -----------------------------------------
4
+ --------------------------------------
5
5
 
6
6
  """
7
7
 
8
8
  import logging
9
- import re
10
9
 
11
10
  from lxml import html
12
11
 
@@ -22,39 +21,26 @@ logger = logging.getLogger(__name__)
22
21
  )
23
22
  class QianbiSearcher(BaseSearcher):
24
23
  site_name = "qianbi"
25
- priority = 3
24
+ priority = 10
25
+ BASE_URL = "https://www.23qb.com/"
26
26
  SEARCH_URL = "https://www.23qb.com/search.html"
27
27
 
28
28
  @classmethod
29
- def _fetch_html(cls, keyword: str) -> str:
30
- """
31
- Fetch raw HTML from Qianbi's search page.
32
-
33
- :param keyword: The search term to query on Qianbi.
34
- :return: HTML text of the search results page, or an empty string on fail.
35
- """
29
+ async def _fetch_html(cls, keyword: str) -> str:
36
30
  params = {"searchkey": keyword}
37
31
  try:
38
- response = cls._http_get(cls.SEARCH_URL, params=params)
39
- return response.text
32
+ async with (await cls._http_get(cls.SEARCH_URL, params=params)) as resp:
33
+ return await cls._response_to_str(resp)
40
34
  except Exception:
41
35
  logger.error(
42
36
  "Failed to fetch HTML for keyword '%s' from '%s'",
43
37
  keyword,
44
38
  cls.SEARCH_URL,
45
- exc_info=True,
46
39
  )
47
40
  return ""
48
41
 
49
42
  @classmethod
50
43
  def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
51
- """
52
- Parse raw HTML from Qianbi search results into list of SearchResult.
53
-
54
- :param html_str: Raw HTML string from Qianbi search results page.
55
- :param limit: Maximum number of results to return, or None for all.
56
- :return: List of SearchResult dicts.
57
- """
58
44
  if html_str.find('<meta property="og:url"') != -1:
59
45
  return cls._parse_detail_html(html_str)
60
46
  return cls._parse_search_list_html(html_str, limit)
@@ -68,25 +54,53 @@ class QianbiSearcher(BaseSearcher):
68
54
  :return: A single-element list with the book's SearchResult.
69
55
  """
70
56
  doc = html.fromstring(html_str)
71
- url = doc.xpath('//meta[@property="og:url"]/@content')
72
- if not url:
57
+
58
+ book_url = cls._first_str(doc.xpath("//meta[@property='og:url']/@content"))
59
+ if not book_url:
73
60
  return []
74
61
 
75
- # extract book_id via regex
76
- m = re.search(r"/book/(\d+)/", url[0])
77
- book_id = m.group(1) if m else ""
78
- # title from <h1 class="page-title">
79
- title = (doc.xpath('//h1[@class="page-title"]/text()') or [""])[0].strip()
80
- author = (doc.xpath('//a[contains(@href,"/author/")]/@title') or [""])[
81
- 0
82
- ].strip()
62
+ # 'https://www.23qb.com/book/9268/' -> "9268"
63
+ book_id = book_url.split("book/", 1)[-1].strip("/")
64
+
65
+ cover_rel = cls._first_str(
66
+ doc.xpath("//div[contains(@class,'novel-cover')]//img/@data-src")
67
+ ) or cls._first_str(
68
+ doc.xpath("//div[contains(@class,'novel-cover')]//img/@src")
69
+ )
70
+ cover_url = cls._abs_url(cover_rel) if cover_rel else ""
71
+
72
+ title = cls._first_str(doc.xpath("//h1[@class='page-title']/text()"))
73
+ author = cls._first_str(doc.xpath("//a[contains(@href, '/author/')]/@title"))
74
+
75
+ latest_chapter = (
76
+ cls._first_str(
77
+ doc.xpath(
78
+ "//div[@class='module-row-info']//a[@class='module-row-text']/@title"
79
+ )
80
+ )
81
+ or "-"
82
+ )
83
+ update_date = (
84
+ cls._first_str(
85
+ doc.xpath("//div[@class='module-heading newchapter']/time/text()"),
86
+ replaces=[("更新时间:", "")],
87
+ )
88
+ or "-"
89
+ )
90
+
91
+ word_count = cls._first_str(doc.xpath("//span[contains(text(), '字')]/text()"))
83
92
 
84
93
  return [
85
94
  SearchResult(
86
95
  site=cls.site_name,
87
96
  book_id=book_id,
97
+ book_url=book_url,
98
+ cover_url=cover_url,
88
99
  title=title,
89
100
  author=author,
101
+ latest_chapter=latest_chapter,
102
+ update_date=update_date,
103
+ word_count=word_count,
90
104
  priority=cls.priority,
91
105
  )
92
106
  ]
@@ -99,23 +113,38 @@ class QianbiSearcher(BaseSearcher):
99
113
  Parse a multi-item search result page.
100
114
 
101
115
  :param html_str: Raw HTML of the search-results page.
102
- :param limit: Maximum number of items to return, or None for all.
103
- :return: List of SearchResult.
116
+ :param limit: Maximum number of items to return, or None for all.
117
+ :return: List of SearchResult.
104
118
  """
105
119
  doc = html.fromstring(html_str)
106
120
  items = doc.xpath('//div[contains(@class,"module-search-item")]')
107
121
  results: list[SearchResult] = []
108
122
 
109
123
  for idx, item in enumerate(items):
124
+ href = cls._first_str(
125
+ item.xpath(".//div[@class='novel-info-header']/h3/a/@href")
126
+ )
127
+ if not href:
128
+ continue
129
+
110
130
  if limit is not None and idx >= limit:
111
131
  break
112
- # Title and book_id
113
- link = item.xpath('.//div[@class="novel-info-header"]/h3/a')[0]
114
- title = link.text_content().strip()
115
- href = link.get("href", "").strip("/")
116
- book_id = href.replace("book/", "").strip("/")
117
- # Author is not present on the page
118
- author = ""
132
+
133
+ # '/book/9138/' -> "9138"
134
+ book_id = href.rstrip("/").split("/")[-1]
135
+ book_url = cls._abs_url(href)
136
+
137
+ title = cls._first_str(
138
+ item.xpath(".//div[@class='novel-info-header']/h3/a//text()")
139
+ )
140
+
141
+ cover_rel = cls._first_str(
142
+ item.xpath(".//div[contains(@class,'module-item-pic')]//img/@data-src")
143
+ ) or cls._first_str(
144
+ item.xpath(".//div[contains(@class,'module-item-pic')]//img/@src")
145
+ )
146
+ cover_url = cls._abs_url(cover_rel) if cover_rel else ""
147
+
119
148
  # Compute priority
120
149
  prio = cls.priority + idx
121
150
 
@@ -123,8 +152,13 @@ class QianbiSearcher(BaseSearcher):
123
152
  SearchResult(
124
153
  site=cls.site_name,
125
154
  book_id=book_id,
155
+ book_url=book_url,
156
+ cover_url=cover_url,
126
157
  title=title,
127
- author=author,
158
+ author="-", # Author is not present on the page
159
+ latest_chapter="-",
160
+ update_date="-",
161
+ word_count="-",
128
162
  priority=prio,
129
163
  )
130
164
  )
@@ -0,0 +1,144 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.searchers.quanben5
4
+ ----------------------------------------
5
+
6
+ """
7
+
8
+ import json
9
+ import logging
10
+ import random
11
+ import time
12
+
13
+ from lxml import html
14
+
15
+ from novel_downloader.core.searchers.base import BaseSearcher
16
+ from novel_downloader.core.searchers.registry import register_searcher
17
+ from novel_downloader.models import SearchResult
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ @register_searcher(
23
+ site_keys=["quanben5"],
24
+ )
25
+ class Quanben5Searcher(BaseSearcher):
26
+ site_name = "quanben5"
27
+ priority = 30
28
+ BASE_URL = "https://quanben5.com"
29
+ SEARCH_URL = "https://quanben5.com/"
30
+
31
+ STATIC_CHARS = "PXhw7UT1B0a9kQDKZsjIASmOezxYG4CHo5Jyfg2b8FLpEvRr3WtVnlqMidu6cN"
32
+
33
+ @classmethod
34
+ async def _fetch_html(cls, keyword: str) -> str:
35
+ t = str(int(time.time() * 1000))
36
+ uri_keyword = cls._quote(keyword)
37
+ b_raw = cls._base64(uri_keyword)
38
+ b = cls._quote(b_raw)
39
+
40
+ params = {
41
+ "c": "book",
42
+ "a": "search.json",
43
+ "callback": "search",
44
+ "t": t,
45
+ "keywords": uri_keyword,
46
+ "b": b,
47
+ }
48
+ full_url = cls._build_url(cls.SEARCH_URL, params)
49
+
50
+ headers = {
51
+ "Host": "quanben5.com",
52
+ "Referer": "https://quanben5.com/search.html",
53
+ }
54
+
55
+ try:
56
+ async with (await cls._http_get(full_url, headers=headers)) as resp:
57
+ return await cls._response_to_str(resp)
58
+ except Exception:
59
+ logger.error(
60
+ "Failed to fetch HTML for keyword '%s' from '%s'",
61
+ keyword,
62
+ cls.SEARCH_URL,
63
+ )
64
+ return ""
65
+
66
+ @classmethod
67
+ def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
68
+ # Unwrap JSONP: search({...});
69
+ prefix, suffix = "search(", ");"
70
+ json_str = (
71
+ html_str[len(prefix) : -len(suffix)]
72
+ if html_str.startswith(prefix) and html_str.endswith(suffix)
73
+ else html_str
74
+ )
75
+
76
+ try:
77
+ data = json.loads(json_str)
78
+ except json.JSONDecodeError:
79
+ return []
80
+
81
+ content_html = data.get("content", "")
82
+ if not content_html:
83
+ return []
84
+
85
+ doc = html.fromstring(content_html)
86
+ rows = doc.xpath('//div[@class="pic_txt_list"]')
87
+ results: list[SearchResult] = []
88
+
89
+ for idx, row in enumerate(rows):
90
+ href = cls._first_str(row.xpath(".//h3/a/@href"))
91
+ if not href:
92
+ continue
93
+
94
+ if limit is not None and idx >= limit:
95
+ break
96
+
97
+ # '/n/douposanqian/' -> "douposanqian"
98
+ book_id = href.rstrip("/").split("/")[-1]
99
+ book_url = cls._abs_url(href)
100
+
101
+ cover_rel = cls._first_str(row.xpath(".//div[@class='pic']//img/@src"))
102
+ cover_url = cls._abs_url(cover_rel) if cover_rel else ""
103
+
104
+ title = "".join(
105
+ t.strip()
106
+ for t in row.xpath(".//h3/a/span[@class='name']//text()")
107
+ if t and t.strip()
108
+ )
109
+
110
+ author = cls._first_str(
111
+ row.xpath(".//p[@class='info']//span[contains(@class,'author')]/text()")
112
+ )
113
+
114
+ # Bump priority by result index
115
+ prio = cls.priority + idx
116
+
117
+ results.append(
118
+ SearchResult(
119
+ site=cls.site_name,
120
+ book_id=book_id,
121
+ book_url=book_url,
122
+ cover_url=cover_url,
123
+ title=title,
124
+ author=author,
125
+ latest_chapter="-",
126
+ update_date="-",
127
+ word_count="-",
128
+ priority=prio,
129
+ )
130
+ )
131
+ return results
132
+
133
+ @classmethod
134
+ def _base64(cls, s: str) -> str:
135
+ out = []
136
+ for ch in s:
137
+ idx = cls.STATIC_CHARS.find(ch)
138
+ code = cls.STATIC_CHARS[(idx + 3) % 62] if idx != -1 else ch
139
+ n1 = int(random.random() * 62)
140
+ n2 = int(random.random() * 62)
141
+ out.append(cls.STATIC_CHARS[n1])
142
+ out.append(code)
143
+ out.append(cls.STATIC_CHARS[n2])
144
+ return "".join(out)
@@ -7,9 +7,12 @@ novel_downloader.core.searchers.registry
7
7
 
8
8
  __all__ = ["register_searcher", "search"]
9
9
 
10
+ import asyncio
10
11
  from collections.abc import Callable, Sequence
11
12
  from typing import TypeVar
12
13
 
14
+ import aiohttp
15
+
13
16
  from novel_downloader.core.searchers.base import BaseSearcher
14
17
  from novel_downloader.models import SearchResult
15
18
 
@@ -33,31 +36,44 @@ def register_searcher(
33
36
  return decorator
34
37
 
35
38
 
36
- def search(
39
+ async def search(
37
40
  keyword: str,
38
41
  sites: Sequence[str] | None = None,
39
42
  limit: int | None = None,
40
43
  per_site_limit: int = 5,
44
+ timeout: float = 5.0,
41
45
  ) -> list[SearchResult]:
42
46
  """
43
47
  Perform a search for the given keyword across one or more registered sites,
44
48
  then aggregate and sort the results by their `priority` value.
45
49
 
46
50
  :param keyword: The search term or keyword to query.
47
- :param sites: An optional sequence of site keys to limit which searchers.
48
- :param limit: Maximum total number of results to return; if None, return all.
51
+ :param sites: An optional sequence of site keys to limit which searchers.
52
+ :param limit: Maximum total number of results to return; if None, return all.
49
53
  :param per_site_limit: Maximum number of search results per site.
50
- :return: A flat list of `SearchResult` objects.
54
+ :param timeout: Per-request time budget (seconds)
55
+ :return: A flat list of `SearchResult` objects.
51
56
  """
52
57
  keys = list(sites or _SEARCHER_REGISTRY.keys())
53
58
  to_call = {_SEARCHER_REGISTRY[key] for key in keys if key in _SEARCHER_REGISTRY}
54
59
 
60
+ site_timeout = aiohttp.ClientTimeout(total=timeout)
61
+
55
62
  results: list[SearchResult] = []
56
- for cls in to_call:
57
- try:
58
- results.extend(cls.search(keyword, limit=per_site_limit))
59
- except Exception:
63
+ async with aiohttp.ClientSession(timeout=site_timeout) as session:
64
+ # Give all searchers the same session
65
+ for cls in to_call:
66
+ cls.configure(session)
67
+
68
+ # Kick off all sites in parallel
69
+ coros = [cls.search(keyword, limit=per_site_limit) for cls in to_call]
70
+ site_lists = await asyncio.gather(*coros, return_exceptions=True)
71
+
72
+ # Collect successful results; skip failures
73
+ for item in site_lists:
74
+ if isinstance(item, Exception | BaseException):
60
75
  continue
76
+ results.extend(item)
61
77
 
62
78
  results.sort(key=lambda res: res["priority"])
63
79
  return results[:limit] if limit is not None else results
@@ -0,0 +1,124 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.searchers.shuhaige
4
+ ----------------------------------------
5
+
6
+ """
7
+
8
+ import logging
9
+ import time
10
+
11
+ from lxml import html
12
+
13
+ from novel_downloader.core.searchers.base import BaseSearcher
14
+ from novel_downloader.core.searchers.registry import register_searcher
15
+ from novel_downloader.models import SearchResult
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ @register_searcher(
21
+ site_keys=["shuhaige"],
22
+ )
23
+ class ShuhaigeSearcher(BaseSearcher):
24
+ site_name = "shuhaige"
25
+ priority = 30
26
+ BASE_URL = "https://www.shuhaige.net"
27
+ SEARCH_URL = "https://www.shuhaige.net/search.html"
28
+
29
+ @classmethod
30
+ async def _fetch_html(cls, keyword: str) -> str:
31
+ data = {
32
+ "searchtype": "all",
33
+ "searchkey": keyword,
34
+ }
35
+ ts = int(time.time())
36
+ # baidu cookie format: f"Hm_lpvt_{site_id}={timestamp}"
37
+ cookie_str = (
38
+ f"Hm_lpvt_3094b20ed277f38e8f9ac2b2b29d6263={ts}; "
39
+ f"Hm_lpvt_c3da01855456ad902664af23cc3254cb={ts}"
40
+ )
41
+ headers = {
42
+ "Origin": "https://www.shuhaige.net",
43
+ "Referer": "https://www.shuhaige.net/",
44
+ "Cookie": cookie_str,
45
+ }
46
+ try:
47
+ async with (
48
+ await cls._http_post(cls.SEARCH_URL, data=data, headers=headers)
49
+ ) as resp:
50
+ return await cls._response_to_str(resp)
51
+ except Exception:
52
+ logger.error(
53
+ "Failed to fetch HTML for keyword '%s' from '%s'",
54
+ keyword,
55
+ cls.SEARCH_URL,
56
+ )
57
+ return ""
58
+
59
+ @classmethod
60
+ def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
61
+ doc = html.fromstring(html_str)
62
+ rows = doc.xpath('//div[@id="sitembox"]/dl')
63
+ results: list[SearchResult] = []
64
+
65
+ for idx, row in enumerate(rows):
66
+ href = cls._first_str(row.xpath("./dt/a[1]/@href")) or cls._first_str(
67
+ row.xpath("./dd/h3/a[1]/@href")
68
+ )
69
+ if not href:
70
+ continue
71
+
72
+ if limit is not None and idx >= limit:
73
+ break
74
+
75
+ book_id = href.strip("/").split("/")[0]
76
+ book_url = cls._abs_url(href)
77
+
78
+ title = cls._first_str(row.xpath("./dd/h3/a[1]//text()")) or cls._first_str(
79
+ row.xpath("./dt/a[1]/img[1]/@alt")
80
+ )
81
+
82
+ cover_rel = cls._first_str(row.xpath("./dt/a[1]/img[1]/@src"))
83
+ cover_url = cls._abs_url(cover_rel) if cover_rel else ""
84
+
85
+ author = (
86
+ cls._first_str(row.xpath("./dd[@class='book_other'][1]/span[1]/text()"))
87
+ or "-"
88
+ )
89
+ word_count = (
90
+ cls._first_str(row.xpath("./dd[@class='book_other'][1]/span[4]/text()"))
91
+ or "-"
92
+ )
93
+
94
+ latest_chapter = (
95
+ cls._first_str(
96
+ row.xpath("./dd[@class='book_other'][last()]/a[1]//text()")
97
+ )
98
+ or "-"
99
+ )
100
+ update_date = (
101
+ cls._first_str(
102
+ row.xpath("./dd[@class='book_other'][last()]/span[1]//text()")
103
+ )
104
+ or "-"
105
+ )
106
+
107
+ # Compute priority
108
+ prio = cls.priority + idx
109
+
110
+ results.append(
111
+ SearchResult(
112
+ site=cls.site_name,
113
+ book_id=book_id,
114
+ book_url=book_url,
115
+ cover_url=cover_url,
116
+ title=title,
117
+ author=author,
118
+ latest_chapter=latest_chapter,
119
+ update_date=update_date,
120
+ word_count=word_count,
121
+ priority=prio,
122
+ )
123
+ )
124
+ return results