novel-downloader 1.5.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +1 -3
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +26 -21
  5. novel_downloader/cli/download.py +77 -64
  6. novel_downloader/cli/export.py +16 -20
  7. novel_downloader/cli/main.py +1 -1
  8. novel_downloader/cli/search.py +62 -65
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +8 -5
  11. novel_downloader/config/adapter.py +65 -105
  12. novel_downloader/config/{loader.py → file_io.py} +53 -26
  13. novel_downloader/core/__init__.py +1 -0
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} +12 -20
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +3 -24
  21. novel_downloader/core/downloaders/base.py +49 -23
  22. novel_downloader/core/downloaders/common.py +191 -137
  23. novel_downloader/core/downloaders/qianbi.py +187 -146
  24. novel_downloader/core/downloaders/qidian.py +187 -141
  25. novel_downloader/core/downloaders/registry.py +4 -2
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +3 -20
  28. novel_downloader/core/exporters/base.py +33 -37
  29. novel_downloader/core/exporters/common/__init__.py +1 -2
  30. novel_downloader/core/exporters/common/epub.py +15 -10
  31. novel_downloader/core/exporters/common/main_exporter.py +19 -12
  32. novel_downloader/core/exporters/common/txt.py +14 -9
  33. novel_downloader/core/exporters/epub_util.py +59 -29
  34. novel_downloader/core/exporters/linovelib/__init__.py +1 -0
  35. novel_downloader/core/exporters/linovelib/epub.py +23 -25
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +8 -12
  37. novel_downloader/core/exporters/linovelib/txt.py +17 -11
  38. novel_downloader/core/exporters/qidian.py +2 -8
  39. novel_downloader/core/exporters/registry.py +4 -2
  40. novel_downloader/core/exporters/txt_util.py +7 -7
  41. novel_downloader/core/fetchers/__init__.py +54 -48
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +6 -11
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +37 -46
  45. novel_downloader/core/fetchers/{biquge/browser.py → biquyuedu.py} +12 -17
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +19 -12
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +19 -28
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/lewenn.py +83 -0
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +12 -13
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +5 -10
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +46 -39
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +5 -16
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +7 -10
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/shuhaige.py +84 -0
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/wanbengo.py +83 -0
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +19 -12
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +1 -9
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +4 -17
  79. novel_downloader/core/interfaces/parser.py +5 -6
  80. novel_downloader/core/interfaces/searcher.py +9 -1
  81. novel_downloader/core/parsers/__init__.py +49 -12
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +63 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/esjzone.py +61 -66
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/linovelib.py +48 -64
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/qianbi.py +48 -50
  99. novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
  100. novel_downloader/core/parsers/qidian/chapter_encrypted.py +272 -330
  101. novel_downloader/core/parsers/qidian/chapter_normal.py +24 -55
  102. novel_downloader/core/parsers/qidian/main_parser.py +11 -38
  103. novel_downloader/core/parsers/qidian/utils/__init__.py +1 -0
  104. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +1 -1
  105. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
  106. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
  107. novel_downloader/core/parsers/quanben5.py +103 -0
  108. novel_downloader/core/parsers/registry.py +5 -16
  109. novel_downloader/core/parsers/sfacg.py +38 -45
  110. novel_downloader/core/parsers/shencou.py +215 -0
  111. novel_downloader/core/parsers/shuhaige.py +111 -0
  112. novel_downloader/core/parsers/tongrenquan.py +116 -0
  113. novel_downloader/core/parsers/ttkan.py +132 -0
  114. novel_downloader/core/parsers/wanbengo.py +191 -0
  115. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  116. novel_downloader/core/parsers/xiguashuwu.py +435 -0
  117. novel_downloader/core/parsers/xs63b.py +161 -0
  118. novel_downloader/core/parsers/xshbook.py +134 -0
  119. novel_downloader/core/parsers/yamibo.py +87 -131
  120. novel_downloader/core/parsers/yibige.py +166 -0
  121. novel_downloader/core/searchers/__init__.py +34 -3
  122. novel_downloader/core/searchers/aaatxt.py +107 -0
  123. novel_downloader/core/searchers/{biquge.py → b520.py} +29 -28
  124. novel_downloader/core/searchers/base.py +112 -36
  125. novel_downloader/core/searchers/dxmwx.py +105 -0
  126. novel_downloader/core/searchers/eightnovel.py +84 -0
  127. novel_downloader/core/searchers/esjzone.py +43 -25
  128. novel_downloader/core/searchers/hetushu.py +92 -0
  129. novel_downloader/core/searchers/i25zw.py +93 -0
  130. novel_downloader/core/searchers/ixdzs8.py +107 -0
  131. novel_downloader/core/searchers/jpxs123.py +107 -0
  132. novel_downloader/core/searchers/piaotia.py +100 -0
  133. novel_downloader/core/searchers/qbtr.py +106 -0
  134. novel_downloader/core/searchers/qianbi.py +74 -40
  135. novel_downloader/core/searchers/quanben5.py +144 -0
  136. novel_downloader/core/searchers/registry.py +24 -8
  137. novel_downloader/core/searchers/shuhaige.py +124 -0
  138. novel_downloader/core/searchers/tongrenquan.py +110 -0
  139. novel_downloader/core/searchers/ttkan.py +92 -0
  140. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  141. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  142. novel_downloader/core/searchers/xs63b.py +104 -0
  143. novel_downloader/locales/en.json +31 -82
  144. novel_downloader/locales/zh.json +32 -83
  145. novel_downloader/models/__init__.py +21 -22
  146. novel_downloader/models/book.py +44 -0
  147. novel_downloader/models/config.py +4 -37
  148. novel_downloader/models/login.py +1 -1
  149. novel_downloader/models/search.py +5 -0
  150. novel_downloader/resources/config/settings.toml +8 -70
  151. novel_downloader/resources/json/xiguashuwu.json +718 -0
  152. novel_downloader/utils/__init__.py +13 -22
  153. novel_downloader/utils/chapter_storage.py +3 -2
  154. novel_downloader/utils/constants.py +4 -29
  155. novel_downloader/utils/cookies.py +6 -18
  156. novel_downloader/utils/crypto_utils/__init__.py +13 -0
  157. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  158. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  159. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  160. novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
  161. novel_downloader/utils/epub/__init__.py +1 -1
  162. novel_downloader/utils/epub/constants.py +57 -16
  163. novel_downloader/utils/epub/documents.py +88 -194
  164. novel_downloader/utils/epub/models.py +0 -14
  165. novel_downloader/utils/epub/utils.py +63 -96
  166. novel_downloader/utils/file_utils/__init__.py +2 -23
  167. novel_downloader/utils/file_utils/io.py +3 -113
  168. novel_downloader/utils/file_utils/sanitize.py +0 -4
  169. novel_downloader/utils/fontocr.py +207 -0
  170. novel_downloader/utils/logger.py +8 -16
  171. novel_downloader/utils/network.py +2 -2
  172. novel_downloader/utils/state.py +4 -90
  173. novel_downloader/utils/text_utils/__init__.py +1 -7
  174. novel_downloader/utils/text_utils/diff_display.py +5 -7
  175. novel_downloader/utils/time_utils/__init__.py +5 -11
  176. novel_downloader/utils/time_utils/datetime_utils.py +20 -29
  177. novel_downloader/utils/time_utils/sleep_utils.py +4 -8
  178. novel_downloader/web/__init__.py +13 -0
  179. novel_downloader/web/components/__init__.py +11 -0
  180. novel_downloader/web/components/navigation.py +35 -0
  181. novel_downloader/web/main.py +66 -0
  182. novel_downloader/web/pages/__init__.py +17 -0
  183. novel_downloader/web/pages/download.py +78 -0
  184. novel_downloader/web/pages/progress.py +147 -0
  185. novel_downloader/web/pages/search.py +329 -0
  186. novel_downloader/web/services/__init__.py +17 -0
  187. novel_downloader/web/services/client_dialog.py +164 -0
  188. novel_downloader/web/services/cred_broker.py +113 -0
  189. novel_downloader/web/services/cred_models.py +35 -0
  190. novel_downloader/web/services/task_manager.py +264 -0
  191. novel_downloader-2.0.0.dist-info/METADATA +171 -0
  192. novel_downloader-2.0.0.dist-info/RECORD +210 -0
  193. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
  194. novel_downloader/core/downloaders/biquge.py +0 -29
  195. novel_downloader/core/downloaders/esjzone.py +0 -29
  196. novel_downloader/core/downloaders/linovelib.py +0 -29
  197. novel_downloader/core/downloaders/sfacg.py +0 -29
  198. novel_downloader/core/downloaders/yamibo.py +0 -29
  199. novel_downloader/core/exporters/biquge.py +0 -22
  200. novel_downloader/core/exporters/esjzone.py +0 -22
  201. novel_downloader/core/exporters/qianbi.py +0 -22
  202. novel_downloader/core/exporters/sfacg.py +0 -22
  203. novel_downloader/core/exporters/yamibo.py +0 -22
  204. novel_downloader/core/fetchers/base/__init__.py +0 -14
  205. novel_downloader/core/fetchers/base/browser.py +0 -422
  206. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  207. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  208. novel_downloader/core/fetchers/esjzone/browser.py +0 -209
  209. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  210. novel_downloader/core/fetchers/linovelib/browser.py +0 -198
  211. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  212. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  213. novel_downloader/core/fetchers/qidian/browser.py +0 -326
  214. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  215. novel_downloader/core/fetchers/sfacg/browser.py +0 -194
  216. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  217. novel_downloader/core/fetchers/yamibo/browser.py +0 -234
  218. novel_downloader/core/parsers/biquge.py +0 -139
  219. novel_downloader/models/chapter.py +0 -25
  220. novel_downloader/models/types.py +0 -13
  221. novel_downloader/tui/__init__.py +0 -7
  222. novel_downloader/tui/app.py +0 -32
  223. novel_downloader/tui/main.py +0 -17
  224. novel_downloader/tui/screens/__init__.py +0 -14
  225. novel_downloader/tui/screens/home.py +0 -198
  226. novel_downloader/tui/screens/login.py +0 -74
  227. novel_downloader/tui/styles/home_layout.tcss +0 -79
  228. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  229. novel_downloader/utils/cache.py +0 -24
  230. novel_downloader/utils/fontocr/__init__.py +0 -22
  231. novel_downloader/utils/fontocr/hash_store.py +0 -280
  232. novel_downloader/utils/fontocr/hash_utils.py +0 -103
  233. novel_downloader/utils/fontocr/model_loader.py +0 -69
  234. novel_downloader/utils/fontocr/ocr_v1.py +0 -315
  235. novel_downloader/utils/fontocr/ocr_v2.py +0 -764
  236. novel_downloader/utils/fontocr/ocr_v3.py +0 -744
  237. novel_downloader-1.5.0.dist-info/METADATA +0 -196
  238. novel_downloader-1.5.0.dist-info/RECORD +0 -164
  239. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
  240. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
  241. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -1,19 +1,16 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- novel_downloader.core.fetchers.base.session
4
- -------------------------------------------
3
+ novel_downloader.core.fetchers.base
4
+ -----------------------------------
5
5
 
6
- This module defines the BaseSession class, which provides asynchronous
7
- HTTP request capabilities using aiohttp. It maintains a persistent
8
- client session and supports retries, headers, timeout configurations,
9
- cookie handling, and defines abstract methods for subclasses.
6
+ Abstract base class providing common HTTP session handling for fetchers.
10
7
  """
11
8
 
12
-
13
9
  import abc
14
10
  import json
15
11
  import logging
16
12
  import types
13
+ from collections.abc import Mapping
17
14
  from typing import Any, Self
18
15
 
19
16
  import aiohttp
@@ -22,8 +19,7 @@ from aiohttp import ClientResponse, ClientSession, ClientTimeout, TCPConnector
22
19
  from novel_downloader.core.interfaces import FetcherProtocol
23
20
  from novel_downloader.models import FetcherConfig, LoginField
24
21
  from novel_downloader.utils import (
25
- async_sleep_with_random_delay,
26
- parse_cookie_expires,
22
+ async_jitter_sleep,
27
23
  )
28
24
  from novel_downloader.utils.constants import (
29
25
  DATA_DIR,
@@ -67,7 +63,7 @@ class BaseSession(FetcherProtocol, abc.ABC):
67
63
  self._session: ClientSession | None = None
68
64
  self._rate_limiter: TokenBucketRateLimiter | None = None
69
65
 
70
- if config.max_rps is not None and config.max_rps > 0:
66
+ if config.max_rps > 0:
71
67
  self._rate_limiter = TokenBucketRateLimiter(config.max_rps)
72
68
 
73
69
  self.logger = logging.getLogger(f"{self.__class__.__name__}")
@@ -82,6 +78,7 @@ class BaseSession(FetcherProtocol, abc.ABC):
82
78
  ) -> bool:
83
79
  """
84
80
  Attempt to log in asynchronously.
81
+
85
82
  :returns: True if login succeeded.
86
83
  """
87
84
  return False
@@ -96,7 +93,7 @@ class BaseSession(FetcherProtocol, abc.ABC):
96
93
  Fetch the raw HTML (or JSON) of the book info page asynchronously.
97
94
 
98
95
  :param book_id: The book identifier.
99
- :return: The page content as a string.
96
+ :return: The page content as string list.
100
97
  """
101
98
  ...
102
99
 
@@ -112,7 +109,7 @@ class BaseSession(FetcherProtocol, abc.ABC):
112
109
 
113
110
  :param book_id: The book identifier.
114
111
  :param chapter_id: The chapter identifier.
115
- :return: The chapter content as a string.
112
+ :return: The page content as string list.
116
113
  """
117
114
  ...
118
115
 
@@ -182,7 +179,7 @@ class BaseSession(FetcherProtocol, abc.ABC):
182
179
  return await self._response_to_str(resp, encoding)
183
180
  except aiohttp.ClientError:
184
181
  if attempt < self.retry_times:
185
- await async_sleep_with_random_delay(
182
+ await async_jitter_sleep(
186
183
  self.backoff_factor,
187
184
  mul_spread=1.1,
188
185
  max_sleep=self.backoff_factor + 2,
@@ -247,8 +244,12 @@ class BaseSession(FetcherProtocol, abc.ABC):
247
244
  return False
248
245
  try:
249
246
  storage = json.loads(self._state_file.read_text(encoding="utf-8"))
250
- for c in storage.get("cookies", []):
251
- self._session.cookie_jar.update_cookies({c["name"]: c["value"]})
247
+ raw_cookies = storage.get("cookies", [])
248
+ cookie_dict = self._filter_cookies(raw_cookies)
249
+
250
+ if cookie_dict:
251
+ self._session.cookie_jar.update_cookies(cookie_dict)
252
+
252
253
  self._is_logged_in = await self._check_login_status()
253
254
  return self._is_logged_in
254
255
  except Exception as e:
@@ -278,12 +279,6 @@ class BaseSession(FetcherProtocol, abc.ABC):
278
279
  {
279
280
  "name": cookie.key,
280
281
  "value": cookie.value,
281
- "domain": cookie.get("domain", ""),
282
- "path": cookie.get("path", "/"),
283
- "expires": parse_cookie_expires(cookie.get("expires")),
284
- "httpOnly": bool(cookie.get("httponly", False)),
285
- "secure": bool(cookie.get("secure", False)),
286
- "sameSite": cookie.get("samesite") or "Lax",
287
282
  }
288
283
  )
289
284
  storage_state = {
@@ -300,21 +295,6 @@ class BaseSession(FetcherProtocol, abc.ABC):
300
295
  self.logger.warning("Failed to save state: %s", e)
301
296
  return False
302
297
 
303
- async def set_interactive_mode(self, enable: bool) -> bool:
304
- """
305
- Enable or disable interactive mode for manual login.
306
-
307
- :param enable: True to enable, False to disable interactive mode.
308
- :return: True if operation or login check succeeded, False otherwise.
309
- """
310
- return False
311
-
312
- def get_cookie_value(self, key: str) -> str | None:
313
- for cookie in self.session.cookie_jar:
314
- if cookie.key == key:
315
- return str(cookie.value)
316
- return None
317
-
318
298
  def update_cookies(
319
299
  self,
320
300
  cookies: dict[str, str],
@@ -346,18 +326,10 @@ class BaseSession(FetcherProtocol, abc.ABC):
346
326
  """
347
327
  return False
348
328
 
349
- @property
350
- def hostname(self) -> str:
351
- return ""
352
-
353
329
  @property
354
330
  def site(self) -> str:
355
331
  return self._site
356
332
 
357
- @property
358
- def requester_type(self) -> str:
359
- return "session"
360
-
361
333
  @property
362
334
  def is_logged_in(self) -> bool:
363
335
  """
@@ -411,6 +383,17 @@ class BaseSession(FetcherProtocol, abc.ABC):
411
383
  return dict(self._session.headers)
412
384
  return self._headers.copy()
413
385
 
386
+ @staticmethod
387
+ def _filter_cookies(
388
+ raw_cookies: list[Mapping[str, Any]],
389
+ ) -> dict[str, str]:
390
+ """
391
+ Hook:
392
+ take the raw list of cookie-dicts loaded from storage_state
393
+ and return a simple name -> value mapping.
394
+ """
395
+ return {c["name"]: c["value"] for c in raw_cookies}
396
+
414
397
  @staticmethod
415
398
  async def _response_to_str(
416
399
  resp: ClientResponse,
@@ -421,14 +404,22 @@ class BaseSession(FetcherProtocol, abc.ABC):
421
404
  then on UnicodeDecodeError fall back to a lenient utf-8 decode.
422
405
  """
423
406
  data: bytes = await resp.read()
424
- encodings = [encoding, resp.charset, "utf-8", "gb18030", "gbk"]
407
+ encodings = [
408
+ encoding,
409
+ resp.charset,
410
+ "gb2312",
411
+ "gb18030",
412
+ "gbk",
413
+ "utf-8",
414
+ ]
425
415
  encodings_list: list[str] = [e for e in encodings if e]
426
416
  for enc in encodings_list:
427
417
  try:
428
418
  return data.decode(enc)
429
419
  except UnicodeDecodeError:
430
420
  continue
431
- return data.decode("utf-8", errors="ignore")
421
+ encoding = encoding or "utf-8"
422
+ return data.decode(encoding, errors="ignore")
432
423
 
433
424
  async def __aenter__(self) -> Self:
434
425
  if self._session is None or self._session.closed:
@@ -1,36 +1,35 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- novel_downloader.core.fetchers.biquge.browser
4
- ---------------------------------------------
3
+ novel_downloader.core.fetchers.biquyuedu
4
+ ----------------------------------------
5
5
 
6
6
  """
7
7
 
8
8
  from typing import Any
9
9
 
10
- from novel_downloader.core.fetchers.base import BaseBrowser
10
+ from novel_downloader.core.fetchers.base import BaseSession
11
11
  from novel_downloader.core.fetchers.registry import register_fetcher
12
12
  from novel_downloader.models import FetcherConfig
13
13
 
14
14
 
15
15
  @register_fetcher(
16
- site_keys=["biquge", "bqg"],
17
- backends=["browser"],
16
+ site_keys=["biquyuedu"],
18
17
  )
19
- class BiqugeBrowser(BaseBrowser):
18
+ class BiquyueduSession(BaseSession):
20
19
  """
21
- A browser class for interacting with the Biquge (www.b520.cc) novel website.
20
+ A session class for interacting with the 精彩小说 (biquyuedu.com) novel website.
22
21
  """
23
22
 
24
- BOOK_INFO_URL = "http://www.b520.cc/{book_id}/"
25
- CHAPTER_URL = "http://www.b520.cc/{book_id}/{chapter_id}.html"
23
+ BOOK_INFO_URL = "https://biquyuedu.com/novel/{book_id}.html"
24
+ CHAPTER_URL = "https://biquyuedu.com/novel/{book_id}/{chapter_id}.html"
26
25
 
27
26
  def __init__(
28
27
  self,
29
28
  config: FetcherConfig,
30
- reuse_page: bool = False,
29
+ cookies: dict[str, str] | None = None,
31
30
  **kwargs: Any,
32
31
  ) -> None:
33
- super().__init__("biquge", config, reuse_page, **kwargs)
32
+ super().__init__("biquyuedu", config, cookies, **kwargs)
34
33
 
35
34
  async def get_book_info(
36
35
  self,
@@ -41,7 +40,7 @@ class BiqugeBrowser(BaseBrowser):
41
40
  Fetch the raw HTML of the book info page asynchronously.
42
41
 
43
42
  :param book_id: The book identifier.
44
- :return: The page content as a string.
43
+ :return: The page content as string list.
45
44
  """
46
45
  url = self.book_info_url(book_id=book_id)
47
46
  return [await self.fetch(url, **kwargs)]
@@ -57,7 +56,7 @@ class BiqugeBrowser(BaseBrowser):
57
56
 
58
57
  :param book_id: The book identifier.
59
58
  :param chapter_id: The chapter identifier.
60
- :return: The chapter content as a string.
59
+ :return: The page content as string list.
61
60
  """
62
61
  url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
63
62
  return [await self.fetch(url, **kwargs)]
@@ -82,7 +81,3 @@ class BiqugeBrowser(BaseBrowser):
82
81
  :return: Fully qualified chapter URL.
83
82
  """
84
83
  return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
85
-
86
- @property
87
- def hostname(self) -> str:
88
- return "www.b520.cc"
@@ -0,0 +1,110 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.fetchers.dxmwx
4
+ ------------------------------------
5
+
6
+ """
7
+
8
+ import asyncio
9
+ from typing import Any
10
+
11
+ from novel_downloader.core.fetchers.base import BaseSession
12
+ from novel_downloader.core.fetchers.registry import register_fetcher
13
+ from novel_downloader.models import FetcherConfig
14
+
15
+
16
+ @register_fetcher(
17
+ site_keys=["dxmwx"],
18
+ )
19
+ class DxmwxSession(BaseSession):
20
+ """
21
+ A session class for interacting with the 大熊猫文学网 (www.dxmwx.org) novel website.
22
+ """
23
+
24
+ BOOK_INFO_URL = "https://{base_url}/book/{book_id}.html"
25
+ BOOK_CATALOG_URL = "https://{base_url}/chapter/{book_id}.html"
26
+ CHAPTER_URL = "https://{base_url}/read/{book_id}_{chapter_id}.html"
27
+
28
+ def __init__(
29
+ self,
30
+ config: FetcherConfig,
31
+ cookies: dict[str, str] | None = None,
32
+ **kwargs: Any,
33
+ ) -> None:
34
+ super().__init__("dxmwx", config, cookies, **kwargs)
35
+ self.base_url = (
36
+ "www.dxmwx.org" if config.locale_style == "simplified" else "tw.dxmwx.org"
37
+ )
38
+
39
+ async def get_book_info(
40
+ self,
41
+ book_id: str,
42
+ **kwargs: Any,
43
+ ) -> list[str]:
44
+ """
45
+ Fetch the raw HTML of the book info page asynchronously.
46
+
47
+ Order: [info, catalog]
48
+
49
+ :param book_id: The book identifier.
50
+ :return: The page content as string list.
51
+ """
52
+ info_url = self.book_info_url(base_url=self.base_url, book_id=book_id)
53
+ catalog_url = self.book_catalog_url(base_url=self.base_url, book_id=book_id)
54
+
55
+ info_html, catalog_html = await asyncio.gather(
56
+ self.fetch(info_url, **kwargs),
57
+ self.fetch(catalog_url, **kwargs),
58
+ )
59
+ return [info_html, catalog_html]
60
+
61
+ async def get_book_chapter(
62
+ self,
63
+ book_id: str,
64
+ chapter_id: str,
65
+ **kwargs: Any,
66
+ ) -> list[str]:
67
+ """
68
+ Fetch the raw HTML of a single chapter asynchronously.
69
+
70
+ :param book_id: The book identifier.
71
+ :param chapter_id: The chapter identifier.
72
+ :return: The page content as string list.
73
+ """
74
+ url = self.chapter_url(
75
+ base_url=self.base_url, book_id=book_id, chapter_id=chapter_id
76
+ )
77
+ return [await self.fetch(url, **kwargs)]
78
+
79
+ @classmethod
80
+ def book_info_url(cls, base_url: str, book_id: str) -> str:
81
+ """
82
+ Construct the URL for fetching a book's info page.
83
+
84
+ :param book_id: The identifier of the book.
85
+ :return: Fully qualified URL for the book info page.
86
+ """
87
+ return cls.BOOK_INFO_URL.format(base_url=base_url, book_id=book_id)
88
+
89
+ @classmethod
90
+ def book_catalog_url(cls, base_url: str, book_id: str) -> str:
91
+ """
92
+ Construct the URL for fetching a book's catalog page.
93
+
94
+ :param book_id: The identifier of the book.
95
+ :return: Fully qualified catalog page URL.
96
+ """
97
+ return cls.BOOK_CATALOG_URL.format(base_url=base_url, book_id=book_id)
98
+
99
+ @classmethod
100
+ def chapter_url(cls, base_url: str, book_id: str, chapter_id: str) -> str:
101
+ """
102
+ Construct the URL for fetching a specific chapter.
103
+
104
+ :param book_id: The identifier of the book.
105
+ :param chapter_id: The identifier of the chapter.
106
+ :return: Fully qualified chapter URL.
107
+ """
108
+ return cls.CHAPTER_URL.format(
109
+ base_url=base_url, book_id=book_id, chapter_id=chapter_id
110
+ )
@@ -0,0 +1,139 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.fetchers.eightnovel
4
+ -----------------------------------------
5
+
6
+ """
7
+
8
+ import re
9
+ from re import Pattern
10
+ from typing import Any
11
+
12
+ from novel_downloader.core.fetchers.base import BaseSession
13
+ from novel_downloader.core.fetchers.registry import register_fetcher
14
+ from novel_downloader.models import FetcherConfig
15
+
16
+
17
+ @register_fetcher(
18
+ site_keys=["8novel", "eightnovel"],
19
+ )
20
+ class EightnovelSession(BaseSession):
21
+ """
22
+ A session class for interacting with the 无限轻小说 (www.8novel.com) novel website.
23
+ """
24
+
25
+ BOOK_INFO_URL = "https://www.8novel.com/novelbooks/{book_id}/"
26
+ CHAPTER_URL = "https://article.8novel.com/read/{book_id}/?{chapter_id}"
27
+ CHAPTER_CONTENT_URL = (
28
+ "https://article.8novel.com/txt/1/{book_id}/{chapter_id}{seed_segment}.html"
29
+ )
30
+
31
+ _SPLIT_STR_PATTERN = re.compile(
32
+ r'["\']([^"\']+)["\']\s*\.split\s*\(\s*["\']\s*,\s*["\']\s*\)', re.DOTALL
33
+ )
34
+ _DIGIT_LIST_PATTERN: Pattern[str] = re.compile(r"^\d+(?:,\d+)*$")
35
+
36
+ def __init__(
37
+ self,
38
+ config: FetcherConfig,
39
+ cookies: dict[str, str] | None = None,
40
+ **kwargs: Any,
41
+ ) -> None:
42
+ super().__init__("eightnovel", config, cookies, **kwargs)
43
+
44
+ async def get_book_info(
45
+ self,
46
+ book_id: str,
47
+ **kwargs: Any,
48
+ ) -> list[str]:
49
+ """
50
+ Fetch the raw HTML of the book info page asynchronously.
51
+
52
+ :param book_id: The book identifier.
53
+ :return: The page content as string list.
54
+ """
55
+ url = self.book_info_url(book_id=book_id)
56
+ return [await self.fetch(url, **kwargs)]
57
+
58
+ async def get_book_chapter(
59
+ self,
60
+ book_id: str,
61
+ chapter_id: str,
62
+ **kwargs: Any,
63
+ ) -> list[str]:
64
+ """
65
+ Fetch the raw HTML of a single chapter asynchronously.
66
+
67
+ Order: [chap_info, content]
68
+
69
+ :param book_id: The book identifier.
70
+ :param chapter_id: The chapter identifier.
71
+ :return: The page content as string list.
72
+ """
73
+ url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
74
+ chapter_html = await self.fetch(url, **kwargs)
75
+ url_seed = self._extract_url_seed(chapter_html)
76
+ content_url = self._build_chapter_content_url(
77
+ seed=url_seed,
78
+ book_id=book_id,
79
+ chapter_id=chapter_id,
80
+ )
81
+ content_html = await self.fetch(content_url, **kwargs)
82
+
83
+ return [chapter_html, content_html]
84
+
85
+ @classmethod
86
+ def book_info_url(cls, book_id: str) -> str:
87
+ """
88
+ Construct the URL for fetching a book's info page.
89
+
90
+ :param book_id: The identifier of the book.
91
+ :return: Fully qualified URL for the book info page.
92
+ """
93
+ return cls.BOOK_INFO_URL.format(book_id=book_id)
94
+
95
+ @classmethod
96
+ def chapter_url(cls, book_id: str, chapter_id: str) -> str:
97
+ """
98
+ Construct the URL for fetching a specific chapter.
99
+
100
+ :param book_id: The identifier of the book.
101
+ :param chapter_id: The identifier of the chapter.
102
+ :return: Fully qualified chapter URL.
103
+ """
104
+ return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
105
+
106
+ @classmethod
107
+ def _extract_url_seed(cls, html_str: str) -> str:
108
+ """
109
+ From the given HTML/JS source, find all string literals
110
+ of the form "...".split(","), pick the ones that may contain seed,
111
+ and return the last value.
112
+ """
113
+ split_literals: list[str] = cls._SPLIT_STR_PATTERN.findall(html_str)
114
+
115
+ numeric_lists = [
116
+ lit for lit in split_literals if cls._DIGIT_LIST_PATTERN.fullmatch(lit)
117
+ ]
118
+
119
+ if not numeric_lists:
120
+ return ""
121
+
122
+ last_list = numeric_lists[-1]
123
+ return last_list.split(",")[-1]
124
+
125
+ @classmethod
126
+ def _build_chapter_content_url(
127
+ cls, seed: str, book_id: str, chapter_id: str
128
+ ) -> str:
129
+ """
130
+ Slices out a 5-character segment of `seed` at offset
131
+ and build content url.
132
+ """
133
+ # Compute start index and slice out 5 chars
134
+ start = (int(chapter_id) * 3) % 100
135
+ seed_segment = seed[start : start + 5]
136
+
137
+ return cls.CHAPTER_CONTENT_URL.format(
138
+ book_id=book_id, chapter_id=chapter_id, seed_segment=seed_segment
139
+ )
@@ -1,26 +1,26 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- novel_downloader.core.fetchers.esjzone.session
4
- ----------------------------------------------
3
+ novel_downloader.core.fetchers.esjzone
4
+ --------------------------------------
5
5
 
6
6
  """
7
7
 
8
8
  import re
9
+ from collections.abc import Mapping
9
10
  from typing import Any
10
11
 
11
12
  from novel_downloader.core.fetchers.base import BaseSession
12
13
  from novel_downloader.core.fetchers.registry import register_fetcher
13
14
  from novel_downloader.models import FetcherConfig, LoginField
14
- from novel_downloader.utils import async_sleep_with_random_delay
15
+ from novel_downloader.utils import async_jitter_sleep
15
16
 
16
17
 
17
18
  @register_fetcher(
18
19
  site_keys=["esjzone"],
19
- backends=["session"],
20
20
  )
21
21
  class EsjzoneSession(BaseSession):
22
22
  """
23
- A session class for interacting with the esjzone (www.esjzone.cc) novel website.
23
+ A session class for interacting with the ESJ Zone (www.esjzone.cc) novel website.
24
24
  """
25
25
 
26
26
  BOOKCASE_URL = "https://www.esjzone.cc/my/favorite"
@@ -68,7 +68,7 @@ class EsjzoneSession(BaseSession):
68
68
  ):
69
69
  self._is_logged_in = True
70
70
  return True
71
- await async_sleep_with_random_delay(
71
+ await async_jitter_sleep(
72
72
  self.backoff_factor,
73
73
  mul_spread=1.1,
74
74
  max_sleep=self.backoff_factor + 2,
@@ -86,7 +86,7 @@ class EsjzoneSession(BaseSession):
86
86
  Fetch the raw HTML of the book info page asynchronously.
87
87
 
88
88
  :param book_id: The book identifier.
89
- :return: The page content as a string.
89
+ :return: The page content as string list.
90
90
  """
91
91
  url = self.book_info_url(book_id=book_id)
92
92
  return [await self.fetch(url, **kwargs)]
@@ -102,7 +102,7 @@ class EsjzoneSession(BaseSession):
102
102
 
103
103
  :param book_id: The book identifier.
104
104
  :param chapter_id: The chapter identifier.
105
- :return: The chapter content as a string.
105
+ :return: The page content as string list.
106
106
  """
107
107
  url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
108
108
  return [await self.fetch(url, **kwargs)]
@@ -170,10 +170,6 @@ class EsjzoneSession(BaseSession):
170
170
  """
171
171
  return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
172
172
 
173
- @property
174
- def hostname(self) -> str:
175
- return "www.esjzone.cc"
176
-
177
173
  async def _api_login(self, username: str, password: str) -> bool:
178
174
  """
179
175
  Login to the API using a 2-step token-based process.
@@ -234,3 +230,14 @@ class EsjzoneSession(BaseSession):
234
230
  def _extract_token(self, text: str) -> str:
235
231
  match = re.search(r"<JinJing>(.+?)</JinJing>", text)
236
232
  return match.group(1) if match else ""
233
+
234
+ @staticmethod
235
+ def _filter_cookies(
236
+ raw_cookies: list[Mapping[str, Any]],
237
+ ) -> dict[str, str]:
238
+ ALLOWED_DOMAINS = {".www.esjzone.cc", "www.esjzone.cc", ".esjzone.cc", ""}
239
+ return {
240
+ c["name"]: c["value"]
241
+ for c in raw_cookies
242
+ if c.get("domain", "") in ALLOWED_DOMAINS
243
+ }
@@ -0,0 +1,85 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.fetchers.guidaye
4
+ --------------------------------------
5
+
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from novel_downloader.core.fetchers.base import BaseSession
11
+ from novel_downloader.core.fetchers.registry import register_fetcher
12
+ from novel_downloader.models import FetcherConfig
13
+
14
+
15
+ @register_fetcher(
16
+ site_keys=["guidaye"],
17
+ )
18
+ class GuidayeSession(BaseSession):
19
+ """
20
+ A session class for interacting with the 名著阅读 (b.guidaye.com) novel website.
21
+ """
22
+
23
+ BOOK_INFO_URL = "https://b.guidaye.com/{book_id}/"
24
+ CHAPTER_URL = "https://b.guidaye.com/{book_id}/{chapter_id}.html"
25
+
26
+ def __init__(
27
+ self,
28
+ config: FetcherConfig,
29
+ cookies: dict[str, str] | None = None,
30
+ **kwargs: Any,
31
+ ) -> None:
32
+ super().__init__("guidaye", config, cookies, **kwargs)
33
+
34
+ async def get_book_info(
35
+ self,
36
+ book_id: str,
37
+ **kwargs: Any,
38
+ ) -> list[str]:
39
+ """
40
+ Fetch the raw HTML of the book info page asynchronously.
41
+
42
+ :param book_id: The book identifier.
43
+ :return: The page content as string list.
44
+ """
45
+ book_id = book_id.replace("-", "/")
46
+ url = self.book_info_url(book_id=book_id)
47
+ return [await self.fetch(url, **kwargs)]
48
+
49
+ async def get_book_chapter(
50
+ self,
51
+ book_id: str,
52
+ chapter_id: str,
53
+ **kwargs: Any,
54
+ ) -> list[str]:
55
+ """
56
+ Fetch the raw HTML of a single chapter asynchronously.
57
+
58
+ :param book_id: The book identifier.
59
+ :param chapter_id: The chapter identifier.
60
+ :return: The page content as string list.
61
+ """
62
+ book_id = book_id.replace("-", "/")
63
+ url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
64
+ return [await self.fetch(url, **kwargs)]
65
+
66
+ @classmethod
67
+ def book_info_url(cls, book_id: str) -> str:
68
+ """
69
+ Construct the URL for fetching a book's info page.
70
+
71
+ :param book_id: The identifier of the book.
72
+ :return: Fully qualified URL for the book info page.
73
+ """
74
+ return cls.BOOK_INFO_URL.format(book_id=book_id)
75
+
76
+ @classmethod
77
+ def chapter_url(cls, book_id: str, chapter_id: str) -> str:
78
+ """
79
+ Construct the URL for fetching a specific chapter.
80
+
81
+ :param book_id: The identifier of the book.
82
+ :param chapter_id: The identifier of the chapter.
83
+ :return: Fully qualified chapter URL.
84
+ """
85
+ return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)