novel-downloader 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +2 -4
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +27 -104
  5. novel_downloader/cli/download.py +78 -66
  6. novel_downloader/cli/export.py +20 -21
  7. novel_downloader/cli/main.py +3 -1
  8. novel_downloader/cli/search.py +120 -0
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +10 -14
  11. novel_downloader/config/adapter.py +195 -99
  12. novel_downloader/config/{loader.py → file_io.py} +53 -27
  13. novel_downloader/core/__init__.py +14 -13
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/archived/qidian/searcher.py +79 -0
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +8 -30
  21. novel_downloader/core/downloaders/base.py +182 -30
  22. novel_downloader/core/downloaders/common.py +217 -384
  23. novel_downloader/core/downloaders/qianbi.py +332 -4
  24. novel_downloader/core/downloaders/qidian.py +250 -290
  25. novel_downloader/core/downloaders/registry.py +69 -0
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +8 -26
  28. novel_downloader/core/exporters/base.py +107 -31
  29. novel_downloader/core/exporters/common/__init__.py +3 -4
  30. novel_downloader/core/exporters/common/epub.py +92 -171
  31. novel_downloader/core/exporters/common/main_exporter.py +14 -67
  32. novel_downloader/core/exporters/common/txt.py +90 -86
  33. novel_downloader/core/exporters/epub_util.py +184 -1327
  34. novel_downloader/core/exporters/linovelib/__init__.py +3 -2
  35. novel_downloader/core/exporters/linovelib/epub.py +165 -222
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +10 -71
  37. novel_downloader/core/exporters/linovelib/txt.py +76 -66
  38. novel_downloader/core/exporters/qidian.py +15 -11
  39. novel_downloader/core/exporters/registry.py +55 -0
  40. novel_downloader/core/exporters/txt_util.py +67 -0
  41. novel_downloader/core/fetchers/__init__.py +57 -56
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +10 -10
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +63 -47
  45. novel_downloader/core/fetchers/biquyuedu.py +83 -0
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +23 -11
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +22 -26
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/{biquge/browser.py → lewenn.py} +15 -15
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +16 -12
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +9 -9
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +55 -40
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +60 -0
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +11 -9
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/{common/browser.py → shuhaige.py} +24 -19
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/{common/session.py → wanbengo.py} +21 -17
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +23 -11
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +8 -14
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +4 -17
  79. novel_downloader/core/interfaces/parser.py +5 -6
  80. novel_downloader/core/interfaces/searcher.py +26 -0
  81. novel_downloader/core/parsers/__init__.py +58 -22
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +63 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +67 -67
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +54 -65
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +54 -51
  99. novel_downloader/core/parsers/qidian/__init__.py +2 -2
  100. novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
  101. novel_downloader/core/parsers/qidian/chapter_encrypted.py +290 -346
  102. novel_downloader/core/parsers/qidian/chapter_normal.py +25 -56
  103. novel_downloader/core/parsers/qidian/main_parser.py +19 -57
  104. novel_downloader/core/parsers/qidian/utils/__init__.py +12 -11
  105. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +6 -7
  106. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
  107. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
  108. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
  109. novel_downloader/core/parsers/quanben5.py +103 -0
  110. novel_downloader/core/parsers/registry.py +57 -0
  111. novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +46 -48
  112. novel_downloader/core/parsers/shencou.py +215 -0
  113. novel_downloader/core/parsers/shuhaige.py +111 -0
  114. novel_downloader/core/parsers/tongrenquan.py +116 -0
  115. novel_downloader/core/parsers/ttkan.py +132 -0
  116. novel_downloader/core/parsers/wanbengo.py +191 -0
  117. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  118. novel_downloader/core/parsers/xiguashuwu.py +435 -0
  119. novel_downloader/core/parsers/xs63b.py +161 -0
  120. novel_downloader/core/parsers/xshbook.py +134 -0
  121. novel_downloader/core/parsers/yamibo.py +155 -0
  122. novel_downloader/core/parsers/yibige.py +166 -0
  123. novel_downloader/core/searchers/__init__.py +51 -0
  124. novel_downloader/core/searchers/aaatxt.py +107 -0
  125. novel_downloader/core/searchers/b520.py +84 -0
  126. novel_downloader/core/searchers/base.py +168 -0
  127. novel_downloader/core/searchers/dxmwx.py +105 -0
  128. novel_downloader/core/searchers/eightnovel.py +84 -0
  129. novel_downloader/core/searchers/esjzone.py +102 -0
  130. novel_downloader/core/searchers/hetushu.py +92 -0
  131. novel_downloader/core/searchers/i25zw.py +93 -0
  132. novel_downloader/core/searchers/ixdzs8.py +107 -0
  133. novel_downloader/core/searchers/jpxs123.py +107 -0
  134. novel_downloader/core/searchers/piaotia.py +100 -0
  135. novel_downloader/core/searchers/qbtr.py +106 -0
  136. novel_downloader/core/searchers/qianbi.py +165 -0
  137. novel_downloader/core/searchers/quanben5.py +144 -0
  138. novel_downloader/core/searchers/registry.py +79 -0
  139. novel_downloader/core/searchers/shuhaige.py +124 -0
  140. novel_downloader/core/searchers/tongrenquan.py +110 -0
  141. novel_downloader/core/searchers/ttkan.py +92 -0
  142. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  143. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  144. novel_downloader/core/searchers/xs63b.py +104 -0
  145. novel_downloader/locales/en.json +36 -79
  146. novel_downloader/locales/zh.json +37 -80
  147. novel_downloader/models/__init__.py +23 -50
  148. novel_downloader/models/book.py +44 -0
  149. novel_downloader/models/config.py +16 -43
  150. novel_downloader/models/login.py +1 -1
  151. novel_downloader/models/search.py +21 -0
  152. novel_downloader/resources/config/settings.toml +39 -74
  153. novel_downloader/resources/css_styles/intro.css +83 -0
  154. novel_downloader/resources/css_styles/main.css +30 -89
  155. novel_downloader/resources/json/xiguashuwu.json +718 -0
  156. novel_downloader/utils/__init__.py +43 -0
  157. novel_downloader/utils/chapter_storage.py +247 -226
  158. novel_downloader/utils/constants.py +5 -50
  159. novel_downloader/utils/cookies.py +6 -18
  160. novel_downloader/utils/crypto_utils/__init__.py +13 -0
  161. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  162. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  163. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  164. novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
  165. novel_downloader/utils/epub/__init__.py +34 -0
  166. novel_downloader/utils/epub/builder.py +377 -0
  167. novel_downloader/utils/epub/constants.py +118 -0
  168. novel_downloader/utils/epub/documents.py +297 -0
  169. novel_downloader/utils/epub/models.py +120 -0
  170. novel_downloader/utils/epub/utils.py +179 -0
  171. novel_downloader/utils/file_utils/__init__.py +5 -30
  172. novel_downloader/utils/file_utils/io.py +9 -150
  173. novel_downloader/utils/file_utils/normalize.py +2 -2
  174. novel_downloader/utils/file_utils/sanitize.py +2 -7
  175. novel_downloader/utils/fontocr.py +207 -0
  176. novel_downloader/utils/i18n.py +2 -0
  177. novel_downloader/utils/logger.py +10 -16
  178. novel_downloader/utils/network.py +111 -252
  179. novel_downloader/utils/state.py +5 -90
  180. novel_downloader/utils/text_utils/__init__.py +16 -21
  181. novel_downloader/utils/text_utils/diff_display.py +6 -9
  182. novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
  183. novel_downloader/utils/text_utils/text_cleaner.py +179 -0
  184. novel_downloader/utils/text_utils/truncate_utils.py +62 -0
  185. novel_downloader/utils/time_utils/__init__.py +6 -12
  186. novel_downloader/utils/time_utils/datetime_utils.py +23 -33
  187. novel_downloader/utils/time_utils/sleep_utils.py +5 -10
  188. novel_downloader/web/__init__.py +13 -0
  189. novel_downloader/web/components/__init__.py +11 -0
  190. novel_downloader/web/components/navigation.py +35 -0
  191. novel_downloader/web/main.py +66 -0
  192. novel_downloader/web/pages/__init__.py +17 -0
  193. novel_downloader/web/pages/download.py +78 -0
  194. novel_downloader/web/pages/progress.py +147 -0
  195. novel_downloader/web/pages/search.py +329 -0
  196. novel_downloader/web/services/__init__.py +17 -0
  197. novel_downloader/web/services/client_dialog.py +164 -0
  198. novel_downloader/web/services/cred_broker.py +113 -0
  199. novel_downloader/web/services/cred_models.py +35 -0
  200. novel_downloader/web/services/task_manager.py +264 -0
  201. novel_downloader-2.0.0.dist-info/METADATA +171 -0
  202. novel_downloader-2.0.0.dist-info/RECORD +210 -0
  203. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
  204. novel_downloader/config/site_rules.py +0 -94
  205. novel_downloader/core/downloaders/biquge.py +0 -25
  206. novel_downloader/core/downloaders/esjzone.py +0 -25
  207. novel_downloader/core/downloaders/linovelib.py +0 -25
  208. novel_downloader/core/downloaders/sfacg.py +0 -25
  209. novel_downloader/core/downloaders/yamibo.py +0 -25
  210. novel_downloader/core/exporters/biquge.py +0 -25
  211. novel_downloader/core/exporters/esjzone.py +0 -25
  212. novel_downloader/core/exporters/qianbi.py +0 -25
  213. novel_downloader/core/exporters/sfacg.py +0 -25
  214. novel_downloader/core/exporters/yamibo.py +0 -25
  215. novel_downloader/core/factory/__init__.py +0 -20
  216. novel_downloader/core/factory/downloader.py +0 -73
  217. novel_downloader/core/factory/exporter.py +0 -58
  218. novel_downloader/core/factory/fetcher.py +0 -96
  219. novel_downloader/core/factory/parser.py +0 -86
  220. novel_downloader/core/fetchers/base/__init__.py +0 -14
  221. novel_downloader/core/fetchers/base/browser.py +0 -403
  222. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  223. novel_downloader/core/fetchers/common/__init__.py +0 -14
  224. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  225. novel_downloader/core/fetchers/esjzone/browser.py +0 -204
  226. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  227. novel_downloader/core/fetchers/linovelib/browser.py +0 -193
  228. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  229. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  230. novel_downloader/core/fetchers/qidian/browser.py +0 -318
  231. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  232. novel_downloader/core/fetchers/sfacg/browser.py +0 -189
  233. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  234. novel_downloader/core/fetchers/yamibo/browser.py +0 -229
  235. novel_downloader/core/parsers/biquge/__init__.py +0 -10
  236. novel_downloader/core/parsers/biquge/main_parser.py +0 -134
  237. novel_downloader/core/parsers/common/__init__.py +0 -13
  238. novel_downloader/core/parsers/common/helper.py +0 -323
  239. novel_downloader/core/parsers/common/main_parser.py +0 -106
  240. novel_downloader/core/parsers/esjzone/__init__.py +0 -10
  241. novel_downloader/core/parsers/linovelib/__init__.py +0 -10
  242. novel_downloader/core/parsers/qianbi/__init__.py +0 -10
  243. novel_downloader/core/parsers/sfacg/__init__.py +0 -10
  244. novel_downloader/core/parsers/yamibo/__init__.py +0 -10
  245. novel_downloader/core/parsers/yamibo/main_parser.py +0 -194
  246. novel_downloader/models/browser.py +0 -21
  247. novel_downloader/models/chapter.py +0 -25
  248. novel_downloader/models/site_rules.py +0 -99
  249. novel_downloader/models/tasks.py +0 -33
  250. novel_downloader/models/types.py +0 -15
  251. novel_downloader/resources/css_styles/volume-intro.css +0 -56
  252. novel_downloader/resources/json/replace_word_map.json +0 -4
  253. novel_downloader/resources/text/blacklist.txt +0 -22
  254. novel_downloader/tui/__init__.py +0 -7
  255. novel_downloader/tui/app.py +0 -32
  256. novel_downloader/tui/main.py +0 -17
  257. novel_downloader/tui/screens/__init__.py +0 -14
  258. novel_downloader/tui/screens/home.py +0 -198
  259. novel_downloader/tui/screens/login.py +0 -74
  260. novel_downloader/tui/styles/home_layout.tcss +0 -79
  261. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  262. novel_downloader/utils/cache.py +0 -24
  263. novel_downloader/utils/fontocr/__init__.py +0 -22
  264. novel_downloader/utils/fontocr/model_loader.py +0 -69
  265. novel_downloader/utils/fontocr/ocr_v1.py +0 -303
  266. novel_downloader/utils/fontocr/ocr_v2.py +0 -752
  267. novel_downloader/utils/hash_store.py +0 -279
  268. novel_downloader/utils/hash_utils.py +0 -103
  269. novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
  270. novel_downloader/utils/text_utils/font_mapping.py +0 -28
  271. novel_downloader/utils/text_utils/text_cleaning.py +0 -107
  272. novel_downloader-1.4.5.dist-info/METADATA +0 -196
  273. novel_downloader-1.4.5.dist-info/RECORD +0 -165
  274. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
  275. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
  276. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,83 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.fetchers.aaatxt
4
+ -------------------------------------
5
+
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from novel_downloader.core.fetchers.base import BaseSession
11
+ from novel_downloader.core.fetchers.registry import register_fetcher
12
+ from novel_downloader.models import FetcherConfig
13
+
14
+
15
+ @register_fetcher(
16
+ site_keys=["aaatxt"],
17
+ )
18
+ class AaatxtSession(BaseSession):
19
+ """
20
+ A session class for interacting with the 3A电子书 (www.aaatxt.com) novel website.
21
+ """
22
+
23
+ BOOK_INFO_URL = "http://www.aaatxt.com/shu/{book_id}.html"
24
+ CHAPTER_URL = "http://www.aaatxt.com/yuedu/{chapter_id}.html"
25
+
26
+ def __init__(
27
+ self,
28
+ config: FetcherConfig,
29
+ cookies: dict[str, str] | None = None,
30
+ **kwargs: Any,
31
+ ) -> None:
32
+ super().__init__("aaatxt", config, cookies, **kwargs)
33
+
34
+ async def get_book_info(
35
+ self,
36
+ book_id: str,
37
+ **kwargs: Any,
38
+ ) -> list[str]:
39
+ """
40
+ Fetch the raw HTML of the book info page asynchronously.
41
+
42
+ :param book_id: The book identifier.
43
+ :return: The page content as string list.
44
+ """
45
+ url = self.book_info_url(book_id=book_id)
46
+ return [await self.fetch(url, **kwargs)]
47
+
48
+ async def get_book_chapter(
49
+ self,
50
+ book_id: str,
51
+ chapter_id: str,
52
+ **kwargs: Any,
53
+ ) -> list[str]:
54
+ """
55
+ Fetch the raw HTML of a single chapter asynchronously.
56
+
57
+ :param book_id: The book identifier.
58
+ :param chapter_id: The chapter identifier.
59
+ :return: The page content as string list.
60
+ """
61
+ url = self.chapter_url(chapter_id=chapter_id)
62
+ return [await self.fetch(url, encoding="gb2312", **kwargs)]
63
+
64
+ @classmethod
65
+ def book_info_url(cls, book_id: str) -> str:
66
+ """
67
+ Construct the URL for fetching a book's info page.
68
+
69
+ :param book_id: The identifier of the book.
70
+ :return: Fully qualified URL for the book info page.
71
+ """
72
+ return cls.BOOK_INFO_URL.format(book_id=book_id)
73
+
74
+ @classmethod
75
+ def chapter_url(cls, chapter_id: str) -> str:
76
+ """
77
+ Construct the URL for fetching a specific chapter.
78
+
79
+ :param book_id: The identifier of the book.
80
+ :param chapter_id: The identifier of the chapter.
81
+ :return: Fully qualified chapter URL.
82
+ """
83
+ return cls.CHAPTER_URL.format(chapter_id=chapter_id)
@@ -1,19 +1,23 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- novel_downloader.core.fetchers.biquge.session
4
- ---------------------------------------------
3
+ novel_downloader.core.fetchers.b520
4
+ -----------------------------------
5
5
 
6
6
  """
7
7
 
8
8
  from typing import Any
9
9
 
10
10
  from novel_downloader.core.fetchers.base import BaseSession
11
+ from novel_downloader.core.fetchers.registry import register_fetcher
11
12
  from novel_downloader.models import FetcherConfig
12
13
 
13
14
 
15
+ @register_fetcher(
16
+ site_keys=["biquge", "bqg", "b520"],
17
+ )
14
18
  class BiqugeSession(BaseSession):
15
19
  """
16
- A session class for interacting with the Biquge (www.b520.cc) novel website.
20
+ A session class for interacting with the 笔趣阁 (www.b520.cc) novel website.
17
21
  """
18
22
 
19
23
  BOOK_INFO_URL = "http://www.b520.cc/{book_id}/"
@@ -36,7 +40,7 @@ class BiqugeSession(BaseSession):
36
40
  Fetch the raw HTML of the book info page asynchronously.
37
41
 
38
42
  :param book_id: The book identifier.
39
- :return: The page content as a string.
43
+ :return: The page content as string list.
40
44
  """
41
45
  url = self.book_info_url(book_id=book_id)
42
46
  return [await self.fetch(url, **kwargs)]
@@ -52,10 +56,10 @@ class BiqugeSession(BaseSession):
52
56
 
53
57
  :param book_id: The book identifier.
54
58
  :param chapter_id: The chapter identifier.
55
- :return: The chapter content as a string.
59
+ :return: The page content as string list.
56
60
  """
57
61
  url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
58
- return [await self.fetch(url, **kwargs)]
62
+ return [await self.fetch(url, encoding="gbk", **kwargs)]
59
63
 
60
64
  @classmethod
61
65
  def book_info_url(cls, book_id: str) -> str:
@@ -77,7 +81,3 @@ class BiqugeSession(BaseSession):
77
81
  :return: Fully qualified chapter URL.
78
82
  """
79
83
  return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
80
-
81
- @property
82
- def hostname(self) -> str:
83
- return "www.b520.cc"
@@ -1,19 +1,16 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- novel_downloader.core.fetchers.base.session
4
- -------------------------------------------
3
+ novel_downloader.core.fetchers.base
4
+ -----------------------------------
5
5
 
6
- This module defines the BaseSession class, which provides asynchronous
7
- HTTP request capabilities using aiohttp. It maintains a persistent
8
- client session and supports retries, headers, timeout configurations,
9
- cookie handling, and defines abstract methods for subclasses.
6
+ Abstract base class providing common HTTP session handling for fetchers.
10
7
  """
11
8
 
12
-
13
9
  import abc
14
10
  import json
15
11
  import logging
16
12
  import types
13
+ from collections.abc import Mapping
17
14
  from typing import Any, Self
18
15
 
19
16
  import aiohttp
@@ -21,12 +18,13 @@ from aiohttp import ClientResponse, ClientSession, ClientTimeout, TCPConnector
21
18
 
22
19
  from novel_downloader.core.interfaces import FetcherProtocol
23
20
  from novel_downloader.models import FetcherConfig, LoginField
21
+ from novel_downloader.utils import (
22
+ async_jitter_sleep,
23
+ )
24
24
  from novel_downloader.utils.constants import (
25
25
  DATA_DIR,
26
26
  DEFAULT_USER_HEADERS,
27
27
  )
28
- from novel_downloader.utils.cookies import parse_cookie_expires
29
- from novel_downloader.utils.time_utils import async_sleep_with_random_delay
30
28
 
31
29
  from .rate_limiter import TokenBucketRateLimiter
32
30
 
@@ -65,7 +63,7 @@ class BaseSession(FetcherProtocol, abc.ABC):
65
63
  self._session: ClientSession | None = None
66
64
  self._rate_limiter: TokenBucketRateLimiter | None = None
67
65
 
68
- if config.max_rps is not None and config.max_rps > 0:
66
+ if config.max_rps > 0:
69
67
  self._rate_limiter = TokenBucketRateLimiter(config.max_rps)
70
68
 
71
69
  self.logger = logging.getLogger(f"{self.__class__.__name__}")
@@ -80,6 +78,7 @@ class BaseSession(FetcherProtocol, abc.ABC):
80
78
  ) -> bool:
81
79
  """
82
80
  Attempt to log in asynchronously.
81
+
83
82
  :returns: True if login succeeded.
84
83
  """
85
84
  return False
@@ -94,7 +93,7 @@ class BaseSession(FetcherProtocol, abc.ABC):
94
93
  Fetch the raw HTML (or JSON) of the book info page asynchronously.
95
94
 
96
95
  :param book_id: The book identifier.
97
- :return: The page content as a string.
96
+ :return: The page content as string list.
98
97
  """
99
98
  ...
100
99
 
@@ -110,7 +109,7 @@ class BaseSession(FetcherProtocol, abc.ABC):
110
109
 
111
110
  :param book_id: The book identifier.
112
111
  :param chapter_id: The chapter identifier.
113
- :return: The chapter content as a string.
112
+ :return: The page content as string list.
114
113
  """
115
114
  ...
116
115
 
@@ -156,7 +155,12 @@ class BaseSession(FetcherProtocol, abc.ABC):
156
155
  await self._session.close()
157
156
  self._session = None
158
157
 
159
- async def fetch(self, url: str, **kwargs: Any) -> str:
158
+ async def fetch(
159
+ self,
160
+ url: str,
161
+ encoding: str | None = None,
162
+ **kwargs: Any,
163
+ ) -> str:
160
164
  """
161
165
  Fetch the content from the given URL asynchronously, with retry support.
162
166
 
@@ -172,11 +176,10 @@ class BaseSession(FetcherProtocol, abc.ABC):
172
176
  try:
173
177
  async with self.session.get(url, **kwargs) as resp:
174
178
  resp.raise_for_status()
175
- text: str = await resp.text()
176
- return text
179
+ return await self._response_to_str(resp, encoding)
177
180
  except aiohttp.ClientError:
178
181
  if attempt < self.retry_times:
179
- await async_sleep_with_random_delay(
182
+ await async_jitter_sleep(
180
183
  self.backoff_factor,
181
184
  mul_spread=1.1,
182
185
  max_sleep=self.backoff_factor + 2,
@@ -241,8 +244,12 @@ class BaseSession(FetcherProtocol, abc.ABC):
241
244
  return False
242
245
  try:
243
246
  storage = json.loads(self._state_file.read_text(encoding="utf-8"))
244
- for c in storage.get("cookies", []):
245
- self._session.cookie_jar.update_cookies({c["name"]: c["value"]})
247
+ raw_cookies = storage.get("cookies", [])
248
+ cookie_dict = self._filter_cookies(raw_cookies)
249
+
250
+ if cookie_dict:
251
+ self._session.cookie_jar.update_cookies(cookie_dict)
252
+
246
253
  self._is_logged_in = await self._check_login_status()
247
254
  return self._is_logged_in
248
255
  except Exception as e:
@@ -272,12 +279,6 @@ class BaseSession(FetcherProtocol, abc.ABC):
272
279
  {
273
280
  "name": cookie.key,
274
281
  "value": cookie.value,
275
- "domain": cookie.get("domain", ""),
276
- "path": cookie.get("path", "/"),
277
- "expires": parse_cookie_expires(cookie.get("expires")),
278
- "httpOnly": bool(cookie.get("httponly", False)),
279
- "secure": bool(cookie.get("secure", False)),
280
- "sameSite": cookie.get("samesite") or "Lax",
281
282
  }
282
283
  )
283
284
  storage_state = {
@@ -294,21 +295,6 @@ class BaseSession(FetcherProtocol, abc.ABC):
294
295
  self.logger.warning("Failed to save state: %s", e)
295
296
  return False
296
297
 
297
- async def set_interactive_mode(self, enable: bool) -> bool:
298
- """
299
- Enable or disable interactive mode for manual login.
300
-
301
- :param enable: True to enable, False to disable interactive mode.
302
- :return: True if operation or login check succeeded, False otherwise.
303
- """
304
- return False
305
-
306
- def get_cookie_value(self, key: str) -> str | None:
307
- for cookie in self.session.cookie_jar:
308
- if cookie.key == key:
309
- return str(cookie.value)
310
- return None
311
-
312
298
  def update_cookies(
313
299
  self,
314
300
  cookies: dict[str, str],
@@ -340,18 +326,10 @@ class BaseSession(FetcherProtocol, abc.ABC):
340
326
  """
341
327
  return False
342
328
 
343
- @property
344
- def hostname(self) -> str:
345
- return ""
346
-
347
329
  @property
348
330
  def site(self) -> str:
349
331
  return self._site
350
332
 
351
- @property
352
- def requester_type(self) -> str:
353
- return "session"
354
-
355
333
  @property
356
334
  def is_logged_in(self) -> bool:
357
335
  """
@@ -405,6 +383,44 @@ class BaseSession(FetcherProtocol, abc.ABC):
405
383
  return dict(self._session.headers)
406
384
  return self._headers.copy()
407
385
 
386
+ @staticmethod
387
+ def _filter_cookies(
388
+ raw_cookies: list[Mapping[str, Any]],
389
+ ) -> dict[str, str]:
390
+ """
391
+ Hook:
392
+ take the raw list of cookie-dicts loaded from storage_state
393
+ and return a simple name -> value mapping.
394
+ """
395
+ return {c["name"]: c["value"] for c in raw_cookies}
396
+
397
+ @staticmethod
398
+ async def _response_to_str(
399
+ resp: ClientResponse,
400
+ encoding: str | None = None,
401
+ ) -> str:
402
+ """
403
+ Read the full body of resp as text. First try the declared charset,
404
+ then on UnicodeDecodeError fall back to a lenient utf-8 decode.
405
+ """
406
+ data: bytes = await resp.read()
407
+ encodings = [
408
+ encoding,
409
+ resp.charset,
410
+ "gb2312",
411
+ "gb18030",
412
+ "gbk",
413
+ "utf-8",
414
+ ]
415
+ encodings_list: list[str] = [e for e in encodings if e]
416
+ for enc in encodings_list:
417
+ try:
418
+ return data.decode(enc)
419
+ except UnicodeDecodeError:
420
+ continue
421
+ encoding = encoding or "utf-8"
422
+ return data.decode(encoding, errors="ignore")
423
+
408
424
  async def __aenter__(self) -> Self:
409
425
  if self._session is None or self._session.closed:
410
426
  await self.init()
@@ -0,0 +1,83 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.fetchers.biquyuedu
4
+ ----------------------------------------
5
+
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from novel_downloader.core.fetchers.base import BaseSession
11
+ from novel_downloader.core.fetchers.registry import register_fetcher
12
+ from novel_downloader.models import FetcherConfig
13
+
14
+
15
+ @register_fetcher(
16
+ site_keys=["biquyuedu"],
17
+ )
18
+ class BiquyueduSession(BaseSession):
19
+ """
20
+ A session class for interacting with the 精彩小说 (biquyuedu.com) novel website.
21
+ """
22
+
23
+ BOOK_INFO_URL = "https://biquyuedu.com/novel/{book_id}.html"
24
+ CHAPTER_URL = "https://biquyuedu.com/novel/{book_id}/{chapter_id}.html"
25
+
26
+ def __init__(
27
+ self,
28
+ config: FetcherConfig,
29
+ cookies: dict[str, str] | None = None,
30
+ **kwargs: Any,
31
+ ) -> None:
32
+ super().__init__("biquyuedu", config, cookies, **kwargs)
33
+
34
+ async def get_book_info(
35
+ self,
36
+ book_id: str,
37
+ **kwargs: Any,
38
+ ) -> list[str]:
39
+ """
40
+ Fetch the raw HTML of the book info page asynchronously.
41
+
42
+ :param book_id: The book identifier.
43
+ :return: The page content as string list.
44
+ """
45
+ url = self.book_info_url(book_id=book_id)
46
+ return [await self.fetch(url, **kwargs)]
47
+
48
+ async def get_book_chapter(
49
+ self,
50
+ book_id: str,
51
+ chapter_id: str,
52
+ **kwargs: Any,
53
+ ) -> list[str]:
54
+ """
55
+ Fetch the raw HTML of a single chapter asynchronously.
56
+
57
+ :param book_id: The book identifier.
58
+ :param chapter_id: The chapter identifier.
59
+ :return: The page content as string list.
60
+ """
61
+ url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
62
+ return [await self.fetch(url, **kwargs)]
63
+
64
+ @classmethod
65
+ def book_info_url(cls, book_id: str) -> str:
66
+ """
67
+ Construct the URL for fetching a book's info page.
68
+
69
+ :param book_id: The identifier of the book.
70
+ :return: Fully qualified URL for the book info page.
71
+ """
72
+ return cls.BOOK_INFO_URL.format(book_id=book_id)
73
+
74
+ @classmethod
75
+ def chapter_url(cls, book_id: str, chapter_id: str) -> str:
76
+ """
77
+ Construct the URL for fetching a specific chapter.
78
+
79
+ :param book_id: The identifier of the book.
80
+ :param chapter_id: The identifier of the chapter.
81
+ :return: Fully qualified chapter URL.
82
+ """
83
+ return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
@@ -0,0 +1,110 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.fetchers.dxmwx
4
+ ------------------------------------
5
+
6
+ """
7
+
8
+ import asyncio
9
+ from typing import Any
10
+
11
+ from novel_downloader.core.fetchers.base import BaseSession
12
+ from novel_downloader.core.fetchers.registry import register_fetcher
13
+ from novel_downloader.models import FetcherConfig
14
+
15
+
16
+ @register_fetcher(
17
+ site_keys=["dxmwx"],
18
+ )
19
+ class DxmwxSession(BaseSession):
20
+ """
21
+ A session class for interacting with the 大熊猫文学网 (www.dxmwx.org) novel website.
22
+ """
23
+
24
+ BOOK_INFO_URL = "https://{base_url}/book/{book_id}.html"
25
+ BOOK_CATALOG_URL = "https://{base_url}/chapter/{book_id}.html"
26
+ CHAPTER_URL = "https://{base_url}/read/{book_id}_{chapter_id}.html"
27
+
28
+ def __init__(
29
+ self,
30
+ config: FetcherConfig,
31
+ cookies: dict[str, str] | None = None,
32
+ **kwargs: Any,
33
+ ) -> None:
34
+ super().__init__("dxmwx", config, cookies, **kwargs)
35
+ self.base_url = (
36
+ "www.dxmwx.org" if config.locale_style == "simplified" else "tw.dxmwx.org"
37
+ )
38
+
39
+ async def get_book_info(
40
+ self,
41
+ book_id: str,
42
+ **kwargs: Any,
43
+ ) -> list[str]:
44
+ """
45
+ Fetch the raw HTML of the book info page asynchronously.
46
+
47
+ Order: [info, catalog]
48
+
49
+ :param book_id: The book identifier.
50
+ :return: The page content as string list.
51
+ """
52
+ info_url = self.book_info_url(base_url=self.base_url, book_id=book_id)
53
+ catalog_url = self.book_catalog_url(base_url=self.base_url, book_id=book_id)
54
+
55
+ info_html, catalog_html = await asyncio.gather(
56
+ self.fetch(info_url, **kwargs),
57
+ self.fetch(catalog_url, **kwargs),
58
+ )
59
+ return [info_html, catalog_html]
60
+
61
+ async def get_book_chapter(
62
+ self,
63
+ book_id: str,
64
+ chapter_id: str,
65
+ **kwargs: Any,
66
+ ) -> list[str]:
67
+ """
68
+ Fetch the raw HTML of a single chapter asynchronously.
69
+
70
+ :param book_id: The book identifier.
71
+ :param chapter_id: The chapter identifier.
72
+ :return: The page content as string list.
73
+ """
74
+ url = self.chapter_url(
75
+ base_url=self.base_url, book_id=book_id, chapter_id=chapter_id
76
+ )
77
+ return [await self.fetch(url, **kwargs)]
78
+
79
+ @classmethod
80
+ def book_info_url(cls, base_url: str, book_id: str) -> str:
81
+ """
82
+ Construct the URL for fetching a book's info page.
83
+
84
+ :param book_id: The identifier of the book.
85
+ :return: Fully qualified URL for the book info page.
86
+ """
87
+ return cls.BOOK_INFO_URL.format(base_url=base_url, book_id=book_id)
88
+
89
+ @classmethod
90
+ def book_catalog_url(cls, base_url: str, book_id: str) -> str:
91
+ """
92
+ Construct the URL for fetching a book's catalog page.
93
+
94
+ :param book_id: The identifier of the book.
95
+ :return: Fully qualified catalog page URL.
96
+ """
97
+ return cls.BOOK_CATALOG_URL.format(base_url=base_url, book_id=book_id)
98
+
99
+ @classmethod
100
+ def chapter_url(cls, base_url: str, book_id: str, chapter_id: str) -> str:
101
+ """
102
+ Construct the URL for fetching a specific chapter.
103
+
104
+ :param book_id: The identifier of the book.
105
+ :param chapter_id: The identifier of the chapter.
106
+ :return: Fully qualified chapter URL.
107
+ """
108
+ return cls.CHAPTER_URL.format(
109
+ base_url=base_url, book_id=book_id, chapter_id=chapter_id
110
+ )
@@ -0,0 +1,139 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.fetchers.eightnovel
4
+ -----------------------------------------
5
+
6
+ """
7
+
8
+ import re
9
+ from re import Pattern
10
+ from typing import Any
11
+
12
+ from novel_downloader.core.fetchers.base import BaseSession
13
+ from novel_downloader.core.fetchers.registry import register_fetcher
14
+ from novel_downloader.models import FetcherConfig
15
+
16
+
17
+ @register_fetcher(
18
+ site_keys=["8novel", "eightnovel"],
19
+ )
20
+ class EightnovelSession(BaseSession):
21
+ """
22
+ A session class for interacting with the 无限轻小说 (www.8novel.com) novel website.
23
+ """
24
+
25
+ BOOK_INFO_URL = "https://www.8novel.com/novelbooks/{book_id}/"
26
+ CHAPTER_URL = "https://article.8novel.com/read/{book_id}/?{chapter_id}"
27
+ CHAPTER_CONTENT_URL = (
28
+ "https://article.8novel.com/txt/1/{book_id}/{chapter_id}{seed_segment}.html"
29
+ )
30
+
31
+ _SPLIT_STR_PATTERN = re.compile(
32
+ r'["\']([^"\']+)["\']\s*\.split\s*\(\s*["\']\s*,\s*["\']\s*\)', re.DOTALL
33
+ )
34
+ _DIGIT_LIST_PATTERN: Pattern[str] = re.compile(r"^\d+(?:,\d+)*$")
35
+
36
+ def __init__(
37
+ self,
38
+ config: FetcherConfig,
39
+ cookies: dict[str, str] | None = None,
40
+ **kwargs: Any,
41
+ ) -> None:
42
+ super().__init__("eightnovel", config, cookies, **kwargs)
43
+
44
+ async def get_book_info(
45
+ self,
46
+ book_id: str,
47
+ **kwargs: Any,
48
+ ) -> list[str]:
49
+ """
50
+ Fetch the raw HTML of the book info page asynchronously.
51
+
52
+ :param book_id: The book identifier.
53
+ :return: The page content as string list.
54
+ """
55
+ url = self.book_info_url(book_id=book_id)
56
+ return [await self.fetch(url, **kwargs)]
57
+
58
+ async def get_book_chapter(
59
+ self,
60
+ book_id: str,
61
+ chapter_id: str,
62
+ **kwargs: Any,
63
+ ) -> list[str]:
64
+ """
65
+ Fetch the raw HTML of a single chapter asynchronously.
66
+
67
+ Order: [chap_info, content]
68
+
69
+ :param book_id: The book identifier.
70
+ :param chapter_id: The chapter identifier.
71
+ :return: The page content as string list.
72
+ """
73
+ url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
74
+ chapter_html = await self.fetch(url, **kwargs)
75
+ url_seed = self._extract_url_seed(chapter_html)
76
+ content_url = self._build_chapter_content_url(
77
+ seed=url_seed,
78
+ book_id=book_id,
79
+ chapter_id=chapter_id,
80
+ )
81
+ content_html = await self.fetch(content_url, **kwargs)
82
+
83
+ return [chapter_html, content_html]
84
+
85
+ @classmethod
86
+ def book_info_url(cls, book_id: str) -> str:
87
+ """
88
+ Construct the URL for fetching a book's info page.
89
+
90
+ :param book_id: The identifier of the book.
91
+ :return: Fully qualified URL for the book info page.
92
+ """
93
+ return cls.BOOK_INFO_URL.format(book_id=book_id)
94
+
95
+ @classmethod
96
+ def chapter_url(cls, book_id: str, chapter_id: str) -> str:
97
+ """
98
+ Construct the URL for fetching a specific chapter.
99
+
100
+ :param book_id: The identifier of the book.
101
+ :param chapter_id: The identifier of the chapter.
102
+ :return: Fully qualified chapter URL.
103
+ """
104
+ return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
105
+
106
+ @classmethod
107
+ def _extract_url_seed(cls, html_str: str) -> str:
108
+ """
109
+ From the given HTML/JS source, find all string literals
110
+ of the form "...".split(","), pick the ones that may contain seed,
111
+ and return the last value.
112
+ """
113
+ split_literals: list[str] = cls._SPLIT_STR_PATTERN.findall(html_str)
114
+
115
+ numeric_lists = [
116
+ lit for lit in split_literals if cls._DIGIT_LIST_PATTERN.fullmatch(lit)
117
+ ]
118
+
119
+ if not numeric_lists:
120
+ return ""
121
+
122
+ last_list = numeric_lists[-1]
123
+ return last_list.split(",")[-1]
124
+
125
+ @classmethod
126
+ def _build_chapter_content_url(
127
+ cls, seed: str, book_id: str, chapter_id: str
128
+ ) -> str:
129
+ """
130
+ Slices out a 5-character segment of `seed` at offset
131
+ and build content url.
132
+ """
133
+ # Compute start index and slice out 5 chars
134
+ start = (int(chapter_id) * 3) % 100
135
+ seed_segment = seed[start : start + 5]
136
+
137
+ return cls.CHAPTER_CONTENT_URL.format(
138
+ book_id=book_id, chapter_id=chapter_id, seed_segment=seed_segment
139
+ )