novel-downloader 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +2 -4
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +27 -104
  5. novel_downloader/cli/download.py +78 -66
  6. novel_downloader/cli/export.py +20 -21
  7. novel_downloader/cli/main.py +3 -1
  8. novel_downloader/cli/search.py +120 -0
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +10 -14
  11. novel_downloader/config/adapter.py +195 -99
  12. novel_downloader/config/{loader.py → file_io.py} +53 -27
  13. novel_downloader/core/__init__.py +14 -13
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/archived/qidian/searcher.py +79 -0
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +8 -30
  21. novel_downloader/core/downloaders/base.py +182 -30
  22. novel_downloader/core/downloaders/common.py +217 -384
  23. novel_downloader/core/downloaders/qianbi.py +332 -4
  24. novel_downloader/core/downloaders/qidian.py +250 -290
  25. novel_downloader/core/downloaders/registry.py +69 -0
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +8 -26
  28. novel_downloader/core/exporters/base.py +107 -31
  29. novel_downloader/core/exporters/common/__init__.py +3 -4
  30. novel_downloader/core/exporters/common/epub.py +92 -171
  31. novel_downloader/core/exporters/common/main_exporter.py +14 -67
  32. novel_downloader/core/exporters/common/txt.py +90 -86
  33. novel_downloader/core/exporters/epub_util.py +184 -1327
  34. novel_downloader/core/exporters/linovelib/__init__.py +3 -2
  35. novel_downloader/core/exporters/linovelib/epub.py +165 -222
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +10 -71
  37. novel_downloader/core/exporters/linovelib/txt.py +76 -66
  38. novel_downloader/core/exporters/qidian.py +15 -11
  39. novel_downloader/core/exporters/registry.py +55 -0
  40. novel_downloader/core/exporters/txt_util.py +67 -0
  41. novel_downloader/core/fetchers/__init__.py +57 -56
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +10 -10
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +63 -47
  45. novel_downloader/core/fetchers/biquyuedu.py +83 -0
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +23 -11
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +22 -26
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/{biquge/browser.py → lewenn.py} +15 -15
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +16 -12
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +9 -9
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +55 -40
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +60 -0
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +11 -9
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/{common/browser.py → shuhaige.py} +24 -19
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/{common/session.py → wanbengo.py} +21 -17
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +23 -11
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +8 -14
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +4 -17
  79. novel_downloader/core/interfaces/parser.py +5 -6
  80. novel_downloader/core/interfaces/searcher.py +26 -0
  81. novel_downloader/core/parsers/__init__.py +58 -22
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +63 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +67 -67
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +54 -65
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +54 -51
  99. novel_downloader/core/parsers/qidian/__init__.py +2 -2
  100. novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
  101. novel_downloader/core/parsers/qidian/chapter_encrypted.py +290 -346
  102. novel_downloader/core/parsers/qidian/chapter_normal.py +25 -56
  103. novel_downloader/core/parsers/qidian/main_parser.py +19 -57
  104. novel_downloader/core/parsers/qidian/utils/__init__.py +12 -11
  105. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +6 -7
  106. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
  107. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
  108. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
  109. novel_downloader/core/parsers/quanben5.py +103 -0
  110. novel_downloader/core/parsers/registry.py +57 -0
  111. novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +46 -48
  112. novel_downloader/core/parsers/shencou.py +215 -0
  113. novel_downloader/core/parsers/shuhaige.py +111 -0
  114. novel_downloader/core/parsers/tongrenquan.py +116 -0
  115. novel_downloader/core/parsers/ttkan.py +132 -0
  116. novel_downloader/core/parsers/wanbengo.py +191 -0
  117. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  118. novel_downloader/core/parsers/xiguashuwu.py +435 -0
  119. novel_downloader/core/parsers/xs63b.py +161 -0
  120. novel_downloader/core/parsers/xshbook.py +134 -0
  121. novel_downloader/core/parsers/yamibo.py +155 -0
  122. novel_downloader/core/parsers/yibige.py +166 -0
  123. novel_downloader/core/searchers/__init__.py +51 -0
  124. novel_downloader/core/searchers/aaatxt.py +107 -0
  125. novel_downloader/core/searchers/b520.py +84 -0
  126. novel_downloader/core/searchers/base.py +168 -0
  127. novel_downloader/core/searchers/dxmwx.py +105 -0
  128. novel_downloader/core/searchers/eightnovel.py +84 -0
  129. novel_downloader/core/searchers/esjzone.py +102 -0
  130. novel_downloader/core/searchers/hetushu.py +92 -0
  131. novel_downloader/core/searchers/i25zw.py +93 -0
  132. novel_downloader/core/searchers/ixdzs8.py +107 -0
  133. novel_downloader/core/searchers/jpxs123.py +107 -0
  134. novel_downloader/core/searchers/piaotia.py +100 -0
  135. novel_downloader/core/searchers/qbtr.py +106 -0
  136. novel_downloader/core/searchers/qianbi.py +165 -0
  137. novel_downloader/core/searchers/quanben5.py +144 -0
  138. novel_downloader/core/searchers/registry.py +79 -0
  139. novel_downloader/core/searchers/shuhaige.py +124 -0
  140. novel_downloader/core/searchers/tongrenquan.py +110 -0
  141. novel_downloader/core/searchers/ttkan.py +92 -0
  142. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  143. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  144. novel_downloader/core/searchers/xs63b.py +104 -0
  145. novel_downloader/locales/en.json +36 -79
  146. novel_downloader/locales/zh.json +37 -80
  147. novel_downloader/models/__init__.py +23 -50
  148. novel_downloader/models/book.py +44 -0
  149. novel_downloader/models/config.py +16 -43
  150. novel_downloader/models/login.py +1 -1
  151. novel_downloader/models/search.py +21 -0
  152. novel_downloader/resources/config/settings.toml +39 -74
  153. novel_downloader/resources/css_styles/intro.css +83 -0
  154. novel_downloader/resources/css_styles/main.css +30 -89
  155. novel_downloader/resources/json/xiguashuwu.json +718 -0
  156. novel_downloader/utils/__init__.py +43 -0
  157. novel_downloader/utils/chapter_storage.py +247 -226
  158. novel_downloader/utils/constants.py +5 -50
  159. novel_downloader/utils/cookies.py +6 -18
  160. novel_downloader/utils/crypto_utils/__init__.py +13 -0
  161. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  162. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  163. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  164. novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
  165. novel_downloader/utils/epub/__init__.py +34 -0
  166. novel_downloader/utils/epub/builder.py +377 -0
  167. novel_downloader/utils/epub/constants.py +118 -0
  168. novel_downloader/utils/epub/documents.py +297 -0
  169. novel_downloader/utils/epub/models.py +120 -0
  170. novel_downloader/utils/epub/utils.py +179 -0
  171. novel_downloader/utils/file_utils/__init__.py +5 -30
  172. novel_downloader/utils/file_utils/io.py +9 -150
  173. novel_downloader/utils/file_utils/normalize.py +2 -2
  174. novel_downloader/utils/file_utils/sanitize.py +2 -7
  175. novel_downloader/utils/fontocr.py +207 -0
  176. novel_downloader/utils/i18n.py +2 -0
  177. novel_downloader/utils/logger.py +10 -16
  178. novel_downloader/utils/network.py +111 -252
  179. novel_downloader/utils/state.py +5 -90
  180. novel_downloader/utils/text_utils/__init__.py +16 -21
  181. novel_downloader/utils/text_utils/diff_display.py +6 -9
  182. novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
  183. novel_downloader/utils/text_utils/text_cleaner.py +179 -0
  184. novel_downloader/utils/text_utils/truncate_utils.py +62 -0
  185. novel_downloader/utils/time_utils/__init__.py +6 -12
  186. novel_downloader/utils/time_utils/datetime_utils.py +23 -33
  187. novel_downloader/utils/time_utils/sleep_utils.py +5 -10
  188. novel_downloader/web/__init__.py +13 -0
  189. novel_downloader/web/components/__init__.py +11 -0
  190. novel_downloader/web/components/navigation.py +35 -0
  191. novel_downloader/web/main.py +66 -0
  192. novel_downloader/web/pages/__init__.py +17 -0
  193. novel_downloader/web/pages/download.py +78 -0
  194. novel_downloader/web/pages/progress.py +147 -0
  195. novel_downloader/web/pages/search.py +329 -0
  196. novel_downloader/web/services/__init__.py +17 -0
  197. novel_downloader/web/services/client_dialog.py +164 -0
  198. novel_downloader/web/services/cred_broker.py +113 -0
  199. novel_downloader/web/services/cred_models.py +35 -0
  200. novel_downloader/web/services/task_manager.py +264 -0
  201. novel_downloader-2.0.0.dist-info/METADATA +171 -0
  202. novel_downloader-2.0.0.dist-info/RECORD +210 -0
  203. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
  204. novel_downloader/config/site_rules.py +0 -94
  205. novel_downloader/core/downloaders/biquge.py +0 -25
  206. novel_downloader/core/downloaders/esjzone.py +0 -25
  207. novel_downloader/core/downloaders/linovelib.py +0 -25
  208. novel_downloader/core/downloaders/sfacg.py +0 -25
  209. novel_downloader/core/downloaders/yamibo.py +0 -25
  210. novel_downloader/core/exporters/biquge.py +0 -25
  211. novel_downloader/core/exporters/esjzone.py +0 -25
  212. novel_downloader/core/exporters/qianbi.py +0 -25
  213. novel_downloader/core/exporters/sfacg.py +0 -25
  214. novel_downloader/core/exporters/yamibo.py +0 -25
  215. novel_downloader/core/factory/__init__.py +0 -20
  216. novel_downloader/core/factory/downloader.py +0 -73
  217. novel_downloader/core/factory/exporter.py +0 -58
  218. novel_downloader/core/factory/fetcher.py +0 -96
  219. novel_downloader/core/factory/parser.py +0 -86
  220. novel_downloader/core/fetchers/base/__init__.py +0 -14
  221. novel_downloader/core/fetchers/base/browser.py +0 -403
  222. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  223. novel_downloader/core/fetchers/common/__init__.py +0 -14
  224. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  225. novel_downloader/core/fetchers/esjzone/browser.py +0 -204
  226. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  227. novel_downloader/core/fetchers/linovelib/browser.py +0 -193
  228. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  229. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  230. novel_downloader/core/fetchers/qidian/browser.py +0 -318
  231. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  232. novel_downloader/core/fetchers/sfacg/browser.py +0 -189
  233. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  234. novel_downloader/core/fetchers/yamibo/browser.py +0 -229
  235. novel_downloader/core/parsers/biquge/__init__.py +0 -10
  236. novel_downloader/core/parsers/biquge/main_parser.py +0 -134
  237. novel_downloader/core/parsers/common/__init__.py +0 -13
  238. novel_downloader/core/parsers/common/helper.py +0 -323
  239. novel_downloader/core/parsers/common/main_parser.py +0 -106
  240. novel_downloader/core/parsers/esjzone/__init__.py +0 -10
  241. novel_downloader/core/parsers/linovelib/__init__.py +0 -10
  242. novel_downloader/core/parsers/qianbi/__init__.py +0 -10
  243. novel_downloader/core/parsers/sfacg/__init__.py +0 -10
  244. novel_downloader/core/parsers/yamibo/__init__.py +0 -10
  245. novel_downloader/core/parsers/yamibo/main_parser.py +0 -194
  246. novel_downloader/models/browser.py +0 -21
  247. novel_downloader/models/chapter.py +0 -25
  248. novel_downloader/models/site_rules.py +0 -99
  249. novel_downloader/models/tasks.py +0 -33
  250. novel_downloader/models/types.py +0 -15
  251. novel_downloader/resources/css_styles/volume-intro.css +0 -56
  252. novel_downloader/resources/json/replace_word_map.json +0 -4
  253. novel_downloader/resources/text/blacklist.txt +0 -22
  254. novel_downloader/tui/__init__.py +0 -7
  255. novel_downloader/tui/app.py +0 -32
  256. novel_downloader/tui/main.py +0 -17
  257. novel_downloader/tui/screens/__init__.py +0 -14
  258. novel_downloader/tui/screens/home.py +0 -198
  259. novel_downloader/tui/screens/login.py +0 -74
  260. novel_downloader/tui/styles/home_layout.tcss +0 -79
  261. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  262. novel_downloader/utils/cache.py +0 -24
  263. novel_downloader/utils/fontocr/__init__.py +0 -22
  264. novel_downloader/utils/fontocr/model_loader.py +0 -69
  265. novel_downloader/utils/fontocr/ocr_v1.py +0 -303
  266. novel_downloader/utils/fontocr/ocr_v2.py +0 -752
  267. novel_downloader/utils/hash_store.py +0 -279
  268. novel_downloader/utils/hash_utils.py +0 -103
  269. novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
  270. novel_downloader/utils/text_utils/font_mapping.py +0 -28
  271. novel_downloader/utils/text_utils/text_cleaning.py +0 -107
  272. novel_downloader-1.4.5.dist-info/METADATA +0 -196
  273. novel_downloader-1.4.5.dist-info/RECORD +0 -165
  274. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
  275. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
  276. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,177 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.fetchers.xiguashuwu
4
+ -----------------------------------------
5
+
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from novel_downloader.core.fetchers.base import BaseSession
11
+ from novel_downloader.core.fetchers.registry import register_fetcher
12
+ from novel_downloader.models import FetcherConfig
13
+ from novel_downloader.utils import async_jitter_sleep
14
+
15
+
16
+ @register_fetcher(
17
+ site_keys=["xiguashuwu"],
18
+ )
19
+ class XiguashuwuSession(BaseSession):
20
+ """
21
+ A session class for interacting with the 西瓜书屋 (www.xiguashuwu.com) novel.
22
+ """
23
+
24
+ BASE_URL = "https://www.xiguashuwu.com"
25
+ BOOK_INFO_URL = "https://www.xiguashuwu.com/book/{book_id}/iszip/0/"
26
+ BOOK_CATALOG_URL = "https://www.xiguashuwu.com/book/{book_id}/catalog/"
27
+ CHAPTER_URL = "https://www.xiguashuwu.com/book/{book_id}/{chapter_id}.html"
28
+
29
+ def __init__(
30
+ self,
31
+ config: FetcherConfig,
32
+ cookies: dict[str, str] | None = None,
33
+ **kwargs: Any,
34
+ ) -> None:
35
+ super().__init__("xiguashuwu", config, cookies, **kwargs)
36
+
37
+ async def get_book_info(
38
+ self,
39
+ book_id: str,
40
+ **kwargs: Any,
41
+ ) -> list[str]:
42
+ """
43
+ Fetch the raw HTML of the book info page asynchronously.
44
+
45
+ Order: [info, catalogs1, ..., catalogsN]
46
+
47
+ :param book_id: The book identifier.
48
+ :return: The page content as string list.
49
+ """
50
+ info_url = self.book_info_url(book_id=book_id)
51
+ info_html = await self.fetch(info_url, **kwargs)
52
+
53
+ catalog_url = self.book_catalog_url(book_id=book_id)
54
+ catalog_pages: list[str] = []
55
+ idx = 1
56
+ while True:
57
+ suffix = "" if idx == 1 else f"{idx}.html"
58
+ full_url = catalog_url + suffix
59
+
60
+ try:
61
+ html = await self.fetch(full_url, **kwargs)
62
+ except Exception as exc:
63
+ self.logger.warning(
64
+ "[async] get_book_catalog(%s page %d) failed: %s",
65
+ book_id,
66
+ idx,
67
+ exc,
68
+ )
69
+ break
70
+
71
+ catalog_pages.append(html)
72
+ idx += 1
73
+ next_patterns = [
74
+ # f"javascript:readbook('{book_id}','{idx}');",
75
+ # f"javascript:gobook('{book_id}','{idx}');",
76
+ # f"javascript:runbook('{book_id}','{idx}');",
77
+ # f"javascript:gotochapter('{book_id}','{idx}');",
78
+ f"javascript:readbookjump('{book_id}','{idx}');",
79
+ f"javascript:gobookjump('{book_id}','{idx}');",
80
+ f"javascript:runbookjump('{book_id}','{idx}');",
81
+ f"javascript:gotojump('{book_id}','{idx}');",
82
+ f"javascript:gotochapterjump('{book_id}','{idx}');",
83
+ f"/book/{book_id}/catalog/{idx}.html",
84
+ ]
85
+ if not any(pat in html for pat in next_patterns):
86
+ break
87
+
88
+ await async_jitter_sleep(
89
+ self.request_interval,
90
+ mul_spread=1.1,
91
+ max_sleep=self.request_interval + 2,
92
+ )
93
+ return [info_html, *catalog_pages]
94
+
95
+ async def get_book_chapter(
96
+ self,
97
+ book_id: str,
98
+ chapter_id: str,
99
+ **kwargs: Any,
100
+ ) -> list[str]:
101
+ """
102
+ Fetch the raw HTML of a single chapter asynchronously.
103
+
104
+ Order: [page1, ..., pageN]
105
+
106
+ :param book_id: The book identifier.
107
+ :param chapter_id: The chapter identifier.
108
+ :return: The page content as string list.
109
+ """
110
+ html_pages: list[str] = []
111
+ idx = 1
112
+
113
+ while True:
114
+ chapter_suffix = chapter_id if idx == 1 else f"{chapter_id}_{idx}"
115
+ relative_path = self.relative_chapter_url(book_id, chapter_suffix)
116
+ if idx > 1 and relative_path not in html_pages[-1]:
117
+ break
118
+ full_url = self.BASE_URL + relative_path
119
+
120
+ try:
121
+ html = await self.fetch(full_url, **kwargs)
122
+ except Exception as exc:
123
+ self.logger.warning(
124
+ "[async] get_book_chapter(%s page %d) failed: %s",
125
+ chapter_id,
126
+ idx,
127
+ exc,
128
+ )
129
+ break
130
+
131
+ html_pages.append(html)
132
+ idx += 1
133
+ await async_jitter_sleep(
134
+ self.request_interval,
135
+ mul_spread=1.1,
136
+ max_sleep=self.request_interval + 2,
137
+ )
138
+
139
+ return html_pages
140
+
141
+ @classmethod
142
+ def book_info_url(cls, book_id: str) -> str:
143
+ """
144
+ Construct the URL for fetching a book's info page.
145
+
146
+ :param book_id: The identifier of the book.
147
+ :return: Fully qualified URL for the book info page.
148
+ """
149
+ return cls.BOOK_INFO_URL.format(book_id=book_id)
150
+
151
+ @classmethod
152
+ def book_catalog_url(cls, book_id: str) -> str:
153
+ """
154
+ Construct the URL for fetching a book's catalog page.
155
+
156
+ :param book_id: The identifier of the book.
157
+ :return: Fully qualified catalog page URL.
158
+ """
159
+ return cls.BOOK_CATALOG_URL.format(book_id=book_id)
160
+
161
+ @classmethod
162
+ def chapter_url(cls, book_id: str, chapter_id: str) -> str:
163
+ """
164
+ Construct the URL for fetching a specific chapter.
165
+
166
+ :param book_id: The identifier of the book.
167
+ :param chapter_id: The identifier of the chapter.
168
+ :return: Fully qualified chapter URL.
169
+ """
170
+ return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
171
+
172
+ @classmethod
173
+ def relative_chapter_url(cls, book_id: str, chapter_id: str) -> str:
174
+ """
175
+ Return the relative URL path for a given chapter.
176
+ """
177
+ return f"/book/{book_id}/{chapter_id}.html"
@@ -0,0 +1,171 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.fetchers.xs63b
4
+ ------------------------------------
5
+
6
+ """
7
+
8
+ import asyncio
9
+ import base64
10
+ import re
11
+ from typing import Any
12
+
13
+ from novel_downloader.core.fetchers.base import BaseSession
14
+ from novel_downloader.core.fetchers.registry import register_fetcher
15
+ from novel_downloader.models import FetcherConfig
16
+ from novel_downloader.utils import async_jitter_sleep
17
+
18
+
19
+ @register_fetcher(
20
+ site_keys=["xs63b"],
21
+ )
22
+ class Xs63bSession(BaseSession):
23
+ """
24
+ A session class for interacting with the 小说路上 (m.xs63b.com) novel website.
25
+ """
26
+
27
+ BOOK_INFO_URL = "https://m.xs63b.com/{book_id}/"
28
+ BOOK_CATALOG_URL = "https://www.xs63b.com/{book_id}/"
29
+ CHAPTER_URL = "https://m.xs63b.com/{book_id}/{chapter_id}.html"
30
+
31
+ _JSARR_PATTERN = re.compile(r"var\s+jsarr\s*=\s*\[([^\]]+)\]")
32
+ _JSSTR_PATTERN = re.compile(r"var\s+jsstr\s*=\s*\"([^\"]+)\";")
33
+
34
+ def __init__(
35
+ self,
36
+ config: FetcherConfig,
37
+ cookies: dict[str, str] | None = None,
38
+ **kwargs: Any,
39
+ ) -> None:
40
+ super().__init__("xs63b", config, cookies, **kwargs)
41
+
42
+ async def get_book_info(
43
+ self,
44
+ book_id: str,
45
+ **kwargs: Any,
46
+ ) -> list[str]:
47
+ """
48
+ Fetch the raw HTML of the book info page asynchronously.
49
+
50
+ Order: [info, catalog]
51
+
52
+ :param book_id: The book identifier.
53
+ :return: The page content as string list.
54
+ """
55
+ book_id = book_id.replace("-", "/")
56
+ info_url = self.book_info_url(book_id=book_id)
57
+ catalog_url = self.book_catalog_url(book_id=book_id)
58
+
59
+ info_html, catalog_html = await asyncio.gather(
60
+ self.fetch(info_url, ssl=False, **kwargs),
61
+ self.fetch(catalog_url, ssl=False, **kwargs),
62
+ )
63
+ return [info_html, catalog_html]
64
+
65
+ async def get_book_chapter(
66
+ self,
67
+ book_id: str,
68
+ chapter_id: str,
69
+ **kwargs: Any,
70
+ ) -> list[str]:
71
+ """
72
+ Fetch the raw HTML of a single chapter asynchronously.
73
+
74
+ Order: [page1, ..., pageN]
75
+
76
+ :param book_id: The book identifier.
77
+ :param chapter_id: The chapter identifier.
78
+ :return: The page content as string list.
79
+ """
80
+ book_id = book_id.replace("-", "/")
81
+ html_pages: list[str] = []
82
+ chapter_url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
83
+
84
+ while True:
85
+ try:
86
+ html = await self.fetch(chapter_url, **kwargs)
87
+ except Exception as exc:
88
+ self.logger.warning(
89
+ "[async] get_book_chapter(%s page %d) failed: %s",
90
+ chapter_url,
91
+ exc,
92
+ )
93
+ break
94
+
95
+ html_pages.append(html)
96
+ if "/xs635/mobile/images/nextpage.png" not in html:
97
+ break
98
+
99
+ jsarr = self._parse_jsarr(html)
100
+ jsstr = self._parse_jsstr(html)
101
+ chapter_url = self._build_chapter_url(book_id, jsarr, jsstr)
102
+
103
+ await async_jitter_sleep(
104
+ self.request_interval,
105
+ mul_spread=1.1,
106
+ max_sleep=self.request_interval + 2,
107
+ )
108
+
109
+ return html_pages
110
+
111
+ @classmethod
112
+ def book_info_url(cls, book_id: str) -> str:
113
+ """
114
+ Construct the URL for fetching a book's info page.
115
+
116
+ :param book_id: The identifier of the book.
117
+ :return: Fully qualified URL for the book info page.
118
+ """
119
+ return cls.BOOK_INFO_URL.format(book_id=book_id)
120
+
121
+ @classmethod
122
+ def book_catalog_url(cls, book_id: str) -> str:
123
+ """
124
+ Construct the URL for fetching a book's catalog page.
125
+
126
+ :param book_id: The identifier of the book.
127
+ :return: Fully qualified catalog page URL.
128
+ """
129
+ return cls.BOOK_CATALOG_URL.format(book_id=book_id)
130
+
131
+ @classmethod
132
+ def chapter_url(cls, book_id: str, chapter_id: str) -> str:
133
+ """
134
+ Construct the URL for fetching a specific chapter.
135
+
136
+ :param book_id: The identifier of the book.
137
+ :param chapter_id: The identifier of the chapter.
138
+ :return: Fully qualified chapter URL.
139
+ """
140
+ return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
141
+
142
+ @classmethod
143
+ def _parse_jsarr(cls, text: str) -> list[int]:
144
+ """
145
+ Extract jsarr from `var jsarr = [...];`.
146
+
147
+ Raises ValueError if not found.
148
+ """
149
+ m = cls._JSARR_PATTERN.search(text)
150
+ if not m:
151
+ raise ValueError("jsarr not found")
152
+ return [int(x) for x in m.group(1).split(",")]
153
+
154
+ @classmethod
155
+ def _parse_jsstr(cls, text: str) -> str:
156
+ """
157
+ Extract jsstr from `var jsstr = "...";`.
158
+
159
+ Raises ValueError if not found.
160
+ """
161
+ m = cls._JSSTR_PATTERN.search(text)
162
+ if not m:
163
+ raise ValueError("jsstr not found")
164
+ return m.group(1)
165
+
166
+ @staticmethod
167
+ def _build_chapter_url(book_id: str, jsarr: list[int], jsstr: str) -> str:
168
+ decoded = base64.b64decode(jsstr).decode("utf-8")
169
+ nnarr = list(decoded)
170
+ nnstr = "".join(nnarr[i] for i in jsarr)
171
+ return f"https://m.xs63b.com/{book_id}/{nnstr}.html"
@@ -0,0 +1,85 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.fetchers.xshbook
4
+ --------------------------------------
5
+
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from novel_downloader.core.fetchers.base import BaseSession
11
+ from novel_downloader.core.fetchers.registry import register_fetcher
12
+ from novel_downloader.models import FetcherConfig
13
+
14
+
15
+ @register_fetcher(
16
+ site_keys=["xshbook"],
17
+ )
18
+ class XshbookSession(BaseSession):
19
+ """
20
+ A session class for interacting with the 小说虎 (www.xshbook.com) novel website.
21
+ """
22
+
23
+ BOOK_INFO_URL = "https://www.xshbook.com/{book_id}/"
24
+ CHAPTER_URL = "https://www.xshbook.com/{book_id}/{chapter_id}.html"
25
+
26
+ def __init__(
27
+ self,
28
+ config: FetcherConfig,
29
+ cookies: dict[str, str] | None = None,
30
+ **kwargs: Any,
31
+ ) -> None:
32
+ super().__init__("xshbook", config, cookies, **kwargs)
33
+
34
+ async def get_book_info(
35
+ self,
36
+ book_id: str,
37
+ **kwargs: Any,
38
+ ) -> list[str]:
39
+ """
40
+ Fetch the raw HTML of the book info page asynchronously.
41
+
42
+ :param book_id: The book identifier.
43
+ :return: The page content as string list.
44
+ """
45
+ book_id = book_id.replace("-", "/")
46
+ url = self.book_info_url(book_id=book_id)
47
+ return [await self.fetch(url, **kwargs)]
48
+
49
+ async def get_book_chapter(
50
+ self,
51
+ book_id: str,
52
+ chapter_id: str,
53
+ **kwargs: Any,
54
+ ) -> list[str]:
55
+ """
56
+ Fetch the raw HTML of a single chapter asynchronously.
57
+
58
+ :param book_id: The book identifier.
59
+ :param chapter_id: The chapter identifier.
60
+ :return: The page content as string list.
61
+ """
62
+ book_id = book_id.replace("-", "/")
63
+ url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
64
+ return [await self.fetch(url, **kwargs)]
65
+
66
+ @classmethod
67
+ def book_info_url(cls, book_id: str) -> str:
68
+ """
69
+ Construct the URL for fetching a book's info page.
70
+
71
+ :param book_id: The identifier of the book.
72
+ :return: Fully qualified URL for the book info page.
73
+ """
74
+ return cls.BOOK_INFO_URL.format(book_id=book_id)
75
+
76
+ @classmethod
77
+ def chapter_url(cls, book_id: str, chapter_id: str) -> str:
78
+ """
79
+ Construct the URL for fetching a specific chapter.
80
+
81
+ :param book_id: The identifier of the book.
82
+ :param chapter_id: The identifier of the chapter.
83
+ :return: Fully qualified chapter URL.
84
+ """
85
+ return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
@@ -1,22 +1,27 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- novel_downloader.core.fetchers.yamibo.session
4
- ---------------------------------------------
3
+ novel_downloader.core.fetchers.yamibo
4
+ -------------------------------------
5
5
 
6
6
  """
7
7
 
8
+ from collections.abc import Mapping
8
9
  from typing import Any
9
10
 
10
11
  from lxml import html
11
12
 
12
13
  from novel_downloader.core.fetchers.base import BaseSession
14
+ from novel_downloader.core.fetchers.registry import register_fetcher
13
15
  from novel_downloader.models import FetcherConfig, LoginField
14
- from novel_downloader.utils.time_utils import async_sleep_with_random_delay
16
+ from novel_downloader.utils import async_jitter_sleep
15
17
 
16
18
 
19
+ @register_fetcher(
20
+ site_keys=["yamibo"],
21
+ )
17
22
  class YamiboSession(BaseSession):
18
23
  """
19
- A session class for interacting with the Yamibo (www.yamibo.com) novel website.
24
+ A session class for interacting with the 百合会 (www.yamibo.com) novel website.
20
25
  """
21
26
 
22
27
  BASE_URL = "https://www.yamibo.com"
@@ -64,7 +69,7 @@ class YamiboSession(BaseSession):
64
69
  ):
65
70
  self._is_logged_in = True
66
71
  return True
67
- await async_sleep_with_random_delay(
72
+ await async_jitter_sleep(
68
73
  self.backoff_factor,
69
74
  mul_spread=1.1,
70
75
  max_sleep=self.backoff_factor + 2,
@@ -82,7 +87,7 @@ class YamiboSession(BaseSession):
82
87
  Fetch the raw HTML of the book info page asynchronously.
83
88
 
84
89
  :param book_id: The book identifier.
85
- :return: The page content as a string.
90
+ :return: The page content as string list.
86
91
  """
87
92
  url = self.book_info_url(book_id=book_id)
88
93
  return [await self.fetch(url, **kwargs)]
@@ -98,7 +103,7 @@ class YamiboSession(BaseSession):
98
103
 
99
104
  :param book_id: The book identifier.
100
105
  :param chapter_id: The chapter identifier.
101
- :return: The chapter content as a string.
106
+ :return: The page content as string list.
102
107
  """
103
108
  url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
104
109
  return [await self.fetch(url, **kwargs)]
@@ -166,10 +171,6 @@ class YamiboSession(BaseSession):
166
171
  """
167
172
  return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
168
173
 
169
- @property
170
- def hostname(self) -> str:
171
- return "www.yamibo.com"
172
-
173
174
  async def _api_login(self, username: str, password: str) -> bool:
174
175
  """
175
176
  Login to the API using a 2-step token-based process.
@@ -227,3 +228,14 @@ class YamiboSession(BaseSession):
227
228
  if not resp_text:
228
229
  return False
229
230
  return not any(kw in resp_text[0] for kw in keywords)
231
+
232
+ @staticmethod
233
+ def _filter_cookies(
234
+ raw_cookies: list[Mapping[str, Any]],
235
+ ) -> dict[str, str]:
236
+ ALLOWED_DOMAINS = {"www.yamibo.com", "bbs.yamibo.com", ""}
237
+ return {
238
+ c["name"]: c["value"]
239
+ for c in raw_cookies
240
+ if c.get("domain", "") in ALLOWED_DOMAINS
241
+ }
@@ -0,0 +1,114 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.fetchers.yibige
4
+ -------------------------------------
5
+
6
+ """
7
+
8
+ import asyncio
9
+ from typing import Any
10
+
11
+ from novel_downloader.core.fetchers.base import BaseSession
12
+ from novel_downloader.core.fetchers.registry import register_fetcher
13
+ from novel_downloader.models import FetcherConfig
14
+
15
+
16
+ @register_fetcher(
17
+ site_keys=["yibige"],
18
+ )
19
+ class YibigeSession(BaseSession):
20
+ """
21
+ A session class for interacting with the 一笔阁 (www.yibige.org) novel website.
22
+ """
23
+
24
+ BOOK_INFO_URL = "https://{base_url}/{book_id}/"
25
+ BOOK_CATALOG_URL = "https://{base_url}/{book_id}/index.html"
26
+ CHAPTER_URL = "https://{base_url}/{book_id}/{chapter_id}.html"
27
+
28
+ def __init__(
29
+ self,
30
+ config: FetcherConfig,
31
+ cookies: dict[str, str] | None = None,
32
+ **kwargs: Any,
33
+ ) -> None:
34
+ super().__init__("yibige", config, cookies, **kwargs)
35
+ self.base_url = (
36
+ "www.yibige.org" if config.locale_style == "simplified" else "tw.yibige.org"
37
+ )
38
+ # 主站: www.yibige.org
39
+ # 新加坡: sg.yibige.org
40
+ # 臺灣正體: tw.yibige.org
41
+ # 香港繁體: hk.yibige.org
42
+
43
+ async def get_book_info(
44
+ self,
45
+ book_id: str,
46
+ **kwargs: Any,
47
+ ) -> list[str]:
48
+ """
49
+ Fetch the raw HTML of the book info page asynchronously.
50
+
51
+ Order: [info, catalog]
52
+
53
+ :param book_id: The book identifier.
54
+ :return: The page content as string list.
55
+ """
56
+ info_url = self.book_info_url(base_url=self.base_url, book_id=book_id)
57
+ catalog_url = self.book_catalog_url(base_url=self.base_url, book_id=book_id)
58
+
59
+ info_html, catalog_html = await asyncio.gather(
60
+ self.fetch(info_url, **kwargs),
61
+ self.fetch(catalog_url, **kwargs),
62
+ )
63
+ return [info_html, catalog_html]
64
+
65
+ async def get_book_chapter(
66
+ self,
67
+ book_id: str,
68
+ chapter_id: str,
69
+ **kwargs: Any,
70
+ ) -> list[str]:
71
+ """
72
+ Fetch the raw HTML of a single chapter asynchronously.
73
+
74
+ :param book_id: The book identifier.
75
+ :param chapter_id: The chapter identifier.
76
+ :return: The page content as string list.
77
+ """
78
+ url = self.chapter_url(
79
+ base_url=self.base_url, book_id=book_id, chapter_id=chapter_id
80
+ )
81
+ return [await self.fetch(url, **kwargs)]
82
+
83
+ @classmethod
84
+ def book_info_url(cls, base_url: str, book_id: str) -> str:
85
+ """
86
+ Construct the URL for fetching a book's info page.
87
+
88
+ :param book_id: The identifier of the book.
89
+ :return: Fully qualified URL for the book info page.
90
+ """
91
+ return cls.BOOK_INFO_URL.format(base_url=base_url, book_id=book_id)
92
+
93
+ @classmethod
94
+ def book_catalog_url(cls, base_url: str, book_id: str) -> str:
95
+ """
96
+ Construct the URL for fetching a book's catalog page.
97
+
98
+ :param book_id: The identifier of the book.
99
+ :return: Fully qualified catalog page URL.
100
+ """
101
+ return cls.BOOK_CATALOG_URL.format(base_url=base_url, book_id=book_id)
102
+
103
+ @classmethod
104
+ def chapter_url(cls, base_url: str, book_id: str, chapter_id: str) -> str:
105
+ """
106
+ Construct the URL for fetching a specific chapter.
107
+
108
+ :param book_id: The identifier of the book.
109
+ :param chapter_id: The identifier of the chapter.
110
+ :return: Fully qualified chapter URL.
111
+ """
112
+ return cls.CHAPTER_URL.format(
113
+ base_url=base_url, book_id=book_id, chapter_id=chapter_id
114
+ )
@@ -3,25 +3,19 @@
3
3
  novel_downloader.core.interfaces
4
4
  --------------------------------
5
5
 
6
- This package centralizes the protocol definitions used across the
7
- system to promote interface-based design and type-safe dependency
8
- injection.
9
-
10
- Included protocols:
11
- - DownloaderProtocol
12
- - FetcherProtocol
13
- - ParserProtocol
14
- - ExporterProtocol
6
+ Protocol interfaces defining the contracts for core components.
15
7
  """
16
8
 
17
- from .downloader import DownloaderProtocol
18
- from .exporter import ExporterProtocol
19
- from .fetcher import FetcherProtocol
20
- from .parser import ParserProtocol
21
-
22
9
  __all__ = [
23
10
  "DownloaderProtocol",
24
11
  "ExporterProtocol",
25
12
  "FetcherProtocol",
26
13
  "ParserProtocol",
14
+ "SearcherProtocol",
27
15
  ]
16
+
17
+ from .downloader import DownloaderProtocol
18
+ from .exporter import ExporterProtocol
19
+ from .fetcher import FetcherProtocol
20
+ from .parser import ParserProtocol
21
+ from .searcher import SearcherProtocol