novel-downloader 2.0.0__py3-none-any.whl → 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/download.py +14 -11
  3. novel_downloader/cli/export.py +19 -19
  4. novel_downloader/cli/ui.py +35 -8
  5. novel_downloader/config/adapter.py +216 -153
  6. novel_downloader/core/__init__.py +5 -6
  7. novel_downloader/core/archived/deqixs/fetcher.py +1 -28
  8. novel_downloader/core/downloaders/__init__.py +2 -0
  9. novel_downloader/core/downloaders/base.py +34 -85
  10. novel_downloader/core/downloaders/common.py +147 -171
  11. novel_downloader/core/downloaders/qianbi.py +30 -64
  12. novel_downloader/core/downloaders/qidian.py +157 -184
  13. novel_downloader/core/downloaders/qqbook.py +292 -0
  14. novel_downloader/core/downloaders/registry.py +2 -2
  15. novel_downloader/core/exporters/__init__.py +2 -0
  16. novel_downloader/core/exporters/base.py +37 -59
  17. novel_downloader/core/exporters/common.py +620 -0
  18. novel_downloader/core/exporters/linovelib.py +47 -0
  19. novel_downloader/core/exporters/qidian.py +41 -12
  20. novel_downloader/core/exporters/qqbook.py +28 -0
  21. novel_downloader/core/exporters/registry.py +2 -2
  22. novel_downloader/core/fetchers/__init__.py +4 -2
  23. novel_downloader/core/fetchers/aaatxt.py +2 -22
  24. novel_downloader/core/fetchers/b520.py +3 -23
  25. novel_downloader/core/fetchers/base.py +80 -105
  26. novel_downloader/core/fetchers/biquyuedu.py +2 -22
  27. novel_downloader/core/fetchers/dxmwx.py +10 -22
  28. novel_downloader/core/fetchers/esjzone.py +6 -29
  29. novel_downloader/core/fetchers/guidaye.py +2 -22
  30. novel_downloader/core/fetchers/hetushu.py +9 -29
  31. novel_downloader/core/fetchers/i25zw.py +2 -16
  32. novel_downloader/core/fetchers/ixdzs8.py +2 -16
  33. novel_downloader/core/fetchers/jpxs123.py +2 -16
  34. novel_downloader/core/fetchers/lewenn.py +2 -22
  35. novel_downloader/core/fetchers/linovelib.py +4 -20
  36. novel_downloader/core/fetchers/{eightnovel.py → n8novel.py} +12 -40
  37. novel_downloader/core/fetchers/piaotia.py +2 -16
  38. novel_downloader/core/fetchers/qbtr.py +2 -16
  39. novel_downloader/core/fetchers/qianbi.py +1 -20
  40. novel_downloader/core/fetchers/qidian.py +27 -68
  41. novel_downloader/core/fetchers/qqbook.py +177 -0
  42. novel_downloader/core/fetchers/quanben5.py +9 -29
  43. novel_downloader/core/fetchers/rate_limiter.py +22 -53
  44. novel_downloader/core/fetchers/sfacg.py +3 -16
  45. novel_downloader/core/fetchers/shencou.py +2 -16
  46. novel_downloader/core/fetchers/shuhaige.py +2 -22
  47. novel_downloader/core/fetchers/tongrenquan.py +2 -22
  48. novel_downloader/core/fetchers/ttkan.py +3 -14
  49. novel_downloader/core/fetchers/wanbengo.py +2 -22
  50. novel_downloader/core/fetchers/xiaoshuowu.py +2 -16
  51. novel_downloader/core/fetchers/xiguashuwu.py +4 -20
  52. novel_downloader/core/fetchers/xs63b.py +3 -15
  53. novel_downloader/core/fetchers/xshbook.py +2 -22
  54. novel_downloader/core/fetchers/yamibo.py +4 -28
  55. novel_downloader/core/fetchers/yibige.py +13 -26
  56. novel_downloader/core/interfaces/exporter.py +19 -7
  57. novel_downloader/core/interfaces/fetcher.py +23 -49
  58. novel_downloader/core/interfaces/parser.py +2 -2
  59. novel_downloader/core/parsers/__init__.py +4 -2
  60. novel_downloader/core/parsers/b520.py +2 -2
  61. novel_downloader/core/parsers/base.py +5 -39
  62. novel_downloader/core/parsers/esjzone.py +3 -3
  63. novel_downloader/core/parsers/{eightnovel.py → n8novel.py} +7 -7
  64. novel_downloader/core/parsers/qidian.py +717 -0
  65. novel_downloader/core/parsers/qqbook.py +709 -0
  66. novel_downloader/core/parsers/xiguashuwu.py +8 -15
  67. novel_downloader/core/searchers/__init__.py +2 -2
  68. novel_downloader/core/searchers/b520.py +1 -1
  69. novel_downloader/core/searchers/base.py +2 -2
  70. novel_downloader/core/searchers/{eightnovel.py → n8novel.py} +5 -5
  71. novel_downloader/locales/en.json +3 -3
  72. novel_downloader/locales/zh.json +3 -3
  73. novel_downloader/models/__init__.py +2 -0
  74. novel_downloader/models/book.py +1 -0
  75. novel_downloader/models/config.py +12 -0
  76. novel_downloader/resources/config/settings.toml +23 -5
  77. novel_downloader/resources/js_scripts/expr_to_json.js +14 -0
  78. novel_downloader/resources/js_scripts/qidian_decrypt_node.js +21 -16
  79. novel_downloader/resources/js_scripts/qq_decrypt_node.js +92 -0
  80. novel_downloader/utils/__init__.py +0 -2
  81. novel_downloader/utils/chapter_storage.py +2 -3
  82. novel_downloader/utils/constants.py +7 -3
  83. novel_downloader/utils/cookies.py +32 -17
  84. novel_downloader/utils/crypto_utils/__init__.py +0 -6
  85. novel_downloader/utils/crypto_utils/aes_util.py +1 -1
  86. novel_downloader/utils/crypto_utils/rc4.py +40 -50
  87. novel_downloader/utils/epub/__init__.py +2 -3
  88. novel_downloader/utils/epub/builder.py +6 -6
  89. novel_downloader/utils/epub/constants.py +1 -6
  90. novel_downloader/utils/epub/documents.py +7 -7
  91. novel_downloader/utils/epub/models.py +8 -8
  92. novel_downloader/utils/epub/utils.py +10 -10
  93. novel_downloader/utils/file_utils/io.py +48 -73
  94. novel_downloader/utils/file_utils/normalize.py +1 -7
  95. novel_downloader/utils/file_utils/sanitize.py +4 -11
  96. novel_downloader/utils/fontocr/__init__.py +13 -0
  97. novel_downloader/utils/{fontocr.py → fontocr/core.py} +72 -61
  98. novel_downloader/utils/fontocr/loader.py +52 -0
  99. novel_downloader/utils/logger.py +80 -56
  100. novel_downloader/utils/network.py +16 -40
  101. novel_downloader/utils/node_decryptor/__init__.py +13 -0
  102. novel_downloader/utils/node_decryptor/decryptor.py +342 -0
  103. novel_downloader/{core/parsers/qidian/utils → utils/node_decryptor}/decryptor_fetcher.py +5 -6
  104. novel_downloader/utils/text_utils/text_cleaner.py +39 -30
  105. novel_downloader/utils/text_utils/truncate_utils.py +3 -14
  106. novel_downloader/utils/time_utils/sleep_utils.py +53 -43
  107. novel_downloader/web/main.py +1 -1
  108. novel_downloader/web/pages/download.py +1 -1
  109. novel_downloader/web/pages/search.py +4 -4
  110. novel_downloader/web/services/task_manager.py +2 -0
  111. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/METADATA +5 -1
  112. novel_downloader-2.0.2.dist-info/RECORD +203 -0
  113. novel_downloader/core/exporters/common/__init__.py +0 -11
  114. novel_downloader/core/exporters/common/epub.py +0 -198
  115. novel_downloader/core/exporters/common/main_exporter.py +0 -64
  116. novel_downloader/core/exporters/common/txt.py +0 -146
  117. novel_downloader/core/exporters/epub_util.py +0 -215
  118. novel_downloader/core/exporters/linovelib/__init__.py +0 -11
  119. novel_downloader/core/exporters/linovelib/epub.py +0 -349
  120. novel_downloader/core/exporters/linovelib/main_exporter.py +0 -66
  121. novel_downloader/core/exporters/linovelib/txt.py +0 -139
  122. novel_downloader/core/exporters/txt_util.py +0 -67
  123. novel_downloader/core/parsers/qidian/__init__.py +0 -10
  124. novel_downloader/core/parsers/qidian/book_info_parser.py +0 -89
  125. novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -470
  126. novel_downloader/core/parsers/qidian/chapter_normal.py +0 -126
  127. novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
  128. novel_downloader/core/parsers/qidian/main_parser.py +0 -101
  129. novel_downloader/core/parsers/qidian/utils/__init__.py +0 -30
  130. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +0 -143
  131. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -110
  132. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +0 -175
  133. novel_downloader-2.0.0.dist-info/RECORD +0 -210
  134. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/WHEEL +0 -0
  135. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/entry_points.txt +0 -0
  136. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/licenses/LICENSE +0 -0
  137. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/top_level.txt +0 -0
@@ -18,10 +18,8 @@ import aiohttp
18
18
  from novel_downloader.core.fetchers.base import BaseSession
19
19
  from novel_downloader.core.fetchers.registry import register_fetcher
20
20
  from novel_downloader.models import FetcherConfig, LoginField
21
- from novel_downloader.utils import (
22
- async_jitter_sleep,
23
- rc4_crypt,
24
- )
21
+ from novel_downloader.utils import async_jitter_sleep
22
+ from novel_downloader.utils.crypto_utils.rc4 import rc4_init, rc4_stream
25
23
 
26
24
 
27
25
  @register_fetcher(
@@ -32,6 +30,8 @@ class QidianSession(BaseSession):
32
30
  A session class for interacting with the 起点中文网 (www.qidian.com) novel website.
33
31
  """
34
32
 
33
+ site_name: str = "qidian"
34
+
35
35
  HOMEPAGE_URL = "https://www.qidian.com/"
36
36
  BOOKCASE_URL = "https://my.qidian.com/bookcase/"
37
37
  BOOK_INFO_URL = "https://www.qidian.com/book/{book_id}/"
@@ -40,10 +40,7 @@ class QidianSession(BaseSession):
40
40
  LOGIN_URL = "https://passport.qidian.com/"
41
41
 
42
42
  _cookie_keys: ClassVar[list[str]] = [
43
- "X2NzcmZUb2tlbg==",
44
43
  "eXdndWlk",
45
- "eXdvcGVuaWQ=",
46
- "eXdrZXk=",
47
44
  "d190c2Zw",
48
45
  ]
49
46
 
@@ -53,7 +50,9 @@ class QidianSession(BaseSession):
53
50
  cookies: dict[str, str] | None = None,
54
51
  **kwargs: Any,
55
52
  ) -> None:
56
- super().__init__("qidian", config, cookies, **kwargs)
53
+ super().__init__(config, cookies, **kwargs)
54
+ self._s_init = rc4_init(self._d2("dGcwOUl0Myo5aA=="))
55
+ self._cookie_key = self._d("d190c2Zw")
57
56
  self._fp_key = self._d("ZmluZ2VycHJpbnQ=")
58
57
  self._ab_key = self._d("YWJub3JtYWw=")
59
58
  self._ck_key = self._d("Y2hlY2tzdW0=")
@@ -85,12 +84,6 @@ class QidianSession(BaseSession):
85
84
  book_id: str,
86
85
  **kwargs: Any,
87
86
  ) -> list[str]:
88
- """
89
- Fetch the raw HTML of the book info page asynchronously.
90
-
91
- :param book_id: The book identifier.
92
- :return: The page content as string list.
93
- """
94
87
  url = self.book_info_url(book_id=book_id)
95
88
  return [await self.fetch(url, **kwargs)]
96
89
 
@@ -100,13 +93,6 @@ class QidianSession(BaseSession):
100
93
  chapter_id: str,
101
94
  **kwargs: Any,
102
95
  ) -> list[str]:
103
- """
104
- Fetch the raw HTML of a single chapter asynchronously.
105
-
106
- :param book_id: The book identifier.
107
- :param chapter_id: The chapter identifier.
108
- :return: The page content as string list.
109
- """
110
96
  url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
111
97
  return [await self.fetch(url, **kwargs)]
112
98
 
@@ -122,18 +108,6 @@ class QidianSession(BaseSession):
122
108
  url = self.bookcase_url()
123
109
  return [await self.fetch(url, **kwargs)]
124
110
 
125
- async def get_homepage(
126
- self,
127
- **kwargs: Any,
128
- ) -> list[str]:
129
- """
130
- Retrieve the site home page.
131
-
132
- :return: The HTML markup of the home page.
133
- """
134
- url = self.homepage_url()
135
- return [await self.fetch(url, **kwargs)]
136
-
137
111
  @property
138
112
  def login_fields(self) -> list[LoginField]:
139
113
  return [
@@ -165,23 +139,21 @@ class QidianSession(BaseSession):
165
139
  if self._rate_limiter:
166
140
  await self._rate_limiter.wait()
167
141
 
168
- cookie_key = self._d("d190c2Zw")
169
-
170
- for attempt in range(self.retry_times + 1):
142
+ for attempt in range(self._retry_times + 1):
171
143
  try:
172
144
  refreshed_token = self._build_payload_token(url)
173
- self.update_cookies({cookie_key: refreshed_token})
145
+ self.update_cookies({self._cookie_key: refreshed_token})
174
146
 
175
147
  async with self.session.get(url, **kwargs) as resp:
176
148
  resp.raise_for_status()
177
149
  text: str = await resp.text(encoding=encoding)
178
150
  return text
179
151
  except aiohttp.ClientError:
180
- if attempt < self.retry_times:
152
+ if attempt < self._retry_times:
181
153
  await async_jitter_sleep(
182
- self.backoff_factor,
154
+ self._backoff_factor,
183
155
  mul_spread=1.1,
184
- max_sleep=self.backoff_factor + 2,
156
+ max_sleep=self._backoff_factor + 2,
185
157
  )
186
158
  continue
187
159
  raise
@@ -227,40 +199,30 @@ class QidianSession(BaseSession):
227
199
  """
228
200
  return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
229
201
 
230
- def _update_fp_val(
231
- self,
232
- *,
233
- key: str = "",
234
- ) -> None:
235
- """"""
236
- enc_token = self._get_cookie_value(self._d("d190c2Zw"))
202
+ def _update_fp_val(self) -> None:
203
+ """
204
+ Decrypt the payload from cookie and update `_fp_val` and `_ab_val`.
205
+ """
206
+ enc_token = self._get_cookie_value(self._cookie_key)
237
207
  if not enc_token:
238
208
  return
239
- if not key:
240
- key = self._get_key()
241
- decrypted_json: str = rc4_crypt(key, enc_token, mode="decrypt")
209
+
210
+ cipher_bytes = base64.b64decode(enc_token)
211
+ plain_bytes = rc4_stream(self._s_init, cipher_bytes)
212
+ decrypted_json = plain_bytes.decode("utf-8", errors="replace")
242
213
  payload: dict[str, Any] = json.loads(decrypted_json)
243
214
  self._fp_val = payload.get(self._fp_key, "")
244
215
  self._ab_val = payload.get(self._ab_key, "0" * 32)
245
216
 
246
- def _build_payload_token(
247
- self,
248
- new_uri: str,
249
- *,
250
- key: str = "",
251
- ) -> str:
217
+ def _build_payload_token(self, new_uri: str) -> str:
252
218
  """
253
219
  Patch a timestamp-bearing token with fresh timing and checksum info.
254
220
 
255
221
  :param new_uri: URI used in checksum generation.
256
- :param key: RC4 key extracted from front-end JavaScript (optional).
257
-
258
222
  :return: Updated token with new timing and checksum values.
259
223
  """
260
224
  if not self._fp_val or not self._ab_val:
261
225
  self._update_fp_val()
262
- if not key:
263
- key = self._get_key()
264
226
 
265
227
  # rebuild timing fields
266
228
  loadts = int(time.time() * 1000) # ms since epoch
@@ -278,9 +240,9 @@ class QidianSession(BaseSession):
278
240
  self._ab_key: self._ab_val,
279
241
  self._ck_key: ck_val,
280
242
  }
281
- return rc4_crypt(
282
- key, json.dumps(new_payload, separators=(",", ":")), mode="encrypt"
283
- )
243
+ plain_bytes = json.dumps(new_payload, separators=(",", ":")).encode("utf-8")
244
+ cipher_bytes = rc4_stream(self._s_init, plain_bytes)
245
+ return base64.b64encode(cipher_bytes).decode("utf-8")
284
246
 
285
247
  async def _check_login_status(self) -> bool:
286
248
  """
@@ -335,8 +297,5 @@ class QidianSession(BaseSession):
335
297
  return base64.b64decode(b).decode()
336
298
 
337
299
  @staticmethod
338
- def _get_key() -> str:
339
- encoded = "Lj1qYxMuaXBjMg=="
340
- decoded = base64.b64decode(encoded)
341
- key = "".join([chr(b ^ 0x5A) for b in decoded])
342
- return key
300
+ def _d2(b: str) -> bytes:
301
+ return base64.b64decode(b)
@@ -0,0 +1,177 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.fetchers.qqbook
4
+ -------------------------------------
5
+
6
+ """
7
+
8
+ import asyncio
9
+ from typing import Any
10
+
11
+ from novel_downloader.core.fetchers.base import BaseSession
12
+ from novel_downloader.core.fetchers.registry import register_fetcher
13
+ from novel_downloader.models import LoginField
14
+
15
+
16
+ @register_fetcher(
17
+ site_keys=["qqbook", "qq"],
18
+ )
19
+ class QqbookSession(BaseSession):
20
+ """
21
+ A session class for interacting with the QQ 阅读 (book.qq.com) novel website.
22
+ """
23
+
24
+ site_name: str = "qqbook"
25
+
26
+ HOMEPAGE_URL = "https://book.qq.com/"
27
+ BOOKCASE_URL = "https://book.qq.com/book-shelf"
28
+ BOOK_INFO_URL = "https://book.qq.com/book-detail/{book_id}"
29
+ BOOK_CATALOG_URL = "https://book.qq.com/api/book/detail/chapters?bid={book_id}"
30
+ CHAPTER_URL = "https://book.qq.com/book-read/{book_id}/{chapter_id}/"
31
+
32
+ USER_HOMEPAGE_API_URL = "https://book.qq.com/api/user/homepage"
33
+
34
+ async def login(
35
+ self,
36
+ username: str = "",
37
+ password: str = "",
38
+ cookies: dict[str, str] | None = None,
39
+ attempt: int = 1,
40
+ **kwargs: Any,
41
+ ) -> bool:
42
+ """
43
+ Restore cookies persisted by the session-based workflow.
44
+ """
45
+ if not cookies:
46
+ return False
47
+ self.update_cookies(cookies)
48
+
49
+ self._is_logged_in = await self._check_login_status()
50
+ return self._is_logged_in
51
+
52
+ async def get_book_info(
53
+ self,
54
+ book_id: str,
55
+ **kwargs: Any,
56
+ ) -> list[str]:
57
+ """
58
+ Fetch the raw HTML of the book info page asynchronously.
59
+
60
+ Order: [info, catalog]
61
+
62
+ :param book_id: The book identifier.
63
+ :return: The page content as string list.
64
+ """
65
+ info_url = self.book_info_url(book_id=book_id)
66
+ catalog_url = self.book_catalog_url(book_id=book_id)
67
+
68
+ info_html, catalog_html = await asyncio.gather(
69
+ self.fetch(info_url, **kwargs),
70
+ self.fetch(catalog_url, **kwargs),
71
+ )
72
+ return [info_html, catalog_html]
73
+
74
+ async def get_book_chapter(
75
+ self,
76
+ book_id: str,
77
+ chapter_id: str,
78
+ **kwargs: Any,
79
+ ) -> list[str]:
80
+ url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
81
+ return [await self.fetch(url, **kwargs)]
82
+
83
+ async def get_bookcase(
84
+ self,
85
+ **kwargs: Any,
86
+ ) -> list[str]:
87
+ """
88
+ Retrieve the user's *bookcase* page.
89
+
90
+ :return: The HTML markup of the bookcase page.
91
+ """
92
+ url = self.bookcase_url()
93
+ return [await self.fetch(url, **kwargs)]
94
+
95
+ @property
96
+ def login_fields(self) -> list[LoginField]:
97
+ return [
98
+ LoginField(
99
+ name="cookies",
100
+ label="Cookie",
101
+ type="cookie",
102
+ required=True,
103
+ placeholder="请输入你的登录 Cookie",
104
+ description="可以通过浏览器开发者工具复制已登录状态下的 Cookie",
105
+ ),
106
+ ]
107
+
108
+ @classmethod
109
+ def homepage_url(cls) -> str:
110
+ """
111
+ Construct the URL for the site home page.
112
+
113
+ :return: Fully qualified URL of the home page.
114
+ """
115
+ return cls.HOMEPAGE_URL
116
+
117
+ @classmethod
118
+ def bookcase_url(cls) -> str:
119
+ """
120
+ Construct the URL for the user's bookcase page.
121
+
122
+ :return: Fully qualified URL of the bookcase.
123
+ """
124
+ return cls.BOOKCASE_URL
125
+
126
+ @classmethod
127
+ def book_info_url(cls, book_id: str) -> str:
128
+ """
129
+ Construct the URL for fetching a book's info page.
130
+
131
+ :param book_id: The identifier of the book.
132
+ :return: Fully qualified URL for the book info page.
133
+ """
134
+ return cls.BOOK_INFO_URL.format(book_id=book_id)
135
+
136
+ @classmethod
137
+ def book_catalog_url(cls, book_id: str) -> str:
138
+ """
139
+ Construct the URL for fetching a book's catalog page.
140
+
141
+ :param book_id: The identifier of the book.
142
+ :return: Fully qualified catalog page URL.
143
+ """
144
+ return cls.BOOK_CATALOG_URL.format(book_id=book_id)
145
+
146
+ @classmethod
147
+ def chapter_url(cls, book_id: str, chapter_id: str) -> str:
148
+ """
149
+ Construct the URL for fetching a specific chapter.
150
+
151
+ :param book_id: The identifier of the book.
152
+ :param chapter_id: The identifier of the chapter.
153
+ :return: Fully qualified chapter URL.
154
+ """
155
+ return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
156
+
157
+ async def _check_login_status(self) -> bool:
158
+ """
159
+ Check whether the user is currently logged in by
160
+ inspecting the user home page api content.
161
+
162
+ :return: True if the user is logged in, False otherwise.
163
+ """
164
+ try:
165
+ resp = await self.get(self.USER_HOMEPAGE_API_URL)
166
+ resp.raise_for_status()
167
+ payload = await resp.json(encoding="utf-8")
168
+ if payload.get("code") == 0:
169
+ return True
170
+ self.logger.info(
171
+ "login invalid (code=%s): %s",
172
+ payload.get("code"),
173
+ payload.get("msg"),
174
+ )
175
+ except Exception as e:
176
+ self.logger.info("login check failed: %s", e)
177
+ return False
@@ -9,7 +9,6 @@ from typing import Any
9
9
 
10
10
  from novel_downloader.core.fetchers.base import BaseSession
11
11
  from novel_downloader.core.fetchers.registry import register_fetcher
12
- from novel_downloader.models import FetcherConfig
13
12
 
14
13
 
15
14
  @register_fetcher(
@@ -20,34 +19,22 @@ class Quanben5Session(BaseSession):
20
19
  A session class for interacting with the 全本小说网 (quanben5.com) novel website.
21
20
  """
22
21
 
22
+ site_name: str = "quanben5"
23
+ BASE_URL_MAP: dict[str, str] = {
24
+ "simplified": "quanben5.com",
25
+ "traditional": "big5.quanben5.com",
26
+ }
27
+ DEFAULT_BASE_URL: str = "quanben5.com"
28
+
23
29
  BOOK_INFO_URL = "https://{base_url}/n/{book_id}/xiaoshuo.html"
24
30
  CHAPTER_URL = "https://{base_url}/n/{book_id}/{chapter_id}.html"
25
31
 
26
- def __init__(
27
- self,
28
- config: FetcherConfig,
29
- cookies: dict[str, str] | None = None,
30
- **kwargs: Any,
31
- ) -> None:
32
- super().__init__("quanben5", config, cookies, **kwargs)
33
- self.base_url = (
34
- "quanben5.com"
35
- if config.locale_style == "simplified"
36
- else "big5.quanben5.com"
37
- )
38
-
39
32
  async def get_book_info(
40
33
  self,
41
34
  book_id: str,
42
35
  **kwargs: Any,
43
36
  ) -> list[str]:
44
- """
45
- Fetch the raw HTML of the book info page asynchronously.
46
-
47
- :param book_id: The book identifier.
48
- :return: The page content as string list.
49
- """
50
- url = self.book_info_url(base_url=self.base_url, book_id=book_id)
37
+ url = self.book_info_url(base_url=self._base_url, book_id=book_id)
51
38
  return [await self.fetch(url, **kwargs)]
52
39
 
53
40
  async def get_book_chapter(
@@ -56,15 +43,8 @@ class Quanben5Session(BaseSession):
56
43
  chapter_id: str,
57
44
  **kwargs: Any,
58
45
  ) -> list[str]:
59
- """
60
- Fetch the raw HTML of a single chapter asynchronously.
61
-
62
- :param book_id: The book identifier.
63
- :param chapter_id: The chapter identifier.
64
- :return: The page content as string list.
65
- """
66
46
  url = self.chapter_url(
67
- base_url=self.base_url, book_id=book_id, chapter_id=chapter_id
47
+ base_url=self._base_url, book_id=book_id, chapter_id=chapter_id
68
48
  )
69
49
  return [await self.fetch(url, **kwargs)]
70
50
 
@@ -3,6 +3,7 @@
3
3
  novel_downloader.core.fetchers.rate_limiter
4
4
  -------------------------------------------
5
5
 
6
+ An asyncio-compatible token bucket rate limiter.
6
7
  """
7
8
 
8
9
  import asyncio
@@ -10,45 +11,6 @@ import random
10
11
  import time
11
12
 
12
13
 
13
- class RateLimiter:
14
- """
15
- Simple async token-bucket rate limiter:
16
- ensures no more than rate_per_sec
17
- requests are started per second, across all coroutines.
18
- """
19
-
20
- def __init__(self, rate_per_sec: float):
21
- self._interval = 1.0 / rate_per_sec
22
- self._lock = asyncio.Lock()
23
- self._last = time.monotonic()
24
-
25
- async def wait(self) -> None:
26
- async with self._lock:
27
- now = time.monotonic()
28
- elapsed = now - self._last
29
- delay = self._interval - elapsed
30
- if delay > 0:
31
- jitter = random.uniform(0, 0.3)
32
- await asyncio.sleep(delay + jitter)
33
- self._last = time.monotonic()
34
-
35
-
36
- class RateLimiterV2:
37
- def __init__(self, rate_per_sec: float):
38
- self._interval = 1.0 / rate_per_sec
39
- self._lock = asyncio.Lock()
40
- self._next_allowed_time = time.monotonic()
41
-
42
- async def wait(self) -> None:
43
- async with self._lock:
44
- now = time.monotonic()
45
- if now < self._next_allowed_time:
46
- delay = self._next_allowed_time - now
47
- jitter = random.uniform(0, 0.05 * self._interval)
48
- await asyncio.sleep(delay + jitter)
49
- self._next_allowed_time = max(now, self._next_allowed_time) + self._interval
50
-
51
-
52
14
  class TokenBucketRateLimiter:
53
15
  def __init__(
54
16
  self,
@@ -56,9 +18,16 @@ class TokenBucketRateLimiter:
56
18
  burst: int = 10,
57
19
  jitter_strength: float = 0.3,
58
20
  ):
21
+ """
22
+ A simple asyncio-compatible token bucket rate limiter.
23
+
24
+ :param rate: Tokens added per second.
25
+ :param burst: Maximum bucket size (burst capacity).
26
+ :param jitter_strength: Jitter range in seconds (+/-).
27
+ """
59
28
  self.rate = rate
60
29
  self.capacity = burst
61
- self.tokens = burst
30
+ self.tokens = float(burst)
62
31
  self.timestamp = time.monotonic()
63
32
  self.lock = asyncio.Lock()
64
33
  self.jitter_strength = jitter_strength
@@ -68,19 +37,19 @@ class TokenBucketRateLimiter:
68
37
  now = time.monotonic()
69
38
  elapsed = now - self.timestamp
70
39
 
71
- self.tokens = min(self.capacity, int(self.tokens + elapsed * self.rate))
40
+ self.tokens = min(self.capacity, self.tokens + elapsed * self.rate)
72
41
  self.timestamp = now
73
42
 
74
- if self.tokens >= 1:
75
- self.tokens -= 1
76
- jitter = random.uniform(-self.jitter_strength, self.jitter_strength)
77
- if jitter > 0:
78
- await asyncio.sleep(jitter)
43
+ if self.tokens >= 1.0:
44
+ self.tokens -= 1.0
79
45
  return
80
- else:
81
- wait_time = (1 - self.tokens) / self.rate
82
- jitter = random.uniform(-self.jitter_strength, self.jitter_strength)
83
- total_wait = max(0.0, wait_time + jitter)
84
- await asyncio.sleep(total_wait)
85
- self.timestamp = time.monotonic()
86
- self.tokens = max(0, self.tokens - 1)
46
+
47
+ wait_time = (1.0 - self.tokens) / self.rate
48
+ jitter = random.uniform(-self.jitter_strength, self.jitter_strength)
49
+ total_wait = max(0.0, wait_time + jitter)
50
+
51
+ await asyncio.sleep(total_wait)
52
+
53
+ async with self.lock:
54
+ self.timestamp = time.monotonic()
55
+ self.tokens = max(0.0, self.tokens - 1.0)
@@ -9,7 +9,7 @@ from typing import Any
9
9
 
10
10
  from novel_downloader.core.fetchers.base import BaseSession
11
11
  from novel_downloader.core.fetchers.registry import register_fetcher
12
- from novel_downloader.models import FetcherConfig, LoginField
12
+ from novel_downloader.models import LoginField
13
13
 
14
14
 
15
15
  @register_fetcher(
@@ -20,20 +20,14 @@ class SfacgSession(BaseSession):
20
20
  A session class for interacting with the SF轻小说 (m.sfacg.com) novel website.
21
21
  """
22
22
 
23
+ site_name: str = "sfacg"
24
+
23
25
  LOGIN_URL = "https://m.sfacg.com/login"
24
26
  BOOKCASE_URL = "https://m.sfacg.com/sheets/"
25
27
  BOOK_INFO_URL = "https://m.sfacg.com/b/{book_id}/"
26
28
  BOOK_CATALOG_URL = "https://m.sfacg.com/i/{book_id}/"
27
29
  CHAPTER_URL = "https://m.sfacg.com/c/{chapter_id}/"
28
30
 
29
- def __init__(
30
- self,
31
- config: FetcherConfig,
32
- cookies: dict[str, str] | None = None,
33
- **kwargs: Any,
34
- ) -> None:
35
- super().__init__("sfacg", config, cookies, **kwargs)
36
-
37
31
  async def login(
38
32
  self,
39
33
  username: str = "",
@@ -83,13 +77,6 @@ class SfacgSession(BaseSession):
83
77
  chapter_id: str,
84
78
  **kwargs: Any,
85
79
  ) -> list[str]:
86
- """
87
- Fetch the raw HTML of a single chapter asynchronously.
88
-
89
- :param book_id: The book identifier.
90
- :param chapter_id: The chapter identifier.
91
- :return: The page content as string list.
92
- """
93
80
  url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
94
81
  return [await self.fetch(url, **kwargs)]
95
82
 
@@ -10,7 +10,6 @@ from typing import Any
10
10
 
11
11
  from novel_downloader.core.fetchers.base import BaseSession
12
12
  from novel_downloader.core.fetchers.registry import register_fetcher
13
- from novel_downloader.models import FetcherConfig
14
13
 
15
14
 
16
15
  @register_fetcher(
@@ -21,18 +20,12 @@ class ShencouSession(BaseSession):
21
20
  A session class for interacting with the 神凑轻小说 (www.shencou.com) novel website.
22
21
  """
23
22
 
23
+ site_name: str = "shencou"
24
+
24
25
  BOOK_INFO_URL = "https://www.shencou.com/books/read_{book_id}.html"
25
26
  BOOK_CATALOG_URL = "https://www.shencou.com/read/{book_id}/index.html"
26
27
  CHAPTER_URL = "https://www.shencou.com/read/{book_id}/{chapter_id}.html"
27
28
 
28
- def __init__(
29
- self,
30
- config: FetcherConfig,
31
- cookies: dict[str, str] | None = None,
32
- **kwargs: Any,
33
- ) -> None:
34
- super().__init__("shencou", config, cookies, **kwargs)
35
-
36
29
  async def get_book_info(
37
30
  self,
38
31
  book_id: str,
@@ -62,13 +55,6 @@ class ShencouSession(BaseSession):
62
55
  chapter_id: str,
63
56
  **kwargs: Any,
64
57
  ) -> list[str]:
65
- """
66
- Fetch the raw HTML of a single chapter asynchronously.
67
-
68
- :param book_id: The book identifier.
69
- :param chapter_id: The chapter identifier.
70
- :return: The page content as string list.
71
- """
72
58
  book_id = book_id.replace("-", "/")
73
59
  url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
74
60
  return [await self.fetch(url, **kwargs)]
@@ -9,7 +9,6 @@ from typing import Any
9
9
 
10
10
  from novel_downloader.core.fetchers.base import BaseSession
11
11
  from novel_downloader.core.fetchers.registry import register_fetcher
12
- from novel_downloader.models import FetcherConfig
13
12
 
14
13
 
15
14
  @register_fetcher(
@@ -21,28 +20,16 @@ class ShuhaigeSession(BaseSession):
21
20
  书海阁小说网 (www.shuhaige.net) novel website.
22
21
  """
23
22
 
23
+ site_name: str = "shuhaige"
24
+
24
25
  BOOK_INFO_URL = "https://www.shuhaige.net/{book_id}/"
25
26
  CHAPTER_URL = "https://www.shuhaige.net/{book_id}/{chapter_id}.html"
26
27
 
27
- def __init__(
28
- self,
29
- config: FetcherConfig,
30
- cookies: dict[str, str] | None = None,
31
- **kwargs: Any,
32
- ) -> None:
33
- super().__init__("shuhaige", config, cookies, **kwargs)
34
-
35
28
  async def get_book_info(
36
29
  self,
37
30
  book_id: str,
38
31
  **kwargs: Any,
39
32
  ) -> list[str]:
40
- """
41
- Fetch the raw HTML of the book info page asynchronously.
42
-
43
- :param book_id: The book identifier.
44
- :return: The page content as string list.
45
- """
46
33
  url = self.book_info_url(book_id=book_id)
47
34
  return [await self.fetch(url, **kwargs)]
48
35
 
@@ -52,13 +39,6 @@ class ShuhaigeSession(BaseSession):
52
39
  chapter_id: str,
53
40
  **kwargs: Any,
54
41
  ) -> list[str]:
55
- """
56
- Fetch the raw HTML of a single chapter asynchronously.
57
-
58
- :param book_id: The book identifier.
59
- :param chapter_id: The chapter identifier.
60
- :return: The page content as string list.
61
- """
62
42
  url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
63
43
  return [await self.fetch(url, **kwargs)]
64
44