novel-downloader 2.0.0__py3-none-any.whl → 2.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/download.py +14 -11
- novel_downloader/cli/export.py +19 -19
- novel_downloader/cli/ui.py +35 -8
- novel_downloader/config/adapter.py +216 -153
- novel_downloader/core/__init__.py +5 -6
- novel_downloader/core/archived/deqixs/fetcher.py +1 -28
- novel_downloader/core/downloaders/__init__.py +2 -0
- novel_downloader/core/downloaders/base.py +34 -85
- novel_downloader/core/downloaders/common.py +147 -171
- novel_downloader/core/downloaders/qianbi.py +30 -64
- novel_downloader/core/downloaders/qidian.py +157 -184
- novel_downloader/core/downloaders/qqbook.py +292 -0
- novel_downloader/core/downloaders/registry.py +2 -2
- novel_downloader/core/exporters/__init__.py +2 -0
- novel_downloader/core/exporters/base.py +37 -59
- novel_downloader/core/exporters/common.py +620 -0
- novel_downloader/core/exporters/linovelib.py +47 -0
- novel_downloader/core/exporters/qidian.py +41 -12
- novel_downloader/core/exporters/qqbook.py +28 -0
- novel_downloader/core/exporters/registry.py +2 -2
- novel_downloader/core/fetchers/__init__.py +4 -2
- novel_downloader/core/fetchers/aaatxt.py +2 -22
- novel_downloader/core/fetchers/b520.py +3 -23
- novel_downloader/core/fetchers/base.py +80 -105
- novel_downloader/core/fetchers/biquyuedu.py +2 -22
- novel_downloader/core/fetchers/dxmwx.py +10 -22
- novel_downloader/core/fetchers/esjzone.py +6 -29
- novel_downloader/core/fetchers/guidaye.py +2 -22
- novel_downloader/core/fetchers/hetushu.py +9 -29
- novel_downloader/core/fetchers/i25zw.py +2 -16
- novel_downloader/core/fetchers/ixdzs8.py +2 -16
- novel_downloader/core/fetchers/jpxs123.py +2 -16
- novel_downloader/core/fetchers/lewenn.py +2 -22
- novel_downloader/core/fetchers/linovelib.py +4 -20
- novel_downloader/core/fetchers/{eightnovel.py → n8novel.py} +12 -40
- novel_downloader/core/fetchers/piaotia.py +2 -16
- novel_downloader/core/fetchers/qbtr.py +2 -16
- novel_downloader/core/fetchers/qianbi.py +1 -20
- novel_downloader/core/fetchers/qidian.py +27 -68
- novel_downloader/core/fetchers/qqbook.py +177 -0
- novel_downloader/core/fetchers/quanben5.py +9 -29
- novel_downloader/core/fetchers/rate_limiter.py +22 -53
- novel_downloader/core/fetchers/sfacg.py +3 -16
- novel_downloader/core/fetchers/shencou.py +2 -16
- novel_downloader/core/fetchers/shuhaige.py +2 -22
- novel_downloader/core/fetchers/tongrenquan.py +2 -22
- novel_downloader/core/fetchers/ttkan.py +3 -14
- novel_downloader/core/fetchers/wanbengo.py +2 -22
- novel_downloader/core/fetchers/xiaoshuowu.py +2 -16
- novel_downloader/core/fetchers/xiguashuwu.py +4 -20
- novel_downloader/core/fetchers/xs63b.py +3 -15
- novel_downloader/core/fetchers/xshbook.py +2 -22
- novel_downloader/core/fetchers/yamibo.py +4 -28
- novel_downloader/core/fetchers/yibige.py +13 -26
- novel_downloader/core/interfaces/exporter.py +19 -7
- novel_downloader/core/interfaces/fetcher.py +23 -49
- novel_downloader/core/interfaces/parser.py +2 -2
- novel_downloader/core/parsers/__init__.py +4 -2
- novel_downloader/core/parsers/b520.py +2 -2
- novel_downloader/core/parsers/base.py +5 -39
- novel_downloader/core/parsers/esjzone.py +3 -3
- novel_downloader/core/parsers/{eightnovel.py → n8novel.py} +7 -7
- novel_downloader/core/parsers/qidian.py +717 -0
- novel_downloader/core/parsers/qqbook.py +709 -0
- novel_downloader/core/parsers/xiguashuwu.py +8 -15
- novel_downloader/core/searchers/__init__.py +2 -2
- novel_downloader/core/searchers/b520.py +1 -1
- novel_downloader/core/searchers/base.py +2 -2
- novel_downloader/core/searchers/{eightnovel.py → n8novel.py} +5 -5
- novel_downloader/locales/en.json +3 -3
- novel_downloader/locales/zh.json +3 -3
- novel_downloader/models/__init__.py +2 -0
- novel_downloader/models/book.py +1 -0
- novel_downloader/models/config.py +12 -0
- novel_downloader/resources/config/settings.toml +23 -5
- novel_downloader/resources/js_scripts/expr_to_json.js +14 -0
- novel_downloader/resources/js_scripts/qidian_decrypt_node.js +21 -16
- novel_downloader/resources/js_scripts/qq_decrypt_node.js +92 -0
- novel_downloader/utils/__init__.py +0 -2
- novel_downloader/utils/chapter_storage.py +2 -3
- novel_downloader/utils/constants.py +7 -3
- novel_downloader/utils/cookies.py +32 -17
- novel_downloader/utils/crypto_utils/__init__.py +0 -6
- novel_downloader/utils/crypto_utils/aes_util.py +1 -1
- novel_downloader/utils/crypto_utils/rc4.py +40 -50
- novel_downloader/utils/epub/__init__.py +2 -3
- novel_downloader/utils/epub/builder.py +6 -6
- novel_downloader/utils/epub/constants.py +1 -6
- novel_downloader/utils/epub/documents.py +7 -7
- novel_downloader/utils/epub/models.py +8 -8
- novel_downloader/utils/epub/utils.py +10 -10
- novel_downloader/utils/file_utils/io.py +48 -73
- novel_downloader/utils/file_utils/normalize.py +1 -7
- novel_downloader/utils/file_utils/sanitize.py +4 -11
- novel_downloader/utils/fontocr/__init__.py +13 -0
- novel_downloader/utils/{fontocr.py → fontocr/core.py} +72 -61
- novel_downloader/utils/fontocr/loader.py +52 -0
- novel_downloader/utils/logger.py +80 -56
- novel_downloader/utils/network.py +16 -40
- novel_downloader/utils/node_decryptor/__init__.py +13 -0
- novel_downloader/utils/node_decryptor/decryptor.py +342 -0
- novel_downloader/{core/parsers/qidian/utils → utils/node_decryptor}/decryptor_fetcher.py +5 -6
- novel_downloader/utils/text_utils/text_cleaner.py +39 -30
- novel_downloader/utils/text_utils/truncate_utils.py +3 -14
- novel_downloader/utils/time_utils/sleep_utils.py +53 -43
- novel_downloader/web/main.py +1 -1
- novel_downloader/web/pages/download.py +1 -1
- novel_downloader/web/pages/search.py +4 -4
- novel_downloader/web/services/task_manager.py +2 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/METADATA +5 -1
- novel_downloader-2.0.2.dist-info/RECORD +203 -0
- novel_downloader/core/exporters/common/__init__.py +0 -11
- novel_downloader/core/exporters/common/epub.py +0 -198
- novel_downloader/core/exporters/common/main_exporter.py +0 -64
- novel_downloader/core/exporters/common/txt.py +0 -146
- novel_downloader/core/exporters/epub_util.py +0 -215
- novel_downloader/core/exporters/linovelib/__init__.py +0 -11
- novel_downloader/core/exporters/linovelib/epub.py +0 -349
- novel_downloader/core/exporters/linovelib/main_exporter.py +0 -66
- novel_downloader/core/exporters/linovelib/txt.py +0 -139
- novel_downloader/core/exporters/txt_util.py +0 -67
- novel_downloader/core/parsers/qidian/__init__.py +0 -10
- novel_downloader/core/parsers/qidian/book_info_parser.py +0 -89
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -470
- novel_downloader/core/parsers/qidian/chapter_normal.py +0 -126
- novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
- novel_downloader/core/parsers/qidian/main_parser.py +0 -101
- novel_downloader/core/parsers/qidian/utils/__init__.py +0 -30
- novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +0 -143
- novel_downloader/core/parsers/qidian/utils/helpers.py +0 -110
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +0 -175
- novel_downloader-2.0.0.dist-info/RECORD +0 -210
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/WHEEL +0 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/entry_points.txt +0 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/top_level.txt +0 -0
@@ -18,10 +18,8 @@ import aiohttp
|
|
18
18
|
from novel_downloader.core.fetchers.base import BaseSession
|
19
19
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
20
20
|
from novel_downloader.models import FetcherConfig, LoginField
|
21
|
-
from novel_downloader.utils import
|
22
|
-
|
23
|
-
rc4_crypt,
|
24
|
-
)
|
21
|
+
from novel_downloader.utils import async_jitter_sleep
|
22
|
+
from novel_downloader.utils.crypto_utils.rc4 import rc4_init, rc4_stream
|
25
23
|
|
26
24
|
|
27
25
|
@register_fetcher(
|
@@ -32,6 +30,8 @@ class QidianSession(BaseSession):
|
|
32
30
|
A session class for interacting with the 起点中文网 (www.qidian.com) novel website.
|
33
31
|
"""
|
34
32
|
|
33
|
+
site_name: str = "qidian"
|
34
|
+
|
35
35
|
HOMEPAGE_URL = "https://www.qidian.com/"
|
36
36
|
BOOKCASE_URL = "https://my.qidian.com/bookcase/"
|
37
37
|
BOOK_INFO_URL = "https://www.qidian.com/book/{book_id}/"
|
@@ -40,10 +40,7 @@ class QidianSession(BaseSession):
|
|
40
40
|
LOGIN_URL = "https://passport.qidian.com/"
|
41
41
|
|
42
42
|
_cookie_keys: ClassVar[list[str]] = [
|
43
|
-
"X2NzcmZUb2tlbg==",
|
44
43
|
"eXdndWlk",
|
45
|
-
"eXdvcGVuaWQ=",
|
46
|
-
"eXdrZXk=",
|
47
44
|
"d190c2Zw",
|
48
45
|
]
|
49
46
|
|
@@ -53,7 +50,9 @@ class QidianSession(BaseSession):
|
|
53
50
|
cookies: dict[str, str] | None = None,
|
54
51
|
**kwargs: Any,
|
55
52
|
) -> None:
|
56
|
-
super().__init__(
|
53
|
+
super().__init__(config, cookies, **kwargs)
|
54
|
+
self._s_init = rc4_init(self._d2("dGcwOUl0Myo5aA=="))
|
55
|
+
self._cookie_key = self._d("d190c2Zw")
|
57
56
|
self._fp_key = self._d("ZmluZ2VycHJpbnQ=")
|
58
57
|
self._ab_key = self._d("YWJub3JtYWw=")
|
59
58
|
self._ck_key = self._d("Y2hlY2tzdW0=")
|
@@ -85,12 +84,6 @@ class QidianSession(BaseSession):
|
|
85
84
|
book_id: str,
|
86
85
|
**kwargs: Any,
|
87
86
|
) -> list[str]:
|
88
|
-
"""
|
89
|
-
Fetch the raw HTML of the book info page asynchronously.
|
90
|
-
|
91
|
-
:param book_id: The book identifier.
|
92
|
-
:return: The page content as string list.
|
93
|
-
"""
|
94
87
|
url = self.book_info_url(book_id=book_id)
|
95
88
|
return [await self.fetch(url, **kwargs)]
|
96
89
|
|
@@ -100,13 +93,6 @@ class QidianSession(BaseSession):
|
|
100
93
|
chapter_id: str,
|
101
94
|
**kwargs: Any,
|
102
95
|
) -> list[str]:
|
103
|
-
"""
|
104
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
105
|
-
|
106
|
-
:param book_id: The book identifier.
|
107
|
-
:param chapter_id: The chapter identifier.
|
108
|
-
:return: The page content as string list.
|
109
|
-
"""
|
110
96
|
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
111
97
|
return [await self.fetch(url, **kwargs)]
|
112
98
|
|
@@ -122,18 +108,6 @@ class QidianSession(BaseSession):
|
|
122
108
|
url = self.bookcase_url()
|
123
109
|
return [await self.fetch(url, **kwargs)]
|
124
110
|
|
125
|
-
async def get_homepage(
|
126
|
-
self,
|
127
|
-
**kwargs: Any,
|
128
|
-
) -> list[str]:
|
129
|
-
"""
|
130
|
-
Retrieve the site home page.
|
131
|
-
|
132
|
-
:return: The HTML markup of the home page.
|
133
|
-
"""
|
134
|
-
url = self.homepage_url()
|
135
|
-
return [await self.fetch(url, **kwargs)]
|
136
|
-
|
137
111
|
@property
|
138
112
|
def login_fields(self) -> list[LoginField]:
|
139
113
|
return [
|
@@ -165,23 +139,21 @@ class QidianSession(BaseSession):
|
|
165
139
|
if self._rate_limiter:
|
166
140
|
await self._rate_limiter.wait()
|
167
141
|
|
168
|
-
|
169
|
-
|
170
|
-
for attempt in range(self.retry_times + 1):
|
142
|
+
for attempt in range(self._retry_times + 1):
|
171
143
|
try:
|
172
144
|
refreshed_token = self._build_payload_token(url)
|
173
|
-
self.update_cookies({
|
145
|
+
self.update_cookies({self._cookie_key: refreshed_token})
|
174
146
|
|
175
147
|
async with self.session.get(url, **kwargs) as resp:
|
176
148
|
resp.raise_for_status()
|
177
149
|
text: str = await resp.text(encoding=encoding)
|
178
150
|
return text
|
179
151
|
except aiohttp.ClientError:
|
180
|
-
if attempt < self.
|
152
|
+
if attempt < self._retry_times:
|
181
153
|
await async_jitter_sleep(
|
182
|
-
self.
|
154
|
+
self._backoff_factor,
|
183
155
|
mul_spread=1.1,
|
184
|
-
max_sleep=self.
|
156
|
+
max_sleep=self._backoff_factor + 2,
|
185
157
|
)
|
186
158
|
continue
|
187
159
|
raise
|
@@ -227,40 +199,30 @@ class QidianSession(BaseSession):
|
|
227
199
|
"""
|
228
200
|
return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
|
229
201
|
|
230
|
-
def _update_fp_val(
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
""""""
|
236
|
-
enc_token = self._get_cookie_value(self._d("d190c2Zw"))
|
202
|
+
def _update_fp_val(self) -> None:
|
203
|
+
"""
|
204
|
+
Decrypt the payload from cookie and update `_fp_val` and `_ab_val`.
|
205
|
+
"""
|
206
|
+
enc_token = self._get_cookie_value(self._cookie_key)
|
237
207
|
if not enc_token:
|
238
208
|
return
|
239
|
-
|
240
|
-
|
241
|
-
|
209
|
+
|
210
|
+
cipher_bytes = base64.b64decode(enc_token)
|
211
|
+
plain_bytes = rc4_stream(self._s_init, cipher_bytes)
|
212
|
+
decrypted_json = plain_bytes.decode("utf-8", errors="replace")
|
242
213
|
payload: dict[str, Any] = json.loads(decrypted_json)
|
243
214
|
self._fp_val = payload.get(self._fp_key, "")
|
244
215
|
self._ab_val = payload.get(self._ab_key, "0" * 32)
|
245
216
|
|
246
|
-
def _build_payload_token(
|
247
|
-
self,
|
248
|
-
new_uri: str,
|
249
|
-
*,
|
250
|
-
key: str = "",
|
251
|
-
) -> str:
|
217
|
+
def _build_payload_token(self, new_uri: str) -> str:
|
252
218
|
"""
|
253
219
|
Patch a timestamp-bearing token with fresh timing and checksum info.
|
254
220
|
|
255
221
|
:param new_uri: URI used in checksum generation.
|
256
|
-
:param key: RC4 key extracted from front-end JavaScript (optional).
|
257
|
-
|
258
222
|
:return: Updated token with new timing and checksum values.
|
259
223
|
"""
|
260
224
|
if not self._fp_val or not self._ab_val:
|
261
225
|
self._update_fp_val()
|
262
|
-
if not key:
|
263
|
-
key = self._get_key()
|
264
226
|
|
265
227
|
# rebuild timing fields
|
266
228
|
loadts = int(time.time() * 1000) # ms since epoch
|
@@ -278,9 +240,9 @@ class QidianSession(BaseSession):
|
|
278
240
|
self._ab_key: self._ab_val,
|
279
241
|
self._ck_key: ck_val,
|
280
242
|
}
|
281
|
-
|
282
|
-
|
283
|
-
)
|
243
|
+
plain_bytes = json.dumps(new_payload, separators=(",", ":")).encode("utf-8")
|
244
|
+
cipher_bytes = rc4_stream(self._s_init, plain_bytes)
|
245
|
+
return base64.b64encode(cipher_bytes).decode("utf-8")
|
284
246
|
|
285
247
|
async def _check_login_status(self) -> bool:
|
286
248
|
"""
|
@@ -335,8 +297,5 @@ class QidianSession(BaseSession):
|
|
335
297
|
return base64.b64decode(b).decode()
|
336
298
|
|
337
299
|
@staticmethod
|
338
|
-
def
|
339
|
-
|
340
|
-
decoded = base64.b64decode(encoded)
|
341
|
-
key = "".join([chr(b ^ 0x5A) for b in decoded])
|
342
|
-
return key
|
300
|
+
def _d2(b: str) -> bytes:
|
301
|
+
return base64.b64decode(b)
|
@@ -0,0 +1,177 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.fetchers.qqbook
|
4
|
+
-------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
import asyncio
|
9
|
+
from typing import Any
|
10
|
+
|
11
|
+
from novel_downloader.core.fetchers.base import BaseSession
|
12
|
+
from novel_downloader.core.fetchers.registry import register_fetcher
|
13
|
+
from novel_downloader.models import LoginField
|
14
|
+
|
15
|
+
|
16
|
+
@register_fetcher(
|
17
|
+
site_keys=["qqbook", "qq"],
|
18
|
+
)
|
19
|
+
class QqbookSession(BaseSession):
|
20
|
+
"""
|
21
|
+
A session class for interacting with the QQ 阅读 (book.qq.com) novel website.
|
22
|
+
"""
|
23
|
+
|
24
|
+
site_name: str = "qqbook"
|
25
|
+
|
26
|
+
HOMEPAGE_URL = "https://book.qq.com/"
|
27
|
+
BOOKCASE_URL = "https://book.qq.com/book-shelf"
|
28
|
+
BOOK_INFO_URL = "https://book.qq.com/book-detail/{book_id}"
|
29
|
+
BOOK_CATALOG_URL = "https://book.qq.com/api/book/detail/chapters?bid={book_id}"
|
30
|
+
CHAPTER_URL = "https://book.qq.com/book-read/{book_id}/{chapter_id}/"
|
31
|
+
|
32
|
+
USER_HOMEPAGE_API_URL = "https://book.qq.com/api/user/homepage"
|
33
|
+
|
34
|
+
async def login(
|
35
|
+
self,
|
36
|
+
username: str = "",
|
37
|
+
password: str = "",
|
38
|
+
cookies: dict[str, str] | None = None,
|
39
|
+
attempt: int = 1,
|
40
|
+
**kwargs: Any,
|
41
|
+
) -> bool:
|
42
|
+
"""
|
43
|
+
Restore cookies persisted by the session-based workflow.
|
44
|
+
"""
|
45
|
+
if not cookies:
|
46
|
+
return False
|
47
|
+
self.update_cookies(cookies)
|
48
|
+
|
49
|
+
self._is_logged_in = await self._check_login_status()
|
50
|
+
return self._is_logged_in
|
51
|
+
|
52
|
+
async def get_book_info(
|
53
|
+
self,
|
54
|
+
book_id: str,
|
55
|
+
**kwargs: Any,
|
56
|
+
) -> list[str]:
|
57
|
+
"""
|
58
|
+
Fetch the raw HTML of the book info page asynchronously.
|
59
|
+
|
60
|
+
Order: [info, catalog]
|
61
|
+
|
62
|
+
:param book_id: The book identifier.
|
63
|
+
:return: The page content as string list.
|
64
|
+
"""
|
65
|
+
info_url = self.book_info_url(book_id=book_id)
|
66
|
+
catalog_url = self.book_catalog_url(book_id=book_id)
|
67
|
+
|
68
|
+
info_html, catalog_html = await asyncio.gather(
|
69
|
+
self.fetch(info_url, **kwargs),
|
70
|
+
self.fetch(catalog_url, **kwargs),
|
71
|
+
)
|
72
|
+
return [info_html, catalog_html]
|
73
|
+
|
74
|
+
async def get_book_chapter(
|
75
|
+
self,
|
76
|
+
book_id: str,
|
77
|
+
chapter_id: str,
|
78
|
+
**kwargs: Any,
|
79
|
+
) -> list[str]:
|
80
|
+
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
81
|
+
return [await self.fetch(url, **kwargs)]
|
82
|
+
|
83
|
+
async def get_bookcase(
|
84
|
+
self,
|
85
|
+
**kwargs: Any,
|
86
|
+
) -> list[str]:
|
87
|
+
"""
|
88
|
+
Retrieve the user's *bookcase* page.
|
89
|
+
|
90
|
+
:return: The HTML markup of the bookcase page.
|
91
|
+
"""
|
92
|
+
url = self.bookcase_url()
|
93
|
+
return [await self.fetch(url, **kwargs)]
|
94
|
+
|
95
|
+
@property
|
96
|
+
def login_fields(self) -> list[LoginField]:
|
97
|
+
return [
|
98
|
+
LoginField(
|
99
|
+
name="cookies",
|
100
|
+
label="Cookie",
|
101
|
+
type="cookie",
|
102
|
+
required=True,
|
103
|
+
placeholder="请输入你的登录 Cookie",
|
104
|
+
description="可以通过浏览器开发者工具复制已登录状态下的 Cookie",
|
105
|
+
),
|
106
|
+
]
|
107
|
+
|
108
|
+
@classmethod
|
109
|
+
def homepage_url(cls) -> str:
|
110
|
+
"""
|
111
|
+
Construct the URL for the site home page.
|
112
|
+
|
113
|
+
:return: Fully qualified URL of the home page.
|
114
|
+
"""
|
115
|
+
return cls.HOMEPAGE_URL
|
116
|
+
|
117
|
+
@classmethod
|
118
|
+
def bookcase_url(cls) -> str:
|
119
|
+
"""
|
120
|
+
Construct the URL for the user's bookcase page.
|
121
|
+
|
122
|
+
:return: Fully qualified URL of the bookcase.
|
123
|
+
"""
|
124
|
+
return cls.BOOKCASE_URL
|
125
|
+
|
126
|
+
@classmethod
|
127
|
+
def book_info_url(cls, book_id: str) -> str:
|
128
|
+
"""
|
129
|
+
Construct the URL for fetching a book's info page.
|
130
|
+
|
131
|
+
:param book_id: The identifier of the book.
|
132
|
+
:return: Fully qualified URL for the book info page.
|
133
|
+
"""
|
134
|
+
return cls.BOOK_INFO_URL.format(book_id=book_id)
|
135
|
+
|
136
|
+
@classmethod
|
137
|
+
def book_catalog_url(cls, book_id: str) -> str:
|
138
|
+
"""
|
139
|
+
Construct the URL for fetching a book's catalog page.
|
140
|
+
|
141
|
+
:param book_id: The identifier of the book.
|
142
|
+
:return: Fully qualified catalog page URL.
|
143
|
+
"""
|
144
|
+
return cls.BOOK_CATALOG_URL.format(book_id=book_id)
|
145
|
+
|
146
|
+
@classmethod
|
147
|
+
def chapter_url(cls, book_id: str, chapter_id: str) -> str:
|
148
|
+
"""
|
149
|
+
Construct the URL for fetching a specific chapter.
|
150
|
+
|
151
|
+
:param book_id: The identifier of the book.
|
152
|
+
:param chapter_id: The identifier of the chapter.
|
153
|
+
:return: Fully qualified chapter URL.
|
154
|
+
"""
|
155
|
+
return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
|
156
|
+
|
157
|
+
async def _check_login_status(self) -> bool:
|
158
|
+
"""
|
159
|
+
Check whether the user is currently logged in by
|
160
|
+
inspecting the user home page api content.
|
161
|
+
|
162
|
+
:return: True if the user is logged in, False otherwise.
|
163
|
+
"""
|
164
|
+
try:
|
165
|
+
resp = await self.get(self.USER_HOMEPAGE_API_URL)
|
166
|
+
resp.raise_for_status()
|
167
|
+
payload = await resp.json(encoding="utf-8")
|
168
|
+
if payload.get("code") == 0:
|
169
|
+
return True
|
170
|
+
self.logger.info(
|
171
|
+
"login invalid (code=%s): %s",
|
172
|
+
payload.get("code"),
|
173
|
+
payload.get("msg"),
|
174
|
+
)
|
175
|
+
except Exception as e:
|
176
|
+
self.logger.info("login check failed: %s", e)
|
177
|
+
return False
|
@@ -9,7 +9,6 @@ from typing import Any
|
|
9
9
|
|
10
10
|
from novel_downloader.core.fetchers.base import BaseSession
|
11
11
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
12
|
-
from novel_downloader.models import FetcherConfig
|
13
12
|
|
14
13
|
|
15
14
|
@register_fetcher(
|
@@ -20,34 +19,22 @@ class Quanben5Session(BaseSession):
|
|
20
19
|
A session class for interacting with the 全本小说网 (quanben5.com) novel website.
|
21
20
|
"""
|
22
21
|
|
22
|
+
site_name: str = "quanben5"
|
23
|
+
BASE_URL_MAP: dict[str, str] = {
|
24
|
+
"simplified": "quanben5.com",
|
25
|
+
"traditional": "big5.quanben5.com",
|
26
|
+
}
|
27
|
+
DEFAULT_BASE_URL: str = "quanben5.com"
|
28
|
+
|
23
29
|
BOOK_INFO_URL = "https://{base_url}/n/{book_id}/xiaoshuo.html"
|
24
30
|
CHAPTER_URL = "https://{base_url}/n/{book_id}/{chapter_id}.html"
|
25
31
|
|
26
|
-
def __init__(
|
27
|
-
self,
|
28
|
-
config: FetcherConfig,
|
29
|
-
cookies: dict[str, str] | None = None,
|
30
|
-
**kwargs: Any,
|
31
|
-
) -> None:
|
32
|
-
super().__init__("quanben5", config, cookies, **kwargs)
|
33
|
-
self.base_url = (
|
34
|
-
"quanben5.com"
|
35
|
-
if config.locale_style == "simplified"
|
36
|
-
else "big5.quanben5.com"
|
37
|
-
)
|
38
|
-
|
39
32
|
async def get_book_info(
|
40
33
|
self,
|
41
34
|
book_id: str,
|
42
35
|
**kwargs: Any,
|
43
36
|
) -> list[str]:
|
44
|
-
|
45
|
-
Fetch the raw HTML of the book info page asynchronously.
|
46
|
-
|
47
|
-
:param book_id: The book identifier.
|
48
|
-
:return: The page content as string list.
|
49
|
-
"""
|
50
|
-
url = self.book_info_url(base_url=self.base_url, book_id=book_id)
|
37
|
+
url = self.book_info_url(base_url=self._base_url, book_id=book_id)
|
51
38
|
return [await self.fetch(url, **kwargs)]
|
52
39
|
|
53
40
|
async def get_book_chapter(
|
@@ -56,15 +43,8 @@ class Quanben5Session(BaseSession):
|
|
56
43
|
chapter_id: str,
|
57
44
|
**kwargs: Any,
|
58
45
|
) -> list[str]:
|
59
|
-
"""
|
60
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
61
|
-
|
62
|
-
:param book_id: The book identifier.
|
63
|
-
:param chapter_id: The chapter identifier.
|
64
|
-
:return: The page content as string list.
|
65
|
-
"""
|
66
46
|
url = self.chapter_url(
|
67
|
-
base_url=self.
|
47
|
+
base_url=self._base_url, book_id=book_id, chapter_id=chapter_id
|
68
48
|
)
|
69
49
|
return [await self.fetch(url, **kwargs)]
|
70
50
|
|
@@ -3,6 +3,7 @@
|
|
3
3
|
novel_downloader.core.fetchers.rate_limiter
|
4
4
|
-------------------------------------------
|
5
5
|
|
6
|
+
An asyncio-compatible token bucket rate limiter.
|
6
7
|
"""
|
7
8
|
|
8
9
|
import asyncio
|
@@ -10,45 +11,6 @@ import random
|
|
10
11
|
import time
|
11
12
|
|
12
13
|
|
13
|
-
class RateLimiter:
|
14
|
-
"""
|
15
|
-
Simple async token-bucket rate limiter:
|
16
|
-
ensures no more than rate_per_sec
|
17
|
-
requests are started per second, across all coroutines.
|
18
|
-
"""
|
19
|
-
|
20
|
-
def __init__(self, rate_per_sec: float):
|
21
|
-
self._interval = 1.0 / rate_per_sec
|
22
|
-
self._lock = asyncio.Lock()
|
23
|
-
self._last = time.monotonic()
|
24
|
-
|
25
|
-
async def wait(self) -> None:
|
26
|
-
async with self._lock:
|
27
|
-
now = time.monotonic()
|
28
|
-
elapsed = now - self._last
|
29
|
-
delay = self._interval - elapsed
|
30
|
-
if delay > 0:
|
31
|
-
jitter = random.uniform(0, 0.3)
|
32
|
-
await asyncio.sleep(delay + jitter)
|
33
|
-
self._last = time.monotonic()
|
34
|
-
|
35
|
-
|
36
|
-
class RateLimiterV2:
|
37
|
-
def __init__(self, rate_per_sec: float):
|
38
|
-
self._interval = 1.0 / rate_per_sec
|
39
|
-
self._lock = asyncio.Lock()
|
40
|
-
self._next_allowed_time = time.monotonic()
|
41
|
-
|
42
|
-
async def wait(self) -> None:
|
43
|
-
async with self._lock:
|
44
|
-
now = time.monotonic()
|
45
|
-
if now < self._next_allowed_time:
|
46
|
-
delay = self._next_allowed_time - now
|
47
|
-
jitter = random.uniform(0, 0.05 * self._interval)
|
48
|
-
await asyncio.sleep(delay + jitter)
|
49
|
-
self._next_allowed_time = max(now, self._next_allowed_time) + self._interval
|
50
|
-
|
51
|
-
|
52
14
|
class TokenBucketRateLimiter:
|
53
15
|
def __init__(
|
54
16
|
self,
|
@@ -56,9 +18,16 @@ class TokenBucketRateLimiter:
|
|
56
18
|
burst: int = 10,
|
57
19
|
jitter_strength: float = 0.3,
|
58
20
|
):
|
21
|
+
"""
|
22
|
+
A simple asyncio-compatible token bucket rate limiter.
|
23
|
+
|
24
|
+
:param rate: Tokens added per second.
|
25
|
+
:param burst: Maximum bucket size (burst capacity).
|
26
|
+
:param jitter_strength: Jitter range in seconds (+/-).
|
27
|
+
"""
|
59
28
|
self.rate = rate
|
60
29
|
self.capacity = burst
|
61
|
-
self.tokens = burst
|
30
|
+
self.tokens = float(burst)
|
62
31
|
self.timestamp = time.monotonic()
|
63
32
|
self.lock = asyncio.Lock()
|
64
33
|
self.jitter_strength = jitter_strength
|
@@ -68,19 +37,19 @@ class TokenBucketRateLimiter:
|
|
68
37
|
now = time.monotonic()
|
69
38
|
elapsed = now - self.timestamp
|
70
39
|
|
71
|
-
self.tokens = min(self.capacity,
|
40
|
+
self.tokens = min(self.capacity, self.tokens + elapsed * self.rate)
|
72
41
|
self.timestamp = now
|
73
42
|
|
74
|
-
if self.tokens >= 1:
|
75
|
-
self.tokens -= 1
|
76
|
-
jitter = random.uniform(-self.jitter_strength, self.jitter_strength)
|
77
|
-
if jitter > 0:
|
78
|
-
await asyncio.sleep(jitter)
|
43
|
+
if self.tokens >= 1.0:
|
44
|
+
self.tokens -= 1.0
|
79
45
|
return
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
46
|
+
|
47
|
+
wait_time = (1.0 - self.tokens) / self.rate
|
48
|
+
jitter = random.uniform(-self.jitter_strength, self.jitter_strength)
|
49
|
+
total_wait = max(0.0, wait_time + jitter)
|
50
|
+
|
51
|
+
await asyncio.sleep(total_wait)
|
52
|
+
|
53
|
+
async with self.lock:
|
54
|
+
self.timestamp = time.monotonic()
|
55
|
+
self.tokens = max(0.0, self.tokens - 1.0)
|
@@ -9,7 +9,7 @@ from typing import Any
|
|
9
9
|
|
10
10
|
from novel_downloader.core.fetchers.base import BaseSession
|
11
11
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
12
|
-
from novel_downloader.models import
|
12
|
+
from novel_downloader.models import LoginField
|
13
13
|
|
14
14
|
|
15
15
|
@register_fetcher(
|
@@ -20,20 +20,14 @@ class SfacgSession(BaseSession):
|
|
20
20
|
A session class for interacting with the SF轻小说 (m.sfacg.com) novel website.
|
21
21
|
"""
|
22
22
|
|
23
|
+
site_name: str = "sfacg"
|
24
|
+
|
23
25
|
LOGIN_URL = "https://m.sfacg.com/login"
|
24
26
|
BOOKCASE_URL = "https://m.sfacg.com/sheets/"
|
25
27
|
BOOK_INFO_URL = "https://m.sfacg.com/b/{book_id}/"
|
26
28
|
BOOK_CATALOG_URL = "https://m.sfacg.com/i/{book_id}/"
|
27
29
|
CHAPTER_URL = "https://m.sfacg.com/c/{chapter_id}/"
|
28
30
|
|
29
|
-
def __init__(
|
30
|
-
self,
|
31
|
-
config: FetcherConfig,
|
32
|
-
cookies: dict[str, str] | None = None,
|
33
|
-
**kwargs: Any,
|
34
|
-
) -> None:
|
35
|
-
super().__init__("sfacg", config, cookies, **kwargs)
|
36
|
-
|
37
31
|
async def login(
|
38
32
|
self,
|
39
33
|
username: str = "",
|
@@ -83,13 +77,6 @@ class SfacgSession(BaseSession):
|
|
83
77
|
chapter_id: str,
|
84
78
|
**kwargs: Any,
|
85
79
|
) -> list[str]:
|
86
|
-
"""
|
87
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
88
|
-
|
89
|
-
:param book_id: The book identifier.
|
90
|
-
:param chapter_id: The chapter identifier.
|
91
|
-
:return: The page content as string list.
|
92
|
-
"""
|
93
80
|
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
94
81
|
return [await self.fetch(url, **kwargs)]
|
95
82
|
|
@@ -10,7 +10,6 @@ from typing import Any
|
|
10
10
|
|
11
11
|
from novel_downloader.core.fetchers.base import BaseSession
|
12
12
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
13
|
-
from novel_downloader.models import FetcherConfig
|
14
13
|
|
15
14
|
|
16
15
|
@register_fetcher(
|
@@ -21,18 +20,12 @@ class ShencouSession(BaseSession):
|
|
21
20
|
A session class for interacting with the 神凑轻小说 (www.shencou.com) novel website.
|
22
21
|
"""
|
23
22
|
|
23
|
+
site_name: str = "shencou"
|
24
|
+
|
24
25
|
BOOK_INFO_URL = "https://www.shencou.com/books/read_{book_id}.html"
|
25
26
|
BOOK_CATALOG_URL = "https://www.shencou.com/read/{book_id}/index.html"
|
26
27
|
CHAPTER_URL = "https://www.shencou.com/read/{book_id}/{chapter_id}.html"
|
27
28
|
|
28
|
-
def __init__(
|
29
|
-
self,
|
30
|
-
config: FetcherConfig,
|
31
|
-
cookies: dict[str, str] | None = None,
|
32
|
-
**kwargs: Any,
|
33
|
-
) -> None:
|
34
|
-
super().__init__("shencou", config, cookies, **kwargs)
|
35
|
-
|
36
29
|
async def get_book_info(
|
37
30
|
self,
|
38
31
|
book_id: str,
|
@@ -62,13 +55,6 @@ class ShencouSession(BaseSession):
|
|
62
55
|
chapter_id: str,
|
63
56
|
**kwargs: Any,
|
64
57
|
) -> list[str]:
|
65
|
-
"""
|
66
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
67
|
-
|
68
|
-
:param book_id: The book identifier.
|
69
|
-
:param chapter_id: The chapter identifier.
|
70
|
-
:return: The page content as string list.
|
71
|
-
"""
|
72
58
|
book_id = book_id.replace("-", "/")
|
73
59
|
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
74
60
|
return [await self.fetch(url, **kwargs)]
|
@@ -9,7 +9,6 @@ from typing import Any
|
|
9
9
|
|
10
10
|
from novel_downloader.core.fetchers.base import BaseSession
|
11
11
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
12
|
-
from novel_downloader.models import FetcherConfig
|
13
12
|
|
14
13
|
|
15
14
|
@register_fetcher(
|
@@ -21,28 +20,16 @@ class ShuhaigeSession(BaseSession):
|
|
21
20
|
书海阁小说网 (www.shuhaige.net) novel website.
|
22
21
|
"""
|
23
22
|
|
23
|
+
site_name: str = "shuhaige"
|
24
|
+
|
24
25
|
BOOK_INFO_URL = "https://www.shuhaige.net/{book_id}/"
|
25
26
|
CHAPTER_URL = "https://www.shuhaige.net/{book_id}/{chapter_id}.html"
|
26
27
|
|
27
|
-
def __init__(
|
28
|
-
self,
|
29
|
-
config: FetcherConfig,
|
30
|
-
cookies: dict[str, str] | None = None,
|
31
|
-
**kwargs: Any,
|
32
|
-
) -> None:
|
33
|
-
super().__init__("shuhaige", config, cookies, **kwargs)
|
34
|
-
|
35
28
|
async def get_book_info(
|
36
29
|
self,
|
37
30
|
book_id: str,
|
38
31
|
**kwargs: Any,
|
39
32
|
) -> list[str]:
|
40
|
-
"""
|
41
|
-
Fetch the raw HTML of the book info page asynchronously.
|
42
|
-
|
43
|
-
:param book_id: The book identifier.
|
44
|
-
:return: The page content as string list.
|
45
|
-
"""
|
46
33
|
url = self.book_info_url(book_id=book_id)
|
47
34
|
return [await self.fetch(url, **kwargs)]
|
48
35
|
|
@@ -52,13 +39,6 @@ class ShuhaigeSession(BaseSession):
|
|
52
39
|
chapter_id: str,
|
53
40
|
**kwargs: Any,
|
54
41
|
) -> list[str]:
|
55
|
-
"""
|
56
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
57
|
-
|
58
|
-
:param book_id: The book identifier.
|
59
|
-
:param chapter_id: The chapter identifier.
|
60
|
-
:return: The page content as string list.
|
61
|
-
"""
|
62
42
|
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
63
43
|
return [await self.fetch(url, **kwargs)]
|
64
44
|
|