novel-downloader 1.5.0__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/__init__.py +1 -3
- novel_downloader/cli/clean.py +21 -88
- novel_downloader/cli/config.py +26 -21
- novel_downloader/cli/download.py +79 -66
- novel_downloader/cli/export.py +17 -21
- novel_downloader/cli/main.py +1 -1
- novel_downloader/cli/search.py +62 -65
- novel_downloader/cli/ui.py +156 -0
- novel_downloader/config/__init__.py +8 -5
- novel_downloader/config/adapter.py +206 -209
- novel_downloader/config/{loader.py → file_io.py} +53 -26
- novel_downloader/core/__init__.py +5 -5
- novel_downloader/core/archived/deqixs/fetcher.py +115 -0
- novel_downloader/core/archived/deqixs/parser.py +132 -0
- novel_downloader/core/archived/deqixs/searcher.py +89 -0
- novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} +12 -20
- novel_downloader/core/archived/wanbengo/searcher.py +98 -0
- novel_downloader/core/archived/xshbook/searcher.py +93 -0
- novel_downloader/core/downloaders/__init__.py +3 -24
- novel_downloader/core/downloaders/base.py +49 -23
- novel_downloader/core/downloaders/common.py +191 -137
- novel_downloader/core/downloaders/qianbi.py +187 -146
- novel_downloader/core/downloaders/qidian.py +187 -141
- novel_downloader/core/downloaders/registry.py +4 -2
- novel_downloader/core/downloaders/signals.py +46 -0
- novel_downloader/core/exporters/__init__.py +3 -20
- novel_downloader/core/exporters/base.py +33 -37
- novel_downloader/core/exporters/common/__init__.py +1 -2
- novel_downloader/core/exporters/common/epub.py +15 -10
- novel_downloader/core/exporters/common/main_exporter.py +19 -12
- novel_downloader/core/exporters/common/txt.py +17 -12
- novel_downloader/core/exporters/epub_util.py +59 -29
- novel_downloader/core/exporters/linovelib/__init__.py +1 -0
- novel_downloader/core/exporters/linovelib/epub.py +23 -25
- novel_downloader/core/exporters/linovelib/main_exporter.py +8 -12
- novel_downloader/core/exporters/linovelib/txt.py +20 -14
- novel_downloader/core/exporters/qidian.py +2 -8
- novel_downloader/core/exporters/registry.py +4 -2
- novel_downloader/core/exporters/txt_util.py +7 -7
- novel_downloader/core/fetchers/__init__.py +54 -48
- novel_downloader/core/fetchers/aaatxt.py +83 -0
- novel_downloader/core/fetchers/{biquge/session.py → b520.py} +6 -11
- novel_downloader/core/fetchers/{base/session.py → base.py} +37 -46
- novel_downloader/core/fetchers/{biquge/browser.py → biquyuedu.py} +12 -17
- novel_downloader/core/fetchers/dxmwx.py +110 -0
- novel_downloader/core/fetchers/eightnovel.py +139 -0
- novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +19 -12
- novel_downloader/core/fetchers/guidaye.py +85 -0
- novel_downloader/core/fetchers/hetushu.py +92 -0
- novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +19 -28
- novel_downloader/core/fetchers/ixdzs8.py +113 -0
- novel_downloader/core/fetchers/jpxs123.py +101 -0
- novel_downloader/core/fetchers/lewenn.py +83 -0
- novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +12 -13
- novel_downloader/core/fetchers/piaotia.py +105 -0
- novel_downloader/core/fetchers/qbtr.py +101 -0
- novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +5 -10
- novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +56 -64
- novel_downloader/core/fetchers/quanben5.py +92 -0
- novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
- novel_downloader/core/fetchers/registry.py +5 -16
- novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +7 -10
- novel_downloader/core/fetchers/shencou.py +106 -0
- novel_downloader/core/fetchers/shuhaige.py +84 -0
- novel_downloader/core/fetchers/tongrenquan.py +84 -0
- novel_downloader/core/fetchers/ttkan.py +95 -0
- novel_downloader/core/fetchers/wanbengo.py +83 -0
- novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
- novel_downloader/core/fetchers/xiguashuwu.py +177 -0
- novel_downloader/core/fetchers/xs63b.py +171 -0
- novel_downloader/core/fetchers/xshbook.py +85 -0
- novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +19 -12
- novel_downloader/core/fetchers/yibige.py +114 -0
- novel_downloader/core/interfaces/__init__.py +1 -9
- novel_downloader/core/interfaces/downloader.py +6 -2
- novel_downloader/core/interfaces/exporter.py +7 -7
- novel_downloader/core/interfaces/fetcher.py +6 -19
- novel_downloader/core/interfaces/parser.py +7 -8
- novel_downloader/core/interfaces/searcher.py +9 -1
- novel_downloader/core/parsers/__init__.py +49 -12
- novel_downloader/core/parsers/aaatxt.py +132 -0
- novel_downloader/core/parsers/b520.py +116 -0
- novel_downloader/core/parsers/base.py +64 -12
- novel_downloader/core/parsers/biquyuedu.py +133 -0
- novel_downloader/core/parsers/dxmwx.py +162 -0
- novel_downloader/core/parsers/eightnovel.py +224 -0
- novel_downloader/core/parsers/esjzone.py +64 -69
- novel_downloader/core/parsers/guidaye.py +128 -0
- novel_downloader/core/parsers/hetushu.py +139 -0
- novel_downloader/core/parsers/i25zw.py +137 -0
- novel_downloader/core/parsers/ixdzs8.py +186 -0
- novel_downloader/core/parsers/jpxs123.py +137 -0
- novel_downloader/core/parsers/lewenn.py +142 -0
- novel_downloader/core/parsers/linovelib.py +48 -64
- novel_downloader/core/parsers/piaotia.py +189 -0
- novel_downloader/core/parsers/qbtr.py +136 -0
- novel_downloader/core/parsers/qianbi.py +48 -50
- novel_downloader/core/parsers/qidian/main_parser.py +756 -48
- novel_downloader/core/parsers/qidian/utils/__init__.py +3 -21
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +1 -1
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +4 -4
- novel_downloader/core/parsers/quanben5.py +103 -0
- novel_downloader/core/parsers/registry.py +5 -16
- novel_downloader/core/parsers/sfacg.py +38 -45
- novel_downloader/core/parsers/shencou.py +215 -0
- novel_downloader/core/parsers/shuhaige.py +111 -0
- novel_downloader/core/parsers/tongrenquan.py +116 -0
- novel_downloader/core/parsers/ttkan.py +132 -0
- novel_downloader/core/parsers/wanbengo.py +191 -0
- novel_downloader/core/parsers/xiaoshuowu.py +173 -0
- novel_downloader/core/parsers/xiguashuwu.py +429 -0
- novel_downloader/core/parsers/xs63b.py +161 -0
- novel_downloader/core/parsers/xshbook.py +134 -0
- novel_downloader/core/parsers/yamibo.py +87 -131
- novel_downloader/core/parsers/yibige.py +166 -0
- novel_downloader/core/searchers/__init__.py +34 -3
- novel_downloader/core/searchers/aaatxt.py +107 -0
- novel_downloader/core/searchers/{biquge.py → b520.py} +29 -28
- novel_downloader/core/searchers/base.py +112 -36
- novel_downloader/core/searchers/dxmwx.py +105 -0
- novel_downloader/core/searchers/eightnovel.py +84 -0
- novel_downloader/core/searchers/esjzone.py +43 -25
- novel_downloader/core/searchers/hetushu.py +92 -0
- novel_downloader/core/searchers/i25zw.py +93 -0
- novel_downloader/core/searchers/ixdzs8.py +107 -0
- novel_downloader/core/searchers/jpxs123.py +107 -0
- novel_downloader/core/searchers/piaotia.py +100 -0
- novel_downloader/core/searchers/qbtr.py +106 -0
- novel_downloader/core/searchers/qianbi.py +74 -40
- novel_downloader/core/searchers/quanben5.py +144 -0
- novel_downloader/core/searchers/registry.py +24 -8
- novel_downloader/core/searchers/shuhaige.py +124 -0
- novel_downloader/core/searchers/tongrenquan.py +110 -0
- novel_downloader/core/searchers/ttkan.py +92 -0
- novel_downloader/core/searchers/xiaoshuowu.py +122 -0
- novel_downloader/core/searchers/xiguashuwu.py +95 -0
- novel_downloader/core/searchers/xs63b.py +104 -0
- novel_downloader/locales/en.json +34 -85
- novel_downloader/locales/zh.json +35 -86
- novel_downloader/models/__init__.py +21 -22
- novel_downloader/models/book.py +44 -0
- novel_downloader/models/config.py +4 -37
- novel_downloader/models/login.py +1 -1
- novel_downloader/models/search.py +5 -0
- novel_downloader/resources/config/settings.toml +8 -70
- novel_downloader/resources/json/xiguashuwu.json +718 -0
- novel_downloader/utils/__init__.py +13 -24
- novel_downloader/utils/chapter_storage.py +5 -5
- novel_downloader/utils/constants.py +4 -31
- novel_downloader/utils/cookies.py +38 -35
- novel_downloader/utils/crypto_utils/__init__.py +7 -0
- novel_downloader/utils/crypto_utils/aes_util.py +90 -0
- novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
- novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
- novel_downloader/utils/crypto_utils/rc4.py +54 -0
- novel_downloader/utils/epub/__init__.py +3 -4
- novel_downloader/utils/epub/builder.py +6 -6
- novel_downloader/utils/epub/constants.py +62 -21
- novel_downloader/utils/epub/documents.py +95 -201
- novel_downloader/utils/epub/models.py +8 -22
- novel_downloader/utils/epub/utils.py +73 -106
- novel_downloader/utils/file_utils/__init__.py +2 -23
- novel_downloader/utils/file_utils/io.py +53 -188
- novel_downloader/utils/file_utils/normalize.py +1 -7
- novel_downloader/utils/file_utils/sanitize.py +4 -15
- novel_downloader/utils/fontocr/__init__.py +5 -14
- novel_downloader/utils/fontocr/core.py +216 -0
- novel_downloader/utils/fontocr/loader.py +50 -0
- novel_downloader/utils/logger.py +81 -65
- novel_downloader/utils/network.py +17 -41
- novel_downloader/utils/state.py +4 -90
- novel_downloader/utils/text_utils/__init__.py +1 -7
- novel_downloader/utils/text_utils/diff_display.py +5 -7
- novel_downloader/utils/text_utils/text_cleaner.py +39 -30
- novel_downloader/utils/text_utils/truncate_utils.py +3 -14
- novel_downloader/utils/time_utils/__init__.py +5 -11
- novel_downloader/utils/time_utils/datetime_utils.py +20 -29
- novel_downloader/utils/time_utils/sleep_utils.py +55 -49
- novel_downloader/web/__init__.py +13 -0
- novel_downloader/web/components/__init__.py +11 -0
- novel_downloader/web/components/navigation.py +35 -0
- novel_downloader/web/main.py +66 -0
- novel_downloader/web/pages/__init__.py +17 -0
- novel_downloader/web/pages/download.py +78 -0
- novel_downloader/web/pages/progress.py +147 -0
- novel_downloader/web/pages/search.py +329 -0
- novel_downloader/web/services/__init__.py +17 -0
- novel_downloader/web/services/client_dialog.py +164 -0
- novel_downloader/web/services/cred_broker.py +113 -0
- novel_downloader/web/services/cred_models.py +35 -0
- novel_downloader/web/services/task_manager.py +264 -0
- novel_downloader-2.0.1.dist-info/METADATA +172 -0
- novel_downloader-2.0.1.dist-info/RECORD +206 -0
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/entry_points.txt +1 -1
- novel_downloader/core/downloaders/biquge.py +0 -29
- novel_downloader/core/downloaders/esjzone.py +0 -29
- novel_downloader/core/downloaders/linovelib.py +0 -29
- novel_downloader/core/downloaders/sfacg.py +0 -29
- novel_downloader/core/downloaders/yamibo.py +0 -29
- novel_downloader/core/exporters/biquge.py +0 -22
- novel_downloader/core/exporters/esjzone.py +0 -22
- novel_downloader/core/exporters/qianbi.py +0 -22
- novel_downloader/core/exporters/sfacg.py +0 -22
- novel_downloader/core/exporters/yamibo.py +0 -22
- novel_downloader/core/fetchers/base/__init__.py +0 -14
- novel_downloader/core/fetchers/base/browser.py +0 -422
- novel_downloader/core/fetchers/biquge/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/browser.py +0 -209
- novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
- novel_downloader/core/fetchers/linovelib/browser.py +0 -198
- novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/browser.py +0 -326
- novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
- novel_downloader/core/fetchers/sfacg/browser.py +0 -194
- novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
- novel_downloader/core/fetchers/yamibo/browser.py +0 -234
- novel_downloader/core/parsers/biquge.py +0 -139
- novel_downloader/core/parsers/qidian/book_info_parser.py +0 -90
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -528
- novel_downloader/core/parsers/qidian/chapter_normal.py +0 -157
- novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
- novel_downloader/core/parsers/qidian/utils/helpers.py +0 -114
- novel_downloader/models/chapter.py +0 -25
- novel_downloader/models/types.py +0 -13
- novel_downloader/tui/__init__.py +0 -7
- novel_downloader/tui/app.py +0 -32
- novel_downloader/tui/main.py +0 -17
- novel_downloader/tui/screens/__init__.py +0 -14
- novel_downloader/tui/screens/home.py +0 -198
- novel_downloader/tui/screens/login.py +0 -74
- novel_downloader/tui/styles/home_layout.tcss +0 -79
- novel_downloader/tui/widgets/richlog_handler.py +0 -24
- novel_downloader/utils/cache.py +0 -24
- novel_downloader/utils/crypto_utils.py +0 -71
- novel_downloader/utils/fontocr/hash_store.py +0 -280
- novel_downloader/utils/fontocr/hash_utils.py +0 -103
- novel_downloader/utils/fontocr/model_loader.py +0 -69
- novel_downloader/utils/fontocr/ocr_v1.py +0 -315
- novel_downloader/utils/fontocr/ocr_v2.py +0 -764
- novel_downloader/utils/fontocr/ocr_v3.py +0 -744
- novel_downloader-1.5.0.dist-info/METADATA +0 -196
- novel_downloader-1.5.0.dist-info/RECORD +0 -164
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/WHEEL +0 -0
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/top_level.txt +0 -0
@@ -1,19 +1,16 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
"""
|
3
|
-
novel_downloader.core.fetchers.base
|
4
|
-
|
3
|
+
novel_downloader.core.fetchers.base
|
4
|
+
-----------------------------------
|
5
5
|
|
6
|
-
|
7
|
-
HTTP request capabilities using aiohttp. It maintains a persistent
|
8
|
-
client session and supports retries, headers, timeout configurations,
|
9
|
-
cookie handling, and defines abstract methods for subclasses.
|
6
|
+
Abstract base class providing common HTTP session handling for fetchers.
|
10
7
|
"""
|
11
8
|
|
12
|
-
|
13
9
|
import abc
|
14
10
|
import json
|
15
11
|
import logging
|
16
12
|
import types
|
13
|
+
from collections.abc import Mapping
|
17
14
|
from typing import Any, Self
|
18
15
|
|
19
16
|
import aiohttp
|
@@ -22,8 +19,7 @@ from aiohttp import ClientResponse, ClientSession, ClientTimeout, TCPConnector
|
|
22
19
|
from novel_downloader.core.interfaces import FetcherProtocol
|
23
20
|
from novel_downloader.models import FetcherConfig, LoginField
|
24
21
|
from novel_downloader.utils import (
|
25
|
-
|
26
|
-
parse_cookie_expires,
|
22
|
+
async_jitter_sleep,
|
27
23
|
)
|
28
24
|
from novel_downloader.utils.constants import (
|
29
25
|
DATA_DIR,
|
@@ -67,7 +63,7 @@ class BaseSession(FetcherProtocol, abc.ABC):
|
|
67
63
|
self._session: ClientSession | None = None
|
68
64
|
self._rate_limiter: TokenBucketRateLimiter | None = None
|
69
65
|
|
70
|
-
if config.max_rps
|
66
|
+
if config.max_rps > 0:
|
71
67
|
self._rate_limiter = TokenBucketRateLimiter(config.max_rps)
|
72
68
|
|
73
69
|
self.logger = logging.getLogger(f"{self.__class__.__name__}")
|
@@ -82,6 +78,7 @@ class BaseSession(FetcherProtocol, abc.ABC):
|
|
82
78
|
) -> bool:
|
83
79
|
"""
|
84
80
|
Attempt to log in asynchronously.
|
81
|
+
|
85
82
|
:returns: True if login succeeded.
|
86
83
|
"""
|
87
84
|
return False
|
@@ -96,7 +93,7 @@ class BaseSession(FetcherProtocol, abc.ABC):
|
|
96
93
|
Fetch the raw HTML (or JSON) of the book info page asynchronously.
|
97
94
|
|
98
95
|
:param book_id: The book identifier.
|
99
|
-
:return: The page content as
|
96
|
+
:return: The page content as string list.
|
100
97
|
"""
|
101
98
|
...
|
102
99
|
|
@@ -112,7 +109,7 @@ class BaseSession(FetcherProtocol, abc.ABC):
|
|
112
109
|
|
113
110
|
:param book_id: The book identifier.
|
114
111
|
:param chapter_id: The chapter identifier.
|
115
|
-
:return: The
|
112
|
+
:return: The page content as string list.
|
116
113
|
"""
|
117
114
|
...
|
118
115
|
|
@@ -182,7 +179,7 @@ class BaseSession(FetcherProtocol, abc.ABC):
|
|
182
179
|
return await self._response_to_str(resp, encoding)
|
183
180
|
except aiohttp.ClientError:
|
184
181
|
if attempt < self.retry_times:
|
185
|
-
await
|
182
|
+
await async_jitter_sleep(
|
186
183
|
self.backoff_factor,
|
187
184
|
mul_spread=1.1,
|
188
185
|
max_sleep=self.backoff_factor + 2,
|
@@ -247,8 +244,12 @@ class BaseSession(FetcherProtocol, abc.ABC):
|
|
247
244
|
return False
|
248
245
|
try:
|
249
246
|
storage = json.loads(self._state_file.read_text(encoding="utf-8"))
|
250
|
-
|
251
|
-
|
247
|
+
raw_cookies = storage.get("cookies", [])
|
248
|
+
cookie_dict = self._filter_cookies(raw_cookies)
|
249
|
+
|
250
|
+
if cookie_dict:
|
251
|
+
self._session.cookie_jar.update_cookies(cookie_dict)
|
252
|
+
|
252
253
|
self._is_logged_in = await self._check_login_status()
|
253
254
|
return self._is_logged_in
|
254
255
|
except Exception as e:
|
@@ -278,12 +279,6 @@ class BaseSession(FetcherProtocol, abc.ABC):
|
|
278
279
|
{
|
279
280
|
"name": cookie.key,
|
280
281
|
"value": cookie.value,
|
281
|
-
"domain": cookie.get("domain", ""),
|
282
|
-
"path": cookie.get("path", "/"),
|
283
|
-
"expires": parse_cookie_expires(cookie.get("expires")),
|
284
|
-
"httpOnly": bool(cookie.get("httponly", False)),
|
285
|
-
"secure": bool(cookie.get("secure", False)),
|
286
|
-
"sameSite": cookie.get("samesite") or "Lax",
|
287
282
|
}
|
288
283
|
)
|
289
284
|
storage_state = {
|
@@ -300,21 +295,6 @@ class BaseSession(FetcherProtocol, abc.ABC):
|
|
300
295
|
self.logger.warning("Failed to save state: %s", e)
|
301
296
|
return False
|
302
297
|
|
303
|
-
async def set_interactive_mode(self, enable: bool) -> bool:
|
304
|
-
"""
|
305
|
-
Enable or disable interactive mode for manual login.
|
306
|
-
|
307
|
-
:param enable: True to enable, False to disable interactive mode.
|
308
|
-
:return: True if operation or login check succeeded, False otherwise.
|
309
|
-
"""
|
310
|
-
return False
|
311
|
-
|
312
|
-
def get_cookie_value(self, key: str) -> str | None:
|
313
|
-
for cookie in self.session.cookie_jar:
|
314
|
-
if cookie.key == key:
|
315
|
-
return str(cookie.value)
|
316
|
-
return None
|
317
|
-
|
318
298
|
def update_cookies(
|
319
299
|
self,
|
320
300
|
cookies: dict[str, str],
|
@@ -346,18 +326,10 @@ class BaseSession(FetcherProtocol, abc.ABC):
|
|
346
326
|
"""
|
347
327
|
return False
|
348
328
|
|
349
|
-
@property
|
350
|
-
def hostname(self) -> str:
|
351
|
-
return ""
|
352
|
-
|
353
329
|
@property
|
354
330
|
def site(self) -> str:
|
355
331
|
return self._site
|
356
332
|
|
357
|
-
@property
|
358
|
-
def requester_type(self) -> str:
|
359
|
-
return "session"
|
360
|
-
|
361
333
|
@property
|
362
334
|
def is_logged_in(self) -> bool:
|
363
335
|
"""
|
@@ -411,6 +383,17 @@ class BaseSession(FetcherProtocol, abc.ABC):
|
|
411
383
|
return dict(self._session.headers)
|
412
384
|
return self._headers.copy()
|
413
385
|
|
386
|
+
@staticmethod
|
387
|
+
def _filter_cookies(
|
388
|
+
raw_cookies: list[Mapping[str, Any]],
|
389
|
+
) -> dict[str, str]:
|
390
|
+
"""
|
391
|
+
Hook:
|
392
|
+
take the raw list of cookie-dicts loaded from storage_state
|
393
|
+
and return a simple name -> value mapping.
|
394
|
+
"""
|
395
|
+
return {c["name"]: c["value"] for c in raw_cookies}
|
396
|
+
|
414
397
|
@staticmethod
|
415
398
|
async def _response_to_str(
|
416
399
|
resp: ClientResponse,
|
@@ -421,14 +404,22 @@ class BaseSession(FetcherProtocol, abc.ABC):
|
|
421
404
|
then on UnicodeDecodeError fall back to a lenient utf-8 decode.
|
422
405
|
"""
|
423
406
|
data: bytes = await resp.read()
|
424
|
-
encodings = [
|
407
|
+
encodings = [
|
408
|
+
encoding,
|
409
|
+
resp.charset,
|
410
|
+
"gb2312",
|
411
|
+
"gb18030",
|
412
|
+
"gbk",
|
413
|
+
"utf-8",
|
414
|
+
]
|
425
415
|
encodings_list: list[str] = [e for e in encodings if e]
|
426
416
|
for enc in encodings_list:
|
427
417
|
try:
|
428
418
|
return data.decode(enc)
|
429
419
|
except UnicodeDecodeError:
|
430
420
|
continue
|
431
|
-
|
421
|
+
encoding = encoding or "utf-8"
|
422
|
+
return data.decode(encoding, errors="ignore")
|
432
423
|
|
433
424
|
async def __aenter__(self) -> Self:
|
434
425
|
if self._session is None or self._session.closed:
|
@@ -1,36 +1,35 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
"""
|
3
|
-
novel_downloader.core.fetchers.
|
4
|
-
|
3
|
+
novel_downloader.core.fetchers.biquyuedu
|
4
|
+
----------------------------------------
|
5
5
|
|
6
6
|
"""
|
7
7
|
|
8
8
|
from typing import Any
|
9
9
|
|
10
|
-
from novel_downloader.core.fetchers.base import
|
10
|
+
from novel_downloader.core.fetchers.base import BaseSession
|
11
11
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
12
12
|
from novel_downloader.models import FetcherConfig
|
13
13
|
|
14
14
|
|
15
15
|
@register_fetcher(
|
16
|
-
site_keys=["
|
17
|
-
backends=["browser"],
|
16
|
+
site_keys=["biquyuedu"],
|
18
17
|
)
|
19
|
-
class
|
18
|
+
class BiquyueduSession(BaseSession):
|
20
19
|
"""
|
21
|
-
A
|
20
|
+
A session class for interacting with the 精彩小说 (biquyuedu.com) novel website.
|
22
21
|
"""
|
23
22
|
|
24
|
-
BOOK_INFO_URL = "
|
25
|
-
CHAPTER_URL = "
|
23
|
+
BOOK_INFO_URL = "https://biquyuedu.com/novel/{book_id}.html"
|
24
|
+
CHAPTER_URL = "https://biquyuedu.com/novel/{book_id}/{chapter_id}.html"
|
26
25
|
|
27
26
|
def __init__(
|
28
27
|
self,
|
29
28
|
config: FetcherConfig,
|
30
|
-
|
29
|
+
cookies: dict[str, str] | None = None,
|
31
30
|
**kwargs: Any,
|
32
31
|
) -> None:
|
33
|
-
super().__init__("
|
32
|
+
super().__init__("biquyuedu", config, cookies, **kwargs)
|
34
33
|
|
35
34
|
async def get_book_info(
|
36
35
|
self,
|
@@ -41,7 +40,7 @@ class BiqugeBrowser(BaseBrowser):
|
|
41
40
|
Fetch the raw HTML of the book info page asynchronously.
|
42
41
|
|
43
42
|
:param book_id: The book identifier.
|
44
|
-
:return: The page content as
|
43
|
+
:return: The page content as string list.
|
45
44
|
"""
|
46
45
|
url = self.book_info_url(book_id=book_id)
|
47
46
|
return [await self.fetch(url, **kwargs)]
|
@@ -57,7 +56,7 @@ class BiqugeBrowser(BaseBrowser):
|
|
57
56
|
|
58
57
|
:param book_id: The book identifier.
|
59
58
|
:param chapter_id: The chapter identifier.
|
60
|
-
:return: The
|
59
|
+
:return: The page content as string list.
|
61
60
|
"""
|
62
61
|
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
63
62
|
return [await self.fetch(url, **kwargs)]
|
@@ -82,7 +81,3 @@ class BiqugeBrowser(BaseBrowser):
|
|
82
81
|
:return: Fully qualified chapter URL.
|
83
82
|
"""
|
84
83
|
return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
|
85
|
-
|
86
|
-
@property
|
87
|
-
def hostname(self) -> str:
|
88
|
-
return "www.b520.cc"
|
@@ -0,0 +1,110 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.fetchers.dxmwx
|
4
|
+
------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
import asyncio
|
9
|
+
from typing import Any
|
10
|
+
|
11
|
+
from novel_downloader.core.fetchers.base import BaseSession
|
12
|
+
from novel_downloader.core.fetchers.registry import register_fetcher
|
13
|
+
from novel_downloader.models import FetcherConfig
|
14
|
+
|
15
|
+
|
16
|
+
@register_fetcher(
|
17
|
+
site_keys=["dxmwx"],
|
18
|
+
)
|
19
|
+
class DxmwxSession(BaseSession):
|
20
|
+
"""
|
21
|
+
A session class for interacting with the 大熊猫文学网 (www.dxmwx.org) novel website.
|
22
|
+
"""
|
23
|
+
|
24
|
+
BOOK_INFO_URL = "https://{base_url}/book/{book_id}.html"
|
25
|
+
BOOK_CATALOG_URL = "https://{base_url}/chapter/{book_id}.html"
|
26
|
+
CHAPTER_URL = "https://{base_url}/read/{book_id}_{chapter_id}.html"
|
27
|
+
|
28
|
+
def __init__(
|
29
|
+
self,
|
30
|
+
config: FetcherConfig,
|
31
|
+
cookies: dict[str, str] | None = None,
|
32
|
+
**kwargs: Any,
|
33
|
+
) -> None:
|
34
|
+
super().__init__("dxmwx", config, cookies, **kwargs)
|
35
|
+
self.base_url = (
|
36
|
+
"www.dxmwx.org" if config.locale_style == "simplified" else "tw.dxmwx.org"
|
37
|
+
)
|
38
|
+
|
39
|
+
async def get_book_info(
|
40
|
+
self,
|
41
|
+
book_id: str,
|
42
|
+
**kwargs: Any,
|
43
|
+
) -> list[str]:
|
44
|
+
"""
|
45
|
+
Fetch the raw HTML of the book info page asynchronously.
|
46
|
+
|
47
|
+
Order: [info, catalog]
|
48
|
+
|
49
|
+
:param book_id: The book identifier.
|
50
|
+
:return: The page content as string list.
|
51
|
+
"""
|
52
|
+
info_url = self.book_info_url(base_url=self.base_url, book_id=book_id)
|
53
|
+
catalog_url = self.book_catalog_url(base_url=self.base_url, book_id=book_id)
|
54
|
+
|
55
|
+
info_html, catalog_html = await asyncio.gather(
|
56
|
+
self.fetch(info_url, **kwargs),
|
57
|
+
self.fetch(catalog_url, **kwargs),
|
58
|
+
)
|
59
|
+
return [info_html, catalog_html]
|
60
|
+
|
61
|
+
async def get_book_chapter(
|
62
|
+
self,
|
63
|
+
book_id: str,
|
64
|
+
chapter_id: str,
|
65
|
+
**kwargs: Any,
|
66
|
+
) -> list[str]:
|
67
|
+
"""
|
68
|
+
Fetch the raw HTML of a single chapter asynchronously.
|
69
|
+
|
70
|
+
:param book_id: The book identifier.
|
71
|
+
:param chapter_id: The chapter identifier.
|
72
|
+
:return: The page content as string list.
|
73
|
+
"""
|
74
|
+
url = self.chapter_url(
|
75
|
+
base_url=self.base_url, book_id=book_id, chapter_id=chapter_id
|
76
|
+
)
|
77
|
+
return [await self.fetch(url, **kwargs)]
|
78
|
+
|
79
|
+
@classmethod
|
80
|
+
def book_info_url(cls, base_url: str, book_id: str) -> str:
|
81
|
+
"""
|
82
|
+
Construct the URL for fetching a book's info page.
|
83
|
+
|
84
|
+
:param book_id: The identifier of the book.
|
85
|
+
:return: Fully qualified URL for the book info page.
|
86
|
+
"""
|
87
|
+
return cls.BOOK_INFO_URL.format(base_url=base_url, book_id=book_id)
|
88
|
+
|
89
|
+
@classmethod
|
90
|
+
def book_catalog_url(cls, base_url: str, book_id: str) -> str:
|
91
|
+
"""
|
92
|
+
Construct the URL for fetching a book's catalog page.
|
93
|
+
|
94
|
+
:param book_id: The identifier of the book.
|
95
|
+
:return: Fully qualified catalog page URL.
|
96
|
+
"""
|
97
|
+
return cls.BOOK_CATALOG_URL.format(base_url=base_url, book_id=book_id)
|
98
|
+
|
99
|
+
@classmethod
|
100
|
+
def chapter_url(cls, base_url: str, book_id: str, chapter_id: str) -> str:
|
101
|
+
"""
|
102
|
+
Construct the URL for fetching a specific chapter.
|
103
|
+
|
104
|
+
:param book_id: The identifier of the book.
|
105
|
+
:param chapter_id: The identifier of the chapter.
|
106
|
+
:return: Fully qualified chapter URL.
|
107
|
+
"""
|
108
|
+
return cls.CHAPTER_URL.format(
|
109
|
+
base_url=base_url, book_id=book_id, chapter_id=chapter_id
|
110
|
+
)
|
@@ -0,0 +1,139 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.fetchers.eightnovel
|
4
|
+
-----------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
import re
|
9
|
+
from re import Pattern
|
10
|
+
from typing import Any
|
11
|
+
|
12
|
+
from novel_downloader.core.fetchers.base import BaseSession
|
13
|
+
from novel_downloader.core.fetchers.registry import register_fetcher
|
14
|
+
from novel_downloader.models import FetcherConfig
|
15
|
+
|
16
|
+
|
17
|
+
@register_fetcher(
|
18
|
+
site_keys=["8novel", "eightnovel"],
|
19
|
+
)
|
20
|
+
class EightnovelSession(BaseSession):
|
21
|
+
"""
|
22
|
+
A session class for interacting with the 无限轻小说 (www.8novel.com) novel website.
|
23
|
+
"""
|
24
|
+
|
25
|
+
BOOK_INFO_URL = "https://www.8novel.com/novelbooks/{book_id}/"
|
26
|
+
CHAPTER_URL = "https://article.8novel.com/read/{book_id}/?{chapter_id}"
|
27
|
+
CHAPTER_CONTENT_URL = (
|
28
|
+
"https://article.8novel.com/txt/1/{book_id}/{chapter_id}{seed_segment}.html"
|
29
|
+
)
|
30
|
+
|
31
|
+
_SPLIT_STR_PATTERN = re.compile(
|
32
|
+
r'["\']([^"\']+)["\']\s*\.split\s*\(\s*["\']\s*,\s*["\']\s*\)', re.DOTALL
|
33
|
+
)
|
34
|
+
_DIGIT_LIST_PATTERN: Pattern[str] = re.compile(r"^\d+(?:,\d+)*$")
|
35
|
+
|
36
|
+
def __init__(
|
37
|
+
self,
|
38
|
+
config: FetcherConfig,
|
39
|
+
cookies: dict[str, str] | None = None,
|
40
|
+
**kwargs: Any,
|
41
|
+
) -> None:
|
42
|
+
super().__init__("eightnovel", config, cookies, **kwargs)
|
43
|
+
|
44
|
+
async def get_book_info(
|
45
|
+
self,
|
46
|
+
book_id: str,
|
47
|
+
**kwargs: Any,
|
48
|
+
) -> list[str]:
|
49
|
+
"""
|
50
|
+
Fetch the raw HTML of the book info page asynchronously.
|
51
|
+
|
52
|
+
:param book_id: The book identifier.
|
53
|
+
:return: The page content as string list.
|
54
|
+
"""
|
55
|
+
url = self.book_info_url(book_id=book_id)
|
56
|
+
return [await self.fetch(url, **kwargs)]
|
57
|
+
|
58
|
+
async def get_book_chapter(
|
59
|
+
self,
|
60
|
+
book_id: str,
|
61
|
+
chapter_id: str,
|
62
|
+
**kwargs: Any,
|
63
|
+
) -> list[str]:
|
64
|
+
"""
|
65
|
+
Fetch the raw HTML of a single chapter asynchronously.
|
66
|
+
|
67
|
+
Order: [chap_info, content]
|
68
|
+
|
69
|
+
:param book_id: The book identifier.
|
70
|
+
:param chapter_id: The chapter identifier.
|
71
|
+
:return: The page content as string list.
|
72
|
+
"""
|
73
|
+
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
74
|
+
chapter_html = await self.fetch(url, **kwargs)
|
75
|
+
url_seed = self._extract_url_seed(chapter_html)
|
76
|
+
content_url = self._build_chapter_content_url(
|
77
|
+
seed=url_seed,
|
78
|
+
book_id=book_id,
|
79
|
+
chapter_id=chapter_id,
|
80
|
+
)
|
81
|
+
content_html = await self.fetch(content_url, **kwargs)
|
82
|
+
|
83
|
+
return [chapter_html, content_html]
|
84
|
+
|
85
|
+
@classmethod
|
86
|
+
def book_info_url(cls, book_id: str) -> str:
|
87
|
+
"""
|
88
|
+
Construct the URL for fetching a book's info page.
|
89
|
+
|
90
|
+
:param book_id: The identifier of the book.
|
91
|
+
:return: Fully qualified URL for the book info page.
|
92
|
+
"""
|
93
|
+
return cls.BOOK_INFO_URL.format(book_id=book_id)
|
94
|
+
|
95
|
+
@classmethod
|
96
|
+
def chapter_url(cls, book_id: str, chapter_id: str) -> str:
|
97
|
+
"""
|
98
|
+
Construct the URL for fetching a specific chapter.
|
99
|
+
|
100
|
+
:param book_id: The identifier of the book.
|
101
|
+
:param chapter_id: The identifier of the chapter.
|
102
|
+
:return: Fully qualified chapter URL.
|
103
|
+
"""
|
104
|
+
return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
|
105
|
+
|
106
|
+
@classmethod
|
107
|
+
def _extract_url_seed(cls, html_str: str) -> str:
|
108
|
+
"""
|
109
|
+
From the given HTML/JS source, find all string literals
|
110
|
+
of the form "...".split(","), pick the ones that may contain seed,
|
111
|
+
and return the last value.
|
112
|
+
"""
|
113
|
+
split_literals: list[str] = cls._SPLIT_STR_PATTERN.findall(html_str)
|
114
|
+
|
115
|
+
numeric_lists = [
|
116
|
+
lit for lit in split_literals if cls._DIGIT_LIST_PATTERN.fullmatch(lit)
|
117
|
+
]
|
118
|
+
|
119
|
+
if not numeric_lists:
|
120
|
+
return ""
|
121
|
+
|
122
|
+
last_list = numeric_lists[-1]
|
123
|
+
return last_list.split(",")[-1]
|
124
|
+
|
125
|
+
@classmethod
|
126
|
+
def _build_chapter_content_url(
|
127
|
+
cls, seed: str, book_id: str, chapter_id: str
|
128
|
+
) -> str:
|
129
|
+
"""
|
130
|
+
Slices out a 5-character segment of `seed` at offset
|
131
|
+
and build content url.
|
132
|
+
"""
|
133
|
+
# Compute start index and slice out 5 chars
|
134
|
+
start = (int(chapter_id) * 3) % 100
|
135
|
+
seed_segment = seed[start : start + 5]
|
136
|
+
|
137
|
+
return cls.CHAPTER_CONTENT_URL.format(
|
138
|
+
book_id=book_id, chapter_id=chapter_id, seed_segment=seed_segment
|
139
|
+
)
|
@@ -1,26 +1,26 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
"""
|
3
|
-
novel_downloader.core.fetchers.esjzone
|
4
|
-
|
3
|
+
novel_downloader.core.fetchers.esjzone
|
4
|
+
--------------------------------------
|
5
5
|
|
6
6
|
"""
|
7
7
|
|
8
8
|
import re
|
9
|
+
from collections.abc import Mapping
|
9
10
|
from typing import Any
|
10
11
|
|
11
12
|
from novel_downloader.core.fetchers.base import BaseSession
|
12
13
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
13
14
|
from novel_downloader.models import FetcherConfig, LoginField
|
14
|
-
from novel_downloader.utils import
|
15
|
+
from novel_downloader.utils import async_jitter_sleep
|
15
16
|
|
16
17
|
|
17
18
|
@register_fetcher(
|
18
19
|
site_keys=["esjzone"],
|
19
|
-
backends=["session"],
|
20
20
|
)
|
21
21
|
class EsjzoneSession(BaseSession):
|
22
22
|
"""
|
23
|
-
A session class for interacting with the
|
23
|
+
A session class for interacting with the ESJ Zone (www.esjzone.cc) novel website.
|
24
24
|
"""
|
25
25
|
|
26
26
|
BOOKCASE_URL = "https://www.esjzone.cc/my/favorite"
|
@@ -68,7 +68,7 @@ class EsjzoneSession(BaseSession):
|
|
68
68
|
):
|
69
69
|
self._is_logged_in = True
|
70
70
|
return True
|
71
|
-
await
|
71
|
+
await async_jitter_sleep(
|
72
72
|
self.backoff_factor,
|
73
73
|
mul_spread=1.1,
|
74
74
|
max_sleep=self.backoff_factor + 2,
|
@@ -86,7 +86,7 @@ class EsjzoneSession(BaseSession):
|
|
86
86
|
Fetch the raw HTML of the book info page asynchronously.
|
87
87
|
|
88
88
|
:param book_id: The book identifier.
|
89
|
-
:return: The page content as
|
89
|
+
:return: The page content as string list.
|
90
90
|
"""
|
91
91
|
url = self.book_info_url(book_id=book_id)
|
92
92
|
return [await self.fetch(url, **kwargs)]
|
@@ -102,7 +102,7 @@ class EsjzoneSession(BaseSession):
|
|
102
102
|
|
103
103
|
:param book_id: The book identifier.
|
104
104
|
:param chapter_id: The chapter identifier.
|
105
|
-
:return: The
|
105
|
+
:return: The page content as string list.
|
106
106
|
"""
|
107
107
|
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
108
108
|
return [await self.fetch(url, **kwargs)]
|
@@ -170,10 +170,6 @@ class EsjzoneSession(BaseSession):
|
|
170
170
|
"""
|
171
171
|
return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
|
172
172
|
|
173
|
-
@property
|
174
|
-
def hostname(self) -> str:
|
175
|
-
return "www.esjzone.cc"
|
176
|
-
|
177
173
|
async def _api_login(self, username: str, password: str) -> bool:
|
178
174
|
"""
|
179
175
|
Login to the API using a 2-step token-based process.
|
@@ -234,3 +230,14 @@ class EsjzoneSession(BaseSession):
|
|
234
230
|
def _extract_token(self, text: str) -> str:
|
235
231
|
match = re.search(r"<JinJing>(.+?)</JinJing>", text)
|
236
232
|
return match.group(1) if match else ""
|
233
|
+
|
234
|
+
@staticmethod
|
235
|
+
def _filter_cookies(
|
236
|
+
raw_cookies: list[Mapping[str, Any]],
|
237
|
+
) -> dict[str, str]:
|
238
|
+
ALLOWED_DOMAINS = {".www.esjzone.cc", "www.esjzone.cc", ".esjzone.cc", ""}
|
239
|
+
return {
|
240
|
+
c["name"]: c["value"]
|
241
|
+
for c in raw_cookies
|
242
|
+
if c.get("domain", "") in ALLOWED_DOMAINS
|
243
|
+
}
|
@@ -0,0 +1,85 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.fetchers.guidaye
|
4
|
+
--------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import Any
|
9
|
+
|
10
|
+
from novel_downloader.core.fetchers.base import BaseSession
|
11
|
+
from novel_downloader.core.fetchers.registry import register_fetcher
|
12
|
+
from novel_downloader.models import FetcherConfig
|
13
|
+
|
14
|
+
|
15
|
+
@register_fetcher(
|
16
|
+
site_keys=["guidaye"],
|
17
|
+
)
|
18
|
+
class GuidayeSession(BaseSession):
|
19
|
+
"""
|
20
|
+
A session class for interacting with the 名著阅读 (b.guidaye.com) novel website.
|
21
|
+
"""
|
22
|
+
|
23
|
+
BOOK_INFO_URL = "https://b.guidaye.com/{book_id}/"
|
24
|
+
CHAPTER_URL = "https://b.guidaye.com/{book_id}/{chapter_id}.html"
|
25
|
+
|
26
|
+
def __init__(
|
27
|
+
self,
|
28
|
+
config: FetcherConfig,
|
29
|
+
cookies: dict[str, str] | None = None,
|
30
|
+
**kwargs: Any,
|
31
|
+
) -> None:
|
32
|
+
super().__init__("guidaye", config, cookies, **kwargs)
|
33
|
+
|
34
|
+
async def get_book_info(
|
35
|
+
self,
|
36
|
+
book_id: str,
|
37
|
+
**kwargs: Any,
|
38
|
+
) -> list[str]:
|
39
|
+
"""
|
40
|
+
Fetch the raw HTML of the book info page asynchronously.
|
41
|
+
|
42
|
+
:param book_id: The book identifier.
|
43
|
+
:return: The page content as string list.
|
44
|
+
"""
|
45
|
+
book_id = book_id.replace("-", "/")
|
46
|
+
url = self.book_info_url(book_id=book_id)
|
47
|
+
return [await self.fetch(url, **kwargs)]
|
48
|
+
|
49
|
+
async def get_book_chapter(
|
50
|
+
self,
|
51
|
+
book_id: str,
|
52
|
+
chapter_id: str,
|
53
|
+
**kwargs: Any,
|
54
|
+
) -> list[str]:
|
55
|
+
"""
|
56
|
+
Fetch the raw HTML of a single chapter asynchronously.
|
57
|
+
|
58
|
+
:param book_id: The book identifier.
|
59
|
+
:param chapter_id: The chapter identifier.
|
60
|
+
:return: The page content as string list.
|
61
|
+
"""
|
62
|
+
book_id = book_id.replace("-", "/")
|
63
|
+
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
64
|
+
return [await self.fetch(url, **kwargs)]
|
65
|
+
|
66
|
+
@classmethod
|
67
|
+
def book_info_url(cls, book_id: str) -> str:
|
68
|
+
"""
|
69
|
+
Construct the URL for fetching a book's info page.
|
70
|
+
|
71
|
+
:param book_id: The identifier of the book.
|
72
|
+
:return: Fully qualified URL for the book info page.
|
73
|
+
"""
|
74
|
+
return cls.BOOK_INFO_URL.format(book_id=book_id)
|
75
|
+
|
76
|
+
@classmethod
|
77
|
+
def chapter_url(cls, book_id: str, chapter_id: str) -> str:
|
78
|
+
"""
|
79
|
+
Construct the URL for fetching a specific chapter.
|
80
|
+
|
81
|
+
:param book_id: The identifier of the book.
|
82
|
+
:param chapter_id: The identifier of the chapter.
|
83
|
+
:return: Fully qualified chapter URL.
|
84
|
+
"""
|
85
|
+
return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
|