novel-downloader 2.0.1__py3-none-any.whl → 2.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/download.py +11 -8
- novel_downloader/cli/export.py +17 -17
- novel_downloader/cli/ui.py +28 -1
- novel_downloader/config/adapter.py +27 -1
- novel_downloader/core/archived/deqixs/fetcher.py +1 -28
- novel_downloader/core/downloaders/__init__.py +2 -0
- novel_downloader/core/downloaders/base.py +34 -85
- novel_downloader/core/downloaders/common.py +147 -171
- novel_downloader/core/downloaders/qianbi.py +30 -64
- novel_downloader/core/downloaders/qidian.py +157 -184
- novel_downloader/core/downloaders/qqbook.py +292 -0
- novel_downloader/core/downloaders/registry.py +2 -2
- novel_downloader/core/exporters/__init__.py +2 -0
- novel_downloader/core/exporters/base.py +37 -59
- novel_downloader/core/exporters/common.py +620 -0
- novel_downloader/core/exporters/linovelib.py +47 -0
- novel_downloader/core/exporters/qidian.py +41 -12
- novel_downloader/core/exporters/qqbook.py +28 -0
- novel_downloader/core/exporters/registry.py +2 -2
- novel_downloader/core/fetchers/__init__.py +4 -2
- novel_downloader/core/fetchers/aaatxt.py +2 -22
- novel_downloader/core/fetchers/b520.py +3 -23
- novel_downloader/core/fetchers/base.py +80 -105
- novel_downloader/core/fetchers/biquyuedu.py +2 -22
- novel_downloader/core/fetchers/dxmwx.py +10 -22
- novel_downloader/core/fetchers/esjzone.py +6 -29
- novel_downloader/core/fetchers/guidaye.py +2 -22
- novel_downloader/core/fetchers/hetushu.py +9 -29
- novel_downloader/core/fetchers/i25zw.py +2 -16
- novel_downloader/core/fetchers/ixdzs8.py +2 -16
- novel_downloader/core/fetchers/jpxs123.py +2 -16
- novel_downloader/core/fetchers/lewenn.py +2 -22
- novel_downloader/core/fetchers/linovelib.py +4 -20
- novel_downloader/core/fetchers/{eightnovel.py → n8novel.py} +12 -40
- novel_downloader/core/fetchers/piaotia.py +2 -16
- novel_downloader/core/fetchers/qbtr.py +2 -16
- novel_downloader/core/fetchers/qianbi.py +1 -20
- novel_downloader/core/fetchers/qidian.py +7 -33
- novel_downloader/core/fetchers/qqbook.py +177 -0
- novel_downloader/core/fetchers/quanben5.py +9 -29
- novel_downloader/core/fetchers/rate_limiter.py +22 -53
- novel_downloader/core/fetchers/sfacg.py +3 -16
- novel_downloader/core/fetchers/shencou.py +2 -16
- novel_downloader/core/fetchers/shuhaige.py +2 -22
- novel_downloader/core/fetchers/tongrenquan.py +2 -22
- novel_downloader/core/fetchers/ttkan.py +3 -14
- novel_downloader/core/fetchers/wanbengo.py +2 -22
- novel_downloader/core/fetchers/xiaoshuowu.py +2 -16
- novel_downloader/core/fetchers/xiguashuwu.py +4 -20
- novel_downloader/core/fetchers/xs63b.py +3 -15
- novel_downloader/core/fetchers/xshbook.py +2 -22
- novel_downloader/core/fetchers/yamibo.py +4 -28
- novel_downloader/core/fetchers/yibige.py +13 -26
- novel_downloader/core/interfaces/exporter.py +19 -7
- novel_downloader/core/interfaces/fetcher.py +21 -47
- novel_downloader/core/parsers/__init__.py +4 -2
- novel_downloader/core/parsers/b520.py +2 -2
- novel_downloader/core/parsers/base.py +4 -39
- novel_downloader/core/parsers/{eightnovel.py → n8novel.py} +5 -5
- novel_downloader/core/parsers/{qidian/main_parser.py → qidian.py} +147 -266
- novel_downloader/core/parsers/qqbook.py +709 -0
- novel_downloader/core/parsers/xiguashuwu.py +3 -4
- novel_downloader/core/searchers/__init__.py +2 -2
- novel_downloader/core/searchers/b520.py +1 -1
- novel_downloader/core/searchers/base.py +2 -2
- novel_downloader/core/searchers/{eightnovel.py → n8novel.py} +5 -5
- novel_downloader/models/__init__.py +2 -0
- novel_downloader/models/book.py +1 -0
- novel_downloader/models/config.py +12 -0
- novel_downloader/resources/config/settings.toml +23 -5
- novel_downloader/resources/js_scripts/expr_to_json.js +14 -0
- novel_downloader/resources/js_scripts/qidian_decrypt_node.js +21 -16
- novel_downloader/resources/js_scripts/qq_decrypt_node.js +92 -0
- novel_downloader/utils/constants.py +6 -0
- novel_downloader/utils/crypto_utils/aes_util.py +1 -1
- novel_downloader/utils/epub/constants.py +1 -6
- novel_downloader/utils/fontocr/core.py +2 -0
- novel_downloader/utils/fontocr/loader.py +10 -8
- novel_downloader/utils/node_decryptor/__init__.py +13 -0
- novel_downloader/utils/node_decryptor/decryptor.py +342 -0
- novel_downloader/{core/parsers/qidian/utils → utils/node_decryptor}/decryptor_fetcher.py +5 -6
- novel_downloader/web/pages/download.py +1 -1
- novel_downloader/web/pages/search.py +1 -1
- novel_downloader/web/services/task_manager.py +2 -0
- {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/METADATA +4 -1
- {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/RECORD +91 -94
- novel_downloader/core/exporters/common/__init__.py +0 -11
- novel_downloader/core/exporters/common/epub.py +0 -198
- novel_downloader/core/exporters/common/main_exporter.py +0 -64
- novel_downloader/core/exporters/common/txt.py +0 -146
- novel_downloader/core/exporters/epub_util.py +0 -215
- novel_downloader/core/exporters/linovelib/__init__.py +0 -11
- novel_downloader/core/exporters/linovelib/epub.py +0 -349
- novel_downloader/core/exporters/linovelib/main_exporter.py +0 -66
- novel_downloader/core/exporters/linovelib/txt.py +0 -139
- novel_downloader/core/exporters/txt_util.py +0 -67
- novel_downloader/core/parsers/qidian/__init__.py +0 -10
- novel_downloader/core/parsers/qidian/utils/__init__.py +0 -11
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +0 -175
- {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/WHEEL +0 -0
- {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/entry_points.txt +0 -0
- {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/top_level.txt +0 -0
@@ -11,8 +11,7 @@ from typing import Any
|
|
11
11
|
|
12
12
|
from novel_downloader.core.fetchers.base import BaseSession
|
13
13
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
14
|
-
from novel_downloader.models import
|
15
|
-
from novel_downloader.utils import async_jitter_sleep
|
14
|
+
from novel_downloader.models import LoginField
|
16
15
|
|
17
16
|
|
18
17
|
@register_fetcher(
|
@@ -23,6 +22,8 @@ class EsjzoneSession(BaseSession):
|
|
23
22
|
A session class for interacting with the ESJ Zone (www.esjzone.cc) novel website.
|
24
23
|
"""
|
25
24
|
|
25
|
+
site_name: str = "esjzone"
|
26
|
+
|
26
27
|
BOOKCASE_URL = "https://www.esjzone.cc/my/favorite"
|
27
28
|
BOOK_INFO_URL = "https://www.esjzone.cc/detail/{book_id}.html"
|
28
29
|
CHAPTER_URL = "https://www.esjzone.cc/forum/{book_id}/{chapter_id}.html"
|
@@ -30,13 +31,7 @@ class EsjzoneSession(BaseSession):
|
|
30
31
|
API_LOGIN_URL_1 = "https://www.esjzone.cc/my/login"
|
31
32
|
API_LOGIN_URL_2 = "https://www.esjzone.cc/inc/mem_login.php"
|
32
33
|
|
33
|
-
|
34
|
-
self,
|
35
|
-
config: FetcherConfig,
|
36
|
-
cookies: dict[str, str] | None = None,
|
37
|
-
**kwargs: Any,
|
38
|
-
) -> None:
|
39
|
-
super().__init__("esjzone", config, cookies, **kwargs)
|
34
|
+
_TOKEN_RE = re.compile(r"<JinJing>(.*?)</JinJing>")
|
40
35
|
|
41
36
|
async def login(
|
42
37
|
self,
|
@@ -68,11 +63,7 @@ class EsjzoneSession(BaseSession):
|
|
68
63
|
):
|
69
64
|
self._is_logged_in = True
|
70
65
|
return True
|
71
|
-
await
|
72
|
-
self.backoff_factor,
|
73
|
-
mul_spread=1.1,
|
74
|
-
max_sleep=self.backoff_factor + 2,
|
75
|
-
)
|
66
|
+
await self._sleep()
|
76
67
|
|
77
68
|
self._is_logged_in = False
|
78
69
|
return False
|
@@ -82,12 +73,6 @@ class EsjzoneSession(BaseSession):
|
|
82
73
|
book_id: str,
|
83
74
|
**kwargs: Any,
|
84
75
|
) -> list[str]:
|
85
|
-
"""
|
86
|
-
Fetch the raw HTML of the book info page asynchronously.
|
87
|
-
|
88
|
-
:param book_id: The book identifier.
|
89
|
-
:return: The page content as string list.
|
90
|
-
"""
|
91
76
|
url = self.book_info_url(book_id=book_id)
|
92
77
|
return [await self.fetch(url, **kwargs)]
|
93
78
|
|
@@ -97,13 +82,6 @@ class EsjzoneSession(BaseSession):
|
|
97
82
|
chapter_id: str,
|
98
83
|
**kwargs: Any,
|
99
84
|
) -> list[str]:
|
100
|
-
"""
|
101
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
102
|
-
|
103
|
-
:param book_id: The book identifier.
|
104
|
-
:param chapter_id: The chapter identifier.
|
105
|
-
:return: The page content as string list.
|
106
|
-
"""
|
107
85
|
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
108
86
|
return [await self.fetch(url, **kwargs)]
|
109
87
|
|
@@ -228,8 +206,7 @@ class EsjzoneSession(BaseSession):
|
|
228
206
|
return not any(kw in resp_text[0] for kw in keywords)
|
229
207
|
|
230
208
|
def _extract_token(self, text: str) -> str:
|
231
|
-
|
232
|
-
return match.group(1) if match else ""
|
209
|
+
return m.group(1) if (m := self._TOKEN_RE.search(text)) else ""
|
233
210
|
|
234
211
|
@staticmethod
|
235
212
|
def _filter_cookies(
|
@@ -9,7 +9,6 @@ from typing import Any
|
|
9
9
|
|
10
10
|
from novel_downloader.core.fetchers.base import BaseSession
|
11
11
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
12
|
-
from novel_downloader.models import FetcherConfig
|
13
12
|
|
14
13
|
|
15
14
|
@register_fetcher(
|
@@ -20,28 +19,16 @@ class GuidayeSession(BaseSession):
|
|
20
19
|
A session class for interacting with the 名著阅读 (b.guidaye.com) novel website.
|
21
20
|
"""
|
22
21
|
|
22
|
+
site_name: str = "guidaye"
|
23
|
+
|
23
24
|
BOOK_INFO_URL = "https://b.guidaye.com/{book_id}/"
|
24
25
|
CHAPTER_URL = "https://b.guidaye.com/{book_id}/{chapter_id}.html"
|
25
26
|
|
26
|
-
def __init__(
|
27
|
-
self,
|
28
|
-
config: FetcherConfig,
|
29
|
-
cookies: dict[str, str] | None = None,
|
30
|
-
**kwargs: Any,
|
31
|
-
) -> None:
|
32
|
-
super().__init__("guidaye", config, cookies, **kwargs)
|
33
|
-
|
34
27
|
async def get_book_info(
|
35
28
|
self,
|
36
29
|
book_id: str,
|
37
30
|
**kwargs: Any,
|
38
31
|
) -> list[str]:
|
39
|
-
"""
|
40
|
-
Fetch the raw HTML of the book info page asynchronously.
|
41
|
-
|
42
|
-
:param book_id: The book identifier.
|
43
|
-
:return: The page content as string list.
|
44
|
-
"""
|
45
32
|
book_id = book_id.replace("-", "/")
|
46
33
|
url = self.book_info_url(book_id=book_id)
|
47
34
|
return [await self.fetch(url, **kwargs)]
|
@@ -52,13 +39,6 @@ class GuidayeSession(BaseSession):
|
|
52
39
|
chapter_id: str,
|
53
40
|
**kwargs: Any,
|
54
41
|
) -> list[str]:
|
55
|
-
"""
|
56
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
57
|
-
|
58
|
-
:param book_id: The book identifier.
|
59
|
-
:param chapter_id: The chapter identifier.
|
60
|
-
:return: The page content as string list.
|
61
|
-
"""
|
62
42
|
book_id = book_id.replace("-", "/")
|
63
43
|
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
64
44
|
return [await self.fetch(url, **kwargs)]
|
@@ -9,7 +9,6 @@ from typing import Any
|
|
9
9
|
|
10
10
|
from novel_downloader.core.fetchers.base import BaseSession
|
11
11
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
12
|
-
from novel_downloader.models import FetcherConfig
|
13
12
|
|
14
13
|
|
15
14
|
@register_fetcher(
|
@@ -20,34 +19,22 @@ class HetushuSession(BaseSession):
|
|
20
19
|
A session class for interacting with the 和图书 (www.hetushu.com) novel website.
|
21
20
|
"""
|
22
21
|
|
22
|
+
site_name: str = "hetushu"
|
23
|
+
BASE_URL_MAP: dict[str, str] = {
|
24
|
+
"simplified": "www.hetushu.com",
|
25
|
+
"traditional": "www.hetubook.com",
|
26
|
+
}
|
27
|
+
DEFAULT_BASE_URL: str = "www.hetushu.com"
|
28
|
+
|
23
29
|
BOOK_INFO_URL = "https://{base_url}/book/{book_id}/index.html"
|
24
30
|
CHAPTER_URL = "https://{base_url}/book/{book_id}/{chapter_id}.html"
|
25
31
|
|
26
|
-
def __init__(
|
27
|
-
self,
|
28
|
-
config: FetcherConfig,
|
29
|
-
cookies: dict[str, str] | None = None,
|
30
|
-
**kwargs: Any,
|
31
|
-
) -> None:
|
32
|
-
super().__init__("hetushu", config, cookies, **kwargs)
|
33
|
-
self.base_url = (
|
34
|
-
"www.hetushu.com"
|
35
|
-
if config.locale_style == "simplified"
|
36
|
-
else "www.hetubook.com"
|
37
|
-
)
|
38
|
-
|
39
32
|
async def get_book_info(
|
40
33
|
self,
|
41
34
|
book_id: str,
|
42
35
|
**kwargs: Any,
|
43
36
|
) -> list[str]:
|
44
|
-
|
45
|
-
Fetch the raw HTML of the book info page asynchronously.
|
46
|
-
|
47
|
-
:param book_id: The book identifier.
|
48
|
-
:return: The page content as string list.
|
49
|
-
"""
|
50
|
-
url = self.book_info_url(base_url=self.base_url, book_id=book_id)
|
37
|
+
url = self.book_info_url(base_url=self._base_url, book_id=book_id)
|
51
38
|
return [await self.fetch(url, **kwargs)]
|
52
39
|
|
53
40
|
async def get_book_chapter(
|
@@ -56,15 +43,8 @@ class HetushuSession(BaseSession):
|
|
56
43
|
chapter_id: str,
|
57
44
|
**kwargs: Any,
|
58
45
|
) -> list[str]:
|
59
|
-
"""
|
60
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
61
|
-
|
62
|
-
:param book_id: The book identifier.
|
63
|
-
:param chapter_id: The chapter identifier.
|
64
|
-
:return: The page content as string list.
|
65
|
-
"""
|
66
46
|
url = self.chapter_url(
|
67
|
-
base_url=self.
|
47
|
+
base_url=self._base_url, book_id=book_id, chapter_id=chapter_id
|
68
48
|
)
|
69
49
|
return [await self.fetch(url, **kwargs)]
|
70
50
|
|
@@ -10,7 +10,6 @@ from typing import Any
|
|
10
10
|
|
11
11
|
from novel_downloader.core.fetchers.base import BaseSession
|
12
12
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
13
|
-
from novel_downloader.models import FetcherConfig
|
14
13
|
|
15
14
|
|
16
15
|
@register_fetcher(
|
@@ -21,18 +20,12 @@ class I25zwSession(BaseSession):
|
|
21
20
|
A session class for interacting with the 25中文网 (www.i25zw.com) novel website.
|
22
21
|
"""
|
23
22
|
|
23
|
+
site_name: str = "i25zw"
|
24
|
+
|
24
25
|
BOOK_INFO_URL = "https://www.i25zw.com/book/{book_id}.html"
|
25
26
|
BOOK_CATALOG_URL = "https://www.i25zw.com/{book_id}/"
|
26
27
|
CHAPTER_URL = "https://www.i25zw.com/{book_id}/{chapter_id}.html"
|
27
28
|
|
28
|
-
def __init__(
|
29
|
-
self,
|
30
|
-
config: FetcherConfig,
|
31
|
-
cookies: dict[str, str] | None = None,
|
32
|
-
**kwargs: Any,
|
33
|
-
) -> None:
|
34
|
-
super().__init__("i25zw", config, cookies, **kwargs)
|
35
|
-
|
36
29
|
async def get_book_info(
|
37
30
|
self,
|
38
31
|
book_id: str,
|
@@ -61,13 +54,6 @@ class I25zwSession(BaseSession):
|
|
61
54
|
chapter_id: str,
|
62
55
|
**kwargs: Any,
|
63
56
|
) -> list[str]:
|
64
|
-
"""
|
65
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
66
|
-
|
67
|
-
:param book_id: The book identifier.
|
68
|
-
:param chapter_id: The chapter identifier.
|
69
|
-
:return: The page content as string list.
|
70
|
-
"""
|
71
57
|
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
72
58
|
return [await self.fetch(url, **kwargs)]
|
73
59
|
|
@@ -11,7 +11,6 @@ from typing import Any
|
|
11
11
|
|
12
12
|
from novel_downloader.core.fetchers.base import BaseSession
|
13
13
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
14
|
-
from novel_downloader.models import FetcherConfig
|
15
14
|
|
16
15
|
|
17
16
|
@register_fetcher(
|
@@ -22,19 +21,13 @@ class Ixdzs8Session(BaseSession):
|
|
22
21
|
A session class for interacting with the 爱下电子书 (ixdzs8.com) novel website.
|
23
22
|
"""
|
24
23
|
|
24
|
+
site_name: str = "ixdzs8"
|
25
|
+
|
25
26
|
BOOK_INFO_URL = "https://ixdzs8.com/read/{book_id}/"
|
26
27
|
BOOK_CATALOG_URL = "https://ixdzs8.com/novel/clist/"
|
27
28
|
CHAPTER_URL = "https://ixdzs8.com/read/{book_id}/{chapter_id}.html"
|
28
29
|
_TOKEN_PATTERN = re.compile(r'let\s+token\s*=\s*"([^"]+)"')
|
29
30
|
|
30
|
-
def __init__(
|
31
|
-
self,
|
32
|
-
config: FetcherConfig,
|
33
|
-
cookies: dict[str, str] | None = None,
|
34
|
-
**kwargs: Any,
|
35
|
-
) -> None:
|
36
|
-
super().__init__("ixdzs8", config, cookies, **kwargs)
|
37
|
-
|
38
31
|
async def get_book_info(
|
39
32
|
self,
|
40
33
|
book_id: str,
|
@@ -63,13 +56,6 @@ class Ixdzs8Session(BaseSession):
|
|
63
56
|
chapter_id: str,
|
64
57
|
**kwargs: Any,
|
65
58
|
) -> list[str]:
|
66
|
-
"""
|
67
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
68
|
-
|
69
|
-
:param book_id: The book identifier.
|
70
|
-
:param chapter_id: The chapter identifier.
|
71
|
-
:return: The page content as string list.
|
72
|
-
"""
|
73
59
|
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
74
60
|
return [await self.fetch_verified_html(url, **kwargs)]
|
75
61
|
|
@@ -11,7 +11,6 @@ from lxml import html
|
|
11
11
|
|
12
12
|
from novel_downloader.core.fetchers.base import BaseSession
|
13
13
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
14
|
-
from novel_downloader.models import FetcherConfig
|
15
14
|
|
16
15
|
|
17
16
|
@register_fetcher(
|
@@ -22,18 +21,12 @@ class Jpxs123Session(BaseSession):
|
|
22
21
|
A session class for interacting with the 精品小说网 (www.jpxs123.com) novel website.
|
23
22
|
"""
|
24
23
|
|
24
|
+
site_name: str = "jpxs123"
|
25
|
+
|
25
26
|
BASE_URL = "https://www.jpxs123.com"
|
26
27
|
BOOK_INFO_URL = "https://www.jpxs123.com/{book_id}.html"
|
27
28
|
CHAPTER_URL = "https://www.jpxs123.com/{book_id}/{chapter_id}.html"
|
28
29
|
|
29
|
-
def __init__(
|
30
|
-
self,
|
31
|
-
config: FetcherConfig,
|
32
|
-
cookies: dict[str, str] | None = None,
|
33
|
-
**kwargs: Any,
|
34
|
-
) -> None:
|
35
|
-
super().__init__("jpxs123", config, cookies, **kwargs)
|
36
|
-
|
37
30
|
async def get_book_info(
|
38
31
|
self,
|
39
32
|
book_id: str,
|
@@ -68,13 +61,6 @@ class Jpxs123Session(BaseSession):
|
|
68
61
|
chapter_id: str,
|
69
62
|
**kwargs: Any,
|
70
63
|
) -> list[str]:
|
71
|
-
"""
|
72
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
73
|
-
|
74
|
-
:param book_id: The book identifier.
|
75
|
-
:param chapter_id: The chapter identifier.
|
76
|
-
:return: The page content as string list.
|
77
|
-
"""
|
78
64
|
book_id = book_id.replace("-", "/")
|
79
65
|
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
80
66
|
return [await self.fetch(url, **kwargs)]
|
@@ -9,7 +9,6 @@ from typing import Any
|
|
9
9
|
|
10
10
|
from novel_downloader.core.fetchers.base import BaseSession
|
11
11
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
12
|
-
from novel_downloader.models import FetcherConfig
|
13
12
|
|
14
13
|
|
15
14
|
@register_fetcher(
|
@@ -20,28 +19,16 @@ class LewennSession(BaseSession):
|
|
20
19
|
A session class for interacting with the 乐文小说网 (www.lewenn.net) novel website.
|
21
20
|
"""
|
22
21
|
|
22
|
+
site_name: str = "lewenn"
|
23
|
+
|
23
24
|
BOOK_INFO_URL = "https://www.lewenn.net/{book_id}/"
|
24
25
|
CHAPTER_URL = "https://www.lewenn.net/{book_id}/{chapter_id}.html"
|
25
26
|
|
26
|
-
def __init__(
|
27
|
-
self,
|
28
|
-
config: FetcherConfig,
|
29
|
-
cookies: dict[str, str] | None = None,
|
30
|
-
**kwargs: Any,
|
31
|
-
) -> None:
|
32
|
-
super().__init__("lewenn", config, cookies, **kwargs)
|
33
|
-
|
34
27
|
async def get_book_info(
|
35
28
|
self,
|
36
29
|
book_id: str,
|
37
30
|
**kwargs: Any,
|
38
31
|
) -> list[str]:
|
39
|
-
"""
|
40
|
-
Fetch the raw HTML of the book info page asynchronously.
|
41
|
-
|
42
|
-
:param book_id: The book identifier.
|
43
|
-
:return: The page content as string list.
|
44
|
-
"""
|
45
32
|
url = self.book_info_url(book_id=book_id)
|
46
33
|
return [await self.fetch(url, **kwargs)]
|
47
34
|
|
@@ -51,13 +38,6 @@ class LewennSession(BaseSession):
|
|
51
38
|
chapter_id: str,
|
52
39
|
**kwargs: Any,
|
53
40
|
) -> list[str]:
|
54
|
-
"""
|
55
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
56
|
-
|
57
|
-
:param book_id: The book identifier.
|
58
|
-
:param chapter_id: The chapter identifier.
|
59
|
-
:return: The page content as string list.
|
60
|
-
"""
|
61
41
|
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
62
42
|
return [await self.fetch(url, **kwargs)]
|
63
43
|
|
@@ -10,8 +10,6 @@ from typing import Any
|
|
10
10
|
|
11
11
|
from novel_downloader.core.fetchers.base import BaseSession
|
12
12
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
13
|
-
from novel_downloader.models import FetcherConfig
|
14
|
-
from novel_downloader.utils import async_jitter_sleep
|
15
13
|
|
16
14
|
|
17
15
|
@register_fetcher(
|
@@ -22,6 +20,8 @@ class LinovelibSession(BaseSession):
|
|
22
20
|
A session class for interacting with 哔哩轻小说 (www.linovelib.com) novel website.
|
23
21
|
"""
|
24
22
|
|
23
|
+
site_name: str = "linovelib"
|
24
|
+
|
25
25
|
BASE_URL = "https://www.linovelib.com"
|
26
26
|
BOOK_INFO_URL = "https://www.linovelib.com/novel/{book_id}.html"
|
27
27
|
BOOK_CATALOG_UTL = "https://www.linovelib.com/novel/{book_id}/catalog"
|
@@ -30,14 +30,6 @@ class LinovelibSession(BaseSession):
|
|
30
30
|
|
31
31
|
_VOL_ID_PATTERN: re.Pattern[str] = re.compile(r"/novel/\d+/(vol_\d+)\.html")
|
32
32
|
|
33
|
-
def __init__(
|
34
|
-
self,
|
35
|
-
config: FetcherConfig,
|
36
|
-
cookies: dict[str, str] | None = None,
|
37
|
-
**kwargs: Any,
|
38
|
-
) -> None:
|
39
|
-
super().__init__("linovelib", config, cookies, **kwargs)
|
40
|
-
|
41
33
|
async def get_book_info(
|
42
34
|
self,
|
43
35
|
book_id: str,
|
@@ -63,11 +55,7 @@ class LinovelibSession(BaseSession):
|
|
63
55
|
|
64
56
|
vol_htmls = []
|
65
57
|
for vol_id in vol_ids:
|
66
|
-
await
|
67
|
-
self.request_interval,
|
68
|
-
mul_spread=1.1,
|
69
|
-
max_sleep=self.request_interval + 2,
|
70
|
-
)
|
58
|
+
await self._sleep()
|
71
59
|
html = await self.get_book_volume(book_id, vol_id, **kwargs)
|
72
60
|
if html:
|
73
61
|
vol_htmls.append(html)
|
@@ -129,11 +117,7 @@ class LinovelibSession(BaseSession):
|
|
129
117
|
|
130
118
|
html_pages.append(html)
|
131
119
|
idx += 1
|
132
|
-
await
|
133
|
-
self.request_interval,
|
134
|
-
mul_spread=1.1,
|
135
|
-
max_sleep=self.request_interval + 2,
|
136
|
-
)
|
120
|
+
await self._sleep()
|
137
121
|
|
138
122
|
return html_pages
|
139
123
|
|
@@ -1,57 +1,42 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
"""
|
3
|
-
novel_downloader.core.fetchers.
|
4
|
-
|
3
|
+
novel_downloader.core.fetchers.n8novel
|
4
|
+
--------------------------------------
|
5
5
|
|
6
6
|
"""
|
7
7
|
|
8
8
|
import re
|
9
|
-
from re import Pattern
|
10
9
|
from typing import Any
|
11
10
|
|
12
11
|
from novel_downloader.core.fetchers.base import BaseSession
|
13
12
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
14
|
-
from novel_downloader.models import FetcherConfig
|
15
13
|
|
16
14
|
|
17
15
|
@register_fetcher(
|
18
|
-
site_keys=["8novel", "
|
16
|
+
site_keys=["8novel", "n8novel"],
|
19
17
|
)
|
20
|
-
class
|
18
|
+
class N8novelSession(BaseSession):
|
21
19
|
"""
|
22
20
|
A session class for interacting with the 无限轻小说 (www.8novel.com) novel website.
|
23
21
|
"""
|
24
22
|
|
23
|
+
site_name: str = "n8novel"
|
24
|
+
|
25
25
|
BOOK_INFO_URL = "https://www.8novel.com/novelbooks/{book_id}/"
|
26
26
|
CHAPTER_URL = "https://article.8novel.com/read/{book_id}/?{chapter_id}"
|
27
27
|
CHAPTER_CONTENT_URL = (
|
28
28
|
"https://article.8novel.com/txt/1/{book_id}/{chapter_id}{seed_segment}.html"
|
29
29
|
)
|
30
30
|
|
31
|
-
|
32
|
-
r'["\'](
|
31
|
+
_SPLIT_DIGITS_PATTERN = re.compile(
|
32
|
+
r'["\'](\d+(?:,\d+)*)["\']\s*\.split\s*\(\s*["\']\s*,\s*["\']\s*\)', re.DOTALL
|
33
33
|
)
|
34
|
-
_DIGIT_LIST_PATTERN: Pattern[str] = re.compile(r"^\d+(?:,\d+)*$")
|
35
|
-
|
36
|
-
def __init__(
|
37
|
-
self,
|
38
|
-
config: FetcherConfig,
|
39
|
-
cookies: dict[str, str] | None = None,
|
40
|
-
**kwargs: Any,
|
41
|
-
) -> None:
|
42
|
-
super().__init__("eightnovel", config, cookies, **kwargs)
|
43
34
|
|
44
35
|
async def get_book_info(
|
45
36
|
self,
|
46
37
|
book_id: str,
|
47
38
|
**kwargs: Any,
|
48
39
|
) -> list[str]:
|
49
|
-
"""
|
50
|
-
Fetch the raw HTML of the book info page asynchronously.
|
51
|
-
|
52
|
-
:param book_id: The book identifier.
|
53
|
-
:return: The page content as string list.
|
54
|
-
"""
|
55
40
|
url = self.book_info_url(book_id=book_id)
|
56
41
|
return [await self.fetch(url, **kwargs)]
|
57
42
|
|
@@ -84,12 +69,6 @@ class EightnovelSession(BaseSession):
|
|
84
69
|
|
85
70
|
@classmethod
|
86
71
|
def book_info_url(cls, book_id: str) -> str:
|
87
|
-
"""
|
88
|
-
Construct the URL for fetching a book's info page.
|
89
|
-
|
90
|
-
:param book_id: The identifier of the book.
|
91
|
-
:return: Fully qualified URL for the book info page.
|
92
|
-
"""
|
93
72
|
return cls.BOOK_INFO_URL.format(book_id=book_id)
|
94
73
|
|
95
74
|
@classmethod
|
@@ -110,17 +89,10 @@ class EightnovelSession(BaseSession):
|
|
110
89
|
of the form "...".split(","), pick the ones that may contain seed,
|
111
90
|
and return the last value.
|
112
91
|
"""
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
]
|
118
|
-
|
119
|
-
if not numeric_lists:
|
120
|
-
return ""
|
121
|
-
|
122
|
-
last_list = numeric_lists[-1]
|
123
|
-
return last_list.split(",")[-1]
|
92
|
+
matches: list[str] = cls._SPLIT_DIGITS_PATTERN.findall(html_str)
|
93
|
+
if not matches:
|
94
|
+
raise ValueError("No digit lists found in HTML.")
|
95
|
+
return matches[-1].split(",")[-1]
|
124
96
|
|
125
97
|
@classmethod
|
126
98
|
def _build_chapter_content_url(
|
@@ -10,7 +10,6 @@ from typing import Any
|
|
10
10
|
|
11
11
|
from novel_downloader.core.fetchers.base import BaseSession
|
12
12
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
13
|
-
from novel_downloader.models import FetcherConfig
|
14
13
|
|
15
14
|
|
16
15
|
@register_fetcher(
|
@@ -21,18 +20,12 @@ class PiaotiaSession(BaseSession):
|
|
21
20
|
A session class for interacting with the 飘天文学网 (www.piaotia.com) novel website.
|
22
21
|
"""
|
23
22
|
|
23
|
+
site_name: str = "piaotia"
|
24
|
+
|
24
25
|
BOOK_INFO_URL = "https://www.piaotia.com/bookinfo/{book_id}.html"
|
25
26
|
BOOK_CATALOG_URL = "https://www.piaotia.com/html/{book_id}/index.html"
|
26
27
|
CHAPTER_URL = "https://www.piaotia.com/html/{book_id}/{chapter_id}.html"
|
27
28
|
|
28
|
-
def __init__(
|
29
|
-
self,
|
30
|
-
config: FetcherConfig,
|
31
|
-
cookies: dict[str, str] | None = None,
|
32
|
-
**kwargs: Any,
|
33
|
-
) -> None:
|
34
|
-
super().__init__("piaotia", config, cookies, **kwargs)
|
35
|
-
|
36
29
|
async def get_book_info(
|
37
30
|
self,
|
38
31
|
book_id: str,
|
@@ -62,13 +55,6 @@ class PiaotiaSession(BaseSession):
|
|
62
55
|
chapter_id: str,
|
63
56
|
**kwargs: Any,
|
64
57
|
) -> list[str]:
|
65
|
-
"""
|
66
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
67
|
-
|
68
|
-
:param book_id: The book identifier.
|
69
|
-
:param chapter_id: The chapter identifier.
|
70
|
-
:return: The page content as string list.
|
71
|
-
"""
|
72
58
|
book_id = book_id.replace("-", "/")
|
73
59
|
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
74
60
|
return [await self.fetch(url, **kwargs)]
|
@@ -11,7 +11,6 @@ from lxml import html
|
|
11
11
|
|
12
12
|
from novel_downloader.core.fetchers.base import BaseSession
|
13
13
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
14
|
-
from novel_downloader.models import FetcherConfig
|
15
14
|
|
16
15
|
|
17
16
|
@register_fetcher(
|
@@ -22,18 +21,12 @@ class QbtrSession(BaseSession):
|
|
22
21
|
A session class for interacting with the 全本同人小说 (www.qbtr.cc) novel website.
|
23
22
|
"""
|
24
23
|
|
24
|
+
site_name: str = "qbtr"
|
25
|
+
|
25
26
|
BASE_URL = "https://www.qbtr.cc"
|
26
27
|
BOOK_INFO_URL = "https://www.qbtr.cc/{book_id}.html"
|
27
28
|
CHAPTER_URL = "https://www.qbtr.cc/{book_id}/{chapter_id}.html"
|
28
29
|
|
29
|
-
def __init__(
|
30
|
-
self,
|
31
|
-
config: FetcherConfig,
|
32
|
-
cookies: dict[str, str] | None = None,
|
33
|
-
**kwargs: Any,
|
34
|
-
) -> None:
|
35
|
-
super().__init__("qbtr", config, cookies, **kwargs)
|
36
|
-
|
37
30
|
async def get_book_info(
|
38
31
|
self,
|
39
32
|
book_id: str,
|
@@ -68,13 +61,6 @@ class QbtrSession(BaseSession):
|
|
68
61
|
chapter_id: str,
|
69
62
|
**kwargs: Any,
|
70
63
|
) -> list[str]:
|
71
|
-
"""
|
72
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
73
|
-
|
74
|
-
:param book_id: The book identifier.
|
75
|
-
:param chapter_id: The chapter identifier.
|
76
|
-
:return: The page content as string list.
|
77
|
-
"""
|
78
64
|
book_id = book_id.replace("-", "/")
|
79
65
|
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
80
66
|
return [await self.fetch(url, **kwargs)]
|
@@ -10,7 +10,6 @@ from typing import Any
|
|
10
10
|
|
11
11
|
from novel_downloader.core.fetchers.base import BaseSession
|
12
12
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
13
|
-
from novel_downloader.models import FetcherConfig
|
14
13
|
|
15
14
|
|
16
15
|
@register_fetcher(
|
@@ -21,23 +20,12 @@ class QianbiSession(BaseSession):
|
|
21
20
|
A session class for interacting with the 铅笔小说 (www.23qb.com) novel website.
|
22
21
|
"""
|
23
22
|
|
24
|
-
|
25
|
-
"www.23qb.com",
|
26
|
-
"www.23qb.net",
|
27
|
-
]
|
23
|
+
site_name: str = "qianbi"
|
28
24
|
|
29
25
|
BOOK_INFO_URL = "https://www.23qb.com/book/{book_id}/"
|
30
26
|
BOOK_CATALOG_URL = "https://www.23qb.com/book/{book_id}/catalog"
|
31
27
|
CHAPTER_URL = "https://www.23qb.com/book/{book_id}/{chapter_id}.html"
|
32
28
|
|
33
|
-
def __init__(
|
34
|
-
self,
|
35
|
-
config: FetcherConfig,
|
36
|
-
cookies: dict[str, str] | None = None,
|
37
|
-
**kwargs: Any,
|
38
|
-
) -> None:
|
39
|
-
super().__init__("qianbi", config, cookies, **kwargs)
|
40
|
-
|
41
29
|
async def get_book_info(
|
42
30
|
self,
|
43
31
|
book_id: str,
|
@@ -66,13 +54,6 @@ class QianbiSession(BaseSession):
|
|
66
54
|
chapter_id: str,
|
67
55
|
**kwargs: Any,
|
68
56
|
) -> list[str]:
|
69
|
-
"""
|
70
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
71
|
-
|
72
|
-
:param book_id: The book identifier.
|
73
|
-
:param chapter_id: The chapter identifier.
|
74
|
-
:return: The page content as string list.
|
75
|
-
"""
|
76
57
|
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
77
58
|
return [await self.fetch(url, **kwargs)]
|
78
59
|
|