novel-downloader 2.0.0__py3-none-any.whl → 2.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/download.py +14 -11
- novel_downloader/cli/export.py +19 -19
- novel_downloader/cli/ui.py +35 -8
- novel_downloader/config/adapter.py +216 -153
- novel_downloader/core/__init__.py +5 -6
- novel_downloader/core/archived/deqixs/fetcher.py +1 -28
- novel_downloader/core/downloaders/__init__.py +2 -0
- novel_downloader/core/downloaders/base.py +34 -85
- novel_downloader/core/downloaders/common.py +147 -171
- novel_downloader/core/downloaders/qianbi.py +30 -64
- novel_downloader/core/downloaders/qidian.py +157 -184
- novel_downloader/core/downloaders/qqbook.py +292 -0
- novel_downloader/core/downloaders/registry.py +2 -2
- novel_downloader/core/exporters/__init__.py +2 -0
- novel_downloader/core/exporters/base.py +37 -59
- novel_downloader/core/exporters/common.py +620 -0
- novel_downloader/core/exporters/linovelib.py +47 -0
- novel_downloader/core/exporters/qidian.py +41 -12
- novel_downloader/core/exporters/qqbook.py +28 -0
- novel_downloader/core/exporters/registry.py +2 -2
- novel_downloader/core/fetchers/__init__.py +4 -2
- novel_downloader/core/fetchers/aaatxt.py +2 -22
- novel_downloader/core/fetchers/b520.py +3 -23
- novel_downloader/core/fetchers/base.py +80 -105
- novel_downloader/core/fetchers/biquyuedu.py +2 -22
- novel_downloader/core/fetchers/dxmwx.py +10 -22
- novel_downloader/core/fetchers/esjzone.py +6 -29
- novel_downloader/core/fetchers/guidaye.py +2 -22
- novel_downloader/core/fetchers/hetushu.py +9 -29
- novel_downloader/core/fetchers/i25zw.py +2 -16
- novel_downloader/core/fetchers/ixdzs8.py +2 -16
- novel_downloader/core/fetchers/jpxs123.py +2 -16
- novel_downloader/core/fetchers/lewenn.py +2 -22
- novel_downloader/core/fetchers/linovelib.py +4 -20
- novel_downloader/core/fetchers/{eightnovel.py → n8novel.py} +12 -40
- novel_downloader/core/fetchers/piaotia.py +2 -16
- novel_downloader/core/fetchers/qbtr.py +2 -16
- novel_downloader/core/fetchers/qianbi.py +1 -20
- novel_downloader/core/fetchers/qidian.py +27 -68
- novel_downloader/core/fetchers/qqbook.py +177 -0
- novel_downloader/core/fetchers/quanben5.py +9 -29
- novel_downloader/core/fetchers/rate_limiter.py +22 -53
- novel_downloader/core/fetchers/sfacg.py +3 -16
- novel_downloader/core/fetchers/shencou.py +2 -16
- novel_downloader/core/fetchers/shuhaige.py +2 -22
- novel_downloader/core/fetchers/tongrenquan.py +2 -22
- novel_downloader/core/fetchers/ttkan.py +3 -14
- novel_downloader/core/fetchers/wanbengo.py +2 -22
- novel_downloader/core/fetchers/xiaoshuowu.py +2 -16
- novel_downloader/core/fetchers/xiguashuwu.py +4 -20
- novel_downloader/core/fetchers/xs63b.py +3 -15
- novel_downloader/core/fetchers/xshbook.py +2 -22
- novel_downloader/core/fetchers/yamibo.py +4 -28
- novel_downloader/core/fetchers/yibige.py +13 -26
- novel_downloader/core/interfaces/exporter.py +19 -7
- novel_downloader/core/interfaces/fetcher.py +23 -49
- novel_downloader/core/interfaces/parser.py +2 -2
- novel_downloader/core/parsers/__init__.py +4 -2
- novel_downloader/core/parsers/b520.py +2 -2
- novel_downloader/core/parsers/base.py +5 -39
- novel_downloader/core/parsers/esjzone.py +3 -3
- novel_downloader/core/parsers/{eightnovel.py → n8novel.py} +7 -7
- novel_downloader/core/parsers/qidian.py +717 -0
- novel_downloader/core/parsers/qqbook.py +709 -0
- novel_downloader/core/parsers/xiguashuwu.py +8 -15
- novel_downloader/core/searchers/__init__.py +2 -2
- novel_downloader/core/searchers/b520.py +1 -1
- novel_downloader/core/searchers/base.py +2 -2
- novel_downloader/core/searchers/{eightnovel.py → n8novel.py} +5 -5
- novel_downloader/locales/en.json +3 -3
- novel_downloader/locales/zh.json +3 -3
- novel_downloader/models/__init__.py +2 -0
- novel_downloader/models/book.py +1 -0
- novel_downloader/models/config.py +12 -0
- novel_downloader/resources/config/settings.toml +23 -5
- novel_downloader/resources/js_scripts/expr_to_json.js +14 -0
- novel_downloader/resources/js_scripts/qidian_decrypt_node.js +21 -16
- novel_downloader/resources/js_scripts/qq_decrypt_node.js +92 -0
- novel_downloader/utils/__init__.py +0 -2
- novel_downloader/utils/chapter_storage.py +2 -3
- novel_downloader/utils/constants.py +7 -3
- novel_downloader/utils/cookies.py +32 -17
- novel_downloader/utils/crypto_utils/__init__.py +0 -6
- novel_downloader/utils/crypto_utils/aes_util.py +1 -1
- novel_downloader/utils/crypto_utils/rc4.py +40 -50
- novel_downloader/utils/epub/__init__.py +2 -3
- novel_downloader/utils/epub/builder.py +6 -6
- novel_downloader/utils/epub/constants.py +1 -6
- novel_downloader/utils/epub/documents.py +7 -7
- novel_downloader/utils/epub/models.py +8 -8
- novel_downloader/utils/epub/utils.py +10 -10
- novel_downloader/utils/file_utils/io.py +48 -73
- novel_downloader/utils/file_utils/normalize.py +1 -7
- novel_downloader/utils/file_utils/sanitize.py +4 -11
- novel_downloader/utils/fontocr/__init__.py +13 -0
- novel_downloader/utils/{fontocr.py → fontocr/core.py} +72 -61
- novel_downloader/utils/fontocr/loader.py +52 -0
- novel_downloader/utils/logger.py +80 -56
- novel_downloader/utils/network.py +16 -40
- novel_downloader/utils/node_decryptor/__init__.py +13 -0
- novel_downloader/utils/node_decryptor/decryptor.py +342 -0
- novel_downloader/{core/parsers/qidian/utils → utils/node_decryptor}/decryptor_fetcher.py +5 -6
- novel_downloader/utils/text_utils/text_cleaner.py +39 -30
- novel_downloader/utils/text_utils/truncate_utils.py +3 -14
- novel_downloader/utils/time_utils/sleep_utils.py +53 -43
- novel_downloader/web/main.py +1 -1
- novel_downloader/web/pages/download.py +1 -1
- novel_downloader/web/pages/search.py +4 -4
- novel_downloader/web/services/task_manager.py +2 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/METADATA +5 -1
- novel_downloader-2.0.2.dist-info/RECORD +203 -0
- novel_downloader/core/exporters/common/__init__.py +0 -11
- novel_downloader/core/exporters/common/epub.py +0 -198
- novel_downloader/core/exporters/common/main_exporter.py +0 -64
- novel_downloader/core/exporters/common/txt.py +0 -146
- novel_downloader/core/exporters/epub_util.py +0 -215
- novel_downloader/core/exporters/linovelib/__init__.py +0 -11
- novel_downloader/core/exporters/linovelib/epub.py +0 -349
- novel_downloader/core/exporters/linovelib/main_exporter.py +0 -66
- novel_downloader/core/exporters/linovelib/txt.py +0 -139
- novel_downloader/core/exporters/txt_util.py +0 -67
- novel_downloader/core/parsers/qidian/__init__.py +0 -10
- novel_downloader/core/parsers/qidian/book_info_parser.py +0 -89
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -470
- novel_downloader/core/parsers/qidian/chapter_normal.py +0 -126
- novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
- novel_downloader/core/parsers/qidian/main_parser.py +0 -101
- novel_downloader/core/parsers/qidian/utils/__init__.py +0 -30
- novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +0 -143
- novel_downloader/core/parsers/qidian/utils/helpers.py +0 -110
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +0 -175
- novel_downloader-2.0.0.dist-info/RECORD +0 -210
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/WHEEL +0 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/entry_points.txt +0 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/top_level.txt +0 -0
@@ -9,7 +9,6 @@ from typing import Any
|
|
9
9
|
|
10
10
|
from novel_downloader.core.fetchers.base import BaseSession
|
11
11
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
12
|
-
from novel_downloader.models import FetcherConfig
|
13
12
|
|
14
13
|
|
15
14
|
@register_fetcher(
|
@@ -20,29 +19,17 @@ class TongrenquanSession(BaseSession):
|
|
20
19
|
A session class for interacting with the 同人圈 (www.tongrenquan.org) novel website.
|
21
20
|
"""
|
22
21
|
|
22
|
+
site_name: str = "tongrenquan"
|
23
|
+
|
23
24
|
BASE_URL = "https://www.tongrenquan.org"
|
24
25
|
BOOK_INFO_URL = "https://www.tongrenquan.org/tongren/{book_id}.html"
|
25
26
|
CHAPTER_URL = "https://www.tongrenquan.org/tongren/{book_id}/{chapter_id}.html"
|
26
27
|
|
27
|
-
def __init__(
|
28
|
-
self,
|
29
|
-
config: FetcherConfig,
|
30
|
-
cookies: dict[str, str] | None = None,
|
31
|
-
**kwargs: Any,
|
32
|
-
) -> None:
|
33
|
-
super().__init__("tongrenquan", config, cookies, **kwargs)
|
34
|
-
|
35
28
|
async def get_book_info(
|
36
29
|
self,
|
37
30
|
book_id: str,
|
38
31
|
**kwargs: Any,
|
39
32
|
) -> list[str]:
|
40
|
-
"""
|
41
|
-
Fetch the raw HTML of the book info page asynchronously.
|
42
|
-
|
43
|
-
:param book_id: The book identifier.
|
44
|
-
:return: The page content as string list.
|
45
|
-
"""
|
46
33
|
url = self.book_info_url(book_id=book_id)
|
47
34
|
return [await self.fetch(url, **kwargs)]
|
48
35
|
|
@@ -52,13 +39,6 @@ class TongrenquanSession(BaseSession):
|
|
52
39
|
chapter_id: str,
|
53
40
|
**kwargs: Any,
|
54
41
|
) -> list[str]:
|
55
|
-
"""
|
56
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
57
|
-
|
58
|
-
:param book_id: The book identifier.
|
59
|
-
:param chapter_id: The chapter identifier.
|
60
|
-
:return: The page content as string list.
|
61
|
-
"""
|
62
42
|
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
63
43
|
return [await self.fetch(url, **kwargs)]
|
64
44
|
|
@@ -20,6 +20,8 @@ class TtkanSession(BaseSession):
|
|
20
20
|
A session class for interacting with the 天天看小说 (www.ttkan.co) novel website.
|
21
21
|
"""
|
22
22
|
|
23
|
+
site_name: str = "ttkan"
|
24
|
+
|
23
25
|
BOOK_INFO_URL = "https://{lang}.ttkan.co/novel/chapters/{book_id}"
|
24
26
|
CHAPTER_URL = "https://{lang}.wa01.com/novel/pagea/{book_id}_{chapter_id}.html"
|
25
27
|
|
@@ -29,7 +31,7 @@ class TtkanSession(BaseSession):
|
|
29
31
|
cookies: dict[str, str] | None = None,
|
30
32
|
**kwargs: Any,
|
31
33
|
) -> None:
|
32
|
-
super().__init__(
|
34
|
+
super().__init__(config, cookies, **kwargs)
|
33
35
|
self._lang = "cn" if config.locale_style == "simplified" else "tw"
|
34
36
|
|
35
37
|
async def get_book_info(
|
@@ -37,12 +39,6 @@ class TtkanSession(BaseSession):
|
|
37
39
|
book_id: str,
|
38
40
|
**kwargs: Any,
|
39
41
|
) -> list[str]:
|
40
|
-
"""
|
41
|
-
Fetch the raw HTML of the book info page asynchronously.
|
42
|
-
|
43
|
-
:param book_id: The book identifier.
|
44
|
-
:return: The page content as string list.
|
45
|
-
"""
|
46
42
|
url = self.book_info_url(book_id=book_id)
|
47
43
|
return [await self.fetch(url, **kwargs)]
|
48
44
|
|
@@ -52,13 +48,6 @@ class TtkanSession(BaseSession):
|
|
52
48
|
chapter_id: str,
|
53
49
|
**kwargs: Any,
|
54
50
|
) -> list[str]:
|
55
|
-
"""
|
56
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
57
|
-
|
58
|
-
:param book_id: The book identifier.
|
59
|
-
:param chapter_id: The chapter identifier.
|
60
|
-
:return: The page content as string list.
|
61
|
-
"""
|
62
51
|
url = self.chapter_url(
|
63
52
|
book_id=book_id,
|
64
53
|
chapter_id=chapter_id,
|
@@ -9,7 +9,6 @@ from typing import Any
|
|
9
9
|
|
10
10
|
from novel_downloader.core.fetchers.base import BaseSession
|
11
11
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
12
|
-
from novel_downloader.models import FetcherConfig
|
13
12
|
|
14
13
|
|
15
14
|
@register_fetcher(
|
@@ -20,28 +19,16 @@ class WanbengoSession(BaseSession):
|
|
20
19
|
A session class for interacting with the 完本神站 (www.wanbengo.com) novel website.
|
21
20
|
"""
|
22
21
|
|
22
|
+
site_name: str = "wanbengo"
|
23
|
+
|
23
24
|
BOOK_INFO_URL = "https://www.wanbengo.com/{book_id}/"
|
24
25
|
CHAPTER_URL = "https://www.wanbengo.com/{book_id}/{chapter_id}.html"
|
25
26
|
|
26
|
-
def __init__(
|
27
|
-
self,
|
28
|
-
config: FetcherConfig,
|
29
|
-
cookies: dict[str, str] | None = None,
|
30
|
-
**kwargs: Any,
|
31
|
-
) -> None:
|
32
|
-
super().__init__("wanbengo", config, cookies, **kwargs)
|
33
|
-
|
34
27
|
async def get_book_info(
|
35
28
|
self,
|
36
29
|
book_id: str,
|
37
30
|
**kwargs: Any,
|
38
31
|
) -> list[str]:
|
39
|
-
"""
|
40
|
-
Fetch the raw HTML of the book info page asynchronously.
|
41
|
-
|
42
|
-
:param book_id: The book identifier.
|
43
|
-
:return: The page content as string list.
|
44
|
-
"""
|
45
32
|
url = self.book_info_url(book_id=book_id)
|
46
33
|
return [await self.fetch(url, **kwargs)]
|
47
34
|
|
@@ -51,13 +38,6 @@ class WanbengoSession(BaseSession):
|
|
51
38
|
chapter_id: str,
|
52
39
|
**kwargs: Any,
|
53
40
|
) -> list[str]:
|
54
|
-
"""
|
55
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
56
|
-
|
57
|
-
:param book_id: The book identifier.
|
58
|
-
:param chapter_id: The chapter identifier.
|
59
|
-
:return: The page content as string list.
|
60
|
-
"""
|
61
41
|
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
62
42
|
return [await self.fetch(url, **kwargs)]
|
63
43
|
|
@@ -10,7 +10,6 @@ from typing import Any
|
|
10
10
|
|
11
11
|
from novel_downloader.core.fetchers.base import BaseSession
|
12
12
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
13
|
-
from novel_downloader.models import FetcherConfig
|
14
13
|
|
15
14
|
|
16
15
|
@register_fetcher(
|
@@ -21,18 +20,12 @@ class XiaoshuowuSession(BaseSession):
|
|
21
20
|
A session class for interacting with the 小说屋 (www.xiaoshuoge.info) novel.
|
22
21
|
"""
|
23
22
|
|
23
|
+
site_name: str = "xiaoshuowu"
|
24
|
+
|
24
25
|
BOOK_INFO_URL = "http://www.xiaoshuoge.info/book/{book_id}/"
|
25
26
|
BOOK_CATALOG_URL = "http://www.xiaoshuoge.info/html/{book_id}/"
|
26
27
|
CHAPTER_URL = "http://www.xiaoshuoge.info/html/{book_id}/{chapter_id}.html"
|
27
28
|
|
28
|
-
def __init__(
|
29
|
-
self,
|
30
|
-
config: FetcherConfig,
|
31
|
-
cookies: dict[str, str] | None = None,
|
32
|
-
**kwargs: Any,
|
33
|
-
) -> None:
|
34
|
-
super().__init__("xiaoshuowu", config, cookies, **kwargs)
|
35
|
-
|
36
29
|
async def get_book_info(
|
37
30
|
self,
|
38
31
|
book_id: str,
|
@@ -62,13 +55,6 @@ class XiaoshuowuSession(BaseSession):
|
|
62
55
|
chapter_id: str,
|
63
56
|
**kwargs: Any,
|
64
57
|
) -> list[str]:
|
65
|
-
"""
|
66
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
67
|
-
|
68
|
-
:param book_id: The book identifier.
|
69
|
-
:param chapter_id: The chapter identifier.
|
70
|
-
:return: The page content as string list.
|
71
|
-
"""
|
72
58
|
book_id = book_id.replace("-", "/")
|
73
59
|
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
74
60
|
return [await self.fetch(url, ssl=False, **kwargs)]
|
@@ -9,8 +9,6 @@ from typing import Any
|
|
9
9
|
|
10
10
|
from novel_downloader.core.fetchers.base import BaseSession
|
11
11
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
12
|
-
from novel_downloader.models import FetcherConfig
|
13
|
-
from novel_downloader.utils import async_jitter_sleep
|
14
12
|
|
15
13
|
|
16
14
|
@register_fetcher(
|
@@ -21,19 +19,13 @@ class XiguashuwuSession(BaseSession):
|
|
21
19
|
A session class for interacting with the 西瓜书屋 (www.xiguashuwu.com) novel.
|
22
20
|
"""
|
23
21
|
|
22
|
+
site_name: str = "xiguashuwu"
|
23
|
+
|
24
24
|
BASE_URL = "https://www.xiguashuwu.com"
|
25
25
|
BOOK_INFO_URL = "https://www.xiguashuwu.com/book/{book_id}/iszip/0/"
|
26
26
|
BOOK_CATALOG_URL = "https://www.xiguashuwu.com/book/{book_id}/catalog/"
|
27
27
|
CHAPTER_URL = "https://www.xiguashuwu.com/book/{book_id}/{chapter_id}.html"
|
28
28
|
|
29
|
-
def __init__(
|
30
|
-
self,
|
31
|
-
config: FetcherConfig,
|
32
|
-
cookies: dict[str, str] | None = None,
|
33
|
-
**kwargs: Any,
|
34
|
-
) -> None:
|
35
|
-
super().__init__("xiguashuwu", config, cookies, **kwargs)
|
36
|
-
|
37
29
|
async def get_book_info(
|
38
30
|
self,
|
39
31
|
book_id: str,
|
@@ -85,11 +77,7 @@ class XiguashuwuSession(BaseSession):
|
|
85
77
|
if not any(pat in html for pat in next_patterns):
|
86
78
|
break
|
87
79
|
|
88
|
-
await
|
89
|
-
self.request_interval,
|
90
|
-
mul_spread=1.1,
|
91
|
-
max_sleep=self.request_interval + 2,
|
92
|
-
)
|
80
|
+
await self._sleep()
|
93
81
|
return [info_html, *catalog_pages]
|
94
82
|
|
95
83
|
async def get_book_chapter(
|
@@ -130,11 +118,7 @@ class XiguashuwuSession(BaseSession):
|
|
130
118
|
|
131
119
|
html_pages.append(html)
|
132
120
|
idx += 1
|
133
|
-
await
|
134
|
-
self.request_interval,
|
135
|
-
mul_spread=1.1,
|
136
|
-
max_sleep=self.request_interval + 2,
|
137
|
-
)
|
121
|
+
await self._sleep()
|
138
122
|
|
139
123
|
return html_pages
|
140
124
|
|
@@ -12,8 +12,6 @@ from typing import Any
|
|
12
12
|
|
13
13
|
from novel_downloader.core.fetchers.base import BaseSession
|
14
14
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
15
|
-
from novel_downloader.models import FetcherConfig
|
16
|
-
from novel_downloader.utils import async_jitter_sleep
|
17
15
|
|
18
16
|
|
19
17
|
@register_fetcher(
|
@@ -24,6 +22,8 @@ class Xs63bSession(BaseSession):
|
|
24
22
|
A session class for interacting with the 小说路上 (m.xs63b.com) novel website.
|
25
23
|
"""
|
26
24
|
|
25
|
+
site_name: str = "xs63b"
|
26
|
+
|
27
27
|
BOOK_INFO_URL = "https://m.xs63b.com/{book_id}/"
|
28
28
|
BOOK_CATALOG_URL = "https://www.xs63b.com/{book_id}/"
|
29
29
|
CHAPTER_URL = "https://m.xs63b.com/{book_id}/{chapter_id}.html"
|
@@ -31,14 +31,6 @@ class Xs63bSession(BaseSession):
|
|
31
31
|
_JSARR_PATTERN = re.compile(r"var\s+jsarr\s*=\s*\[([^\]]+)\]")
|
32
32
|
_JSSTR_PATTERN = re.compile(r"var\s+jsstr\s*=\s*\"([^\"]+)\";")
|
33
33
|
|
34
|
-
def __init__(
|
35
|
-
self,
|
36
|
-
config: FetcherConfig,
|
37
|
-
cookies: dict[str, str] | None = None,
|
38
|
-
**kwargs: Any,
|
39
|
-
) -> None:
|
40
|
-
super().__init__("xs63b", config, cookies, **kwargs)
|
41
|
-
|
42
34
|
async def get_book_info(
|
43
35
|
self,
|
44
36
|
book_id: str,
|
@@ -100,11 +92,7 @@ class Xs63bSession(BaseSession):
|
|
100
92
|
jsstr = self._parse_jsstr(html)
|
101
93
|
chapter_url = self._build_chapter_url(book_id, jsarr, jsstr)
|
102
94
|
|
103
|
-
await
|
104
|
-
self.request_interval,
|
105
|
-
mul_spread=1.1,
|
106
|
-
max_sleep=self.request_interval + 2,
|
107
|
-
)
|
95
|
+
await self._sleep()
|
108
96
|
|
109
97
|
return html_pages
|
110
98
|
|
@@ -9,7 +9,6 @@ from typing import Any
|
|
9
9
|
|
10
10
|
from novel_downloader.core.fetchers.base import BaseSession
|
11
11
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
12
|
-
from novel_downloader.models import FetcherConfig
|
13
12
|
|
14
13
|
|
15
14
|
@register_fetcher(
|
@@ -20,28 +19,16 @@ class XshbookSession(BaseSession):
|
|
20
19
|
A session class for interacting with the 小说虎 (www.xshbook.com) novel website.
|
21
20
|
"""
|
22
21
|
|
22
|
+
site_name: str = "xshbook"
|
23
|
+
|
23
24
|
BOOK_INFO_URL = "https://www.xshbook.com/{book_id}/"
|
24
25
|
CHAPTER_URL = "https://www.xshbook.com/{book_id}/{chapter_id}.html"
|
25
26
|
|
26
|
-
def __init__(
|
27
|
-
self,
|
28
|
-
config: FetcherConfig,
|
29
|
-
cookies: dict[str, str] | None = None,
|
30
|
-
**kwargs: Any,
|
31
|
-
) -> None:
|
32
|
-
super().__init__("xshbook", config, cookies, **kwargs)
|
33
|
-
|
34
27
|
async def get_book_info(
|
35
28
|
self,
|
36
29
|
book_id: str,
|
37
30
|
**kwargs: Any,
|
38
31
|
) -> list[str]:
|
39
|
-
"""
|
40
|
-
Fetch the raw HTML of the book info page asynchronously.
|
41
|
-
|
42
|
-
:param book_id: The book identifier.
|
43
|
-
:return: The page content as string list.
|
44
|
-
"""
|
45
32
|
book_id = book_id.replace("-", "/")
|
46
33
|
url = self.book_info_url(book_id=book_id)
|
47
34
|
return [await self.fetch(url, **kwargs)]
|
@@ -52,13 +39,6 @@ class XshbookSession(BaseSession):
|
|
52
39
|
chapter_id: str,
|
53
40
|
**kwargs: Any,
|
54
41
|
) -> list[str]:
|
55
|
-
"""
|
56
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
57
|
-
|
58
|
-
:param book_id: The book identifier.
|
59
|
-
:param chapter_id: The chapter identifier.
|
60
|
-
:return: The page content as string list.
|
61
|
-
"""
|
62
42
|
book_id = book_id.replace("-", "/")
|
63
43
|
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
64
44
|
return [await self.fetch(url, **kwargs)]
|
@@ -12,8 +12,7 @@ from lxml import html
|
|
12
12
|
|
13
13
|
from novel_downloader.core.fetchers.base import BaseSession
|
14
14
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
15
|
-
from novel_downloader.models import
|
16
|
-
from novel_downloader.utils import async_jitter_sleep
|
15
|
+
from novel_downloader.models import LoginField
|
17
16
|
|
18
17
|
|
19
18
|
@register_fetcher(
|
@@ -24,6 +23,8 @@ class YamiboSession(BaseSession):
|
|
24
23
|
A session class for interacting with the 百合会 (www.yamibo.com) novel website.
|
25
24
|
"""
|
26
25
|
|
26
|
+
site_name: str = "yamibo"
|
27
|
+
|
27
28
|
BASE_URL = "https://www.yamibo.com"
|
28
29
|
BOOKCASE_URL = "https://www.yamibo.com/my/fav"
|
29
30
|
BOOK_INFO_URL = "https://www.yamibo.com/novel/{book_id}"
|
@@ -31,14 +32,6 @@ class YamiboSession(BaseSession):
|
|
31
32
|
|
32
33
|
LOGIN_URL = "https://www.yamibo.com/user/login"
|
33
34
|
|
34
|
-
def __init__(
|
35
|
-
self,
|
36
|
-
config: FetcherConfig,
|
37
|
-
cookies: dict[str, str] | None = None,
|
38
|
-
**kwargs: Any,
|
39
|
-
) -> None:
|
40
|
-
super().__init__("yamibo", config, cookies, **kwargs)
|
41
|
-
|
42
35
|
async def login(
|
43
36
|
self,
|
44
37
|
username: str = "",
|
@@ -69,11 +62,7 @@ class YamiboSession(BaseSession):
|
|
69
62
|
):
|
70
63
|
self._is_logged_in = True
|
71
64
|
return True
|
72
|
-
await
|
73
|
-
self.backoff_factor,
|
74
|
-
mul_spread=1.1,
|
75
|
-
max_sleep=self.backoff_factor + 2,
|
76
|
-
)
|
65
|
+
await self._sleep()
|
77
66
|
|
78
67
|
self._is_logged_in = False
|
79
68
|
return False
|
@@ -83,12 +72,6 @@ class YamiboSession(BaseSession):
|
|
83
72
|
book_id: str,
|
84
73
|
**kwargs: Any,
|
85
74
|
) -> list[str]:
|
86
|
-
"""
|
87
|
-
Fetch the raw HTML of the book info page asynchronously.
|
88
|
-
|
89
|
-
:param book_id: The book identifier.
|
90
|
-
:return: The page content as string list.
|
91
|
-
"""
|
92
75
|
url = self.book_info_url(book_id=book_id)
|
93
76
|
return [await self.fetch(url, **kwargs)]
|
94
77
|
|
@@ -98,13 +81,6 @@ class YamiboSession(BaseSession):
|
|
98
81
|
chapter_id: str,
|
99
82
|
**kwargs: Any,
|
100
83
|
) -> list[str]:
|
101
|
-
"""
|
102
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
103
|
-
|
104
|
-
:param book_id: The book identifier.
|
105
|
-
:param chapter_id: The chapter identifier.
|
106
|
-
:return: The page content as string list.
|
107
|
-
"""
|
108
84
|
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
109
85
|
return [await self.fetch(url, **kwargs)]
|
110
86
|
|
@@ -10,7 +10,6 @@ from typing import Any
|
|
10
10
|
|
11
11
|
from novel_downloader.core.fetchers.base import BaseSession
|
12
12
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
13
|
-
from novel_downloader.models import FetcherConfig
|
14
13
|
|
15
14
|
|
16
15
|
@register_fetcher(
|
@@ -21,25 +20,20 @@ class YibigeSession(BaseSession):
|
|
21
20
|
A session class for interacting with the 一笔阁 (www.yibige.org) novel website.
|
22
21
|
"""
|
23
22
|
|
23
|
+
site_name: str = "yibige"
|
24
|
+
BASE_URL_MAP: dict[str, str] = {
|
25
|
+
"simplified": "www.yibige.org", # 主站
|
26
|
+
"traditional": "tw.yibige.org",
|
27
|
+
"singapore": "sg.yibige.org", # 新加坡
|
28
|
+
"taiwan": "tw.yibige.org", # 臺灣正體
|
29
|
+
"hongkong": "hk.yibige.org", # 香港繁體
|
30
|
+
}
|
31
|
+
DEFAULT_BASE_URL: str = "www.yibige.org"
|
32
|
+
|
24
33
|
BOOK_INFO_URL = "https://{base_url}/{book_id}/"
|
25
34
|
BOOK_CATALOG_URL = "https://{base_url}/{book_id}/index.html"
|
26
35
|
CHAPTER_URL = "https://{base_url}/{book_id}/{chapter_id}.html"
|
27
36
|
|
28
|
-
def __init__(
|
29
|
-
self,
|
30
|
-
config: FetcherConfig,
|
31
|
-
cookies: dict[str, str] | None = None,
|
32
|
-
**kwargs: Any,
|
33
|
-
) -> None:
|
34
|
-
super().__init__("yibige", config, cookies, **kwargs)
|
35
|
-
self.base_url = (
|
36
|
-
"www.yibige.org" if config.locale_style == "simplified" else "tw.yibige.org"
|
37
|
-
)
|
38
|
-
# 主站: www.yibige.org
|
39
|
-
# 新加坡: sg.yibige.org
|
40
|
-
# 臺灣正體: tw.yibige.org
|
41
|
-
# 香港繁體: hk.yibige.org
|
42
|
-
|
43
37
|
async def get_book_info(
|
44
38
|
self,
|
45
39
|
book_id: str,
|
@@ -53,8 +47,8 @@ class YibigeSession(BaseSession):
|
|
53
47
|
:param book_id: The book identifier.
|
54
48
|
:return: The page content as string list.
|
55
49
|
"""
|
56
|
-
info_url = self.book_info_url(base_url=self.
|
57
|
-
catalog_url = self.book_catalog_url(base_url=self.
|
50
|
+
info_url = self.book_info_url(base_url=self._base_url, book_id=book_id)
|
51
|
+
catalog_url = self.book_catalog_url(base_url=self._base_url, book_id=book_id)
|
58
52
|
|
59
53
|
info_html, catalog_html = await asyncio.gather(
|
60
54
|
self.fetch(info_url, **kwargs),
|
@@ -68,15 +62,8 @@ class YibigeSession(BaseSession):
|
|
68
62
|
chapter_id: str,
|
69
63
|
**kwargs: Any,
|
70
64
|
) -> list[str]:
|
71
|
-
"""
|
72
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
73
|
-
|
74
|
-
:param book_id: The book identifier.
|
75
|
-
:param chapter_id: The chapter identifier.
|
76
|
-
:return: The page content as string list.
|
77
|
-
"""
|
78
65
|
url = self.chapter_url(
|
79
|
-
base_url=self.
|
66
|
+
base_url=self._base_url, book_id=book_id, chapter_id=chapter_id
|
80
67
|
)
|
81
68
|
return [await self.fetch(url, **kwargs)]
|
82
69
|
|
@@ -6,18 +6,13 @@ novel_downloader.core.interfaces.exporter
|
|
6
6
|
Protocol defining the interface for exporting books to text, EPUB, and other formats.
|
7
7
|
"""
|
8
8
|
|
9
|
+
import types
|
9
10
|
from pathlib import Path
|
10
|
-
from typing import Protocol, runtime_checkable
|
11
|
+
from typing import Protocol, Self, runtime_checkable
|
11
12
|
|
12
13
|
|
13
14
|
@runtime_checkable
|
14
15
|
class ExporterProtocol(Protocol):
|
15
|
-
"""
|
16
|
-
A exporter must implement a method to persist a completed book as plain text.
|
17
|
-
|
18
|
-
It may also optionally implement an EPUB (or other format) exporter.
|
19
|
-
"""
|
20
|
-
|
21
16
|
def export(self, book_id: str) -> dict[str, Path]:
|
22
17
|
"""
|
23
18
|
Export the book in the formats specified in config.
|
@@ -58,3 +53,20 @@ class ExporterProtocol(Protocol):
|
|
58
53
|
:param book_id: The book identifier.
|
59
54
|
"""
|
60
55
|
...
|
56
|
+
|
57
|
+
def close(self) -> None:
|
58
|
+
"""
|
59
|
+
Shutdown and clean up the exporter.
|
60
|
+
"""
|
61
|
+
...
|
62
|
+
|
63
|
+
def __enter__(self) -> Self:
|
64
|
+
...
|
65
|
+
|
66
|
+
def __exit__(
|
67
|
+
self,
|
68
|
+
exc_type: type[BaseException] | None,
|
69
|
+
exc_val: BaseException | None,
|
70
|
+
tb: types.TracebackType | None,
|
71
|
+
) -> None:
|
72
|
+
...
|
@@ -16,11 +16,29 @@ from novel_downloader.models import LoginField
|
|
16
16
|
class FetcherProtocol(Protocol):
|
17
17
|
"""
|
18
18
|
An async requester must be able to fetch raw HTML/data for:
|
19
|
-
|
20
|
-
|
19
|
+
* a book's info page,
|
20
|
+
* a specific chapter page,
|
21
21
|
and manage login/shutdown asynchronously.
|
22
22
|
"""
|
23
23
|
|
24
|
+
async def init(
|
25
|
+
self,
|
26
|
+
**kwargs: Any,
|
27
|
+
) -> None:
|
28
|
+
"""
|
29
|
+
Perform async initialization, such as creating a session.
|
30
|
+
|
31
|
+
This should be called before using any other method
|
32
|
+
if initialization is required.
|
33
|
+
"""
|
34
|
+
...
|
35
|
+
|
36
|
+
async def close(self) -> None:
|
37
|
+
"""
|
38
|
+
Shutdown and clean up any resources.
|
39
|
+
"""
|
40
|
+
...
|
41
|
+
|
24
42
|
async def login(
|
25
43
|
self,
|
26
44
|
username: str = "",
|
@@ -64,50 +82,6 @@ class FetcherProtocol(Protocol):
|
|
64
82
|
"""
|
65
83
|
...
|
66
84
|
|
67
|
-
async def get_bookcase(
|
68
|
-
self,
|
69
|
-
**kwargs: Any,
|
70
|
-
) -> list[str]:
|
71
|
-
"""
|
72
|
-
Optional: Retrieve the HTML content of the authenticated
|
73
|
-
user's bookcase page asynchronously.
|
74
|
-
|
75
|
-
:return: The HTML markup of the bookcase page.
|
76
|
-
"""
|
77
|
-
...
|
78
|
-
|
79
|
-
async def fetch(
|
80
|
-
self,
|
81
|
-
url: str,
|
82
|
-
**kwargs: Any,
|
83
|
-
) -> str:
|
84
|
-
"""
|
85
|
-
Perform a generic HTTP request and return the response body as text.
|
86
|
-
|
87
|
-
:param url: The URL to request.
|
88
|
-
:return: The response content as a string (HTML or JSON or plain text).
|
89
|
-
"""
|
90
|
-
...
|
91
|
-
|
92
|
-
async def init(
|
93
|
-
self,
|
94
|
-
**kwargs: Any,
|
95
|
-
) -> None:
|
96
|
-
"""
|
97
|
-
Perform async initialization, such as
|
98
|
-
launching a browser or creating a session.
|
99
|
-
|
100
|
-
This should be called before using any other method
|
101
|
-
if initialization is required.
|
102
|
-
"""
|
103
|
-
...
|
104
|
-
|
105
|
-
async def close(self) -> None:
|
106
|
-
"""
|
107
|
-
Shutdown and clean up any resources.
|
108
|
-
"""
|
109
|
-
...
|
110
|
-
|
111
85
|
async def load_state(self) -> bool:
|
112
86
|
"""
|
113
87
|
Restore session state from a persistent storage,
|
@@ -137,6 +111,9 @@ class FetcherProtocol(Protocol):
|
|
137
111
|
def login_fields(self) -> list[LoginField]:
|
138
112
|
...
|
139
113
|
|
114
|
+
async def __aenter__(self) -> Self:
|
115
|
+
...
|
116
|
+
|
140
117
|
async def __aexit__(
|
141
118
|
self,
|
142
119
|
exc_type: type[BaseException] | None,
|
@@ -144,6 +121,3 @@ class FetcherProtocol(Protocol):
|
|
144
121
|
tb: types.TracebackType | None,
|
145
122
|
) -> None:
|
146
123
|
...
|
147
|
-
|
148
|
-
async def __aenter__(self) -> Self:
|
149
|
-
...
|
@@ -15,8 +15,8 @@ from novel_downloader.models import BookInfoDict, ChapterDict
|
|
15
15
|
class ParserProtocol(Protocol):
|
16
16
|
"""
|
17
17
|
A parser must be able to:
|
18
|
-
|
19
|
-
|
18
|
+
* extract book metadata from an HTML string,
|
19
|
+
* extract a single chapter's text from an HTML string
|
20
20
|
"""
|
21
21
|
|
22
22
|
def parse_book_info(
|
@@ -13,7 +13,6 @@ __all__ = [
|
|
13
13
|
"BiqugeParser",
|
14
14
|
"BiquyueduParser",
|
15
15
|
"DxmwxParser",
|
16
|
-
"EightnovelParser",
|
17
16
|
"EsjzoneParser",
|
18
17
|
"GuidayeParser",
|
19
18
|
"HetushuParser",
|
@@ -22,10 +21,12 @@ __all__ = [
|
|
22
21
|
"Jpxs123Parser",
|
23
22
|
"LewennParser",
|
24
23
|
"LinovelibParser",
|
24
|
+
"N8novelParser",
|
25
25
|
"PiaotiaParser",
|
26
26
|
"QbtrParser",
|
27
27
|
"QianbiParser",
|
28
28
|
"QidianParser",
|
29
|
+
"QqbookParser",
|
29
30
|
"Quanben5Parser",
|
30
31
|
"SfacgParser",
|
31
32
|
"ShencouParser",
|
@@ -45,7 +46,6 @@ from .aaatxt import AaatxtParser
|
|
45
46
|
from .b520 import BiqugeParser
|
46
47
|
from .biquyuedu import BiquyueduParser
|
47
48
|
from .dxmwx import DxmwxParser
|
48
|
-
from .eightnovel import EightnovelParser
|
49
49
|
from .esjzone import EsjzoneParser
|
50
50
|
from .guidaye import GuidayeParser
|
51
51
|
from .hetushu import HetushuParser
|
@@ -54,10 +54,12 @@ from .ixdzs8 import Ixdzs8Parser
|
|
54
54
|
from .jpxs123 import Jpxs123Parser
|
55
55
|
from .lewenn import LewennParser
|
56
56
|
from .linovelib import LinovelibParser
|
57
|
+
from .n8novel import N8novelParser
|
57
58
|
from .piaotia import PiaotiaParser
|
58
59
|
from .qbtr import QbtrParser
|
59
60
|
from .qianbi import QianbiParser
|
60
61
|
from .qidian import QidianParser
|
62
|
+
from .qqbook import QqbookParser
|
61
63
|
from .quanben5 import Quanben5Parser
|
62
64
|
from .registry import get_parser
|
63
65
|
from .sfacg import SfacgParser
|