novel-downloader 2.0.1__py3-none-any.whl → 2.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/download.py +11 -8
- novel_downloader/cli/export.py +17 -17
- novel_downloader/cli/ui.py +28 -1
- novel_downloader/config/adapter.py +27 -1
- novel_downloader/core/archived/deqixs/fetcher.py +1 -28
- novel_downloader/core/downloaders/__init__.py +2 -0
- novel_downloader/core/downloaders/base.py +34 -85
- novel_downloader/core/downloaders/common.py +147 -171
- novel_downloader/core/downloaders/qianbi.py +30 -64
- novel_downloader/core/downloaders/qidian.py +157 -184
- novel_downloader/core/downloaders/qqbook.py +292 -0
- novel_downloader/core/downloaders/registry.py +2 -2
- novel_downloader/core/exporters/__init__.py +2 -0
- novel_downloader/core/exporters/base.py +37 -59
- novel_downloader/core/exporters/common.py +620 -0
- novel_downloader/core/exporters/linovelib.py +47 -0
- novel_downloader/core/exporters/qidian.py +41 -12
- novel_downloader/core/exporters/qqbook.py +28 -0
- novel_downloader/core/exporters/registry.py +2 -2
- novel_downloader/core/fetchers/__init__.py +4 -2
- novel_downloader/core/fetchers/aaatxt.py +2 -22
- novel_downloader/core/fetchers/b520.py +3 -23
- novel_downloader/core/fetchers/base.py +80 -105
- novel_downloader/core/fetchers/biquyuedu.py +2 -22
- novel_downloader/core/fetchers/dxmwx.py +10 -22
- novel_downloader/core/fetchers/esjzone.py +6 -29
- novel_downloader/core/fetchers/guidaye.py +2 -22
- novel_downloader/core/fetchers/hetushu.py +9 -29
- novel_downloader/core/fetchers/i25zw.py +2 -16
- novel_downloader/core/fetchers/ixdzs8.py +2 -16
- novel_downloader/core/fetchers/jpxs123.py +2 -16
- novel_downloader/core/fetchers/lewenn.py +2 -22
- novel_downloader/core/fetchers/linovelib.py +4 -20
- novel_downloader/core/fetchers/{eightnovel.py → n8novel.py} +12 -40
- novel_downloader/core/fetchers/piaotia.py +2 -16
- novel_downloader/core/fetchers/qbtr.py +2 -16
- novel_downloader/core/fetchers/qianbi.py +1 -20
- novel_downloader/core/fetchers/qidian.py +7 -33
- novel_downloader/core/fetchers/qqbook.py +177 -0
- novel_downloader/core/fetchers/quanben5.py +9 -29
- novel_downloader/core/fetchers/rate_limiter.py +22 -53
- novel_downloader/core/fetchers/sfacg.py +3 -16
- novel_downloader/core/fetchers/shencou.py +2 -16
- novel_downloader/core/fetchers/shuhaige.py +2 -22
- novel_downloader/core/fetchers/tongrenquan.py +2 -22
- novel_downloader/core/fetchers/ttkan.py +3 -14
- novel_downloader/core/fetchers/wanbengo.py +2 -22
- novel_downloader/core/fetchers/xiaoshuowu.py +2 -16
- novel_downloader/core/fetchers/xiguashuwu.py +4 -20
- novel_downloader/core/fetchers/xs63b.py +3 -15
- novel_downloader/core/fetchers/xshbook.py +2 -22
- novel_downloader/core/fetchers/yamibo.py +4 -28
- novel_downloader/core/fetchers/yibige.py +13 -26
- novel_downloader/core/interfaces/exporter.py +19 -7
- novel_downloader/core/interfaces/fetcher.py +21 -47
- novel_downloader/core/parsers/__init__.py +4 -2
- novel_downloader/core/parsers/b520.py +2 -2
- novel_downloader/core/parsers/base.py +4 -39
- novel_downloader/core/parsers/{eightnovel.py → n8novel.py} +5 -5
- novel_downloader/core/parsers/{qidian/main_parser.py → qidian.py} +147 -266
- novel_downloader/core/parsers/qqbook.py +709 -0
- novel_downloader/core/parsers/xiguashuwu.py +3 -4
- novel_downloader/core/searchers/__init__.py +2 -2
- novel_downloader/core/searchers/b520.py +1 -1
- novel_downloader/core/searchers/base.py +2 -2
- novel_downloader/core/searchers/{eightnovel.py → n8novel.py} +5 -5
- novel_downloader/models/__init__.py +2 -0
- novel_downloader/models/book.py +1 -0
- novel_downloader/models/config.py +12 -0
- novel_downloader/resources/config/settings.toml +23 -5
- novel_downloader/resources/js_scripts/expr_to_json.js +14 -0
- novel_downloader/resources/js_scripts/qidian_decrypt_node.js +21 -16
- novel_downloader/resources/js_scripts/qq_decrypt_node.js +92 -0
- novel_downloader/utils/constants.py +6 -0
- novel_downloader/utils/crypto_utils/aes_util.py +1 -1
- novel_downloader/utils/epub/constants.py +1 -6
- novel_downloader/utils/fontocr/core.py +2 -0
- novel_downloader/utils/fontocr/loader.py +10 -8
- novel_downloader/utils/node_decryptor/__init__.py +13 -0
- novel_downloader/utils/node_decryptor/decryptor.py +342 -0
- novel_downloader/{core/parsers/qidian/utils → utils/node_decryptor}/decryptor_fetcher.py +5 -6
- novel_downloader/web/pages/download.py +1 -1
- novel_downloader/web/pages/search.py +1 -1
- novel_downloader/web/services/task_manager.py +2 -0
- {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/METADATA +4 -1
- {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/RECORD +91 -94
- novel_downloader/core/exporters/common/__init__.py +0 -11
- novel_downloader/core/exporters/common/epub.py +0 -198
- novel_downloader/core/exporters/common/main_exporter.py +0 -64
- novel_downloader/core/exporters/common/txt.py +0 -146
- novel_downloader/core/exporters/epub_util.py +0 -215
- novel_downloader/core/exporters/linovelib/__init__.py +0 -11
- novel_downloader/core/exporters/linovelib/epub.py +0 -349
- novel_downloader/core/exporters/linovelib/main_exporter.py +0 -66
- novel_downloader/core/exporters/linovelib/txt.py +0 -139
- novel_downloader/core/exporters/txt_util.py +0 -67
- novel_downloader/core/parsers/qidian/__init__.py +0 -10
- novel_downloader/core/parsers/qidian/utils/__init__.py +0 -11
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +0 -175
- {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/WHEEL +0 -0
- {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/entry_points.txt +0 -0
- {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/top_level.txt +0 -0
@@ -9,7 +9,6 @@ from typing import Any
|
|
9
9
|
|
10
10
|
from novel_downloader.core.fetchers.base import BaseSession
|
11
11
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
12
|
-
from novel_downloader.models import FetcherConfig
|
13
12
|
|
14
13
|
|
15
14
|
@register_fetcher(
|
@@ -20,28 +19,16 @@ class WanbengoSession(BaseSession):
|
|
20
19
|
A session class for interacting with the 完本神站 (www.wanbengo.com) novel website.
|
21
20
|
"""
|
22
21
|
|
22
|
+
site_name: str = "wanbengo"
|
23
|
+
|
23
24
|
BOOK_INFO_URL = "https://www.wanbengo.com/{book_id}/"
|
24
25
|
CHAPTER_URL = "https://www.wanbengo.com/{book_id}/{chapter_id}.html"
|
25
26
|
|
26
|
-
def __init__(
|
27
|
-
self,
|
28
|
-
config: FetcherConfig,
|
29
|
-
cookies: dict[str, str] | None = None,
|
30
|
-
**kwargs: Any,
|
31
|
-
) -> None:
|
32
|
-
super().__init__("wanbengo", config, cookies, **kwargs)
|
33
|
-
|
34
27
|
async def get_book_info(
|
35
28
|
self,
|
36
29
|
book_id: str,
|
37
30
|
**kwargs: Any,
|
38
31
|
) -> list[str]:
|
39
|
-
"""
|
40
|
-
Fetch the raw HTML of the book info page asynchronously.
|
41
|
-
|
42
|
-
:param book_id: The book identifier.
|
43
|
-
:return: The page content as string list.
|
44
|
-
"""
|
45
32
|
url = self.book_info_url(book_id=book_id)
|
46
33
|
return [await self.fetch(url, **kwargs)]
|
47
34
|
|
@@ -51,13 +38,6 @@ class WanbengoSession(BaseSession):
|
|
51
38
|
chapter_id: str,
|
52
39
|
**kwargs: Any,
|
53
40
|
) -> list[str]:
|
54
|
-
"""
|
55
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
56
|
-
|
57
|
-
:param book_id: The book identifier.
|
58
|
-
:param chapter_id: The chapter identifier.
|
59
|
-
:return: The page content as string list.
|
60
|
-
"""
|
61
41
|
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
62
42
|
return [await self.fetch(url, **kwargs)]
|
63
43
|
|
@@ -10,7 +10,6 @@ from typing import Any
|
|
10
10
|
|
11
11
|
from novel_downloader.core.fetchers.base import BaseSession
|
12
12
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
13
|
-
from novel_downloader.models import FetcherConfig
|
14
13
|
|
15
14
|
|
16
15
|
@register_fetcher(
|
@@ -21,18 +20,12 @@ class XiaoshuowuSession(BaseSession):
|
|
21
20
|
A session class for interacting with the 小说屋 (www.xiaoshuoge.info) novel.
|
22
21
|
"""
|
23
22
|
|
23
|
+
site_name: str = "xiaoshuowu"
|
24
|
+
|
24
25
|
BOOK_INFO_URL = "http://www.xiaoshuoge.info/book/{book_id}/"
|
25
26
|
BOOK_CATALOG_URL = "http://www.xiaoshuoge.info/html/{book_id}/"
|
26
27
|
CHAPTER_URL = "http://www.xiaoshuoge.info/html/{book_id}/{chapter_id}.html"
|
27
28
|
|
28
|
-
def __init__(
|
29
|
-
self,
|
30
|
-
config: FetcherConfig,
|
31
|
-
cookies: dict[str, str] | None = None,
|
32
|
-
**kwargs: Any,
|
33
|
-
) -> None:
|
34
|
-
super().__init__("xiaoshuowu", config, cookies, **kwargs)
|
35
|
-
|
36
29
|
async def get_book_info(
|
37
30
|
self,
|
38
31
|
book_id: str,
|
@@ -62,13 +55,6 @@ class XiaoshuowuSession(BaseSession):
|
|
62
55
|
chapter_id: str,
|
63
56
|
**kwargs: Any,
|
64
57
|
) -> list[str]:
|
65
|
-
"""
|
66
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
67
|
-
|
68
|
-
:param book_id: The book identifier.
|
69
|
-
:param chapter_id: The chapter identifier.
|
70
|
-
:return: The page content as string list.
|
71
|
-
"""
|
72
58
|
book_id = book_id.replace("-", "/")
|
73
59
|
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
74
60
|
return [await self.fetch(url, ssl=False, **kwargs)]
|
@@ -9,8 +9,6 @@ from typing import Any
|
|
9
9
|
|
10
10
|
from novel_downloader.core.fetchers.base import BaseSession
|
11
11
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
12
|
-
from novel_downloader.models import FetcherConfig
|
13
|
-
from novel_downloader.utils import async_jitter_sleep
|
14
12
|
|
15
13
|
|
16
14
|
@register_fetcher(
|
@@ -21,19 +19,13 @@ class XiguashuwuSession(BaseSession):
|
|
21
19
|
A session class for interacting with the 西瓜书屋 (www.xiguashuwu.com) novel.
|
22
20
|
"""
|
23
21
|
|
22
|
+
site_name: str = "xiguashuwu"
|
23
|
+
|
24
24
|
BASE_URL = "https://www.xiguashuwu.com"
|
25
25
|
BOOK_INFO_URL = "https://www.xiguashuwu.com/book/{book_id}/iszip/0/"
|
26
26
|
BOOK_CATALOG_URL = "https://www.xiguashuwu.com/book/{book_id}/catalog/"
|
27
27
|
CHAPTER_URL = "https://www.xiguashuwu.com/book/{book_id}/{chapter_id}.html"
|
28
28
|
|
29
|
-
def __init__(
|
30
|
-
self,
|
31
|
-
config: FetcherConfig,
|
32
|
-
cookies: dict[str, str] | None = None,
|
33
|
-
**kwargs: Any,
|
34
|
-
) -> None:
|
35
|
-
super().__init__("xiguashuwu", config, cookies, **kwargs)
|
36
|
-
|
37
29
|
async def get_book_info(
|
38
30
|
self,
|
39
31
|
book_id: str,
|
@@ -85,11 +77,7 @@ class XiguashuwuSession(BaseSession):
|
|
85
77
|
if not any(pat in html for pat in next_patterns):
|
86
78
|
break
|
87
79
|
|
88
|
-
await
|
89
|
-
self.request_interval,
|
90
|
-
mul_spread=1.1,
|
91
|
-
max_sleep=self.request_interval + 2,
|
92
|
-
)
|
80
|
+
await self._sleep()
|
93
81
|
return [info_html, *catalog_pages]
|
94
82
|
|
95
83
|
async def get_book_chapter(
|
@@ -130,11 +118,7 @@ class XiguashuwuSession(BaseSession):
|
|
130
118
|
|
131
119
|
html_pages.append(html)
|
132
120
|
idx += 1
|
133
|
-
await
|
134
|
-
self.request_interval,
|
135
|
-
mul_spread=1.1,
|
136
|
-
max_sleep=self.request_interval + 2,
|
137
|
-
)
|
121
|
+
await self._sleep()
|
138
122
|
|
139
123
|
return html_pages
|
140
124
|
|
@@ -12,8 +12,6 @@ from typing import Any
|
|
12
12
|
|
13
13
|
from novel_downloader.core.fetchers.base import BaseSession
|
14
14
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
15
|
-
from novel_downloader.models import FetcherConfig
|
16
|
-
from novel_downloader.utils import async_jitter_sleep
|
17
15
|
|
18
16
|
|
19
17
|
@register_fetcher(
|
@@ -24,6 +22,8 @@ class Xs63bSession(BaseSession):
|
|
24
22
|
A session class for interacting with the 小说路上 (m.xs63b.com) novel website.
|
25
23
|
"""
|
26
24
|
|
25
|
+
site_name: str = "xs63b"
|
26
|
+
|
27
27
|
BOOK_INFO_URL = "https://m.xs63b.com/{book_id}/"
|
28
28
|
BOOK_CATALOG_URL = "https://www.xs63b.com/{book_id}/"
|
29
29
|
CHAPTER_URL = "https://m.xs63b.com/{book_id}/{chapter_id}.html"
|
@@ -31,14 +31,6 @@ class Xs63bSession(BaseSession):
|
|
31
31
|
_JSARR_PATTERN = re.compile(r"var\s+jsarr\s*=\s*\[([^\]]+)\]")
|
32
32
|
_JSSTR_PATTERN = re.compile(r"var\s+jsstr\s*=\s*\"([^\"]+)\";")
|
33
33
|
|
34
|
-
def __init__(
|
35
|
-
self,
|
36
|
-
config: FetcherConfig,
|
37
|
-
cookies: dict[str, str] | None = None,
|
38
|
-
**kwargs: Any,
|
39
|
-
) -> None:
|
40
|
-
super().__init__("xs63b", config, cookies, **kwargs)
|
41
|
-
|
42
34
|
async def get_book_info(
|
43
35
|
self,
|
44
36
|
book_id: str,
|
@@ -100,11 +92,7 @@ class Xs63bSession(BaseSession):
|
|
100
92
|
jsstr = self._parse_jsstr(html)
|
101
93
|
chapter_url = self._build_chapter_url(book_id, jsarr, jsstr)
|
102
94
|
|
103
|
-
await
|
104
|
-
self.request_interval,
|
105
|
-
mul_spread=1.1,
|
106
|
-
max_sleep=self.request_interval + 2,
|
107
|
-
)
|
95
|
+
await self._sleep()
|
108
96
|
|
109
97
|
return html_pages
|
110
98
|
|
@@ -9,7 +9,6 @@ from typing import Any
|
|
9
9
|
|
10
10
|
from novel_downloader.core.fetchers.base import BaseSession
|
11
11
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
12
|
-
from novel_downloader.models import FetcherConfig
|
13
12
|
|
14
13
|
|
15
14
|
@register_fetcher(
|
@@ -20,28 +19,16 @@ class XshbookSession(BaseSession):
|
|
20
19
|
A session class for interacting with the 小说虎 (www.xshbook.com) novel website.
|
21
20
|
"""
|
22
21
|
|
22
|
+
site_name: str = "xshbook"
|
23
|
+
|
23
24
|
BOOK_INFO_URL = "https://www.xshbook.com/{book_id}/"
|
24
25
|
CHAPTER_URL = "https://www.xshbook.com/{book_id}/{chapter_id}.html"
|
25
26
|
|
26
|
-
def __init__(
|
27
|
-
self,
|
28
|
-
config: FetcherConfig,
|
29
|
-
cookies: dict[str, str] | None = None,
|
30
|
-
**kwargs: Any,
|
31
|
-
) -> None:
|
32
|
-
super().__init__("xshbook", config, cookies, **kwargs)
|
33
|
-
|
34
27
|
async def get_book_info(
|
35
28
|
self,
|
36
29
|
book_id: str,
|
37
30
|
**kwargs: Any,
|
38
31
|
) -> list[str]:
|
39
|
-
"""
|
40
|
-
Fetch the raw HTML of the book info page asynchronously.
|
41
|
-
|
42
|
-
:param book_id: The book identifier.
|
43
|
-
:return: The page content as string list.
|
44
|
-
"""
|
45
32
|
book_id = book_id.replace("-", "/")
|
46
33
|
url = self.book_info_url(book_id=book_id)
|
47
34
|
return [await self.fetch(url, **kwargs)]
|
@@ -52,13 +39,6 @@ class XshbookSession(BaseSession):
|
|
52
39
|
chapter_id: str,
|
53
40
|
**kwargs: Any,
|
54
41
|
) -> list[str]:
|
55
|
-
"""
|
56
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
57
|
-
|
58
|
-
:param book_id: The book identifier.
|
59
|
-
:param chapter_id: The chapter identifier.
|
60
|
-
:return: The page content as string list.
|
61
|
-
"""
|
62
42
|
book_id = book_id.replace("-", "/")
|
63
43
|
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
64
44
|
return [await self.fetch(url, **kwargs)]
|
@@ -12,8 +12,7 @@ from lxml import html
|
|
12
12
|
|
13
13
|
from novel_downloader.core.fetchers.base import BaseSession
|
14
14
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
15
|
-
from novel_downloader.models import
|
16
|
-
from novel_downloader.utils import async_jitter_sleep
|
15
|
+
from novel_downloader.models import LoginField
|
17
16
|
|
18
17
|
|
19
18
|
@register_fetcher(
|
@@ -24,6 +23,8 @@ class YamiboSession(BaseSession):
|
|
24
23
|
A session class for interacting with the 百合会 (www.yamibo.com) novel website.
|
25
24
|
"""
|
26
25
|
|
26
|
+
site_name: str = "yamibo"
|
27
|
+
|
27
28
|
BASE_URL = "https://www.yamibo.com"
|
28
29
|
BOOKCASE_URL = "https://www.yamibo.com/my/fav"
|
29
30
|
BOOK_INFO_URL = "https://www.yamibo.com/novel/{book_id}"
|
@@ -31,14 +32,6 @@ class YamiboSession(BaseSession):
|
|
31
32
|
|
32
33
|
LOGIN_URL = "https://www.yamibo.com/user/login"
|
33
34
|
|
34
|
-
def __init__(
|
35
|
-
self,
|
36
|
-
config: FetcherConfig,
|
37
|
-
cookies: dict[str, str] | None = None,
|
38
|
-
**kwargs: Any,
|
39
|
-
) -> None:
|
40
|
-
super().__init__("yamibo", config, cookies, **kwargs)
|
41
|
-
|
42
35
|
async def login(
|
43
36
|
self,
|
44
37
|
username: str = "",
|
@@ -69,11 +62,7 @@ class YamiboSession(BaseSession):
|
|
69
62
|
):
|
70
63
|
self._is_logged_in = True
|
71
64
|
return True
|
72
|
-
await
|
73
|
-
self.backoff_factor,
|
74
|
-
mul_spread=1.1,
|
75
|
-
max_sleep=self.backoff_factor + 2,
|
76
|
-
)
|
65
|
+
await self._sleep()
|
77
66
|
|
78
67
|
self._is_logged_in = False
|
79
68
|
return False
|
@@ -83,12 +72,6 @@ class YamiboSession(BaseSession):
|
|
83
72
|
book_id: str,
|
84
73
|
**kwargs: Any,
|
85
74
|
) -> list[str]:
|
86
|
-
"""
|
87
|
-
Fetch the raw HTML of the book info page asynchronously.
|
88
|
-
|
89
|
-
:param book_id: The book identifier.
|
90
|
-
:return: The page content as string list.
|
91
|
-
"""
|
92
75
|
url = self.book_info_url(book_id=book_id)
|
93
76
|
return [await self.fetch(url, **kwargs)]
|
94
77
|
|
@@ -98,13 +81,6 @@ class YamiboSession(BaseSession):
|
|
98
81
|
chapter_id: str,
|
99
82
|
**kwargs: Any,
|
100
83
|
) -> list[str]:
|
101
|
-
"""
|
102
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
103
|
-
|
104
|
-
:param book_id: The book identifier.
|
105
|
-
:param chapter_id: The chapter identifier.
|
106
|
-
:return: The page content as string list.
|
107
|
-
"""
|
108
84
|
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
109
85
|
return [await self.fetch(url, **kwargs)]
|
110
86
|
|
@@ -10,7 +10,6 @@ from typing import Any
|
|
10
10
|
|
11
11
|
from novel_downloader.core.fetchers.base import BaseSession
|
12
12
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
13
|
-
from novel_downloader.models import FetcherConfig
|
14
13
|
|
15
14
|
|
16
15
|
@register_fetcher(
|
@@ -21,25 +20,20 @@ class YibigeSession(BaseSession):
|
|
21
20
|
A session class for interacting with the 一笔阁 (www.yibige.org) novel website.
|
22
21
|
"""
|
23
22
|
|
23
|
+
site_name: str = "yibige"
|
24
|
+
BASE_URL_MAP: dict[str, str] = {
|
25
|
+
"simplified": "www.yibige.org", # 主站
|
26
|
+
"traditional": "tw.yibige.org",
|
27
|
+
"singapore": "sg.yibige.org", # 新加坡
|
28
|
+
"taiwan": "tw.yibige.org", # 臺灣正體
|
29
|
+
"hongkong": "hk.yibige.org", # 香港繁體
|
30
|
+
}
|
31
|
+
DEFAULT_BASE_URL: str = "www.yibige.org"
|
32
|
+
|
24
33
|
BOOK_INFO_URL = "https://{base_url}/{book_id}/"
|
25
34
|
BOOK_CATALOG_URL = "https://{base_url}/{book_id}/index.html"
|
26
35
|
CHAPTER_URL = "https://{base_url}/{book_id}/{chapter_id}.html"
|
27
36
|
|
28
|
-
def __init__(
|
29
|
-
self,
|
30
|
-
config: FetcherConfig,
|
31
|
-
cookies: dict[str, str] | None = None,
|
32
|
-
**kwargs: Any,
|
33
|
-
) -> None:
|
34
|
-
super().__init__("yibige", config, cookies, **kwargs)
|
35
|
-
self.base_url = (
|
36
|
-
"www.yibige.org" if config.locale_style == "simplified" else "tw.yibige.org"
|
37
|
-
)
|
38
|
-
# 主站: www.yibige.org
|
39
|
-
# 新加坡: sg.yibige.org
|
40
|
-
# 臺灣正體: tw.yibige.org
|
41
|
-
# 香港繁體: hk.yibige.org
|
42
|
-
|
43
37
|
async def get_book_info(
|
44
38
|
self,
|
45
39
|
book_id: str,
|
@@ -53,8 +47,8 @@ class YibigeSession(BaseSession):
|
|
53
47
|
:param book_id: The book identifier.
|
54
48
|
:return: The page content as string list.
|
55
49
|
"""
|
56
|
-
info_url = self.book_info_url(base_url=self.
|
57
|
-
catalog_url = self.book_catalog_url(base_url=self.
|
50
|
+
info_url = self.book_info_url(base_url=self._base_url, book_id=book_id)
|
51
|
+
catalog_url = self.book_catalog_url(base_url=self._base_url, book_id=book_id)
|
58
52
|
|
59
53
|
info_html, catalog_html = await asyncio.gather(
|
60
54
|
self.fetch(info_url, **kwargs),
|
@@ -68,15 +62,8 @@ class YibigeSession(BaseSession):
|
|
68
62
|
chapter_id: str,
|
69
63
|
**kwargs: Any,
|
70
64
|
) -> list[str]:
|
71
|
-
"""
|
72
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
73
|
-
|
74
|
-
:param book_id: The book identifier.
|
75
|
-
:param chapter_id: The chapter identifier.
|
76
|
-
:return: The page content as string list.
|
77
|
-
"""
|
78
65
|
url = self.chapter_url(
|
79
|
-
base_url=self.
|
66
|
+
base_url=self._base_url, book_id=book_id, chapter_id=chapter_id
|
80
67
|
)
|
81
68
|
return [await self.fetch(url, **kwargs)]
|
82
69
|
|
@@ -6,18 +6,13 @@ novel_downloader.core.interfaces.exporter
|
|
6
6
|
Protocol defining the interface for exporting books to text, EPUB, and other formats.
|
7
7
|
"""
|
8
8
|
|
9
|
+
import types
|
9
10
|
from pathlib import Path
|
10
|
-
from typing import Protocol, runtime_checkable
|
11
|
+
from typing import Protocol, Self, runtime_checkable
|
11
12
|
|
12
13
|
|
13
14
|
@runtime_checkable
|
14
15
|
class ExporterProtocol(Protocol):
|
15
|
-
"""
|
16
|
-
A exporter must implement a method to persist a completed book as plain text.
|
17
|
-
|
18
|
-
It may also optionally implement an EPUB (or other format) exporter.
|
19
|
-
"""
|
20
|
-
|
21
16
|
def export(self, book_id: str) -> dict[str, Path]:
|
22
17
|
"""
|
23
18
|
Export the book in the formats specified in config.
|
@@ -58,3 +53,20 @@ class ExporterProtocol(Protocol):
|
|
58
53
|
:param book_id: The book identifier.
|
59
54
|
"""
|
60
55
|
...
|
56
|
+
|
57
|
+
def close(self) -> None:
|
58
|
+
"""
|
59
|
+
Shutdown and clean up the exporter.
|
60
|
+
"""
|
61
|
+
...
|
62
|
+
|
63
|
+
def __enter__(self) -> Self:
|
64
|
+
...
|
65
|
+
|
66
|
+
def __exit__(
|
67
|
+
self,
|
68
|
+
exc_type: type[BaseException] | None,
|
69
|
+
exc_val: BaseException | None,
|
70
|
+
tb: types.TracebackType | None,
|
71
|
+
) -> None:
|
72
|
+
...
|
@@ -21,6 +21,24 @@ class FetcherProtocol(Protocol):
|
|
21
21
|
and manage login/shutdown asynchronously.
|
22
22
|
"""
|
23
23
|
|
24
|
+
async def init(
|
25
|
+
self,
|
26
|
+
**kwargs: Any,
|
27
|
+
) -> None:
|
28
|
+
"""
|
29
|
+
Perform async initialization, such as creating a session.
|
30
|
+
|
31
|
+
This should be called before using any other method
|
32
|
+
if initialization is required.
|
33
|
+
"""
|
34
|
+
...
|
35
|
+
|
36
|
+
async def close(self) -> None:
|
37
|
+
"""
|
38
|
+
Shutdown and clean up any resources.
|
39
|
+
"""
|
40
|
+
...
|
41
|
+
|
24
42
|
async def login(
|
25
43
|
self,
|
26
44
|
username: str = "",
|
@@ -64,50 +82,6 @@ class FetcherProtocol(Protocol):
|
|
64
82
|
"""
|
65
83
|
...
|
66
84
|
|
67
|
-
async def get_bookcase(
|
68
|
-
self,
|
69
|
-
**kwargs: Any,
|
70
|
-
) -> list[str]:
|
71
|
-
"""
|
72
|
-
Optional: Retrieve the HTML content of the authenticated
|
73
|
-
user's bookcase page asynchronously.
|
74
|
-
|
75
|
-
:return: The HTML markup of the bookcase page.
|
76
|
-
"""
|
77
|
-
...
|
78
|
-
|
79
|
-
async def fetch(
|
80
|
-
self,
|
81
|
-
url: str,
|
82
|
-
**kwargs: Any,
|
83
|
-
) -> str:
|
84
|
-
"""
|
85
|
-
Perform a generic HTTP request and return the response body as text.
|
86
|
-
|
87
|
-
:param url: The URL to request.
|
88
|
-
:return: The response content as a string (HTML or JSON or plain text).
|
89
|
-
"""
|
90
|
-
...
|
91
|
-
|
92
|
-
async def init(
|
93
|
-
self,
|
94
|
-
**kwargs: Any,
|
95
|
-
) -> None:
|
96
|
-
"""
|
97
|
-
Perform async initialization, such as
|
98
|
-
launching a browser or creating a session.
|
99
|
-
|
100
|
-
This should be called before using any other method
|
101
|
-
if initialization is required.
|
102
|
-
"""
|
103
|
-
...
|
104
|
-
|
105
|
-
async def close(self) -> None:
|
106
|
-
"""
|
107
|
-
Shutdown and clean up any resources.
|
108
|
-
"""
|
109
|
-
...
|
110
|
-
|
111
85
|
async def load_state(self) -> bool:
|
112
86
|
"""
|
113
87
|
Restore session state from a persistent storage,
|
@@ -137,6 +111,9 @@ class FetcherProtocol(Protocol):
|
|
137
111
|
def login_fields(self) -> list[LoginField]:
|
138
112
|
...
|
139
113
|
|
114
|
+
async def __aenter__(self) -> Self:
|
115
|
+
...
|
116
|
+
|
140
117
|
async def __aexit__(
|
141
118
|
self,
|
142
119
|
exc_type: type[BaseException] | None,
|
@@ -144,6 +121,3 @@ class FetcherProtocol(Protocol):
|
|
144
121
|
tb: types.TracebackType | None,
|
145
122
|
) -> None:
|
146
123
|
...
|
147
|
-
|
148
|
-
async def __aenter__(self) -> Self:
|
149
|
-
...
|
@@ -13,7 +13,6 @@ __all__ = [
|
|
13
13
|
"BiqugeParser",
|
14
14
|
"BiquyueduParser",
|
15
15
|
"DxmwxParser",
|
16
|
-
"EightnovelParser",
|
17
16
|
"EsjzoneParser",
|
18
17
|
"GuidayeParser",
|
19
18
|
"HetushuParser",
|
@@ -22,10 +21,12 @@ __all__ = [
|
|
22
21
|
"Jpxs123Parser",
|
23
22
|
"LewennParser",
|
24
23
|
"LinovelibParser",
|
24
|
+
"N8novelParser",
|
25
25
|
"PiaotiaParser",
|
26
26
|
"QbtrParser",
|
27
27
|
"QianbiParser",
|
28
28
|
"QidianParser",
|
29
|
+
"QqbookParser",
|
29
30
|
"Quanben5Parser",
|
30
31
|
"SfacgParser",
|
31
32
|
"ShencouParser",
|
@@ -45,7 +46,6 @@ from .aaatxt import AaatxtParser
|
|
45
46
|
from .b520 import BiqugeParser
|
46
47
|
from .biquyuedu import BiquyueduParser
|
47
48
|
from .dxmwx import DxmwxParser
|
48
|
-
from .eightnovel import EightnovelParser
|
49
49
|
from .esjzone import EsjzoneParser
|
50
50
|
from .guidaye import GuidayeParser
|
51
51
|
from .hetushu import HetushuParser
|
@@ -54,10 +54,12 @@ from .ixdzs8 import Ixdzs8Parser
|
|
54
54
|
from .jpxs123 import Jpxs123Parser
|
55
55
|
from .lewenn import LewennParser
|
56
56
|
from .linovelib import LinovelibParser
|
57
|
+
from .n8novel import N8novelParser
|
57
58
|
from .piaotia import PiaotiaParser
|
58
59
|
from .qbtr import QbtrParser
|
59
60
|
from .qianbi import QianbiParser
|
60
61
|
from .qidian import QidianParser
|
62
|
+
from .qqbook import QqbookParser
|
61
63
|
from .quanben5 import Quanben5Parser
|
62
64
|
from .registry import get_parser
|
63
65
|
from .sfacg import SfacgParser
|
@@ -20,7 +20,7 @@ from novel_downloader.models import (
|
|
20
20
|
|
21
21
|
|
22
22
|
@register_parser(
|
23
|
-
site_keys=["biquge", "
|
23
|
+
site_keys=["biquge", "b520"],
|
24
24
|
)
|
25
25
|
class BiqugeParser(BaseParser):
|
26
26
|
"""
|
@@ -112,5 +112,5 @@ class BiqugeParser(BaseParser):
|
|
112
112
|
"id": chapter_id,
|
113
113
|
"title": title,
|
114
114
|
"content": content,
|
115
|
-
"extra": {"site": "
|
115
|
+
"extra": {"site": "b520"},
|
116
116
|
}
|
@@ -12,18 +12,14 @@ from collections.abc import Iterable
|
|
12
12
|
from pathlib import Path
|
13
13
|
from typing import Any
|
14
14
|
|
15
|
-
from novel_downloader.core.interfaces import ParserProtocol
|
16
15
|
from novel_downloader.models import BookInfoDict, ChapterDict, ParserConfig
|
17
16
|
|
18
17
|
|
19
|
-
class BaseParser(
|
18
|
+
class BaseParser(abc.ABC):
|
20
19
|
"""
|
21
20
|
BaseParser defines the interface for extracting book metadata and chapter content
|
22
21
|
from raw HTML.
|
23
22
|
|
24
|
-
This base class manages internal book state (e.g. current book ID) and supports
|
25
|
-
configuration-driven behavior such as content cleaning or formatting.
|
26
|
-
|
27
23
|
Subclasses must implement actual parsing logic for specific sites.
|
28
24
|
"""
|
29
25
|
|
@@ -31,23 +27,20 @@ class BaseParser(ParserProtocol, abc.ABC):
|
|
31
27
|
|
32
28
|
_SPACE_RE = re.compile(r"\s+")
|
33
29
|
|
34
|
-
def __init__(
|
35
|
-
self,
|
36
|
-
config: ParserConfig,
|
37
|
-
):
|
30
|
+
def __init__(self, config: ParserConfig):
|
38
31
|
"""
|
39
32
|
Initialize the parser with a configuration object.
|
40
33
|
|
41
34
|
:param config: ParserConfig object controlling parsing behavior.
|
42
35
|
"""
|
43
|
-
self._config = config
|
44
36
|
self._book_id: str | None = None
|
45
37
|
|
38
|
+
self._fontocr_cfg = config.fontocr_cfg
|
46
39
|
self._save_font_debug = config.save_font_debug
|
47
40
|
self._decode_font: bool = config.decode_font
|
41
|
+
self._batch_size = config.batch_size
|
48
42
|
self._use_truncation = config.use_truncation
|
49
43
|
self._base_cache_dir = Path(config.cache_dir)
|
50
|
-
self._cache_dir = self._base_cache_dir
|
51
44
|
|
52
45
|
self._ad_pattern = self._compile_ads_pattern()
|
53
46
|
|
@@ -81,34 +74,6 @@ class BaseParser(ParserProtocol, abc.ABC):
|
|
81
74
|
"""
|
82
75
|
...
|
83
76
|
|
84
|
-
@property
|
85
|
-
def book_id(self) -> str | None:
|
86
|
-
"""
|
87
|
-
Current book ID in context.
|
88
|
-
|
89
|
-
:return: The current book identifier.
|
90
|
-
"""
|
91
|
-
return self._book_id
|
92
|
-
|
93
|
-
@book_id.setter
|
94
|
-
def book_id(self, value: str) -> None:
|
95
|
-
"""
|
96
|
-
Set current book ID and update debug paths if needed.
|
97
|
-
|
98
|
-
:param value: Book identifier.
|
99
|
-
"""
|
100
|
-
self._book_id = value
|
101
|
-
self._cache_dir = self._base_cache_dir / value
|
102
|
-
self._on_book_id_set()
|
103
|
-
|
104
|
-
def _on_book_id_set(self) -> None:
|
105
|
-
"""
|
106
|
-
Hook called when a new book ID is set.
|
107
|
-
Subclasses can override this to initialize
|
108
|
-
book-related folders or states.
|
109
|
-
"""
|
110
|
-
pass
|
111
|
-
|
112
77
|
def _compile_ads_pattern(self) -> re.Pattern[str] | None:
|
113
78
|
"""
|
114
79
|
Compile a regex pattern from the ADS list, or return None if no ADS.
|