novel-downloader 2.0.0__py3-none-any.whl → 2.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/download.py +14 -11
- novel_downloader/cli/export.py +19 -19
- novel_downloader/cli/ui.py +35 -8
- novel_downloader/config/adapter.py +216 -153
- novel_downloader/core/__init__.py +5 -6
- novel_downloader/core/archived/deqixs/fetcher.py +1 -28
- novel_downloader/core/downloaders/__init__.py +2 -0
- novel_downloader/core/downloaders/base.py +34 -85
- novel_downloader/core/downloaders/common.py +147 -171
- novel_downloader/core/downloaders/qianbi.py +30 -64
- novel_downloader/core/downloaders/qidian.py +157 -184
- novel_downloader/core/downloaders/qqbook.py +292 -0
- novel_downloader/core/downloaders/registry.py +2 -2
- novel_downloader/core/exporters/__init__.py +2 -0
- novel_downloader/core/exporters/base.py +37 -59
- novel_downloader/core/exporters/common.py +620 -0
- novel_downloader/core/exporters/linovelib.py +47 -0
- novel_downloader/core/exporters/qidian.py +41 -12
- novel_downloader/core/exporters/qqbook.py +28 -0
- novel_downloader/core/exporters/registry.py +2 -2
- novel_downloader/core/fetchers/__init__.py +4 -2
- novel_downloader/core/fetchers/aaatxt.py +2 -22
- novel_downloader/core/fetchers/b520.py +3 -23
- novel_downloader/core/fetchers/base.py +80 -105
- novel_downloader/core/fetchers/biquyuedu.py +2 -22
- novel_downloader/core/fetchers/dxmwx.py +10 -22
- novel_downloader/core/fetchers/esjzone.py +6 -29
- novel_downloader/core/fetchers/guidaye.py +2 -22
- novel_downloader/core/fetchers/hetushu.py +9 -29
- novel_downloader/core/fetchers/i25zw.py +2 -16
- novel_downloader/core/fetchers/ixdzs8.py +2 -16
- novel_downloader/core/fetchers/jpxs123.py +2 -16
- novel_downloader/core/fetchers/lewenn.py +2 -22
- novel_downloader/core/fetchers/linovelib.py +4 -20
- novel_downloader/core/fetchers/{eightnovel.py → n8novel.py} +12 -40
- novel_downloader/core/fetchers/piaotia.py +2 -16
- novel_downloader/core/fetchers/qbtr.py +2 -16
- novel_downloader/core/fetchers/qianbi.py +1 -20
- novel_downloader/core/fetchers/qidian.py +27 -68
- novel_downloader/core/fetchers/qqbook.py +177 -0
- novel_downloader/core/fetchers/quanben5.py +9 -29
- novel_downloader/core/fetchers/rate_limiter.py +22 -53
- novel_downloader/core/fetchers/sfacg.py +3 -16
- novel_downloader/core/fetchers/shencou.py +2 -16
- novel_downloader/core/fetchers/shuhaige.py +2 -22
- novel_downloader/core/fetchers/tongrenquan.py +2 -22
- novel_downloader/core/fetchers/ttkan.py +3 -14
- novel_downloader/core/fetchers/wanbengo.py +2 -22
- novel_downloader/core/fetchers/xiaoshuowu.py +2 -16
- novel_downloader/core/fetchers/xiguashuwu.py +4 -20
- novel_downloader/core/fetchers/xs63b.py +3 -15
- novel_downloader/core/fetchers/xshbook.py +2 -22
- novel_downloader/core/fetchers/yamibo.py +4 -28
- novel_downloader/core/fetchers/yibige.py +13 -26
- novel_downloader/core/interfaces/exporter.py +19 -7
- novel_downloader/core/interfaces/fetcher.py +23 -49
- novel_downloader/core/interfaces/parser.py +2 -2
- novel_downloader/core/parsers/__init__.py +4 -2
- novel_downloader/core/parsers/b520.py +2 -2
- novel_downloader/core/parsers/base.py +5 -39
- novel_downloader/core/parsers/esjzone.py +3 -3
- novel_downloader/core/parsers/{eightnovel.py → n8novel.py} +7 -7
- novel_downloader/core/parsers/qidian.py +717 -0
- novel_downloader/core/parsers/qqbook.py +709 -0
- novel_downloader/core/parsers/xiguashuwu.py +8 -15
- novel_downloader/core/searchers/__init__.py +2 -2
- novel_downloader/core/searchers/b520.py +1 -1
- novel_downloader/core/searchers/base.py +2 -2
- novel_downloader/core/searchers/{eightnovel.py → n8novel.py} +5 -5
- novel_downloader/locales/en.json +3 -3
- novel_downloader/locales/zh.json +3 -3
- novel_downloader/models/__init__.py +2 -0
- novel_downloader/models/book.py +1 -0
- novel_downloader/models/config.py +12 -0
- novel_downloader/resources/config/settings.toml +23 -5
- novel_downloader/resources/js_scripts/expr_to_json.js +14 -0
- novel_downloader/resources/js_scripts/qidian_decrypt_node.js +21 -16
- novel_downloader/resources/js_scripts/qq_decrypt_node.js +92 -0
- novel_downloader/utils/__init__.py +0 -2
- novel_downloader/utils/chapter_storage.py +2 -3
- novel_downloader/utils/constants.py +7 -3
- novel_downloader/utils/cookies.py +32 -17
- novel_downloader/utils/crypto_utils/__init__.py +0 -6
- novel_downloader/utils/crypto_utils/aes_util.py +1 -1
- novel_downloader/utils/crypto_utils/rc4.py +40 -50
- novel_downloader/utils/epub/__init__.py +2 -3
- novel_downloader/utils/epub/builder.py +6 -6
- novel_downloader/utils/epub/constants.py +1 -6
- novel_downloader/utils/epub/documents.py +7 -7
- novel_downloader/utils/epub/models.py +8 -8
- novel_downloader/utils/epub/utils.py +10 -10
- novel_downloader/utils/file_utils/io.py +48 -73
- novel_downloader/utils/file_utils/normalize.py +1 -7
- novel_downloader/utils/file_utils/sanitize.py +4 -11
- novel_downloader/utils/fontocr/__init__.py +13 -0
- novel_downloader/utils/{fontocr.py → fontocr/core.py} +72 -61
- novel_downloader/utils/fontocr/loader.py +52 -0
- novel_downloader/utils/logger.py +80 -56
- novel_downloader/utils/network.py +16 -40
- novel_downloader/utils/node_decryptor/__init__.py +13 -0
- novel_downloader/utils/node_decryptor/decryptor.py +342 -0
- novel_downloader/{core/parsers/qidian/utils → utils/node_decryptor}/decryptor_fetcher.py +5 -6
- novel_downloader/utils/text_utils/text_cleaner.py +39 -30
- novel_downloader/utils/text_utils/truncate_utils.py +3 -14
- novel_downloader/utils/time_utils/sleep_utils.py +53 -43
- novel_downloader/web/main.py +1 -1
- novel_downloader/web/pages/download.py +1 -1
- novel_downloader/web/pages/search.py +4 -4
- novel_downloader/web/services/task_manager.py +2 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/METADATA +5 -1
- novel_downloader-2.0.2.dist-info/RECORD +203 -0
- novel_downloader/core/exporters/common/__init__.py +0 -11
- novel_downloader/core/exporters/common/epub.py +0 -198
- novel_downloader/core/exporters/common/main_exporter.py +0 -64
- novel_downloader/core/exporters/common/txt.py +0 -146
- novel_downloader/core/exporters/epub_util.py +0 -215
- novel_downloader/core/exporters/linovelib/__init__.py +0 -11
- novel_downloader/core/exporters/linovelib/epub.py +0 -349
- novel_downloader/core/exporters/linovelib/main_exporter.py +0 -66
- novel_downloader/core/exporters/linovelib/txt.py +0 -139
- novel_downloader/core/exporters/txt_util.py +0 -67
- novel_downloader/core/parsers/qidian/__init__.py +0 -10
- novel_downloader/core/parsers/qidian/book_info_parser.py +0 -89
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -470
- novel_downloader/core/parsers/qidian/chapter_normal.py +0 -126
- novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
- novel_downloader/core/parsers/qidian/main_parser.py +0 -101
- novel_downloader/core/parsers/qidian/utils/__init__.py +0 -30
- novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +0 -143
- novel_downloader/core/parsers/qidian/utils/helpers.py +0 -110
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +0 -175
- novel_downloader-2.0.0.dist-info/RECORD +0 -210
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/WHEEL +0 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/entry_points.txt +0 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/top_level.txt +0 -0
@@ -8,25 +8,54 @@ Exporter implementation for Qidian novels, supporting plain and encrypted source
|
|
8
8
|
|
9
9
|
__all__ = ["QidianExporter"]
|
10
10
|
|
11
|
-
from
|
12
|
-
from novel_downloader.models import ExporterConfig
|
11
|
+
from typing import Any, ClassVar
|
13
12
|
|
14
|
-
from .common import CommonExporter
|
13
|
+
from novel_downloader.core.exporters.common import CommonExporter
|
14
|
+
from novel_downloader.core.exporters.registry import register_exporter
|
15
15
|
|
16
16
|
|
17
17
|
@register_exporter(site_keys=["qidian", "qd"])
|
18
18
|
class QidianExporter(CommonExporter):
|
19
|
-
"""
|
19
|
+
"""
|
20
|
+
Exporter for Qidian (起点) novels.
|
21
|
+
"""
|
20
22
|
|
21
|
-
DEFAULT_SOURCE_ID = 0
|
22
|
-
ENCRYPTED_SOURCE_ID = 1
|
23
|
-
PRIORITIES_MAP = {
|
23
|
+
DEFAULT_SOURCE_ID: ClassVar[int] = 0
|
24
|
+
ENCRYPTED_SOURCE_ID: ClassVar[int] = 1
|
25
|
+
PRIORITIES_MAP: ClassVar[dict[int, int]] = {
|
24
26
|
DEFAULT_SOURCE_ID: 0,
|
25
27
|
ENCRYPTED_SOURCE_ID: 1,
|
26
28
|
}
|
27
29
|
|
28
|
-
def
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
30
|
+
def _render_txt_extras(self, extras: dict[str, Any]) -> str:
|
31
|
+
"""
|
32
|
+
render "作者说" for TXT:
|
33
|
+
* Clean content
|
34
|
+
* Strip leading/trailing blanks
|
35
|
+
* Drop multiple blank lines (keep only non-empty lines)
|
36
|
+
"""
|
37
|
+
note = self._cleaner.clean_content(extras.get("author_say") or "").strip()
|
38
|
+
if not note:
|
39
|
+
return ""
|
40
|
+
|
41
|
+
# collapse blank lines
|
42
|
+
body = "\n".join(s for line in note.splitlines() if (s := line.strip()))
|
43
|
+
return f"作者说\n\n{body}"
|
44
|
+
|
45
|
+
def _render_epub_extras(self, extras: dict[str, Any]) -> str:
|
46
|
+
"""
|
47
|
+
render "作者说" for EPUB:
|
48
|
+
* Clean content
|
49
|
+
* Keep as HTML-safe via _render_html_block
|
50
|
+
* Wrap with `<hr/>` + `<h3>作者说</h3>`
|
51
|
+
"""
|
52
|
+
note = self._cleaner.clean_content(extras.get("author_say") or "").strip()
|
53
|
+
if not note:
|
54
|
+
return ""
|
55
|
+
|
56
|
+
parts = [
|
57
|
+
"<hr />",
|
58
|
+
"<h3>作者说</h3>",
|
59
|
+
self._render_html_block(note),
|
60
|
+
]
|
61
|
+
return "\n".join(parts)
|
@@ -0,0 +1,28 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.exporters.qqbook
|
4
|
+
--------------------------------------
|
5
|
+
|
6
|
+
Exporter implementation for QQ book novels, supporting plain and encrypted sources.
|
7
|
+
"""
|
8
|
+
|
9
|
+
__all__ = ["QqbookExporter"]
|
10
|
+
|
11
|
+
from typing import ClassVar
|
12
|
+
|
13
|
+
from novel_downloader.core.exporters.common import CommonExporter
|
14
|
+
from novel_downloader.core.exporters.registry import register_exporter
|
15
|
+
|
16
|
+
|
17
|
+
@register_exporter(site_keys=["qqbook", "qq"])
|
18
|
+
class QqbookExporter(CommonExporter):
|
19
|
+
"""
|
20
|
+
Exporter for QQ 阅读 novels.
|
21
|
+
"""
|
22
|
+
|
23
|
+
DEFAULT_SOURCE_ID: ClassVar[int] = 0
|
24
|
+
ENCRYPTED_SOURCE_ID: ClassVar[int] = 1
|
25
|
+
PRIORITIES_MAP: ClassVar[dict[int, int]] = {
|
26
|
+
DEFAULT_SOURCE_ID: 0,
|
27
|
+
ENCRYPTED_SOURCE_ID: 1,
|
28
|
+
}
|
@@ -15,7 +15,7 @@ from novel_downloader.core.exporters.common import CommonExporter
|
|
15
15
|
from novel_downloader.core.interfaces import ExporterProtocol
|
16
16
|
from novel_downloader.models import ExporterConfig
|
17
17
|
|
18
|
-
ExporterBuilder = Callable[[ExporterConfig], ExporterProtocol]
|
18
|
+
ExporterBuilder = Callable[[ExporterConfig, str], ExporterProtocol]
|
19
19
|
|
20
20
|
E = TypeVar("E", bound=ExporterProtocol)
|
21
21
|
_EXPORTER_MAP: dict[str, ExporterBuilder] = {}
|
@@ -52,4 +52,4 @@ def get_exporter(site: str, config: ExporterConfig) -> ExporterProtocol:
|
|
52
52
|
exporter_cls = _EXPORTER_MAP[site_key]
|
53
53
|
except KeyError:
|
54
54
|
return CommonExporter(config, site_key)
|
55
|
-
return exporter_cls(config)
|
55
|
+
return exporter_cls(config, site_key)
|
@@ -12,7 +12,6 @@ __all__ = [
|
|
12
12
|
"BiqugeSession",
|
13
13
|
"BiquyueduSession",
|
14
14
|
"DxmwxSession",
|
15
|
-
"EightnovelSession",
|
16
15
|
"EsjzoneSession",
|
17
16
|
"GuidayeSession",
|
18
17
|
"HetushuSession",
|
@@ -21,10 +20,12 @@ __all__ = [
|
|
21
20
|
"Jpxs123Session",
|
22
21
|
"LewennSession",
|
23
22
|
"LinovelibSession",
|
23
|
+
"N8novelSession",
|
24
24
|
"PiaotiaSession",
|
25
25
|
"QbtrSession",
|
26
26
|
"QianbiSession",
|
27
27
|
"QidianSession",
|
28
|
+
"QqbookSession",
|
28
29
|
"Quanben5Session",
|
29
30
|
"SfacgSession",
|
30
31
|
"ShencouSession",
|
@@ -44,7 +45,6 @@ from .aaatxt import AaatxtSession
|
|
44
45
|
from .b520 import BiqugeSession
|
45
46
|
from .biquyuedu import BiquyueduSession
|
46
47
|
from .dxmwx import DxmwxSession
|
47
|
-
from .eightnovel import EightnovelSession
|
48
48
|
from .esjzone import EsjzoneSession
|
49
49
|
from .guidaye import GuidayeSession
|
50
50
|
from .hetushu import HetushuSession
|
@@ -53,10 +53,12 @@ from .ixdzs8 import Ixdzs8Session
|
|
53
53
|
from .jpxs123 import Jpxs123Session
|
54
54
|
from .lewenn import LewennSession
|
55
55
|
from .linovelib import LinovelibSession
|
56
|
+
from .n8novel import N8novelSession
|
56
57
|
from .piaotia import PiaotiaSession
|
57
58
|
from .qbtr import QbtrSession
|
58
59
|
from .qianbi import QianbiSession
|
59
60
|
from .qidian import QidianSession
|
61
|
+
from .qqbook import QqbookSession
|
60
62
|
from .quanben5 import Quanben5Session
|
61
63
|
from .registry import get_fetcher
|
62
64
|
from .sfacg import SfacgSession
|
@@ -9,7 +9,6 @@ from typing import Any
|
|
9
9
|
|
10
10
|
from novel_downloader.core.fetchers.base import BaseSession
|
11
11
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
12
|
-
from novel_downloader.models import FetcherConfig
|
13
12
|
|
14
13
|
|
15
14
|
@register_fetcher(
|
@@ -20,28 +19,16 @@ class AaatxtSession(BaseSession):
|
|
20
19
|
A session class for interacting with the 3A电子书 (www.aaatxt.com) novel website.
|
21
20
|
"""
|
22
21
|
|
22
|
+
site_name: str = "aaatxt"
|
23
|
+
|
23
24
|
BOOK_INFO_URL = "http://www.aaatxt.com/shu/{book_id}.html"
|
24
25
|
CHAPTER_URL = "http://www.aaatxt.com/yuedu/{chapter_id}.html"
|
25
26
|
|
26
|
-
def __init__(
|
27
|
-
self,
|
28
|
-
config: FetcherConfig,
|
29
|
-
cookies: dict[str, str] | None = None,
|
30
|
-
**kwargs: Any,
|
31
|
-
) -> None:
|
32
|
-
super().__init__("aaatxt", config, cookies, **kwargs)
|
33
|
-
|
34
27
|
async def get_book_info(
|
35
28
|
self,
|
36
29
|
book_id: str,
|
37
30
|
**kwargs: Any,
|
38
31
|
) -> list[str]:
|
39
|
-
"""
|
40
|
-
Fetch the raw HTML of the book info page asynchronously.
|
41
|
-
|
42
|
-
:param book_id: The book identifier.
|
43
|
-
:return: The page content as string list.
|
44
|
-
"""
|
45
32
|
url = self.book_info_url(book_id=book_id)
|
46
33
|
return [await self.fetch(url, **kwargs)]
|
47
34
|
|
@@ -51,13 +38,6 @@ class AaatxtSession(BaseSession):
|
|
51
38
|
chapter_id: str,
|
52
39
|
**kwargs: Any,
|
53
40
|
) -> list[str]:
|
54
|
-
"""
|
55
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
56
|
-
|
57
|
-
:param book_id: The book identifier.
|
58
|
-
:param chapter_id: The chapter identifier.
|
59
|
-
:return: The page content as string list.
|
60
|
-
"""
|
61
41
|
url = self.chapter_url(chapter_id=chapter_id)
|
62
42
|
return [await self.fetch(url, encoding="gb2312", **kwargs)]
|
63
43
|
|
@@ -9,39 +9,26 @@ from typing import Any
|
|
9
9
|
|
10
10
|
from novel_downloader.core.fetchers.base import BaseSession
|
11
11
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
12
|
-
from novel_downloader.models import FetcherConfig
|
13
12
|
|
14
13
|
|
15
14
|
@register_fetcher(
|
16
|
-
site_keys=["biquge", "
|
15
|
+
site_keys=["biquge", "b520"],
|
17
16
|
)
|
18
17
|
class BiqugeSession(BaseSession):
|
19
18
|
"""
|
20
19
|
A session class for interacting with the 笔趣阁 (www.b520.cc) novel website.
|
21
20
|
"""
|
22
21
|
|
22
|
+
site_name: str = "b520"
|
23
|
+
|
23
24
|
BOOK_INFO_URL = "http://www.b520.cc/{book_id}/"
|
24
25
|
CHAPTER_URL = "http://www.b520.cc/{book_id}/{chapter_id}.html"
|
25
26
|
|
26
|
-
def __init__(
|
27
|
-
self,
|
28
|
-
config: FetcherConfig,
|
29
|
-
cookies: dict[str, str] | None = None,
|
30
|
-
**kwargs: Any,
|
31
|
-
) -> None:
|
32
|
-
super().__init__("biquge", config, cookies, **kwargs)
|
33
|
-
|
34
27
|
async def get_book_info(
|
35
28
|
self,
|
36
29
|
book_id: str,
|
37
30
|
**kwargs: Any,
|
38
31
|
) -> list[str]:
|
39
|
-
"""
|
40
|
-
Fetch the raw HTML of the book info page asynchronously.
|
41
|
-
|
42
|
-
:param book_id: The book identifier.
|
43
|
-
:return: The page content as string list.
|
44
|
-
"""
|
45
32
|
url = self.book_info_url(book_id=book_id)
|
46
33
|
return [await self.fetch(url, **kwargs)]
|
47
34
|
|
@@ -51,13 +38,6 @@ class BiqugeSession(BaseSession):
|
|
51
38
|
chapter_id: str,
|
52
39
|
**kwargs: Any,
|
53
40
|
) -> list[str]:
|
54
|
-
"""
|
55
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
56
|
-
|
57
|
-
:param book_id: The book identifier.
|
58
|
-
:param chapter_id: The chapter identifier.
|
59
|
-
:return: The page content as string list.
|
60
|
-
"""
|
61
41
|
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
62
42
|
return [await self.fetch(url, encoding="gbk", **kwargs)]
|
63
43
|
|
@@ -16,27 +16,24 @@ from typing import Any, Self
|
|
16
16
|
import aiohttp
|
17
17
|
from aiohttp import ClientResponse, ClientSession, ClientTimeout, TCPConnector
|
18
18
|
|
19
|
-
from novel_downloader.core.interfaces import FetcherProtocol
|
20
19
|
from novel_downloader.models import FetcherConfig, LoginField
|
21
|
-
from novel_downloader.utils import
|
22
|
-
|
23
|
-
)
|
24
|
-
from novel_downloader.utils.constants import (
|
25
|
-
DATA_DIR,
|
26
|
-
DEFAULT_USER_HEADERS,
|
27
|
-
)
|
20
|
+
from novel_downloader.utils import async_jitter_sleep
|
21
|
+
from novel_downloader.utils.constants import DATA_DIR, DEFAULT_USER_HEADERS
|
28
22
|
|
29
23
|
from .rate_limiter import TokenBucketRateLimiter
|
30
24
|
|
31
25
|
|
32
|
-
class BaseSession(
|
26
|
+
class BaseSession(abc.ABC):
|
33
27
|
"""
|
34
28
|
BaseSession wraps basic HTTP operations using aiohttp.ClientSession.
|
35
29
|
"""
|
36
30
|
|
31
|
+
site_name: str
|
32
|
+
BASE_URL_MAP: dict[str, str] = {}
|
33
|
+
DEFAULT_BASE_URL: str = ""
|
34
|
+
|
37
35
|
def __init__(
|
38
36
|
self,
|
39
|
-
site: str,
|
40
37
|
config: FetcherConfig,
|
41
38
|
cookies: dict[str, str] | None = None,
|
42
39
|
**kwargs: Any,
|
@@ -47,26 +44,59 @@ class BaseSession(FetcherProtocol, abc.ABC):
|
|
47
44
|
:param config: Configuration object for session behavior
|
48
45
|
:param cookies: Optional initial cookies to set on the session.
|
49
46
|
"""
|
50
|
-
self.
|
51
|
-
self.
|
47
|
+
self._base_url = self._resolve_base_url(config.locale_style)
|
48
|
+
self._backoff_factor = config.backoff_factor
|
49
|
+
self._request_interval = config.request_interval
|
50
|
+
self._retry_times = config.retry_times
|
51
|
+
self._timeout = config.timeout
|
52
|
+
self._max_connections = config.max_connections
|
53
|
+
self._verify_ssl = config.verify_ssl
|
54
|
+
self._init_cookies = cookies or {}
|
55
|
+
self._is_logged_in = False
|
52
56
|
|
53
|
-
self._state_file = DATA_DIR /
|
54
|
-
self._state_file.parent.mkdir(parents=True, exist_ok=True)
|
57
|
+
self._state_file = DATA_DIR / self.site_name / "session_state.cookies"
|
55
58
|
|
56
|
-
self._is_logged_in = False
|
57
59
|
self._headers = (
|
58
|
-
config.headers.copy()
|
60
|
+
config.headers.copy()
|
61
|
+
if config.headers is not None
|
62
|
+
else DEFAULT_USER_HEADERS.copy()
|
59
63
|
)
|
60
64
|
if config.user_agent:
|
61
65
|
self._headers["User-Agent"] = config.user_agent
|
62
|
-
|
66
|
+
|
63
67
|
self._session: ClientSession | None = None
|
64
|
-
self._rate_limiter: TokenBucketRateLimiter | None =
|
68
|
+
self._rate_limiter: TokenBucketRateLimiter | None = (
|
69
|
+
TokenBucketRateLimiter(config.max_rps) if config.max_rps > 0 else None
|
70
|
+
)
|
65
71
|
|
66
|
-
|
67
|
-
self._rate_limiter = TokenBucketRateLimiter(config.max_rps)
|
72
|
+
self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
|
68
73
|
|
69
|
-
|
74
|
+
async def init(
|
75
|
+
self,
|
76
|
+
**kwargs: Any,
|
77
|
+
) -> None:
|
78
|
+
"""
|
79
|
+
Set up the aiohttp.ClientSession with timeout, connector, headers.
|
80
|
+
"""
|
81
|
+
timeout = ClientTimeout(total=self._timeout)
|
82
|
+
connector = TCPConnector(
|
83
|
+
ssl=self._verify_ssl,
|
84
|
+
limit_per_host=self._max_connections,
|
85
|
+
)
|
86
|
+
self._session = ClientSession(
|
87
|
+
timeout=timeout,
|
88
|
+
connector=connector,
|
89
|
+
headers=self._headers,
|
90
|
+
cookies=self._init_cookies,
|
91
|
+
)
|
92
|
+
|
93
|
+
async def close(self) -> None:
|
94
|
+
"""
|
95
|
+
Shutdown and clean up any resources.
|
96
|
+
"""
|
97
|
+
if self._session and not self._session.closed:
|
98
|
+
await self._session.close()
|
99
|
+
self._session = None
|
70
100
|
|
71
101
|
async def login(
|
72
102
|
self,
|
@@ -113,47 +143,16 @@ class BaseSession(FetcherProtocol, abc.ABC):
|
|
113
143
|
"""
|
114
144
|
...
|
115
145
|
|
116
|
-
|
117
|
-
|
118
|
-
**kwargs: Any,
|
119
|
-
) -> list[str]:
|
120
|
-
"""
|
121
|
-
Optional: Retrieve the HTML content of the authenticated user's bookcase page.
|
122
|
-
Subclasses that support user login/bookcase should override this.
|
123
|
-
|
124
|
-
:return: The HTML of the bookcase page.
|
125
|
-
"""
|
126
|
-
raise NotImplementedError(
|
127
|
-
"Bookcase fetching is not supported by this session type. "
|
128
|
-
"Override get_bookcase() in your subclass to enable it."
|
129
|
-
)
|
130
|
-
|
131
|
-
async def init(
|
132
|
-
self,
|
133
|
-
**kwargs: Any,
|
134
|
-
) -> None:
|
146
|
+
@property
|
147
|
+
def is_logged_in(self) -> bool:
|
135
148
|
"""
|
136
|
-
|
149
|
+
Indicates whether the requester is currently authenticated.
|
137
150
|
"""
|
138
|
-
|
139
|
-
connector = TCPConnector(
|
140
|
-
ssl=self._config.verify_ssl,
|
141
|
-
limit_per_host=self.max_connections,
|
142
|
-
)
|
143
|
-
self._session = ClientSession(
|
144
|
-
timeout=timeout,
|
145
|
-
connector=connector,
|
146
|
-
headers=self._headers,
|
147
|
-
cookies=self._cookies,
|
148
|
-
)
|
151
|
+
return self._is_logged_in
|
149
152
|
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
"""
|
154
|
-
if self._session and not self._session.closed:
|
155
|
-
await self._session.close()
|
156
|
-
self._session = None
|
153
|
+
@property
|
154
|
+
def login_fields(self) -> list[LoginField]:
|
155
|
+
return []
|
157
156
|
|
158
157
|
async def fetch(
|
159
158
|
self,
|
@@ -172,17 +171,17 @@ class BaseSession(FetcherProtocol, abc.ABC):
|
|
172
171
|
if self._rate_limiter:
|
173
172
|
await self._rate_limiter.wait()
|
174
173
|
|
175
|
-
for attempt in range(self.
|
174
|
+
for attempt in range(self._retry_times + 1):
|
176
175
|
try:
|
177
176
|
async with self.session.get(url, **kwargs) as resp:
|
178
177
|
resp.raise_for_status()
|
179
178
|
return await self._response_to_str(resp, encoding)
|
180
179
|
except aiohttp.ClientError:
|
181
|
-
if attempt < self.
|
180
|
+
if attempt < self._retry_times:
|
182
181
|
await async_jitter_sleep(
|
183
|
-
self.
|
182
|
+
self._backoff_factor,
|
184
183
|
mul_spread=1.1,
|
185
|
-
max_sleep=self.
|
184
|
+
max_sleep=self._backoff_factor + 2,
|
186
185
|
)
|
187
186
|
continue
|
188
187
|
raise
|
@@ -304,7 +303,6 @@ class BaseSession(FetcherProtocol, abc.ABC):
|
|
304
303
|
|
305
304
|
:param cookies: A dictionary of cookie key-value pairs.
|
306
305
|
"""
|
307
|
-
self._cookies.update(cookies)
|
308
306
|
if self._session:
|
309
307
|
self._session.cookie_jar.update_cookies(cookies)
|
310
308
|
|
@@ -326,21 +324,6 @@ class BaseSession(FetcherProtocol, abc.ABC):
|
|
326
324
|
"""
|
327
325
|
return False
|
328
326
|
|
329
|
-
@property
|
330
|
-
def site(self) -> str:
|
331
|
-
return self._site
|
332
|
-
|
333
|
-
@property
|
334
|
-
def is_logged_in(self) -> bool:
|
335
|
-
"""
|
336
|
-
Indicates whether the requester is currently authenticated.
|
337
|
-
"""
|
338
|
-
return self._is_logged_in
|
339
|
-
|
340
|
-
@property
|
341
|
-
def login_fields(self) -> list[LoginField]:
|
342
|
-
return []
|
343
|
-
|
344
327
|
@property
|
345
328
|
def session(self) -> ClientSession:
|
346
329
|
"""
|
@@ -352,25 +335,13 @@ class BaseSession(FetcherProtocol, abc.ABC):
|
|
352
335
|
raise RuntimeError("Session is not initialized or has been shut down.")
|
353
336
|
return self._session
|
354
337
|
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
@property
|
364
|
-
def request_interval(self) -> float:
|
365
|
-
return self._config.request_interval
|
366
|
-
|
367
|
-
@property
|
368
|
-
def timeout(self) -> float:
|
369
|
-
return self._config.timeout
|
370
|
-
|
371
|
-
@property
|
372
|
-
def max_connections(self) -> int:
|
373
|
-
return self._config.max_connections
|
338
|
+
async def _sleep(self) -> None:
|
339
|
+
if self._request_interval > 0:
|
340
|
+
await async_jitter_sleep(
|
341
|
+
self._request_interval,
|
342
|
+
mul_spread=1.1,
|
343
|
+
max_sleep=self._request_interval + 2,
|
344
|
+
)
|
374
345
|
|
375
346
|
@property
|
376
347
|
def headers(self) -> dict[str, str]:
|
@@ -400,11 +371,12 @@ class BaseSession(FetcherProtocol, abc.ABC):
|
|
400
371
|
encoding: str | None = None,
|
401
372
|
) -> str:
|
402
373
|
"""
|
403
|
-
Read the full body of resp as text.
|
404
|
-
|
374
|
+
Read the full body of resp as text. Try the provided encoding,
|
375
|
+
response charset, and common fallbacks. On failure, fall back
|
376
|
+
to utf-8 with errors ignored.
|
405
377
|
"""
|
406
378
|
data: bytes = await resp.read()
|
407
|
-
encodings = [
|
379
|
+
encodings: list[str | None] = [
|
408
380
|
encoding,
|
409
381
|
resp.charset,
|
410
382
|
"gb2312",
|
@@ -412,14 +384,17 @@ class BaseSession(FetcherProtocol, abc.ABC):
|
|
412
384
|
"gbk",
|
413
385
|
"utf-8",
|
414
386
|
]
|
415
|
-
|
416
|
-
for enc in
|
387
|
+
|
388
|
+
for enc in (e for e in encodings if e is not None):
|
417
389
|
try:
|
418
390
|
return data.decode(enc)
|
419
391
|
except UnicodeDecodeError:
|
420
392
|
continue
|
421
|
-
|
422
|
-
|
393
|
+
return data.decode(encoding or "utf-8", errors="ignore")
|
394
|
+
|
395
|
+
def _resolve_base_url(self, locale_style: str) -> str:
|
396
|
+
key = locale_style.strip().lower()
|
397
|
+
return self.BASE_URL_MAP.get(key, self.DEFAULT_BASE_URL)
|
423
398
|
|
424
399
|
async def __aenter__(self) -> Self:
|
425
400
|
if self._session is None or self._session.closed:
|
@@ -9,7 +9,6 @@ from typing import Any
|
|
9
9
|
|
10
10
|
from novel_downloader.core.fetchers.base import BaseSession
|
11
11
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
12
|
-
from novel_downloader.models import FetcherConfig
|
13
12
|
|
14
13
|
|
15
14
|
@register_fetcher(
|
@@ -20,28 +19,16 @@ class BiquyueduSession(BaseSession):
|
|
20
19
|
A session class for interacting with the 精彩小说 (biquyuedu.com) novel website.
|
21
20
|
"""
|
22
21
|
|
22
|
+
site_name: str = "biquyuedu"
|
23
|
+
|
23
24
|
BOOK_INFO_URL = "https://biquyuedu.com/novel/{book_id}.html"
|
24
25
|
CHAPTER_URL = "https://biquyuedu.com/novel/{book_id}/{chapter_id}.html"
|
25
26
|
|
26
|
-
def __init__(
|
27
|
-
self,
|
28
|
-
config: FetcherConfig,
|
29
|
-
cookies: dict[str, str] | None = None,
|
30
|
-
**kwargs: Any,
|
31
|
-
) -> None:
|
32
|
-
super().__init__("biquyuedu", config, cookies, **kwargs)
|
33
|
-
|
34
27
|
async def get_book_info(
|
35
28
|
self,
|
36
29
|
book_id: str,
|
37
30
|
**kwargs: Any,
|
38
31
|
) -> list[str]:
|
39
|
-
"""
|
40
|
-
Fetch the raw HTML of the book info page asynchronously.
|
41
|
-
|
42
|
-
:param book_id: The book identifier.
|
43
|
-
:return: The page content as string list.
|
44
|
-
"""
|
45
32
|
url = self.book_info_url(book_id=book_id)
|
46
33
|
return [await self.fetch(url, **kwargs)]
|
47
34
|
|
@@ -51,13 +38,6 @@ class BiquyueduSession(BaseSession):
|
|
51
38
|
chapter_id: str,
|
52
39
|
**kwargs: Any,
|
53
40
|
) -> list[str]:
|
54
|
-
"""
|
55
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
56
|
-
|
57
|
-
:param book_id: The book identifier.
|
58
|
-
:param chapter_id: The chapter identifier.
|
59
|
-
:return: The page content as string list.
|
60
|
-
"""
|
61
41
|
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
62
42
|
return [await self.fetch(url, **kwargs)]
|
63
43
|
|
@@ -10,7 +10,6 @@ from typing import Any
|
|
10
10
|
|
11
11
|
from novel_downloader.core.fetchers.base import BaseSession
|
12
12
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
13
|
-
from novel_downloader.models import FetcherConfig
|
14
13
|
|
15
14
|
|
16
15
|
@register_fetcher(
|
@@ -21,21 +20,17 @@ class DxmwxSession(BaseSession):
|
|
21
20
|
A session class for interacting with the 大熊猫文学网 (www.dxmwx.org) novel website.
|
22
21
|
"""
|
23
22
|
|
23
|
+
site_name: str = "dxmwx"
|
24
|
+
BASE_URL_MAP: dict[str, str] = {
|
25
|
+
"simplified": "www.dxmwx.org",
|
26
|
+
"traditional": "tw.dxmwx.org",
|
27
|
+
}
|
28
|
+
DEFAULT_BASE_URL: str = "www.dxmwx.org"
|
29
|
+
|
24
30
|
BOOK_INFO_URL = "https://{base_url}/book/{book_id}.html"
|
25
31
|
BOOK_CATALOG_URL = "https://{base_url}/chapter/{book_id}.html"
|
26
32
|
CHAPTER_URL = "https://{base_url}/read/{book_id}_{chapter_id}.html"
|
27
33
|
|
28
|
-
def __init__(
|
29
|
-
self,
|
30
|
-
config: FetcherConfig,
|
31
|
-
cookies: dict[str, str] | None = None,
|
32
|
-
**kwargs: Any,
|
33
|
-
) -> None:
|
34
|
-
super().__init__("dxmwx", config, cookies, **kwargs)
|
35
|
-
self.base_url = (
|
36
|
-
"www.dxmwx.org" if config.locale_style == "simplified" else "tw.dxmwx.org"
|
37
|
-
)
|
38
|
-
|
39
34
|
async def get_book_info(
|
40
35
|
self,
|
41
36
|
book_id: str,
|
@@ -49,8 +44,8 @@ class DxmwxSession(BaseSession):
|
|
49
44
|
:param book_id: The book identifier.
|
50
45
|
:return: The page content as string list.
|
51
46
|
"""
|
52
|
-
info_url = self.book_info_url(base_url=self.
|
53
|
-
catalog_url = self.book_catalog_url(base_url=self.
|
47
|
+
info_url = self.book_info_url(base_url=self._base_url, book_id=book_id)
|
48
|
+
catalog_url = self.book_catalog_url(base_url=self._base_url, book_id=book_id)
|
54
49
|
|
55
50
|
info_html, catalog_html = await asyncio.gather(
|
56
51
|
self.fetch(info_url, **kwargs),
|
@@ -64,15 +59,8 @@ class DxmwxSession(BaseSession):
|
|
64
59
|
chapter_id: str,
|
65
60
|
**kwargs: Any,
|
66
61
|
) -> list[str]:
|
67
|
-
"""
|
68
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
69
|
-
|
70
|
-
:param book_id: The book identifier.
|
71
|
-
:param chapter_id: The chapter identifier.
|
72
|
-
:return: The page content as string list.
|
73
|
-
"""
|
74
62
|
url = self.chapter_url(
|
75
|
-
base_url=self.
|
63
|
+
base_url=self._base_url, book_id=book_id, chapter_id=chapter_id
|
76
64
|
)
|
77
65
|
return [await self.fetch(url, **kwargs)]
|
78
66
|
|