novel-downloader 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -2
- novel_downloader/cli/__init__.py +0 -1
- novel_downloader/cli/clean.py +2 -10
- novel_downloader/cli/download.py +18 -22
- novel_downloader/cli/interactive.py +0 -1
- novel_downloader/cli/main.py +1 -3
- novel_downloader/cli/settings.py +8 -8
- novel_downloader/config/__init__.py +0 -1
- novel_downloader/config/adapter.py +48 -18
- novel_downloader/config/loader.py +116 -108
- novel_downloader/config/models.py +41 -32
- novel_downloader/config/site_rules.py +2 -4
- novel_downloader/core/__init__.py +0 -1
- novel_downloader/core/downloaders/__init__.py +4 -4
- novel_downloader/core/downloaders/base/__init__.py +14 -0
- novel_downloader/core/downloaders/{base_async_downloader.py → base/base_async.py} +49 -53
- novel_downloader/core/downloaders/{base_downloader.py → base/base_sync.py} +64 -43
- novel_downloader/core/downloaders/biquge/__init__.py +12 -0
- novel_downloader/core/downloaders/biquge/biquge_sync.py +25 -0
- novel_downloader/core/downloaders/common/__init__.py +14 -0
- novel_downloader/core/downloaders/{common_asynb_downloader.py → common/common_async.py} +42 -33
- novel_downloader/core/downloaders/{common_downloader.py → common/common_sync.py} +34 -23
- novel_downloader/core/downloaders/qidian/__init__.py +10 -0
- novel_downloader/core/downloaders/{qidian_downloader.py → qidian/qidian_sync.py} +80 -64
- novel_downloader/core/factory/__init__.py +4 -5
- novel_downloader/core/factory/{downloader_factory.py → downloader.py} +36 -35
- novel_downloader/core/factory/{parser_factory.py → parser.py} +12 -14
- novel_downloader/core/factory/{requester_factory.py → requester.py} +29 -16
- novel_downloader/core/factory/{saver_factory.py → saver.py} +4 -9
- novel_downloader/core/interfaces/__init__.py +8 -9
- novel_downloader/core/interfaces/{async_downloader_protocol.py → async_downloader.py} +4 -5
- novel_downloader/core/interfaces/{async_requester_protocol.py → async_requester.py} +26 -12
- novel_downloader/core/interfaces/{parser_protocol.py → parser.py} +11 -6
- novel_downloader/core/interfaces/{saver_protocol.py → saver.py} +2 -3
- novel_downloader/core/interfaces/{downloader_protocol.py → sync_downloader.py} +6 -7
- novel_downloader/core/interfaces/{requester_protocol.py → sync_requester.py} +34 -17
- novel_downloader/core/parsers/__init__.py +5 -4
- novel_downloader/core/parsers/{base_parser.py → base.py} +20 -11
- novel_downloader/core/parsers/biquge/__init__.py +10 -0
- novel_downloader/core/parsers/biquge/main_parser.py +126 -0
- novel_downloader/core/parsers/{common_parser → common}/__init__.py +2 -3
- novel_downloader/core/parsers/{common_parser → common}/helper.py +20 -18
- novel_downloader/core/parsers/{common_parser → common}/main_parser.py +15 -9
- novel_downloader/core/parsers/{qidian_parser → qidian}/__init__.py +2 -3
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/__init__.py +2 -3
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_encrypted.py +41 -49
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_normal.py +17 -21
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_router.py +10 -9
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/main_parser.py +16 -12
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/__init__.py +2 -3
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_encrypted.py +37 -45
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_normal.py +19 -23
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_router.py +10 -9
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/main_parser.py +16 -12
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/node_decryptor.py +7 -10
- novel_downloader/core/parsers/{qidian_parser → qidian}/shared/__init__.py +2 -3
- novel_downloader/core/parsers/qidian/shared/book_info_parser.py +150 -0
- novel_downloader/core/parsers/{qidian_parser → qidian}/shared/helpers.py +9 -10
- novel_downloader/core/requesters/__init__.py +9 -5
- novel_downloader/core/requesters/base/__init__.py +16 -0
- novel_downloader/core/requesters/{base_async_session.py → base/async_session.py} +180 -73
- novel_downloader/core/requesters/base/browser.py +340 -0
- novel_downloader/core/requesters/base/session.py +364 -0
- novel_downloader/core/requesters/biquge/__init__.py +12 -0
- novel_downloader/core/requesters/biquge/session.py +90 -0
- novel_downloader/core/requesters/{common_requester → common}/__init__.py +4 -5
- novel_downloader/core/requesters/common/async_session.py +96 -0
- novel_downloader/core/requesters/common/session.py +113 -0
- novel_downloader/core/requesters/qidian/__init__.py +21 -0
- novel_downloader/core/requesters/qidian/broswer.py +306 -0
- novel_downloader/core/requesters/qidian/session.py +287 -0
- novel_downloader/core/savers/__init__.py +5 -3
- novel_downloader/core/savers/{base_saver.py → base.py} +12 -13
- novel_downloader/core/savers/biquge.py +25 -0
- novel_downloader/core/savers/{common_saver → common}/__init__.py +2 -3
- novel_downloader/core/savers/{common_saver/common_epub.py → common/epub.py} +24 -52
- novel_downloader/core/savers/{common_saver → common}/main_saver.py +43 -9
- novel_downloader/core/savers/{common_saver/common_txt.py → common/txt.py} +16 -46
- novel_downloader/core/savers/epub_utils/__init__.py +0 -1
- novel_downloader/core/savers/epub_utils/css_builder.py +13 -7
- novel_downloader/core/savers/epub_utils/initializer.py +4 -5
- novel_downloader/core/savers/epub_utils/text_to_html.py +2 -3
- novel_downloader/core/savers/epub_utils/volume_intro.py +1 -3
- novel_downloader/core/savers/{qidian_saver.py → qidian.py} +12 -6
- novel_downloader/locales/en.json +12 -4
- novel_downloader/locales/zh.json +9 -1
- novel_downloader/resources/config/settings.toml +88 -0
- novel_downloader/utils/cache.py +2 -2
- novel_downloader/utils/chapter_storage.py +340 -0
- novel_downloader/utils/constants.py +8 -5
- novel_downloader/utils/crypto_utils.py +3 -3
- novel_downloader/utils/file_utils/__init__.py +0 -1
- novel_downloader/utils/file_utils/io.py +12 -17
- novel_downloader/utils/file_utils/normalize.py +1 -3
- novel_downloader/utils/file_utils/sanitize.py +2 -9
- novel_downloader/utils/fontocr/__init__.py +0 -1
- novel_downloader/utils/fontocr/ocr_v1.py +19 -22
- novel_downloader/utils/fontocr/ocr_v2.py +147 -60
- novel_downloader/utils/hash_store.py +19 -20
- novel_downloader/utils/hash_utils.py +0 -1
- novel_downloader/utils/i18n.py +3 -4
- novel_downloader/utils/logger.py +5 -6
- novel_downloader/utils/model_loader.py +5 -8
- novel_downloader/utils/network.py +9 -10
- novel_downloader/utils/state.py +6 -7
- novel_downloader/utils/text_utils/__init__.py +0 -1
- novel_downloader/utils/text_utils/chapter_formatting.py +2 -7
- novel_downloader/utils/text_utils/diff_display.py +0 -1
- novel_downloader/utils/text_utils/font_mapping.py +1 -4
- novel_downloader/utils/text_utils/text_cleaning.py +0 -1
- novel_downloader/utils/time_utils/__init__.py +0 -1
- novel_downloader/utils/time_utils/datetime_utils.py +9 -11
- novel_downloader/utils/time_utils/sleep_utils.py +27 -13
- {novel_downloader-1.2.1.dist-info → novel_downloader-1.3.0.dist-info}/METADATA +14 -17
- novel_downloader-1.3.0.dist-info/RECORD +127 -0
- {novel_downloader-1.2.1.dist-info → novel_downloader-1.3.0.dist-info}/WHEEL +1 -1
- novel_downloader/core/parsers/qidian_parser/shared/book_info_parser.py +0 -95
- novel_downloader/core/requesters/base_browser.py +0 -210
- novel_downloader/core/requesters/base_session.py +0 -243
- novel_downloader/core/requesters/common_requester/common_async_session.py +0 -98
- novel_downloader/core/requesters/common_requester/common_session.py +0 -126
- novel_downloader/core/requesters/qidian_requester/__init__.py +0 -22
- novel_downloader/core/requesters/qidian_requester/qidian_broswer.py +0 -377
- novel_downloader/core/requesters/qidian_requester/qidian_session.py +0 -202
- novel_downloader/resources/config/settings.yaml +0 -76
- novel_downloader-1.2.1.dist-info/RECORD +0 -115
- {novel_downloader-1.2.1.dist-info → novel_downloader-1.3.0.dist-info}/entry_points.txt +0 -0
- {novel_downloader-1.2.1.dist-info → novel_downloader-1.3.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.2.1.dist-info → novel_downloader-1.3.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,150 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.parsers.qidian.shared.book_info_parser
|
4
|
+
------------------------------------------------------------
|
5
|
+
|
6
|
+
This module provides parsing of Qidian book info pages.
|
7
|
+
|
8
|
+
It extracts metadata such as title, author, cover URL, update
|
9
|
+
time, status, word count, summary, and volume-chapter structure.
|
10
|
+
"""
|
11
|
+
|
12
|
+
import logging
|
13
|
+
import re
|
14
|
+
from typing import Any
|
15
|
+
|
16
|
+
from bs4.element import Tag
|
17
|
+
|
18
|
+
from .helpers import html_to_soup
|
19
|
+
|
20
|
+
logger = logging.getLogger(__name__)
|
21
|
+
|
22
|
+
|
23
|
+
def _chapter_url_to_id(url: str) -> str:
|
24
|
+
"""
|
25
|
+
Extract chapterId as the last non-empty segment of the URL.
|
26
|
+
"""
|
27
|
+
return url.rstrip("/").split("/")[-1]
|
28
|
+
|
29
|
+
|
30
|
+
def _get_volume_name(vol_div: Tag) -> str:
|
31
|
+
"""
|
32
|
+
Extracts the volume title from a <div class="volume"> element
|
33
|
+
"""
|
34
|
+
h3 = vol_div.select_one("h3")
|
35
|
+
if not h3:
|
36
|
+
return ""
|
37
|
+
for a in h3.find_all("a"):
|
38
|
+
a.decompose()
|
39
|
+
text: str = h3.get_text(strip=True)
|
40
|
+
return text.split(chr(183))[0].strip()
|
41
|
+
|
42
|
+
|
43
|
+
def safe_select_text(
|
44
|
+
soup: Tag,
|
45
|
+
selector: str,
|
46
|
+
*,
|
47
|
+
separator: str = "",
|
48
|
+
strip: bool = False,
|
49
|
+
default: str = "",
|
50
|
+
) -> str:
|
51
|
+
"""
|
52
|
+
Safely select the first element matching a CSS selector and return its text.
|
53
|
+
|
54
|
+
:param soup: A BeautifulSoup Tag or sub-tree to query.
|
55
|
+
:param selector: A CSS selector string.
|
56
|
+
:param separator: Separator to use between strings when joining.
|
57
|
+
:param strip: Whether to strip whitespace from the result.
|
58
|
+
:param default: Value to return if no element is found.
|
59
|
+
:return: The element's text, or `default` if not found.
|
60
|
+
"""
|
61
|
+
tag = soup.select_one(selector)
|
62
|
+
return (
|
63
|
+
tag.get_text(separator=separator, strip=strip)
|
64
|
+
if isinstance(tag, Tag)
|
65
|
+
else default
|
66
|
+
)
|
67
|
+
|
68
|
+
|
69
|
+
def safe_select_attr(
|
70
|
+
soup: Tag,
|
71
|
+
selector: str,
|
72
|
+
attr: str,
|
73
|
+
*,
|
74
|
+
default: str = "",
|
75
|
+
) -> str:
|
76
|
+
"""
|
77
|
+
Safely select the first element matching a CSS selector and return one attributes.
|
78
|
+
|
79
|
+
:param soup: A BeautifulSoup Tag or sub-tree to query.
|
80
|
+
:param selector: A CSS selector string.
|
81
|
+
:param attr: The attribute name to retrieve from the selected element.
|
82
|
+
:param default: Value to return if no element or attribute is found.
|
83
|
+
:return: The attribute's value stripped of whitespace, or `default` if not found.
|
84
|
+
"""
|
85
|
+
tag = soup.select_one(selector)
|
86
|
+
if isinstance(tag, Tag) and attr in tag.attrs:
|
87
|
+
value = tag.attrs[attr]
|
88
|
+
if isinstance(value, list):
|
89
|
+
return " ".join(value).strip()
|
90
|
+
elif isinstance(value, str):
|
91
|
+
return value.strip()
|
92
|
+
return default
|
93
|
+
|
94
|
+
|
95
|
+
def parse_book_info(html_str: str) -> dict[str, Any]:
|
96
|
+
"""
|
97
|
+
Extract metadata: title, author, cover_url, update_time, status,
|
98
|
+
word_count, summary, and volumes with chapters.
|
99
|
+
|
100
|
+
:param html_str: Raw HTML of the book info page.
|
101
|
+
:return: A dict containing book metadata.
|
102
|
+
"""
|
103
|
+
info: dict[str, Any] = {}
|
104
|
+
try:
|
105
|
+
soup = html_to_soup(html_str)
|
106
|
+
info["book_name"] = safe_select_text(soup, "em#bookName", strip=True)
|
107
|
+
info["author"] = safe_select_text(soup, "a.writer", strip=True)
|
108
|
+
info["cover_url"] = safe_select_attr(soup, "div.book-img img", "src")
|
109
|
+
info["update_time"] = (
|
110
|
+
safe_select_text(soup, "span.book-update-time", strip=True)
|
111
|
+
.replace("更新时间", "")
|
112
|
+
.strip()
|
113
|
+
)
|
114
|
+
info["serial_status"] = safe_select_text(soup, "span.blue", strip=True)
|
115
|
+
|
116
|
+
# Word count via regex fallback
|
117
|
+
match = re.search(r"<em>([\d.]+)</em>\s*<cite>(.*?)字</cite>", html_str)
|
118
|
+
info["word_count"] = (
|
119
|
+
f"{match.group(1)}{match.group(2)}字" if match else "Unknown"
|
120
|
+
)
|
121
|
+
|
122
|
+
info["summary"] = safe_select_text(
|
123
|
+
soup, "div.book-intro p", separator="\n", strip=True
|
124
|
+
)
|
125
|
+
# volumes
|
126
|
+
vols = []
|
127
|
+
for vol_div in soup.select("div.volume-wrap div.volume"):
|
128
|
+
name = _get_volume_name(vol_div)
|
129
|
+
chaps = []
|
130
|
+
for li in vol_div.select("li"):
|
131
|
+
a = li.select_one("a")
|
132
|
+
if not isinstance(a, Tag) or "href" not in a.attrs:
|
133
|
+
continue
|
134
|
+
href_val = a["href"]
|
135
|
+
if isinstance(href_val, list):
|
136
|
+
href = href_val[0].strip()
|
137
|
+
else:
|
138
|
+
href = str(href_val).strip()
|
139
|
+
chaps.append(
|
140
|
+
{
|
141
|
+
"title": a.get_text(strip=True),
|
142
|
+
"url": href,
|
143
|
+
"chapterId": _chapter_url_to_id(href),
|
144
|
+
}
|
145
|
+
)
|
146
|
+
vols.append({"volume_name": name, "chapters": chaps})
|
147
|
+
info["volumes"] = vols
|
148
|
+
except Exception as e:
|
149
|
+
logger.warning("[Parser] Error parsing book info: %s", e)
|
150
|
+
return info
|
@@ -1,8 +1,7 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
2
|
"""
|
4
|
-
novel_downloader.core.parsers.
|
5
|
-
|
3
|
+
novel_downloader.core.parsers.qidian.shared.helpers
|
4
|
+
---------------------------------------------------
|
6
5
|
|
7
6
|
Shared utility functions for parsing Qidian browser-rendered pages.
|
8
7
|
|
@@ -14,9 +13,9 @@ This module provides reusable helpers to:
|
|
14
13
|
|
15
14
|
import json
|
16
15
|
import logging
|
17
|
-
from typing import Any
|
16
|
+
from typing import Any
|
18
17
|
|
19
|
-
from bs4 import BeautifulSoup
|
18
|
+
from bs4 import BeautifulSoup, Tag
|
20
19
|
|
21
20
|
logger = logging.getLogger(__name__)
|
22
21
|
|
@@ -76,7 +75,7 @@ def can_view_chapter(soup: BeautifulSoup) -> bool:
|
|
76
75
|
return not (vip_status == 1 and is_buy == 0)
|
77
76
|
|
78
77
|
|
79
|
-
def is_encrypted(content:
|
78
|
+
def is_encrypted(content: str | BeautifulSoup) -> bool:
|
80
79
|
"""
|
81
80
|
Return True if content is encrypted.
|
82
81
|
|
@@ -97,21 +96,21 @@ def is_encrypted(content: Union[str, BeautifulSoup]) -> bool:
|
|
97
96
|
return int(chapter_info.get("cES", 0)) == 2
|
98
97
|
|
99
98
|
|
100
|
-
def find_ssr_page_context(soup: BeautifulSoup) ->
|
99
|
+
def find_ssr_page_context(soup: BeautifulSoup) -> dict[str, Any]:
|
101
100
|
"""
|
102
101
|
Extract SSR JSON from <script id="vite-plugin-ssr_pageContext">.
|
103
102
|
"""
|
104
103
|
try:
|
105
104
|
tag = soup.find("script", id="vite-plugin-ssr_pageContext")
|
106
|
-
if tag and tag.string:
|
107
|
-
data:
|
105
|
+
if isinstance(tag, Tag) and tag.string:
|
106
|
+
data: dict[str, Any] = json.loads(tag.string.strip())
|
108
107
|
return data
|
109
108
|
except Exception as e:
|
110
109
|
logger.warning("[Parser] SSR JSON parse error: %s", e)
|
111
110
|
return {}
|
112
111
|
|
113
112
|
|
114
|
-
def extract_chapter_info(ssr_data:
|
113
|
+
def extract_chapter_info(ssr_data: dict[str, Any]) -> dict[str, Any]:
|
115
114
|
"""
|
116
115
|
Extract the 'chapterInfo' dictionary from the SSR page context.
|
117
116
|
|
@@ -1,5 +1,4 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
2
|
"""
|
4
3
|
novel_downloader.core.requesters
|
5
4
|
--------------------------------
|
@@ -10,20 +9,25 @@ to perform network interactions, such as logging in, sending requests,
|
|
10
9
|
or interacting with browser/session-based sources.
|
11
10
|
|
12
11
|
Subpackages:
|
13
|
-
-
|
14
|
-
-
|
12
|
+
- common
|
13
|
+
- biquge
|
14
|
+
- qidian
|
15
15
|
"""
|
16
16
|
|
17
|
-
from .
|
17
|
+
from .biquge import (
|
18
|
+
BiqugeSession,
|
19
|
+
)
|
20
|
+
from .common import (
|
18
21
|
CommonAsyncSession,
|
19
22
|
CommonSession,
|
20
23
|
)
|
21
|
-
from .
|
24
|
+
from .qidian import (
|
22
25
|
QidianBrowser,
|
23
26
|
QidianSession,
|
24
27
|
)
|
25
28
|
|
26
29
|
__all__ = [
|
30
|
+
"BiqugeSession",
|
27
31
|
"CommonAsyncSession",
|
28
32
|
"CommonSession",
|
29
33
|
"QidianBrowser",
|
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.requesters.base
|
4
|
+
-------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
from .async_session import BaseAsyncSession
|
9
|
+
from .browser import BaseBrowser
|
10
|
+
from .session import BaseSession
|
11
|
+
|
12
|
+
__all__ = [
|
13
|
+
"BaseAsyncSession",
|
14
|
+
"BaseBrowser",
|
15
|
+
"BaseSession",
|
16
|
+
]
|
@@ -1,7 +1,6 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
2
|
"""
|
4
|
-
novel_downloader.core.requesters.
|
3
|
+
novel_downloader.core.requesters.base.async_session
|
5
4
|
---------------------------------------------------
|
6
5
|
|
7
6
|
This module defines the BaseAsyncSession class, which provides asynchronous
|
@@ -12,8 +11,11 @@ cookie handling, and defines abstract methods for subclasses.
|
|
12
11
|
|
13
12
|
import abc
|
14
13
|
import asyncio
|
14
|
+
import logging
|
15
|
+
import random
|
15
16
|
import time
|
16
|
-
|
17
|
+
import types
|
18
|
+
from typing import Any, Literal, Self
|
17
19
|
|
18
20
|
import aiohttp
|
19
21
|
from aiohttp import ClientResponse, ClientSession, ClientTimeout, TCPConnector
|
@@ -25,7 +27,8 @@ from novel_downloader.utils.constants import DEFAULT_USER_HEADERS
|
|
25
27
|
|
26
28
|
class RateLimiter:
|
27
29
|
"""
|
28
|
-
Simple async token-bucket rate limiter:
|
30
|
+
Simple async token-bucket rate limiter:
|
31
|
+
ensures no more than rate_per_sec
|
29
32
|
requests are started per second, across all coroutines.
|
30
33
|
"""
|
31
34
|
|
@@ -40,7 +43,8 @@ class RateLimiter:
|
|
40
43
|
elapsed = now - self._last
|
41
44
|
delay = self._interval - elapsed
|
42
45
|
if delay > 0:
|
43
|
-
|
46
|
+
jitter = random.uniform(0, 0.3)
|
47
|
+
await asyncio.sleep(delay + jitter)
|
44
48
|
self._last = time.monotonic()
|
45
49
|
|
46
50
|
|
@@ -58,10 +62,13 @@ class BaseAsyncSession(AsyncRequesterProtocol, abc.ABC):
|
|
58
62
|
_cookies (Dict[str, str]): Optional cookie jar for the session.
|
59
63
|
"""
|
60
64
|
|
61
|
-
def
|
65
|
+
def is_async(self) -> Literal[True]:
|
66
|
+
return True
|
67
|
+
|
68
|
+
def __init__(
|
62
69
|
self,
|
63
70
|
config: RequesterConfig,
|
64
|
-
cookies:
|
71
|
+
cookies: dict[str, str] | None = None,
|
65
72
|
) -> None:
|
66
73
|
"""
|
67
74
|
Initialize the async session with configuration.
|
@@ -71,26 +78,30 @@ class BaseAsyncSession(AsyncRequesterProtocol, abc.ABC):
|
|
71
78
|
:param cookies: Optional initial cookies to set on the session.
|
72
79
|
"""
|
73
80
|
self._config = config
|
74
|
-
self._timeout = config.timeout
|
75
81
|
self._retry_times = config.retry_times
|
76
|
-
self._retry_interval = config.
|
82
|
+
self._retry_interval = config.backoff_factor
|
83
|
+
self._timeout = config.timeout
|
84
|
+
self._max_rps = config.max_rps
|
85
|
+
self._max_connections = config.max_connections
|
86
|
+
|
77
87
|
self._cookies = cookies or {}
|
78
88
|
self._headers = DEFAULT_USER_HEADERS.copy()
|
79
|
-
self._session:
|
80
|
-
self._rate_limiter:
|
89
|
+
self._session: ClientSession | None = None
|
90
|
+
self._rate_limiter: RateLimiter | None = None
|
91
|
+
|
92
|
+
self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
|
93
|
+
|
94
|
+
self._init_session()
|
81
95
|
|
82
|
-
|
96
|
+
def _init_session(self) -> None:
|
83
97
|
"""
|
84
98
|
Set up the aiohttp.ClientSession with timeout, connector, headers, and cookies.
|
85
99
|
"""
|
86
|
-
|
87
|
-
|
88
|
-
self._rate_limiter = RateLimiter(max_rps)
|
100
|
+
if self._max_rps is not None:
|
101
|
+
self._rate_limiter = RateLimiter(self._max_rps)
|
89
102
|
|
90
103
|
timeout = ClientTimeout(total=self._timeout)
|
91
|
-
connector = TCPConnector(
|
92
|
-
limit_per_host=getattr(self._config, "max_connections", 10)
|
93
|
-
)
|
104
|
+
connector = TCPConnector(limit_per_host=self._max_connections)
|
94
105
|
self._session = ClientSession(
|
95
106
|
timeout=timeout,
|
96
107
|
connector=connector,
|
@@ -98,7 +109,13 @@ class BaseAsyncSession(AsyncRequesterProtocol, abc.ABC):
|
|
98
109
|
cookies=self._cookies,
|
99
110
|
)
|
100
111
|
|
101
|
-
async def login(
|
112
|
+
async def login(
|
113
|
+
self,
|
114
|
+
username: str = "",
|
115
|
+
password: str = "",
|
116
|
+
manual_login: bool = False,
|
117
|
+
**kwargs: Any,
|
118
|
+
) -> bool:
|
102
119
|
"""
|
103
120
|
Attempt to log in asynchronously.
|
104
121
|
Override in subclasses that require authentication.
|
@@ -112,7 +129,9 @@ class BaseAsyncSession(AsyncRequesterProtocol, abc.ABC):
|
|
112
129
|
|
113
130
|
@abc.abstractmethod
|
114
131
|
async def get_book_info(
|
115
|
-
self,
|
132
|
+
self,
|
133
|
+
book_id: str,
|
134
|
+
**kwargs: Any,
|
116
135
|
) -> str:
|
117
136
|
"""
|
118
137
|
Fetch the raw HTML (or JSON) of the book info page asynchronously.
|
@@ -125,7 +144,10 @@ class BaseAsyncSession(AsyncRequesterProtocol, abc.ABC):
|
|
125
144
|
|
126
145
|
@abc.abstractmethod
|
127
146
|
async def get_book_chapter(
|
128
|
-
self,
|
147
|
+
self,
|
148
|
+
book_id: str,
|
149
|
+
chapter_id: str,
|
150
|
+
**kwargs: Any,
|
129
151
|
) -> str:
|
130
152
|
"""
|
131
153
|
Fetch the raw HTML (or JSON) of a single chapter asynchronously.
|
@@ -137,7 +159,11 @@ class BaseAsyncSession(AsyncRequesterProtocol, abc.ABC):
|
|
137
159
|
"""
|
138
160
|
...
|
139
161
|
|
140
|
-
async def get_bookcase(
|
162
|
+
async def get_bookcase(
|
163
|
+
self,
|
164
|
+
page: int = 1,
|
165
|
+
**kwargs: Any,
|
166
|
+
) -> str:
|
141
167
|
"""
|
142
168
|
Optional: Retrieve the HTML content of the authenticated user's bookcase page.
|
143
169
|
Subclasses that support user login/bookcase should override this.
|
@@ -159,17 +185,12 @@ class BaseAsyncSession(AsyncRequesterProtocol, abc.ABC):
|
|
159
185
|
:return: The response body as text.
|
160
186
|
:raises: aiohttp.ClientError on final failure.
|
161
187
|
"""
|
162
|
-
if self._session is None:
|
163
|
-
await self._setup()
|
164
|
-
if self._session is None:
|
165
|
-
raise RuntimeError("Session not initialized after setup")
|
166
|
-
|
167
188
|
if self._rate_limiter:
|
168
189
|
await self._rate_limiter.wait()
|
169
190
|
|
170
191
|
for attempt in range(self._retry_times + 1):
|
171
192
|
try:
|
172
|
-
async with self.
|
193
|
+
async with self.session.get(url, **kwargs) as resp:
|
173
194
|
resp.raise_for_status()
|
174
195
|
text: str = await resp.text()
|
175
196
|
return text
|
@@ -182,7 +203,10 @@ class BaseAsyncSession(AsyncRequesterProtocol, abc.ABC):
|
|
182
203
|
raise RuntimeError("Unreachable code reached in fetch()")
|
183
204
|
|
184
205
|
async def get(
|
185
|
-
self,
|
206
|
+
self,
|
207
|
+
url: str,
|
208
|
+
params: dict[str, Any] | None = None,
|
209
|
+
**kwargs: Any,
|
186
210
|
) -> ClientResponse:
|
187
211
|
"""
|
188
212
|
Send an HTTP GET request asynchronously.
|
@@ -193,20 +217,13 @@ class BaseAsyncSession(AsyncRequesterProtocol, abc.ABC):
|
|
193
217
|
:return: aiohttp.ClientResponse object.
|
194
218
|
:raises RuntimeError: If the session is not initialized.
|
195
219
|
"""
|
196
|
-
|
197
|
-
await self._setup()
|
198
|
-
if self._session is None:
|
199
|
-
raise RuntimeError("Session not initialized after setup")
|
200
|
-
|
201
|
-
if self._rate_limiter:
|
202
|
-
await self._rate_limiter.wait()
|
203
|
-
return await self._session.get(url, params=params, **kwargs)
|
220
|
+
return await self._request("GET", url, params=params, **kwargs)
|
204
221
|
|
205
222
|
async def post(
|
206
223
|
self,
|
207
224
|
url: str,
|
208
|
-
data:
|
209
|
-
json:
|
225
|
+
data: dict[str, Any] | bytes | None = None,
|
226
|
+
json: dict[str, Any] | None = None,
|
210
227
|
**kwargs: Any,
|
211
228
|
) -> ClientResponse:
|
212
229
|
"""
|
@@ -219,14 +236,7 @@ class BaseAsyncSession(AsyncRequesterProtocol, abc.ABC):
|
|
219
236
|
:return: aiohttp.ClientResponse object.
|
220
237
|
:raises RuntimeError: If the session is not initialized.
|
221
238
|
"""
|
222
|
-
|
223
|
-
await self._setup()
|
224
|
-
if self._session is None:
|
225
|
-
raise RuntimeError("Session not initialized after setup")
|
226
|
-
|
227
|
-
if self._rate_limiter:
|
228
|
-
await self._rate_limiter.wait()
|
229
|
-
return await self._session.post(url, data=data, json=json, **kwargs)
|
239
|
+
return await self._request("POST", url, data=data, json=json, **kwargs)
|
230
240
|
|
231
241
|
@property
|
232
242
|
def session(self) -> ClientSession:
|
@@ -240,41 +250,106 @@ class BaseAsyncSession(AsyncRequesterProtocol, abc.ABC):
|
|
240
250
|
return self._session
|
241
251
|
|
242
252
|
@property
|
243
|
-
def
|
244
|
-
"""
|
245
|
-
|
253
|
+
def cookies(self) -> dict[str, str]:
|
254
|
+
"""
|
255
|
+
Get the current session cookies.
|
246
256
|
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
257
|
+
:return: A dict mapping cookie names to their values.
|
258
|
+
"""
|
259
|
+
if self._session:
|
260
|
+
return {c.key: c.value for c in self._session.cookie_jar}
|
261
|
+
else:
|
262
|
+
return self._cookies
|
251
263
|
|
252
264
|
@property
|
253
|
-
def
|
254
|
-
"""
|
255
|
-
|
265
|
+
def headers(self) -> dict[str, str]:
|
266
|
+
"""
|
267
|
+
Get the current session headers.
|
256
268
|
|
257
|
-
|
258
|
-
|
259
|
-
|
269
|
+
:return: A dict mapping header names to their values.
|
270
|
+
"""
|
271
|
+
if self._session:
|
272
|
+
return dict(self._session.headers)
|
273
|
+
else:
|
274
|
+
return self._headers
|
275
|
+
|
276
|
+
def get_header(self, key: str, default: Any = None) -> Any:
|
260
277
|
"""
|
261
|
-
|
278
|
+
Retrieve a specific header value by name.
|
262
279
|
|
263
|
-
:param
|
264
|
-
:param
|
280
|
+
:param key: The header name to look up.
|
281
|
+
:param default: The value to return if the header is not present.
|
282
|
+
:return: The header value if present, else default.
|
265
283
|
"""
|
266
|
-
|
267
|
-
|
268
|
-
self._cookies.update({str(k): str(v) for k, v in cookies.items()})
|
284
|
+
if self._session:
|
285
|
+
return self._session.headers.get(key, default)
|
269
286
|
else:
|
270
|
-
|
271
|
-
|
287
|
+
return self._headers.get(key, default)
|
288
|
+
|
289
|
+
def update_header(self, key: str, value: str) -> None:
|
290
|
+
"""
|
291
|
+
Update or add a single header in the session.
|
292
|
+
|
293
|
+
:param key: The name of the header.
|
294
|
+
:param value: The value of the header.
|
295
|
+
"""
|
296
|
+
self._headers[key] = value
|
297
|
+
if self._session:
|
298
|
+
self._session.headers[key] = value
|
299
|
+
|
300
|
+
def update_headers(self, headers: dict[str, str]) -> None:
|
301
|
+
"""
|
302
|
+
Update or add multiple headers in the session.
|
303
|
+
|
304
|
+
:param headers: A dictionary of header key-value pairs.
|
305
|
+
"""
|
306
|
+
self._headers.update(headers)
|
307
|
+
if self._session:
|
308
|
+
self._session.headers.update(headers)
|
309
|
+
|
310
|
+
def update_cookie(self, key: str, value: str) -> None:
|
311
|
+
"""
|
312
|
+
Update or add a single cookie in the session.
|
313
|
+
|
314
|
+
:param key: The name of the cookie.
|
315
|
+
:param value: The value of the cookie.
|
316
|
+
"""
|
317
|
+
self._cookies[key] = value
|
318
|
+
if self._session:
|
319
|
+
self._session.cookie_jar.update_cookies({key: value})
|
320
|
+
|
321
|
+
def update_cookies(
|
322
|
+
self,
|
323
|
+
cookies: dict[str, str],
|
324
|
+
) -> None:
|
325
|
+
"""
|
326
|
+
Update or add multiple cookies in the session.
|
327
|
+
|
328
|
+
:param cookies: A dictionary of cookie key-value pairs.
|
329
|
+
"""
|
330
|
+
self._cookies.update(cookies)
|
331
|
+
if self._session:
|
332
|
+
self._session.cookie_jar.update_cookies(cookies)
|
272
333
|
|
273
|
-
|
334
|
+
def clear_cookies(self) -> None:
|
335
|
+
"""
|
336
|
+
Clear cookies from the session.
|
337
|
+
"""
|
338
|
+
self._cookies = {}
|
274
339
|
if self._session:
|
275
|
-
self._session.cookie_jar.
|
340
|
+
self._session.cookie_jar.clear()
|
341
|
+
|
342
|
+
async def _request(
|
343
|
+
self,
|
344
|
+
method: str,
|
345
|
+
url: str,
|
346
|
+
**kwargs: Any,
|
347
|
+
) -> ClientResponse:
|
348
|
+
if self._rate_limiter:
|
349
|
+
await self._rate_limiter.wait()
|
350
|
+
return await self.session.request(method, url, **kwargs)
|
276
351
|
|
277
|
-
async def
|
352
|
+
async def close(self) -> None:
|
278
353
|
"""
|
279
354
|
Shutdown and clean up the session. Closes connection pool.
|
280
355
|
"""
|
@@ -282,16 +357,48 @@ class BaseAsyncSession(AsyncRequesterProtocol, abc.ABC):
|
|
282
357
|
await self._session.close()
|
283
358
|
self._session = None
|
284
359
|
|
285
|
-
def
|
360
|
+
def sync_close(self) -> None:
|
361
|
+
"""
|
362
|
+
Sync wrapper for closing the aiohttp session
|
363
|
+
when called from sync contexts.
|
364
|
+
"""
|
365
|
+
if self._session:
|
366
|
+
try:
|
367
|
+
loop = asyncio.get_running_loop()
|
368
|
+
loop.create_task(self.close())
|
369
|
+
except RuntimeError:
|
370
|
+
loop = asyncio.new_event_loop()
|
371
|
+
asyncio.set_event_loop(loop)
|
372
|
+
loop.run_until_complete(self.close())
|
373
|
+
loop.close()
|
374
|
+
|
375
|
+
async def __aenter__(self) -> Self:
|
376
|
+
if self._session is None:
|
377
|
+
self._init_session()
|
378
|
+
return self
|
379
|
+
|
380
|
+
async def __aexit__(
|
381
|
+
self,
|
382
|
+
exc_type: type[BaseException] | None,
|
383
|
+
exc_val: BaseException | None,
|
384
|
+
tb: types.TracebackType | None,
|
385
|
+
) -> None:
|
386
|
+
await self.close()
|
387
|
+
|
388
|
+
def __del__(self) -> None:
|
389
|
+
self.sync_close()
|
390
|
+
|
391
|
+
def __getstate__(self) -> dict[str, Any]:
|
286
392
|
"""
|
287
393
|
Prepare object state for serialization: remove unpickleable session.
|
288
394
|
"""
|
395
|
+
self.sync_close()
|
289
396
|
state = self.__dict__.copy()
|
290
397
|
state.pop("_session", None)
|
291
398
|
state.pop("_rate_limiter", None)
|
292
399
|
return state
|
293
400
|
|
294
|
-
def __setstate__(self, state:
|
401
|
+
def __setstate__(self, state: dict[str, Any]) -> None:
|
295
402
|
"""
|
296
403
|
Restore object state. Session will be lazily reinitialized on next request.
|
297
404
|
"""
|