novel-downloader 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -2
- novel_downloader/cli/__init__.py +0 -1
- novel_downloader/cli/clean.py +2 -10
- novel_downloader/cli/download.py +18 -22
- novel_downloader/cli/interactive.py +0 -1
- novel_downloader/cli/main.py +1 -3
- novel_downloader/cli/settings.py +8 -8
- novel_downloader/config/__init__.py +0 -1
- novel_downloader/config/adapter.py +48 -18
- novel_downloader/config/loader.py +116 -108
- novel_downloader/config/models.py +41 -32
- novel_downloader/config/site_rules.py +2 -4
- novel_downloader/core/__init__.py +0 -1
- novel_downloader/core/downloaders/__init__.py +4 -4
- novel_downloader/core/downloaders/base/__init__.py +14 -0
- novel_downloader/core/downloaders/{base_async_downloader.py → base/base_async.py} +49 -53
- novel_downloader/core/downloaders/{base_downloader.py → base/base_sync.py} +64 -43
- novel_downloader/core/downloaders/biquge/__init__.py +12 -0
- novel_downloader/core/downloaders/biquge/biquge_sync.py +25 -0
- novel_downloader/core/downloaders/common/__init__.py +14 -0
- novel_downloader/core/downloaders/{common_asynb_downloader.py → common/common_async.py} +42 -33
- novel_downloader/core/downloaders/{common_downloader.py → common/common_sync.py} +34 -23
- novel_downloader/core/downloaders/qidian/__init__.py +10 -0
- novel_downloader/core/downloaders/{qidian_downloader.py → qidian/qidian_sync.py} +80 -64
- novel_downloader/core/factory/__init__.py +4 -5
- novel_downloader/core/factory/{downloader_factory.py → downloader.py} +36 -35
- novel_downloader/core/factory/{parser_factory.py → parser.py} +12 -14
- novel_downloader/core/factory/{requester_factory.py → requester.py} +29 -16
- novel_downloader/core/factory/{saver_factory.py → saver.py} +4 -9
- novel_downloader/core/interfaces/__init__.py +8 -9
- novel_downloader/core/interfaces/{async_downloader_protocol.py → async_downloader.py} +4 -5
- novel_downloader/core/interfaces/{async_requester_protocol.py → async_requester.py} +26 -12
- novel_downloader/core/interfaces/{parser_protocol.py → parser.py} +11 -6
- novel_downloader/core/interfaces/{saver_protocol.py → saver.py} +2 -3
- novel_downloader/core/interfaces/{downloader_protocol.py → sync_downloader.py} +6 -7
- novel_downloader/core/interfaces/{requester_protocol.py → sync_requester.py} +34 -17
- novel_downloader/core/parsers/__init__.py +5 -4
- novel_downloader/core/parsers/{base_parser.py → base.py} +20 -11
- novel_downloader/core/parsers/biquge/__init__.py +10 -0
- novel_downloader/core/parsers/biquge/main_parser.py +126 -0
- novel_downloader/core/parsers/{common_parser → common}/__init__.py +2 -3
- novel_downloader/core/parsers/{common_parser → common}/helper.py +20 -18
- novel_downloader/core/parsers/{common_parser → common}/main_parser.py +15 -9
- novel_downloader/core/parsers/{qidian_parser → qidian}/__init__.py +2 -3
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/__init__.py +2 -3
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_encrypted.py +41 -49
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_normal.py +17 -21
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_router.py +10 -9
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/main_parser.py +16 -12
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/__init__.py +2 -3
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_encrypted.py +37 -45
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_normal.py +19 -23
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_router.py +10 -9
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/main_parser.py +16 -12
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/node_decryptor.py +7 -10
- novel_downloader/core/parsers/{qidian_parser → qidian}/shared/__init__.py +2 -3
- novel_downloader/core/parsers/qidian/shared/book_info_parser.py +150 -0
- novel_downloader/core/parsers/{qidian_parser → qidian}/shared/helpers.py +9 -10
- novel_downloader/core/requesters/__init__.py +9 -5
- novel_downloader/core/requesters/base/__init__.py +16 -0
- novel_downloader/core/requesters/{base_async_session.py → base/async_session.py} +180 -73
- novel_downloader/core/requesters/base/browser.py +340 -0
- novel_downloader/core/requesters/base/session.py +364 -0
- novel_downloader/core/requesters/biquge/__init__.py +12 -0
- novel_downloader/core/requesters/biquge/session.py +90 -0
- novel_downloader/core/requesters/{common_requester → common}/__init__.py +4 -5
- novel_downloader/core/requesters/common/async_session.py +96 -0
- novel_downloader/core/requesters/common/session.py +113 -0
- novel_downloader/core/requesters/qidian/__init__.py +21 -0
- novel_downloader/core/requesters/qidian/broswer.py +306 -0
- novel_downloader/core/requesters/qidian/session.py +287 -0
- novel_downloader/core/savers/__init__.py +5 -3
- novel_downloader/core/savers/{base_saver.py → base.py} +12 -13
- novel_downloader/core/savers/biquge.py +25 -0
- novel_downloader/core/savers/{common_saver → common}/__init__.py +2 -3
- novel_downloader/core/savers/{common_saver/common_epub.py → common/epub.py} +24 -52
- novel_downloader/core/savers/{common_saver → common}/main_saver.py +43 -9
- novel_downloader/core/savers/{common_saver/common_txt.py → common/txt.py} +16 -46
- novel_downloader/core/savers/epub_utils/__init__.py +0 -1
- novel_downloader/core/savers/epub_utils/css_builder.py +13 -7
- novel_downloader/core/savers/epub_utils/initializer.py +4 -5
- novel_downloader/core/savers/epub_utils/text_to_html.py +2 -3
- novel_downloader/core/savers/epub_utils/volume_intro.py +1 -3
- novel_downloader/core/savers/{qidian_saver.py → qidian.py} +12 -6
- novel_downloader/locales/en.json +12 -4
- novel_downloader/locales/zh.json +9 -1
- novel_downloader/resources/config/settings.toml +88 -0
- novel_downloader/utils/cache.py +2 -2
- novel_downloader/utils/chapter_storage.py +340 -0
- novel_downloader/utils/constants.py +8 -5
- novel_downloader/utils/crypto_utils.py +3 -3
- novel_downloader/utils/file_utils/__init__.py +0 -1
- novel_downloader/utils/file_utils/io.py +12 -17
- novel_downloader/utils/file_utils/normalize.py +1 -3
- novel_downloader/utils/file_utils/sanitize.py +2 -9
- novel_downloader/utils/fontocr/__init__.py +0 -1
- novel_downloader/utils/fontocr/ocr_v1.py +19 -22
- novel_downloader/utils/fontocr/ocr_v2.py +147 -60
- novel_downloader/utils/hash_store.py +19 -20
- novel_downloader/utils/hash_utils.py +0 -1
- novel_downloader/utils/i18n.py +3 -4
- novel_downloader/utils/logger.py +5 -6
- novel_downloader/utils/model_loader.py +5 -8
- novel_downloader/utils/network.py +9 -10
- novel_downloader/utils/state.py +6 -7
- novel_downloader/utils/text_utils/__init__.py +0 -1
- novel_downloader/utils/text_utils/chapter_formatting.py +2 -7
- novel_downloader/utils/text_utils/diff_display.py +0 -1
- novel_downloader/utils/text_utils/font_mapping.py +1 -4
- novel_downloader/utils/text_utils/text_cleaning.py +0 -1
- novel_downloader/utils/time_utils/__init__.py +0 -1
- novel_downloader/utils/time_utils/datetime_utils.py +9 -11
- novel_downloader/utils/time_utils/sleep_utils.py +27 -13
- {novel_downloader-1.2.1.dist-info → novel_downloader-1.3.0.dist-info}/METADATA +14 -17
- novel_downloader-1.3.0.dist-info/RECORD +127 -0
- {novel_downloader-1.2.1.dist-info → novel_downloader-1.3.0.dist-info}/WHEEL +1 -1
- novel_downloader/core/parsers/qidian_parser/shared/book_info_parser.py +0 -95
- novel_downloader/core/requesters/base_browser.py +0 -210
- novel_downloader/core/requesters/base_session.py +0 -243
- novel_downloader/core/requesters/common_requester/common_async_session.py +0 -98
- novel_downloader/core/requesters/common_requester/common_session.py +0 -126
- novel_downloader/core/requesters/qidian_requester/__init__.py +0 -22
- novel_downloader/core/requesters/qidian_requester/qidian_broswer.py +0 -377
- novel_downloader/core/requesters/qidian_requester/qidian_session.py +0 -202
- novel_downloader/resources/config/settings.yaml +0 -76
- novel_downloader-1.2.1.dist-info/RECORD +0 -115
- {novel_downloader-1.2.1.dist-info → novel_downloader-1.3.0.dist-info}/entry_points.txt +0 -0
- {novel_downloader-1.2.1.dist-info → novel_downloader-1.3.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.2.1.dist-info → novel_downloader-1.3.0.dist-info}/top_level.txt +0 -0
@@ -1,243 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
"""
|
4
|
-
novel_downloader.core.requesters.base_session
|
5
|
-
---------------------------------------------
|
6
|
-
|
7
|
-
This module defines the BaseSession class, which provides basic HTTP
|
8
|
-
request capabilities using the requests library. It maintains a
|
9
|
-
persistent session and supports retries, headers, and timeout configurations.
|
10
|
-
"""
|
11
|
-
|
12
|
-
import abc
|
13
|
-
from typing import Any, Dict, Optional, Union
|
14
|
-
|
15
|
-
import requests
|
16
|
-
from requests import Response, Session
|
17
|
-
from requests.adapters import HTTPAdapter, Retry
|
18
|
-
|
19
|
-
from novel_downloader.config.models import RequesterConfig
|
20
|
-
from novel_downloader.core.interfaces import RequesterProtocol
|
21
|
-
from novel_downloader.utils.constants import DEFAULT_USER_HEADERS
|
22
|
-
|
23
|
-
|
24
|
-
class BaseSession(RequesterProtocol, abc.ABC):
|
25
|
-
"""
|
26
|
-
BaseSession wraps basic HTTP operations using requests.Session,
|
27
|
-
supporting retry logic, timeout, and persistent connections.
|
28
|
-
|
29
|
-
Attributes:
|
30
|
-
_session (requests.Session): The persistent HTTP session.
|
31
|
-
_timeout (float): Timeout for each request in seconds.
|
32
|
-
"""
|
33
|
-
|
34
|
-
def _init_session(
|
35
|
-
self, config: RequesterConfig, cookies: Optional[Dict[str, str]] = None
|
36
|
-
) -> None:
|
37
|
-
"""
|
38
|
-
Initialize the requests.Session with default headers and retry strategy.
|
39
|
-
|
40
|
-
:param config: Configuration object for session behavior
|
41
|
-
(timeouts, retries, headers, etc.)
|
42
|
-
"""
|
43
|
-
self._config = config
|
44
|
-
self._timeout = config.timeout
|
45
|
-
self._retry_times = config.retry_times
|
46
|
-
self._retry_interval = config.retry_interval
|
47
|
-
self._cookies = cookies or {}
|
48
|
-
self._headers = DEFAULT_USER_HEADERS
|
49
|
-
self._session: Optional[Session] = None
|
50
|
-
|
51
|
-
self._setup()
|
52
|
-
|
53
|
-
def _setup(self) -> None:
|
54
|
-
"""
|
55
|
-
Set up the session with retry strategy and apply default headers.
|
56
|
-
"""
|
57
|
-
self._session = requests.Session()
|
58
|
-
|
59
|
-
retry_strategy = Retry(
|
60
|
-
total=self._config.retry_times,
|
61
|
-
backoff_factor=self._config.retry_interval,
|
62
|
-
status_forcelist=[429, 500, 502, 503, 504],
|
63
|
-
allowed_methods=["HEAD", "GET", "OPTIONS"],
|
64
|
-
)
|
65
|
-
|
66
|
-
adapter = HTTPAdapter(max_retries=retry_strategy)
|
67
|
-
self._session.mount("http://", adapter)
|
68
|
-
self._session.mount("https://", adapter)
|
69
|
-
self._session.headers.update(self._headers)
|
70
|
-
|
71
|
-
if self._cookies:
|
72
|
-
self._session.cookies.update(self._cookies)
|
73
|
-
|
74
|
-
def login(self, max_retries: int = 3, manual_login: bool = False) -> bool:
|
75
|
-
"""
|
76
|
-
Attempt to log in
|
77
|
-
"""
|
78
|
-
raise NotImplementedError(
|
79
|
-
"Login is not supported by this session type. "
|
80
|
-
"Override login() in your subclass to enable it."
|
81
|
-
)
|
82
|
-
|
83
|
-
@abc.abstractmethod
|
84
|
-
def get_book_info(self, book_id: str, wait_time: Optional[float] = None) -> str:
|
85
|
-
"""
|
86
|
-
Fetch the raw HTML (or JSON) of the book info page.
|
87
|
-
|
88
|
-
:param book_id: The book identifier.
|
89
|
-
:param wait_time: Base number of seconds to wait before returning content.
|
90
|
-
:return: The page content as a string.
|
91
|
-
"""
|
92
|
-
...
|
93
|
-
|
94
|
-
@abc.abstractmethod
|
95
|
-
def get_book_chapter(
|
96
|
-
self, book_id: str, chapter_id: str, wait_time: Optional[float] = None
|
97
|
-
) -> str:
|
98
|
-
"""
|
99
|
-
Fetch the raw HTML (or JSON) of a single chapter.
|
100
|
-
|
101
|
-
:param book_id: The book identifier.
|
102
|
-
:param chapter_id: The chapter identifier.
|
103
|
-
:param wait_time: Base number of seconds to wait before returning content.
|
104
|
-
:return: The chapter content as a string.
|
105
|
-
"""
|
106
|
-
...
|
107
|
-
|
108
|
-
def get_bookcase(self, wait_time: Optional[float] = None) -> str:
|
109
|
-
"""
|
110
|
-
Optional: Retrieve the HTML content of the authenticated user's bookcase page.
|
111
|
-
|
112
|
-
Subclasses that support user login and bookcase retrieval should override this.
|
113
|
-
|
114
|
-
:param wait_time: Base number of seconds to wait before returning content.
|
115
|
-
:return: The HTML markup of the bookcase page.
|
116
|
-
:raises NotImplementedError: If the subclass does not implement.
|
117
|
-
"""
|
118
|
-
raise NotImplementedError(
|
119
|
-
"Bookcase fetching is not supported by this session type. "
|
120
|
-
"Override get_bookcase() in your subclass to enable it."
|
121
|
-
)
|
122
|
-
|
123
|
-
def get(
|
124
|
-
self, url: str, params: Optional[Dict[str, Any]] = None, **kwargs: Any
|
125
|
-
) -> Response:
|
126
|
-
"""
|
127
|
-
Send a GET request.
|
128
|
-
|
129
|
-
:param url: The target URL.
|
130
|
-
:param params: Query parameters to include in the request.
|
131
|
-
:param kwargs: Additional arguments passed to requests.
|
132
|
-
:return: Response object from the GET request.
|
133
|
-
:raises RuntimeError: If the session is not initialized.
|
134
|
-
"""
|
135
|
-
if not self._session:
|
136
|
-
raise RuntimeError("Session is not initialized or has been shut down.")
|
137
|
-
return self._session.get(url, params=params, timeout=self._timeout, **kwargs)
|
138
|
-
|
139
|
-
def post(
|
140
|
-
self,
|
141
|
-
url: str,
|
142
|
-
data: Optional[Union[Dict[str, Any], bytes]] = None,
|
143
|
-
json: Optional[Dict[str, Any]] = None,
|
144
|
-
**kwargs: Any,
|
145
|
-
) -> Response:
|
146
|
-
"""
|
147
|
-
Send a POST request.
|
148
|
-
|
149
|
-
:param url: The target URL.
|
150
|
-
:param data: Form data to include in the request body.
|
151
|
-
:param json: JSON body to include in the request.
|
152
|
-
:param kwargs: Additional arguments passed to requests.
|
153
|
-
:return: Response object from the POST request.
|
154
|
-
:raises RuntimeError: If the session is not initialized.
|
155
|
-
"""
|
156
|
-
if not self._session:
|
157
|
-
raise RuntimeError("Session is not initialized or has been shut down.")
|
158
|
-
return self._session.post(
|
159
|
-
url, data=data, json=json, timeout=self._timeout, **kwargs
|
160
|
-
)
|
161
|
-
|
162
|
-
@property
|
163
|
-
def session(self) -> Session:
|
164
|
-
"""
|
165
|
-
Return the active requests.Session.
|
166
|
-
|
167
|
-
:raises RuntimeError: If the session is uninitialized or has been shut down.
|
168
|
-
"""
|
169
|
-
if self._session is None:
|
170
|
-
raise RuntimeError("Session is not initialized or has been shut down.")
|
171
|
-
return self._session
|
172
|
-
|
173
|
-
@property
|
174
|
-
def timeout(self) -> float:
|
175
|
-
"""Return the default timeout setting."""
|
176
|
-
return self._timeout
|
177
|
-
|
178
|
-
@property
|
179
|
-
def retry_times(self) -> int:
|
180
|
-
"""Return the maximum number of retry attempts."""
|
181
|
-
return self._retry_times
|
182
|
-
|
183
|
-
@property
|
184
|
-
def retry_interval(self) -> float:
|
185
|
-
"""Return the base interval (in seconds) between retries."""
|
186
|
-
return self._retry_interval
|
187
|
-
|
188
|
-
@property
|
189
|
-
def headers(self) -> Dict[str, str]:
|
190
|
-
"""Return the default headers."""
|
191
|
-
if not self._session:
|
192
|
-
return {}
|
193
|
-
return {k: v for k, v in self._session.headers.items() if isinstance(v, str)}
|
194
|
-
|
195
|
-
def update_cookies(self, cookies: Dict[str, str], overwrite: bool = True) -> None:
|
196
|
-
"""
|
197
|
-
Update cookies for the current session (if initialized) as well as for the
|
198
|
-
internal cache kept in ``self._cookies`` so that subsequent ``_setup`` calls
|
199
|
-
also see the latest values.
|
200
|
-
"""
|
201
|
-
if not cookies:
|
202
|
-
return
|
203
|
-
|
204
|
-
if overwrite:
|
205
|
-
for k, v in cookies.items():
|
206
|
-
self._cookies[str(k)] = str(v)
|
207
|
-
else:
|
208
|
-
for k, v in cookies.items():
|
209
|
-
self._cookies.setdefault(str(k), str(v))
|
210
|
-
|
211
|
-
if self._session is not None:
|
212
|
-
self._session.cookies.update(self._cookies)
|
213
|
-
|
214
|
-
def shutdown(self) -> None:
|
215
|
-
"""
|
216
|
-
Shutdown and clean up the session.
|
217
|
-
|
218
|
-
This closes the underlying connection pool and removes the session.
|
219
|
-
"""
|
220
|
-
if self._session:
|
221
|
-
self._session.close()
|
222
|
-
self._session = None
|
223
|
-
|
224
|
-
def __getstate__(self) -> Dict[str, Any]:
|
225
|
-
"""
|
226
|
-
Prepare object state for serialization.
|
227
|
-
|
228
|
-
Removes unpickleable session object.
|
229
|
-
|
230
|
-
:return: Serializable dict of the object state.
|
231
|
-
"""
|
232
|
-
state = self.__dict__.copy()
|
233
|
-
state.pop("_session", None)
|
234
|
-
return state
|
235
|
-
|
236
|
-
def __setstate__(self, state: Dict[str, Any]) -> None:
|
237
|
-
"""
|
238
|
-
Restore object state and reinitialize session.
|
239
|
-
|
240
|
-
:param state: Saved state dictionary.
|
241
|
-
"""
|
242
|
-
self.__dict__.update(state)
|
243
|
-
self._setup()
|
@@ -1,98 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
"""
|
4
|
-
novel_downloader.core.requesters.common_requester.common_async_session
|
5
|
-
----------------------------------------------------------------------
|
6
|
-
|
7
|
-
This module defines a `CommonAsyncSession` class for handling HTTP requests
|
8
|
-
to common novel sites **asynchronously**. It provides methods to retrieve
|
9
|
-
raw book info pages and chapter contents using a flexible URL templating
|
10
|
-
system defined by a site profile, with retry logic and random delays.
|
11
|
-
"""
|
12
|
-
|
13
|
-
import asyncio
|
14
|
-
import random
|
15
|
-
from typing import Dict, Optional
|
16
|
-
|
17
|
-
from novel_downloader.config import RequesterConfig, SiteProfile
|
18
|
-
from novel_downloader.core.requesters.base_async_session import BaseAsyncSession
|
19
|
-
|
20
|
-
|
21
|
-
class CommonAsyncSession(BaseAsyncSession):
|
22
|
-
"""
|
23
|
-
A common async session for handling site-specific HTTP requests.
|
24
|
-
|
25
|
-
:ivar _site: The unique identifier or name of the site.
|
26
|
-
:ivar _profile: Metadata and URL templates related to the site.
|
27
|
-
"""
|
28
|
-
|
29
|
-
def __init__(
|
30
|
-
self,
|
31
|
-
config: RequesterConfig,
|
32
|
-
site: str,
|
33
|
-
profile: SiteProfile,
|
34
|
-
cookies: Optional[Dict[str, str]] = None,
|
35
|
-
) -> None:
|
36
|
-
"""
|
37
|
-
Initialize a CommonAsyncSession instance.
|
38
|
-
|
39
|
-
:param config: The RequesterConfig instance containing settings.
|
40
|
-
:param site: The identifier or domain of the target site.
|
41
|
-
:param profile: The site's metadata and URL templates.
|
42
|
-
:param cookies: Optional cookies to preload into the session.
|
43
|
-
"""
|
44
|
-
self._init_session(config=config, cookies=cookies)
|
45
|
-
self._site = site
|
46
|
-
self._profile = profile
|
47
|
-
|
48
|
-
async def get_book_info(
|
49
|
-
self, book_id: str, wait_time: Optional[float] = None
|
50
|
-
) -> str:
|
51
|
-
"""
|
52
|
-
Fetch the raw HTML of the book info page asynchronously.
|
53
|
-
|
54
|
-
Relies on BaseAsyncSession.fetch for retry logic, then sleeps with jitter.
|
55
|
-
|
56
|
-
:param book_id: The book identifier.
|
57
|
-
:param wait_time: Base seconds to sleep (with 0.5-1.5x random factor).
|
58
|
-
:return: The page content as a string.
|
59
|
-
"""
|
60
|
-
url = self.book_info_url.format(book_id=book_id)
|
61
|
-
html = await self.fetch(url)
|
62
|
-
base = wait_time if wait_time is not None else self._config.wait_time
|
63
|
-
await asyncio.sleep(base * random.uniform(0.5, 1.5))
|
64
|
-
return html
|
65
|
-
|
66
|
-
async def get_book_chapter(
|
67
|
-
self, book_id: str, chapter_id: str, wait_time: Optional[float] = None
|
68
|
-
) -> str:
|
69
|
-
"""
|
70
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
71
|
-
|
72
|
-
Relies on BaseAsyncSession.fetch for retry logic, then sleeps with jitter.
|
73
|
-
|
74
|
-
:param book_id: The book identifier.
|
75
|
-
:param chapter_id: The chapter identifier.
|
76
|
-
:param wait_time: Base seconds to sleep (with 0.5-1.5x random factor).
|
77
|
-
:return: The chapter content as a string.
|
78
|
-
"""
|
79
|
-
url = self.chapter_url.format(book_id=book_id, chapter_id=chapter_id)
|
80
|
-
html = await self.fetch(url)
|
81
|
-
base = wait_time if wait_time is not None else self._config.wait_time
|
82
|
-
await asyncio.sleep(base * random.uniform(0.5, 1.5))
|
83
|
-
return html
|
84
|
-
|
85
|
-
@property
|
86
|
-
def site(self) -> str:
|
87
|
-
"""Return the site name."""
|
88
|
-
return self._site
|
89
|
-
|
90
|
-
@property
|
91
|
-
def book_info_url(self) -> str:
|
92
|
-
"""Return the URL template for fetching book info."""
|
93
|
-
return self._profile["book_info_url"]
|
94
|
-
|
95
|
-
@property
|
96
|
-
def chapter_url(self) -> str:
|
97
|
-
"""Return the URL template for fetching chapter content."""
|
98
|
-
return self._profile["chapter_url"]
|
@@ -1,126 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
"""
|
4
|
-
novel_downloader.core.requesters.common_requester.common_session
|
5
|
-
------------------------------------------------------------------
|
6
|
-
|
7
|
-
This module defines a `CommonSession` class for handling HTTP requests
|
8
|
-
to common novel sites. It provides methods to retrieve raw book
|
9
|
-
information pages and chapter contents using a flexible URL templating
|
10
|
-
system defined by a site profile.
|
11
|
-
"""
|
12
|
-
|
13
|
-
import time
|
14
|
-
from typing import Dict, Optional
|
15
|
-
|
16
|
-
from novel_downloader.config import RequesterConfig, SiteProfile
|
17
|
-
from novel_downloader.utils.time_utils import sleep_with_random_delay
|
18
|
-
|
19
|
-
from ..base_session import BaseSession
|
20
|
-
|
21
|
-
|
22
|
-
class CommonSession(BaseSession):
|
23
|
-
"""
|
24
|
-
A common session for handling site-specific HTTP requests.
|
25
|
-
|
26
|
-
:ivar _site: The unique identifier or name of the site.
|
27
|
-
:ivar _profile: Metadata and URL templates related to the site.
|
28
|
-
:ivar session: The HTTP session used to make requests.
|
29
|
-
"""
|
30
|
-
|
31
|
-
def __init__(
|
32
|
-
self,
|
33
|
-
config: RequesterConfig,
|
34
|
-
site: str,
|
35
|
-
profile: SiteProfile,
|
36
|
-
cookies: Optional[Dict[str, str]] = None,
|
37
|
-
):
|
38
|
-
"""
|
39
|
-
Initialize a CommonSession instance.
|
40
|
-
|
41
|
-
:param config: The RequesterConfig instance containing settings.
|
42
|
-
:param site: The identifier or domain of the target site.
|
43
|
-
:param profile: The site's metadata and URL templates.
|
44
|
-
:param cookies: Optional cookies to preload into the session.
|
45
|
-
"""
|
46
|
-
self._init_session(config=config, cookies=cookies)
|
47
|
-
self._site = site
|
48
|
-
self._profile = profile
|
49
|
-
|
50
|
-
def get_book_info(self, book_id: str, wait_time: Optional[float] = None) -> str:
|
51
|
-
"""
|
52
|
-
Fetch the raw HTML (or JSON) of the book info page.
|
53
|
-
|
54
|
-
:param book_id: The book identifier.
|
55
|
-
:param wait_time: Base number of seconds to wait before returning content.
|
56
|
-
:return: The page content as a string.
|
57
|
-
:raises requests.HTTPError: If the request returns an unsuccessful status code.
|
58
|
-
"""
|
59
|
-
url = self.book_info_url.format(book_id=book_id)
|
60
|
-
base = wait_time if wait_time is not None else self._config.wait_time
|
61
|
-
|
62
|
-
for attempt in range(1, self.retry_times + 1):
|
63
|
-
try:
|
64
|
-
with self.session.get(url, timeout=self.timeout) as response:
|
65
|
-
response.raise_for_status()
|
66
|
-
content = response.text
|
67
|
-
sleep_with_random_delay(base)
|
68
|
-
return content
|
69
|
-
except Exception as e:
|
70
|
-
if attempt == self.retry_times:
|
71
|
-
raise e # 最后一次也失败了,抛出异常
|
72
|
-
else:
|
73
|
-
time.sleep(self.retry_interval)
|
74
|
-
continue
|
75
|
-
raise RuntimeError("Unexpected error: get_book_info failed without returning")
|
76
|
-
|
77
|
-
def get_book_chapter(
|
78
|
-
self, book_id: str, chapter_id: str, wait_time: Optional[float] = None
|
79
|
-
) -> str:
|
80
|
-
"""
|
81
|
-
Fetch the raw HTML (or JSON) of a single chapter.
|
82
|
-
|
83
|
-
:param book_id: The book identifier.
|
84
|
-
:param chapter_id: The chapter identifier.
|
85
|
-
:param wait_time: Base number of seconds to wait before returning content.
|
86
|
-
:return: The chapter content as a string.
|
87
|
-
:raises requests.HTTPError: If the request returns an unsuccessful status code.
|
88
|
-
"""
|
89
|
-
url = self.chapter_url.format(book_id=book_id, chapter_id=chapter_id)
|
90
|
-
base = wait_time if wait_time is not None else self._config.wait_time
|
91
|
-
|
92
|
-
for attempt in range(1, self.retry_times + 1):
|
93
|
-
try:
|
94
|
-
with self.session.get(url, timeout=self.timeout) as response:
|
95
|
-
response.raise_for_status()
|
96
|
-
content = response.text
|
97
|
-
sleep_with_random_delay(base)
|
98
|
-
return content
|
99
|
-
except Exception as e:
|
100
|
-
if attempt == self.retry_times:
|
101
|
-
raise e # 最后一次也失败了,抛出异常
|
102
|
-
else:
|
103
|
-
time.sleep(self.retry_interval)
|
104
|
-
continue
|
105
|
-
raise RuntimeError(
|
106
|
-
"Unexpected error: get_book_chapter failed without returning"
|
107
|
-
)
|
108
|
-
|
109
|
-
@property
|
110
|
-
def site(self) -> str:
|
111
|
-
"""Return the site name."""
|
112
|
-
return self._site
|
113
|
-
|
114
|
-
@property
|
115
|
-
def book_info_url(self) -> str:
|
116
|
-
"""
|
117
|
-
Return the URL template for fetching book information.
|
118
|
-
"""
|
119
|
-
return self._profile["book_info_url"]
|
120
|
-
|
121
|
-
@property
|
122
|
-
def chapter_url(self) -> str:
|
123
|
-
"""
|
124
|
-
Return the URL template for fetching chapter information.
|
125
|
-
"""
|
126
|
-
return self._profile["chapter_url"]
|
@@ -1,22 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
"""
|
4
|
-
novel_downloader.core.requesters.qidian_requester
|
5
|
-
-------------------------------------------------
|
6
|
-
|
7
|
-
This package provides the implementation of the Qidian-specific requester logic.
|
8
|
-
It contains modules for interacting with Qidian's website, including login,
|
9
|
-
page navigation, and data retrieval using a browser-based automation approach.
|
10
|
-
|
11
|
-
Modules:
|
12
|
-
- qidian_browser: Implements the QidianBrowser class for automated browser control.
|
13
|
-
- qidian_session: Implements the QidianSession class.
|
14
|
-
"""
|
15
|
-
|
16
|
-
from .qidian_broswer import QidianBrowser
|
17
|
-
from .qidian_session import QidianSession
|
18
|
-
|
19
|
-
__all__ = [
|
20
|
-
"QidianBrowser",
|
21
|
-
"QidianSession",
|
22
|
-
]
|