novel-downloader 1.2.2__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -2
- novel_downloader/cli/__init__.py +0 -1
- novel_downloader/cli/clean.py +2 -10
- novel_downloader/cli/download.py +16 -22
- novel_downloader/cli/interactive.py +0 -1
- novel_downloader/cli/main.py +1 -3
- novel_downloader/cli/settings.py +8 -8
- novel_downloader/config/__init__.py +0 -1
- novel_downloader/config/adapter.py +32 -27
- novel_downloader/config/loader.py +116 -108
- novel_downloader/config/models.py +35 -29
- novel_downloader/config/site_rules.py +2 -4
- novel_downloader/core/__init__.py +0 -1
- novel_downloader/core/downloaders/__init__.py +4 -4
- novel_downloader/core/downloaders/base/__init__.py +14 -0
- novel_downloader/core/downloaders/{base_async_downloader.py → base/base_async.py} +49 -53
- novel_downloader/core/downloaders/{base_downloader.py → base/base_sync.py} +64 -43
- novel_downloader/core/downloaders/biquge/__init__.py +12 -0
- novel_downloader/core/downloaders/biquge/biquge_sync.py +25 -0
- novel_downloader/core/downloaders/common/__init__.py +14 -0
- novel_downloader/core/downloaders/{common_asynb_downloader.py → common/common_async.py} +42 -33
- novel_downloader/core/downloaders/{common_downloader.py → common/common_sync.py} +33 -21
- novel_downloader/core/downloaders/qidian/__init__.py +10 -0
- novel_downloader/core/downloaders/{qidian_downloader.py → qidian/qidian_sync.py} +79 -62
- novel_downloader/core/factory/__init__.py +4 -5
- novel_downloader/core/factory/{downloader_factory.py → downloader.py} +25 -26
- novel_downloader/core/factory/{parser_factory.py → parser.py} +12 -14
- novel_downloader/core/factory/{requester_factory.py → requester.py} +29 -16
- novel_downloader/core/factory/{saver_factory.py → saver.py} +4 -9
- novel_downloader/core/interfaces/__init__.py +8 -9
- novel_downloader/core/interfaces/{async_downloader_protocol.py → async_downloader.py} +4 -5
- novel_downloader/core/interfaces/{async_requester_protocol.py → async_requester.py} +23 -12
- novel_downloader/core/interfaces/{parser_protocol.py → parser.py} +11 -6
- novel_downloader/core/interfaces/{saver_protocol.py → saver.py} +2 -3
- novel_downloader/core/interfaces/{downloader_protocol.py → sync_downloader.py} +6 -7
- novel_downloader/core/interfaces/{requester_protocol.py → sync_requester.py} +31 -17
- novel_downloader/core/parsers/__init__.py +5 -4
- novel_downloader/core/parsers/{base_parser.py → base.py} +18 -9
- novel_downloader/core/parsers/biquge/__init__.py +10 -0
- novel_downloader/core/parsers/biquge/main_parser.py +126 -0
- novel_downloader/core/parsers/{common_parser → common}/__init__.py +2 -3
- novel_downloader/core/parsers/{common_parser → common}/helper.py +13 -13
- novel_downloader/core/parsers/{common_parser → common}/main_parser.py +15 -9
- novel_downloader/core/parsers/{qidian_parser → qidian}/__init__.py +2 -3
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/__init__.py +2 -3
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_encrypted.py +40 -48
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_normal.py +17 -21
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_router.py +10 -9
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/main_parser.py +14 -10
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/__init__.py +2 -3
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_encrypted.py +36 -44
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_normal.py +19 -23
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_router.py +10 -9
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/main_parser.py +14 -10
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/node_decryptor.py +7 -10
- novel_downloader/core/parsers/{qidian_parser → qidian}/shared/__init__.py +2 -3
- novel_downloader/core/parsers/{qidian_parser → qidian}/shared/book_info_parser.py +5 -6
- novel_downloader/core/parsers/{qidian_parser → qidian}/shared/helpers.py +7 -8
- novel_downloader/core/requesters/__init__.py +9 -5
- novel_downloader/core/requesters/base/__init__.py +16 -0
- novel_downloader/core/requesters/{base_async_session.py → base/async_session.py} +177 -73
- novel_downloader/core/requesters/base/browser.py +340 -0
- novel_downloader/core/requesters/base/session.py +364 -0
- novel_downloader/core/requesters/biquge/__init__.py +12 -0
- novel_downloader/core/requesters/biquge/session.py +90 -0
- novel_downloader/core/requesters/{common_requester → common}/__init__.py +4 -5
- novel_downloader/core/requesters/common/async_session.py +96 -0
- novel_downloader/core/requesters/common/session.py +113 -0
- novel_downloader/core/requesters/qidian/__init__.py +21 -0
- novel_downloader/core/requesters/qidian/broswer.py +306 -0
- novel_downloader/core/requesters/qidian/session.py +287 -0
- novel_downloader/core/savers/__init__.py +5 -3
- novel_downloader/core/savers/{base_saver.py → base.py} +12 -13
- novel_downloader/core/savers/biquge.py +25 -0
- novel_downloader/core/savers/{common_saver → common}/__init__.py +2 -3
- novel_downloader/core/savers/{common_saver/common_epub.py → common/epub.py} +23 -51
- novel_downloader/core/savers/{common_saver → common}/main_saver.py +43 -9
- novel_downloader/core/savers/{common_saver/common_txt.py → common/txt.py} +16 -46
- novel_downloader/core/savers/epub_utils/__init__.py +0 -1
- novel_downloader/core/savers/epub_utils/css_builder.py +13 -7
- novel_downloader/core/savers/epub_utils/initializer.py +4 -5
- novel_downloader/core/savers/epub_utils/text_to_html.py +2 -3
- novel_downloader/core/savers/epub_utils/volume_intro.py +1 -3
- novel_downloader/core/savers/{qidian_saver.py → qidian.py} +12 -6
- novel_downloader/locales/en.json +8 -4
- novel_downloader/locales/zh.json +5 -1
- novel_downloader/resources/config/settings.toml +88 -0
- novel_downloader/utils/cache.py +2 -2
- novel_downloader/utils/chapter_storage.py +340 -0
- novel_downloader/utils/constants.py +6 -4
- novel_downloader/utils/crypto_utils.py +3 -3
- novel_downloader/utils/file_utils/__init__.py +0 -1
- novel_downloader/utils/file_utils/io.py +12 -17
- novel_downloader/utils/file_utils/normalize.py +1 -3
- novel_downloader/utils/file_utils/sanitize.py +2 -9
- novel_downloader/utils/fontocr/__init__.py +0 -1
- novel_downloader/utils/fontocr/ocr_v1.py +19 -22
- novel_downloader/utils/fontocr/ocr_v2.py +147 -60
- novel_downloader/utils/hash_store.py +19 -20
- novel_downloader/utils/hash_utils.py +0 -1
- novel_downloader/utils/i18n.py +3 -4
- novel_downloader/utils/logger.py +5 -6
- novel_downloader/utils/model_loader.py +5 -8
- novel_downloader/utils/network.py +9 -10
- novel_downloader/utils/state.py +6 -7
- novel_downloader/utils/text_utils/__init__.py +0 -1
- novel_downloader/utils/text_utils/chapter_formatting.py +2 -7
- novel_downloader/utils/text_utils/diff_display.py +0 -1
- novel_downloader/utils/text_utils/font_mapping.py +1 -4
- novel_downloader/utils/text_utils/text_cleaning.py +0 -1
- novel_downloader/utils/time_utils/__init__.py +0 -1
- novel_downloader/utils/time_utils/datetime_utils.py +8 -10
- novel_downloader/utils/time_utils/sleep_utils.py +1 -3
- {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.0.dist-info}/METADATA +14 -17
- novel_downloader-1.3.0.dist-info/RECORD +127 -0
- {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.0.dist-info}/WHEEL +1 -1
- novel_downloader/core/requesters/base_browser.py +0 -214
- novel_downloader/core/requesters/base_session.py +0 -246
- novel_downloader/core/requesters/common_requester/common_async_session.py +0 -98
- novel_downloader/core/requesters/common_requester/common_session.py +0 -126
- novel_downloader/core/requesters/qidian_requester/__init__.py +0 -22
- novel_downloader/core/requesters/qidian_requester/qidian_broswer.py +0 -396
- novel_downloader/core/requesters/qidian_requester/qidian_session.py +0 -202
- novel_downloader/resources/config/settings.yaml +0 -76
- novel_downloader-1.2.2.dist-info/RECORD +0 -115
- {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.0.dist-info}/entry_points.txt +0 -0
- {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,340 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.requesters.base.browser
|
4
|
+
---------------------------------------------
|
5
|
+
|
6
|
+
This module defines the BaseBrowser class, which provides common functionalities
|
7
|
+
for browser operations. Derived classes can extend these methods for
|
8
|
+
specialized purposes.
|
9
|
+
"""
|
10
|
+
|
11
|
+
import abc
|
12
|
+
import logging
|
13
|
+
import random
|
14
|
+
import time
|
15
|
+
import types
|
16
|
+
from typing import Any, Literal, Self, cast
|
17
|
+
|
18
|
+
from DrissionPage import Chromium, ChromiumOptions
|
19
|
+
from DrissionPage._elements.chromium_element import ChromiumElement
|
20
|
+
from DrissionPage._pages.chromium_frame import ChromiumFrame
|
21
|
+
from DrissionPage._pages.mix_tab import MixTab
|
22
|
+
from DrissionPage.common import Keys
|
23
|
+
|
24
|
+
from novel_downloader.config.models import RequesterConfig
|
25
|
+
from novel_downloader.core.interfaces import SyncRequesterProtocol
|
26
|
+
from novel_downloader.utils.constants import (
|
27
|
+
DEFAULT_USER_AGENT,
|
28
|
+
DEFAULT_USER_DATA_DIR,
|
29
|
+
DEFAULT_USER_PROFILE_NAME,
|
30
|
+
)
|
31
|
+
|
32
|
+
|
33
|
+
class BaseBrowser(SyncRequesterProtocol, abc.ABC):
|
34
|
+
"""
|
35
|
+
BaseBrowser wraps basic browser operations using DrissionPage,
|
36
|
+
with full control over browser configuration, session profile,
|
37
|
+
retry and timeout behavior.
|
38
|
+
|
39
|
+
Attributes:
|
40
|
+
_options (ChromiumOptions): Configuration object for Chromium.
|
41
|
+
_browser (Chromium): Chromium instance.
|
42
|
+
_page (ChromiumPage): The active browser tab.
|
43
|
+
"""
|
44
|
+
|
45
|
+
def is_async(self) -> Literal[False]:
|
46
|
+
return False
|
47
|
+
|
48
|
+
def __init__(
|
49
|
+
self,
|
50
|
+
config: RequesterConfig,
|
51
|
+
) -> None:
|
52
|
+
"""
|
53
|
+
Initialize the Requester with a browser configuration.
|
54
|
+
|
55
|
+
:param config: The RequesterConfig instance containing browser settings.
|
56
|
+
"""
|
57
|
+
super().__init__()
|
58
|
+
self._config = config
|
59
|
+
self._options = ChromiumOptions()
|
60
|
+
self._browser: Chromium | None = None
|
61
|
+
self._page: MixTab | None = None
|
62
|
+
self._headless: bool = config.headless
|
63
|
+
|
64
|
+
user_data_path = (
|
65
|
+
config.user_data_folder
|
66
|
+
if self._is_valid(config.user_data_folder)
|
67
|
+
else DEFAULT_USER_DATA_DIR
|
68
|
+
)
|
69
|
+
self._options.set_user_data_path(user_data_path)
|
70
|
+
|
71
|
+
profile_name = (
|
72
|
+
config.profile_name
|
73
|
+
if self._is_valid(config.profile_name)
|
74
|
+
else DEFAULT_USER_PROFILE_NAME
|
75
|
+
)
|
76
|
+
self._options.set_user(profile_name)
|
77
|
+
|
78
|
+
self._options.headless(config.headless)
|
79
|
+
self._options.set_user_agent(DEFAULT_USER_AGENT)
|
80
|
+
self._options.set_timeouts(base=config.timeout)
|
81
|
+
self._options.set_retry(
|
82
|
+
times=config.retry_times, interval=config.backoff_factor
|
83
|
+
)
|
84
|
+
|
85
|
+
self._disable_images_orig = config.disable_images
|
86
|
+
if config.disable_images:
|
87
|
+
self._options.no_imgs(True)
|
88
|
+
if config.mute_audio:
|
89
|
+
self._options.mute(True)
|
90
|
+
|
91
|
+
# self._options.set_argument('--disable-blink-features', 'AutomationControlled')
|
92
|
+
# self._options.set_argument('--log-level', '3')
|
93
|
+
# self._options.set_argument('--disable-gpu')
|
94
|
+
# self._options.set_argument('no-sandbox')
|
95
|
+
|
96
|
+
self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
|
97
|
+
|
98
|
+
self._init_browser()
|
99
|
+
|
100
|
+
def _init_browser(self) -> None:
|
101
|
+
"""
|
102
|
+
Set up the browser instance and open the default tab.
|
103
|
+
"""
|
104
|
+
if not self._browser:
|
105
|
+
self._browser = Chromium(self._options)
|
106
|
+
if not self._page:
|
107
|
+
self._page = cast(MixTab, self._browser.get_tab())
|
108
|
+
|
109
|
+
def login(
|
110
|
+
self,
|
111
|
+
username: str = "",
|
112
|
+
password: str = "",
|
113
|
+
manual_login: bool = False,
|
114
|
+
**kwargs: Any,
|
115
|
+
) -> bool:
|
116
|
+
"""
|
117
|
+
Attempt to log in
|
118
|
+
"""
|
119
|
+
raise NotImplementedError(
|
120
|
+
"Login is not supported by this browser type. "
|
121
|
+
"Override login() in your subclass to enable it."
|
122
|
+
)
|
123
|
+
|
124
|
+
@abc.abstractmethod
|
125
|
+
def get_book_info(
|
126
|
+
self,
|
127
|
+
book_id: str,
|
128
|
+
**kwargs: Any,
|
129
|
+
) -> str:
|
130
|
+
"""
|
131
|
+
Fetch the raw HTML (or JSON) of the book info page.
|
132
|
+
|
133
|
+
:param book_id: The book identifier.
|
134
|
+
:param wait_time: Base number of seconds to wait before returning content.
|
135
|
+
:return: The page content as a string.
|
136
|
+
"""
|
137
|
+
...
|
138
|
+
|
139
|
+
@abc.abstractmethod
|
140
|
+
def get_book_chapter(
|
141
|
+
self,
|
142
|
+
book_id: str,
|
143
|
+
chapter_id: str,
|
144
|
+
**kwargs: Any,
|
145
|
+
) -> str:
|
146
|
+
"""
|
147
|
+
Fetch the raw HTML (or JSON) of a single chapter.
|
148
|
+
|
149
|
+
:param book_id: The book identifier.
|
150
|
+
:param chapter_id: The chapter identifier.
|
151
|
+
:param wait_time: Base number of seconds to wait before returning content.
|
152
|
+
:return: The chapter content as a string.
|
153
|
+
"""
|
154
|
+
...
|
155
|
+
|
156
|
+
def get_bookcase(
|
157
|
+
self,
|
158
|
+
page: int = 1,
|
159
|
+
**kwargs: Any,
|
160
|
+
) -> str:
|
161
|
+
"""
|
162
|
+
Optional: Retrieve the HTML content of the authenticated user's bookcase page.
|
163
|
+
|
164
|
+
Subclasses that support login+bookcase retrieval should override this.
|
165
|
+
|
166
|
+
:param wait_time: Base number of seconds to wait before returning content.
|
167
|
+
:return: The HTML markup of the bookcase page.
|
168
|
+
:raises NotImplementedError: If bookcase fetching is not supported.
|
169
|
+
"""
|
170
|
+
raise NotImplementedError(
|
171
|
+
"Bookcase fetching is not supported by this browser type. "
|
172
|
+
"Override get_bookcase() in your subclass to enable it."
|
173
|
+
)
|
174
|
+
|
175
|
+
def scroll_page(
|
176
|
+
self,
|
177
|
+
presses: int,
|
178
|
+
pause: float = 0.5,
|
179
|
+
jitter: float = 0.1,
|
180
|
+
) -> None:
|
181
|
+
"""
|
182
|
+
Scroll down by sending DOWN key presses to the page.
|
183
|
+
|
184
|
+
:param presses: Number of DOWN key presses.
|
185
|
+
:param pause: Seconds to wait between each press.
|
186
|
+
"""
|
187
|
+
for _ in range(int(presses)):
|
188
|
+
try:
|
189
|
+
self.page.actions.key_down(Keys.DOWN)
|
190
|
+
except Exception as e:
|
191
|
+
self.logger.debug("[page] Scroll press failed: %s", e)
|
192
|
+
actual_pause = pause + random.uniform(-jitter, jitter)
|
193
|
+
actual_pause = max(0, actual_pause)
|
194
|
+
time.sleep(actual_pause)
|
195
|
+
|
196
|
+
def click_button(
|
197
|
+
self,
|
198
|
+
locator: str | tuple[str, str] | ChromiumElement,
|
199
|
+
timeout: float = 5.0,
|
200
|
+
page: MixTab | ChromiumFrame | None = None,
|
201
|
+
) -> bool:
|
202
|
+
"""
|
203
|
+
Attempt to locate and click a button on the page.
|
204
|
+
|
205
|
+
:param locator: The target element to click.
|
206
|
+
:param timeout: Maximum time (in seconds) to wait.
|
207
|
+
:param page: Optional page or frame to search in.
|
208
|
+
:return: True if the element was located and clicked; False otherwise.
|
209
|
+
"""
|
210
|
+
try:
|
211
|
+
page = page or self.page
|
212
|
+
btn = page.ele(locator, timeout=timeout)
|
213
|
+
if isinstance(btn, ChromiumElement):
|
214
|
+
btn.click()
|
215
|
+
return True
|
216
|
+
except Exception as e:
|
217
|
+
self.logger.debug("[browser] Exception clicking button: %s", e)
|
218
|
+
return False
|
219
|
+
|
220
|
+
def get_frame(
|
221
|
+
self,
|
222
|
+
loc_ind_ele: str | int | ChromiumFrame | ChromiumElement,
|
223
|
+
timeout: float = 5.0,
|
224
|
+
page: MixTab | ChromiumFrame | None = None,
|
225
|
+
) -> ChromiumFrame | None:
|
226
|
+
"""
|
227
|
+
Attempt to locate and return a frame from the page.
|
228
|
+
|
229
|
+
:param loc_ind_ele: The frame to locate.
|
230
|
+
:param timeout: Maximum time (in seconds) to wait.
|
231
|
+
:param page: Optional page or frame to search in.
|
232
|
+
:return: The located ChromiumFrame if found; otherwise, None.
|
233
|
+
"""
|
234
|
+
try:
|
235
|
+
page = page or self.page
|
236
|
+
return page.get_frame(loc_ind_ele, timeout=timeout)
|
237
|
+
except Exception as e:
|
238
|
+
self.logger.debug(
|
239
|
+
"[browser] Exception occurred while getting frame [%s]: %s",
|
240
|
+
loc_ind_ele,
|
241
|
+
e,
|
242
|
+
)
|
243
|
+
return None
|
244
|
+
|
245
|
+
def restart_browser(
|
246
|
+
self,
|
247
|
+
headless: bool | None = None,
|
248
|
+
) -> None:
|
249
|
+
"""
|
250
|
+
Shutdown the current browser and restart it with the given headless setting.
|
251
|
+
|
252
|
+
:param headless: Whether to run the browser in headless mode.
|
253
|
+
"""
|
254
|
+
if self._browser:
|
255
|
+
self._browser.quit()
|
256
|
+
self._browser = None
|
257
|
+
self._page = None
|
258
|
+
|
259
|
+
# Apply new headless setting and reinitialize
|
260
|
+
if headless is not None:
|
261
|
+
self._options.headless(headless)
|
262
|
+
self._headless = headless
|
263
|
+
self._init_browser()
|
264
|
+
self.logger.debug("[browser] Browser restarted (headless=%s).", headless)
|
265
|
+
|
266
|
+
@property
|
267
|
+
def page(self) -> MixTab:
|
268
|
+
"""
|
269
|
+
Return the current Chromium page object.
|
270
|
+
|
271
|
+
:return: ChromiumPage instance of the current tab.
|
272
|
+
"""
|
273
|
+
if self._page is None:
|
274
|
+
raise RuntimeError("Page is not initialized or has been shut down.")
|
275
|
+
return self._page
|
276
|
+
|
277
|
+
@property
|
278
|
+
def browser(self) -> Chromium:
|
279
|
+
"""
|
280
|
+
Return the Chromium browser instance.
|
281
|
+
|
282
|
+
:return: Chromium instance used by this browser.
|
283
|
+
"""
|
284
|
+
if self._browser is None:
|
285
|
+
raise RuntimeError("Browser is not initialized or has been shut down.")
|
286
|
+
return self._browser
|
287
|
+
|
288
|
+
@staticmethod
|
289
|
+
def _is_valid(value: str) -> bool:
|
290
|
+
return bool(value and value.strip())
|
291
|
+
|
292
|
+
def close(self) -> None:
|
293
|
+
"""
|
294
|
+
Shutdown the browser session and release resources.
|
295
|
+
|
296
|
+
This quits the Chromium instance and clears references to browser and page.
|
297
|
+
"""
|
298
|
+
if self._browser and self._config.auto_close:
|
299
|
+
self._browser.quit()
|
300
|
+
self._browser = None
|
301
|
+
self._page = None
|
302
|
+
|
303
|
+
def __enter__(self) -> Self:
|
304
|
+
self._init_browser()
|
305
|
+
return self
|
306
|
+
|
307
|
+
def __exit__(
|
308
|
+
self,
|
309
|
+
exc_type: type[BaseException] | None,
|
310
|
+
exc_val: BaseException | None,
|
311
|
+
tb: types.TracebackType | None,
|
312
|
+
) -> None:
|
313
|
+
self.close()
|
314
|
+
|
315
|
+
def __del__(self) -> None:
|
316
|
+
self.close()
|
317
|
+
|
318
|
+
def __getstate__(self) -> dict[str, Any]:
|
319
|
+
"""
|
320
|
+
Prepare object state for serialization (e.g., pickling).
|
321
|
+
|
322
|
+
Removes browser-related fields that cannot be pickled.
|
323
|
+
|
324
|
+
:return: A dict representing the serializable object state.
|
325
|
+
"""
|
326
|
+
state = self.__dict__.copy()
|
327
|
+
state.pop("_browser", None)
|
328
|
+
state.pop("_page", None)
|
329
|
+
return state
|
330
|
+
|
331
|
+
def __setstate__(self, state: dict[str, Any]) -> None:
|
332
|
+
"""
|
333
|
+
Restore object state after deserialization.
|
334
|
+
|
335
|
+
Automatically reinitializes the browser setup.
|
336
|
+
|
337
|
+
:param state: The saved state dictionary.
|
338
|
+
"""
|
339
|
+
self.__dict__.update(state)
|
340
|
+
self._init_browser()
|
@@ -0,0 +1,364 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.requesters.base.session
|
4
|
+
---------------------------------------------
|
5
|
+
|
6
|
+
This module defines the BaseSession class, which provides basic HTTP
|
7
|
+
request capabilities using the requests library. It maintains a
|
8
|
+
persistent session and supports retries, headers, and timeout configurations.
|
9
|
+
"""
|
10
|
+
|
11
|
+
import abc
|
12
|
+
import logging
|
13
|
+
import types
|
14
|
+
from collections.abc import Mapping
|
15
|
+
from typing import Any, Literal, Self
|
16
|
+
|
17
|
+
import requests
|
18
|
+
from requests import Response, Session
|
19
|
+
from requests.adapters import HTTPAdapter, Retry
|
20
|
+
|
21
|
+
from novel_downloader.config.models import RequesterConfig
|
22
|
+
from novel_downloader.core.interfaces import SyncRequesterProtocol
|
23
|
+
from novel_downloader.utils.constants import DEFAULT_USER_HEADERS
|
24
|
+
|
25
|
+
|
26
|
+
class BaseSession(SyncRequesterProtocol, abc.ABC):
|
27
|
+
"""
|
28
|
+
BaseSession wraps basic HTTP operations using requests.Session,
|
29
|
+
supporting retry logic, timeout, and persistent connections.
|
30
|
+
|
31
|
+
Attributes:
|
32
|
+
_session (requests.Session): The persistent HTTP session.
|
33
|
+
"""
|
34
|
+
|
35
|
+
def is_async(self) -> Literal[False]:
|
36
|
+
return False
|
37
|
+
|
38
|
+
def __init__(
|
39
|
+
self,
|
40
|
+
config: RequesterConfig,
|
41
|
+
cookies: dict[str, str] | None = None,
|
42
|
+
) -> None:
|
43
|
+
"""
|
44
|
+
Initialize a Session instance.
|
45
|
+
|
46
|
+
:param config: The RequesterConfig instance containing settings.
|
47
|
+
:param cookies: Optional cookies to preload into the session.
|
48
|
+
"""
|
49
|
+
super().__init__()
|
50
|
+
self._config = config
|
51
|
+
self._cookies = cookies or {}
|
52
|
+
self._headers = DEFAULT_USER_HEADERS.copy()
|
53
|
+
self._session: Session | None = None
|
54
|
+
|
55
|
+
retry_strategy = Retry(
|
56
|
+
total=config.retry_times,
|
57
|
+
backoff_factor=config.backoff_factor,
|
58
|
+
status_forcelist=[408, 429, 500, 502, 503, 504],
|
59
|
+
allowed_methods=["HEAD", "GET", "OPTIONS"],
|
60
|
+
)
|
61
|
+
|
62
|
+
self._adapter = HTTPAdapter(max_retries=retry_strategy)
|
63
|
+
self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
|
64
|
+
|
65
|
+
self._init_session()
|
66
|
+
|
67
|
+
def _init_session(self) -> None:
|
68
|
+
"""
|
69
|
+
Set up the session with retry strategy and apply default headers.
|
70
|
+
"""
|
71
|
+
if self._session:
|
72
|
+
return
|
73
|
+
|
74
|
+
self._session = requests.Session()
|
75
|
+
self._session.mount("http://", self._adapter)
|
76
|
+
self._session.mount("https://", self._adapter)
|
77
|
+
self._session.headers.update(self._headers)
|
78
|
+
|
79
|
+
if self._cookies:
|
80
|
+
self._session.cookies.update(self._cookies)
|
81
|
+
|
82
|
+
def login(
|
83
|
+
self,
|
84
|
+
username: str = "",
|
85
|
+
password: str = "",
|
86
|
+
manual_login: bool = False,
|
87
|
+
**kwargs: Any,
|
88
|
+
) -> bool:
|
89
|
+
"""
|
90
|
+
Attempt to log in
|
91
|
+
"""
|
92
|
+
raise NotImplementedError(
|
93
|
+
"Login is not supported by this session type. "
|
94
|
+
"Override login() in your subclass to enable it."
|
95
|
+
)
|
96
|
+
|
97
|
+
@abc.abstractmethod
|
98
|
+
def get_book_info(
|
99
|
+
self,
|
100
|
+
book_id: str,
|
101
|
+
**kwargs: Any,
|
102
|
+
) -> str:
|
103
|
+
"""
|
104
|
+
Fetch the raw HTML (or JSON) of the book info page.
|
105
|
+
|
106
|
+
:param book_id: The book identifier.
|
107
|
+
:return: The page content as a string.
|
108
|
+
"""
|
109
|
+
...
|
110
|
+
|
111
|
+
@abc.abstractmethod
|
112
|
+
def get_book_chapter(
|
113
|
+
self,
|
114
|
+
book_id: str,
|
115
|
+
chapter_id: str,
|
116
|
+
**kwargs: Any,
|
117
|
+
) -> str:
|
118
|
+
"""
|
119
|
+
Fetch the raw HTML (or JSON) of a single chapter.
|
120
|
+
|
121
|
+
:param book_id: The book identifier.
|
122
|
+
:param chapter_id: The chapter identifier.
|
123
|
+
:return: The chapter content as a string.
|
124
|
+
"""
|
125
|
+
...
|
126
|
+
|
127
|
+
def get_bookcase(
|
128
|
+
self,
|
129
|
+
page: int = 1,
|
130
|
+
**kwargs: Any,
|
131
|
+
) -> str:
|
132
|
+
"""
|
133
|
+
Optional: Retrieve the HTML content of the authenticated user's bookcase page.
|
134
|
+
|
135
|
+
Subclasses that support user login and bookcase retrieval should override this.
|
136
|
+
|
137
|
+
:param page: Page idx
|
138
|
+
:return: The HTML markup of the bookcase page.
|
139
|
+
:raises NotImplementedError: If the subclass does not implement.
|
140
|
+
"""
|
141
|
+
raise NotImplementedError(
|
142
|
+
"Bookcase fetching is not supported by this session type. "
|
143
|
+
"Override get_bookcase() in your subclass to enable it."
|
144
|
+
)
|
145
|
+
|
146
|
+
def get(
|
147
|
+
self,
|
148
|
+
url: str,
|
149
|
+
params: dict[str, Any] | None = None,
|
150
|
+
**kwargs: Any,
|
151
|
+
) -> Response:
|
152
|
+
"""
|
153
|
+
Send a GET request.
|
154
|
+
|
155
|
+
:param url: The target URL.
|
156
|
+
:param params: Query parameters to include in the request.
|
157
|
+
:param kwargs: Additional arguments passed to requests.
|
158
|
+
:return: Response object from the GET request.
|
159
|
+
:raises RuntimeError: If the session is not initialized.
|
160
|
+
"""
|
161
|
+
return self.session.get(url, params=params, **kwargs)
|
162
|
+
|
163
|
+
def post(
|
164
|
+
self,
|
165
|
+
url: str,
|
166
|
+
data: dict[str, Any] | bytes | None = None,
|
167
|
+
json: dict[str, Any] | None = None,
|
168
|
+
**kwargs: Any,
|
169
|
+
) -> Response:
|
170
|
+
"""
|
171
|
+
Send a POST request.
|
172
|
+
|
173
|
+
:param url: The target URL.
|
174
|
+
:param data: Form data to include in the request body.
|
175
|
+
:param json: JSON body to include in the request.
|
176
|
+
:param kwargs: Additional arguments passed to requests.
|
177
|
+
:return: Response object from the POST request.
|
178
|
+
:raises RuntimeError: If the session is not initialized.
|
179
|
+
"""
|
180
|
+
return self.session.post(url, data=data, json=json, **kwargs)
|
181
|
+
|
182
|
+
def put(
|
183
|
+
self,
|
184
|
+
url: str,
|
185
|
+
data: dict[str, Any] | bytes | None = None,
|
186
|
+
json: dict[str, Any] | None = None,
|
187
|
+
**kwargs: Any,
|
188
|
+
) -> Response:
|
189
|
+
"""
|
190
|
+
Send a PUT request with retry logic.
|
191
|
+
"""
|
192
|
+
return self.session.put(url, data=data, json=json, **kwargs)
|
193
|
+
|
194
|
+
def patch(
|
195
|
+
self,
|
196
|
+
url: str,
|
197
|
+
data: dict[str, Any] | bytes | None = None,
|
198
|
+
json: dict[str, Any] | None = None,
|
199
|
+
**kwargs: Any,
|
200
|
+
) -> Response:
|
201
|
+
"""
|
202
|
+
Send a PATCH request with retry logic.
|
203
|
+
"""
|
204
|
+
return self.session.patch(url, data=data, json=json, **kwargs)
|
205
|
+
|
206
|
+
def delete(
|
207
|
+
self,
|
208
|
+
url: str,
|
209
|
+
**kwargs: Any,
|
210
|
+
) -> Response:
|
211
|
+
"""
|
212
|
+
Send a DELETE request with retry logic.
|
213
|
+
"""
|
214
|
+
return self.session.delete(url, **kwargs)
|
215
|
+
|
216
|
+
@property
|
217
|
+
def session(self) -> Session:
|
218
|
+
"""
|
219
|
+
Return the active requests.Session.
|
220
|
+
|
221
|
+
:raises RuntimeError: If the session is uninitialized or has been shut down.
|
222
|
+
"""
|
223
|
+
if self._session is None:
|
224
|
+
# self._init_session()
|
225
|
+
raise RuntimeError("Session is not initialized or has been shut down.")
|
226
|
+
return self._session
|
227
|
+
|
228
|
+
@property
|
229
|
+
def cookies(self) -> dict[str, str]:
|
230
|
+
"""
|
231
|
+
Get the current session cookies.
|
232
|
+
|
233
|
+
:return: A dict mapping cookie names to their values.
|
234
|
+
"""
|
235
|
+
if self._session:
|
236
|
+
return self._session.cookies.get_dict()
|
237
|
+
else:
|
238
|
+
return self._cookies
|
239
|
+
|
240
|
+
@property
|
241
|
+
def headers(self) -> Mapping[str, str | bytes]:
|
242
|
+
"""
|
243
|
+
Get the current session headers.
|
244
|
+
|
245
|
+
:return: A dict mapping header names to their values.
|
246
|
+
"""
|
247
|
+
if self._session:
|
248
|
+
return self._session.headers
|
249
|
+
else:
|
250
|
+
return self._headers
|
251
|
+
|
252
|
+
def get_header(self, key: str, default: Any = None) -> Any:
|
253
|
+
"""
|
254
|
+
Retrieve a specific header value by name.
|
255
|
+
|
256
|
+
:param key: The header name to look up.
|
257
|
+
:param default: The value to return if the header is not present.
|
258
|
+
:return: The header value if present, else default.
|
259
|
+
"""
|
260
|
+
if self._session:
|
261
|
+
return self._session.headers.get(key, default)
|
262
|
+
else:
|
263
|
+
return self._headers.get(key, default)
|
264
|
+
|
265
|
+
def update_header(self, key: str, value: str) -> None:
|
266
|
+
"""
|
267
|
+
Update or add a single header in the session.
|
268
|
+
|
269
|
+
:param key: The name of the header.
|
270
|
+
:param value: The value of the header.
|
271
|
+
"""
|
272
|
+
self._headers[key] = value
|
273
|
+
if self._session:
|
274
|
+
self._session.headers[key] = value
|
275
|
+
|
276
|
+
def update_headers(self, headers: dict[str, str]) -> None:
|
277
|
+
"""
|
278
|
+
Update or add multiple headers in the session.
|
279
|
+
|
280
|
+
:param headers: A dictionary of header key-value pairs.
|
281
|
+
"""
|
282
|
+
self._headers.update(headers)
|
283
|
+
if self._session:
|
284
|
+
self._session.headers.update(headers)
|
285
|
+
|
286
|
+
def update_cookie(self, key: str, value: str) -> None:
|
287
|
+
"""
|
288
|
+
Update or add a single cookie in the session.
|
289
|
+
|
290
|
+
:param key: The name of the cookie.
|
291
|
+
:param value: The value of the cookie.
|
292
|
+
"""
|
293
|
+
self._cookies[key] = value
|
294
|
+
if self._session:
|
295
|
+
self._session.cookies.set(key, value)
|
296
|
+
|
297
|
+
def update_cookies(
|
298
|
+
self,
|
299
|
+
cookies: dict[str, str],
|
300
|
+
) -> None:
|
301
|
+
"""
|
302
|
+
Update or add multiple cookies in the session.
|
303
|
+
|
304
|
+
:param cookies: A dictionary of cookie key-value pairs.
|
305
|
+
"""
|
306
|
+
self._cookies.update(cookies)
|
307
|
+
if self._session:
|
308
|
+
self._session.cookies.update(cookies)
|
309
|
+
|
310
|
+
def clear_cookies(self) -> None:
|
311
|
+
"""
|
312
|
+
Clear cookies from the session.
|
313
|
+
"""
|
314
|
+
self._cookies = {}
|
315
|
+
if self._session:
|
316
|
+
self._session.cookies.clear()
|
317
|
+
|
318
|
+
def close(self) -> None:
|
319
|
+
"""
|
320
|
+
Shutdown and clean up the session.
|
321
|
+
|
322
|
+
This closes the underlying connection pool and removes the session.
|
323
|
+
"""
|
324
|
+
if self._session:
|
325
|
+
self._session.close()
|
326
|
+
self._session = None
|
327
|
+
|
328
|
+
def __enter__(self) -> Self:
|
329
|
+
if self._session is None:
|
330
|
+
self._init_session()
|
331
|
+
return self
|
332
|
+
|
333
|
+
def __exit__(
|
334
|
+
self,
|
335
|
+
exc_type: type[BaseException] | None,
|
336
|
+
exc_val: BaseException | None,
|
337
|
+
tb: types.TracebackType | None,
|
338
|
+
) -> None:
|
339
|
+
self.close()
|
340
|
+
|
341
|
+
def __del__(self) -> None:
|
342
|
+
self.close()
|
343
|
+
|
344
|
+
def __getstate__(self) -> dict[str, Any]:
|
345
|
+
"""
|
346
|
+
Prepare object state for serialization.
|
347
|
+
|
348
|
+
Removes unpickleable session object.
|
349
|
+
|
350
|
+
:return: Serializable dict of the object state.
|
351
|
+
"""
|
352
|
+
self.close()
|
353
|
+
state = self.__dict__.copy()
|
354
|
+
state.pop("_session", None)
|
355
|
+
return state
|
356
|
+
|
357
|
+
def __setstate__(self, state: dict[str, Any]) -> None:
|
358
|
+
"""
|
359
|
+
Restore object state and reinitialize session.
|
360
|
+
|
361
|
+
:param state: Saved state dictionary.
|
362
|
+
"""
|
363
|
+
self.__dict__.update(state)
|
364
|
+
self._init_session()
|