novel-downloader 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +14 -0
- novel_downloader/cli/__init__.py +14 -0
- novel_downloader/cli/clean.py +134 -0
- novel_downloader/cli/download.py +132 -0
- novel_downloader/cli/interactive.py +67 -0
- novel_downloader/cli/main.py +45 -0
- novel_downloader/cli/settings.py +177 -0
- novel_downloader/config/__init__.py +52 -0
- novel_downloader/config/adapter.py +153 -0
- novel_downloader/config/loader.py +177 -0
- novel_downloader/config/models.py +173 -0
- novel_downloader/config/site_rules.py +97 -0
- novel_downloader/core/__init__.py +25 -0
- novel_downloader/core/downloaders/__init__.py +22 -0
- novel_downloader/core/downloaders/base_async_downloader.py +157 -0
- novel_downloader/core/downloaders/base_downloader.py +187 -0
- novel_downloader/core/downloaders/common_asynb_downloader.py +207 -0
- novel_downloader/core/downloaders/common_downloader.py +191 -0
- novel_downloader/core/downloaders/qidian_downloader.py +208 -0
- novel_downloader/core/factory/__init__.py +33 -0
- novel_downloader/core/factory/downloader_factory.py +149 -0
- novel_downloader/core/factory/parser_factory.py +62 -0
- novel_downloader/core/factory/requester_factory.py +106 -0
- novel_downloader/core/factory/saver_factory.py +49 -0
- novel_downloader/core/interfaces/__init__.py +32 -0
- novel_downloader/core/interfaces/async_downloader_protocol.py +37 -0
- novel_downloader/core/interfaces/async_requester_protocol.py +68 -0
- novel_downloader/core/interfaces/downloader_protocol.py +37 -0
- novel_downloader/core/interfaces/parser_protocol.py +40 -0
- novel_downloader/core/interfaces/requester_protocol.py +65 -0
- novel_downloader/core/interfaces/saver_protocol.py +61 -0
- novel_downloader/core/parsers/__init__.py +28 -0
- novel_downloader/core/parsers/base_parser.py +96 -0
- novel_downloader/core/parsers/common_parser/__init__.py +14 -0
- novel_downloader/core/parsers/common_parser/helper.py +321 -0
- novel_downloader/core/parsers/common_parser/main_parser.py +86 -0
- novel_downloader/core/parsers/qidian_parser/__init__.py +20 -0
- novel_downloader/core/parsers/qidian_parser/browser/__init__.py +13 -0
- novel_downloader/core/parsers/qidian_parser/browser/chapter_encrypted.py +498 -0
- novel_downloader/core/parsers/qidian_parser/browser/chapter_normal.py +97 -0
- novel_downloader/core/parsers/qidian_parser/browser/chapter_router.py +70 -0
- novel_downloader/core/parsers/qidian_parser/browser/main_parser.py +110 -0
- novel_downloader/core/parsers/qidian_parser/session/__init__.py +13 -0
- novel_downloader/core/parsers/qidian_parser/session/chapter_encrypted.py +451 -0
- novel_downloader/core/parsers/qidian_parser/session/chapter_normal.py +119 -0
- novel_downloader/core/parsers/qidian_parser/session/chapter_router.py +67 -0
- novel_downloader/core/parsers/qidian_parser/session/main_parser.py +113 -0
- novel_downloader/core/parsers/qidian_parser/session/node_decryptor.py +164 -0
- novel_downloader/core/parsers/qidian_parser/shared/__init__.py +38 -0
- novel_downloader/core/parsers/qidian_parser/shared/book_info_parser.py +95 -0
- novel_downloader/core/parsers/qidian_parser/shared/helpers.py +133 -0
- novel_downloader/core/requesters/__init__.py +31 -0
- novel_downloader/core/requesters/base_async_session.py +297 -0
- novel_downloader/core/requesters/base_browser.py +210 -0
- novel_downloader/core/requesters/base_session.py +243 -0
- novel_downloader/core/requesters/common_requester/__init__.py +18 -0
- novel_downloader/core/requesters/common_requester/common_async_session.py +96 -0
- novel_downloader/core/requesters/common_requester/common_session.py +126 -0
- novel_downloader/core/requesters/qidian_requester/__init__.py +22 -0
- novel_downloader/core/requesters/qidian_requester/qidian_broswer.py +377 -0
- novel_downloader/core/requesters/qidian_requester/qidian_session.py +202 -0
- novel_downloader/core/savers/__init__.py +20 -0
- novel_downloader/core/savers/base_saver.py +169 -0
- novel_downloader/core/savers/common_saver/__init__.py +13 -0
- novel_downloader/core/savers/common_saver/common_epub.py +232 -0
- novel_downloader/core/savers/common_saver/common_txt.py +176 -0
- novel_downloader/core/savers/common_saver/main_saver.py +86 -0
- novel_downloader/core/savers/epub_utils/__init__.py +27 -0
- novel_downloader/core/savers/epub_utils/css_builder.py +68 -0
- novel_downloader/core/savers/epub_utils/initializer.py +98 -0
- novel_downloader/core/savers/epub_utils/text_to_html.py +132 -0
- novel_downloader/core/savers/epub_utils/volume_intro.py +61 -0
- novel_downloader/core/savers/qidian_saver.py +22 -0
- novel_downloader/locales/en.json +91 -0
- novel_downloader/locales/zh.json +91 -0
- novel_downloader/resources/config/rules.toml +196 -0
- novel_downloader/resources/config/settings.yaml +73 -0
- novel_downloader/resources/css_styles/main.css +104 -0
- novel_downloader/resources/css_styles/volume-intro.css +56 -0
- novel_downloader/resources/images/volume_border.png +0 -0
- novel_downloader/resources/js_scripts/qidian_decrypt_node.js +82 -0
- novel_downloader/resources/json/replace_word_map.json +4 -0
- novel_downloader/resources/text/blacklist.txt +22 -0
- novel_downloader/utils/__init__.py +0 -0
- novel_downloader/utils/cache.py +24 -0
- novel_downloader/utils/constants.py +158 -0
- novel_downloader/utils/crypto_utils.py +144 -0
- novel_downloader/utils/file_utils/__init__.py +43 -0
- novel_downloader/utils/file_utils/io.py +252 -0
- novel_downloader/utils/file_utils/normalize.py +68 -0
- novel_downloader/utils/file_utils/sanitize.py +77 -0
- novel_downloader/utils/fontocr/__init__.py +23 -0
- novel_downloader/utils/fontocr/ocr_v1.py +304 -0
- novel_downloader/utils/fontocr/ocr_v2.py +658 -0
- novel_downloader/utils/hash_store.py +288 -0
- novel_downloader/utils/hash_utils.py +103 -0
- novel_downloader/utils/i18n.py +41 -0
- novel_downloader/utils/logger.py +104 -0
- novel_downloader/utils/model_loader.py +72 -0
- novel_downloader/utils/network.py +287 -0
- novel_downloader/utils/state.py +156 -0
- novel_downloader/utils/text_utils/__init__.py +27 -0
- novel_downloader/utils/text_utils/chapter_formatting.py +46 -0
- novel_downloader/utils/text_utils/diff_display.py +75 -0
- novel_downloader/utils/text_utils/font_mapping.py +31 -0
- novel_downloader/utils/text_utils/text_cleaning.py +57 -0
- novel_downloader/utils/time_utils/__init__.py +22 -0
- novel_downloader/utils/time_utils/datetime_utils.py +146 -0
- novel_downloader/utils/time_utils/sleep_utils.py +49 -0
- novel_downloader-1.1.0.dist-info/METADATA +157 -0
- novel_downloader-1.1.0.dist-info/RECORD +115 -0
- novel_downloader-1.1.0.dist-info/WHEEL +5 -0
- novel_downloader-1.1.0.dist-info/entry_points.txt +2 -0
- novel_downloader-1.1.0.dist-info/licenses/LICENSE +21 -0
- novel_downloader-1.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,297 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.requesters.base_async_session
|
5
|
+
---------------------------------------------------
|
6
|
+
|
7
|
+
This module defines the BaseAsyncSession class, which provides asynchronous
|
8
|
+
HTTP request capabilities using aiohttp. It maintains a persistent
|
9
|
+
client session and supports retries, headers, timeout configurations,
|
10
|
+
cookie handling, and defines abstract methods for subclasses.
|
11
|
+
"""
|
12
|
+
|
13
|
+
import abc
|
14
|
+
import asyncio
|
15
|
+
import time
|
16
|
+
from typing import Any, Dict, Optional, Union
|
17
|
+
|
18
|
+
import aiohttp
|
19
|
+
from aiohttp import ClientResponse, ClientSession, ClientTimeout, TCPConnector
|
20
|
+
|
21
|
+
from novel_downloader.config.models import RequesterConfig
|
22
|
+
from novel_downloader.core.interfaces import AsyncRequesterProtocol
|
23
|
+
from novel_downloader.utils.constants import DEFAULT_USER_HEADERS
|
24
|
+
|
25
|
+
|
26
|
+
class RateLimiter:
|
27
|
+
"""
|
28
|
+
Simple async token-bucket rate limiter: ensures no more than rate_per_sec
|
29
|
+
requests are started per second, across all coroutines.
|
30
|
+
"""
|
31
|
+
|
32
|
+
def __init__(self, rate_per_sec: float):
|
33
|
+
self._interval = 1.0 / rate_per_sec
|
34
|
+
self._lock = asyncio.Lock()
|
35
|
+
self._last = time.monotonic()
|
36
|
+
|
37
|
+
async def wait(self) -> None:
|
38
|
+
async with self._lock:
|
39
|
+
now = time.monotonic()
|
40
|
+
elapsed = now - self._last
|
41
|
+
delay = self._interval - elapsed
|
42
|
+
if delay > 0:
|
43
|
+
await asyncio.sleep(delay)
|
44
|
+
self._last = time.monotonic()
|
45
|
+
|
46
|
+
|
47
|
+
class BaseAsyncSession(AsyncRequesterProtocol, abc.ABC):
|
48
|
+
"""
|
49
|
+
BaseAsyncSession wraps basic HTTP operations using aiohttp.ClientSession,
|
50
|
+
supporting retry logic, timeout, persistent connections, and cookie management.
|
51
|
+
|
52
|
+
Attributes:
|
53
|
+
_session (ClientSession): The persistent aiohttp client session.
|
54
|
+
_timeout (int): Timeout for each request in seconds.
|
55
|
+
_retry_times (int): Number of retry attempts on failure.
|
56
|
+
_retry_interval (float): Delay (in seconds) between retries.
|
57
|
+
_headers (Dict[str, str]): Default HTTP headers to send.
|
58
|
+
_cookies (Dict[str, str]): Optional cookie jar for the session.
|
59
|
+
"""
|
60
|
+
|
61
|
+
def _init_session(
|
62
|
+
self,
|
63
|
+
config: RequesterConfig,
|
64
|
+
cookies: Optional[Dict[str, str]] = None,
|
65
|
+
) -> None:
|
66
|
+
"""
|
67
|
+
Initialize the async session with configuration.
|
68
|
+
|
69
|
+
:param config: Configuration object for session behavior
|
70
|
+
(timeouts, retries, headers, etc.)
|
71
|
+
:param cookies: Optional initial cookies to set on the session.
|
72
|
+
"""
|
73
|
+
self._config = config
|
74
|
+
self._timeout = config.timeout
|
75
|
+
self._retry_times = config.retry_times
|
76
|
+
self._retry_interval = config.retry_interval
|
77
|
+
self._cookies = cookies or {}
|
78
|
+
self._headers = DEFAULT_USER_HEADERS.copy()
|
79
|
+
self._session: Optional[ClientSession] = None
|
80
|
+
self._rate_limiter: Optional[RateLimiter] = None
|
81
|
+
|
82
|
+
async def _setup(self) -> None:
|
83
|
+
"""
|
84
|
+
Set up the aiohttp.ClientSession with timeout, connector, headers, and cookies.
|
85
|
+
"""
|
86
|
+
max_rps = getattr(self._config, "max_rps", None)
|
87
|
+
if max_rps is not None:
|
88
|
+
self._rate_limiter = RateLimiter(max_rps)
|
89
|
+
|
90
|
+
timeout = ClientTimeout(total=self._timeout)
|
91
|
+
connector = TCPConnector(
|
92
|
+
limit_per_host=getattr(self._config, "max_connections", 10)
|
93
|
+
)
|
94
|
+
self._session = ClientSession(
|
95
|
+
timeout=timeout,
|
96
|
+
connector=connector,
|
97
|
+
headers=self._headers,
|
98
|
+
cookies=self._cookies,
|
99
|
+
)
|
100
|
+
|
101
|
+
async def login(self, max_retries: int = 3, manual_login: bool = False) -> bool:
|
102
|
+
"""
|
103
|
+
Attempt to log in asynchronously.
|
104
|
+
Override in subclasses that require authentication.
|
105
|
+
|
106
|
+
:returns: True if login succeeded, False otherwise.
|
107
|
+
"""
|
108
|
+
raise NotImplementedError(
|
109
|
+
"Login is not supported by this session type. "
|
110
|
+
"Override login() in your subclass to enable it."
|
111
|
+
)
|
112
|
+
|
113
|
+
@abc.abstractmethod
|
114
|
+
async def get_book_info(self, book_id: str, wait_time: Optional[int] = None) -> str:
|
115
|
+
"""
|
116
|
+
Fetch the raw HTML (or JSON) of the book info page asynchronously.
|
117
|
+
|
118
|
+
:param book_id: The book identifier.
|
119
|
+
:param wait_time: Base number of seconds to wait before returning content.
|
120
|
+
:return: The page content as a string.
|
121
|
+
"""
|
122
|
+
...
|
123
|
+
|
124
|
+
@abc.abstractmethod
|
125
|
+
async def get_book_chapter(
|
126
|
+
self, book_id: str, chapter_id: str, wait_time: Optional[int] = None
|
127
|
+
) -> str:
|
128
|
+
"""
|
129
|
+
Fetch the raw HTML (or JSON) of a single chapter asynchronously.
|
130
|
+
|
131
|
+
:param book_id: The book identifier.
|
132
|
+
:param chapter_id: The chapter identifier.
|
133
|
+
:param wait_time: Base number of seconds to wait before returning content.
|
134
|
+
:return: The chapter content as a string.
|
135
|
+
"""
|
136
|
+
...
|
137
|
+
|
138
|
+
async def get_bookcase(self, wait_time: Optional[int] = None) -> str:
|
139
|
+
"""
|
140
|
+
Optional: Retrieve the HTML content of the authenticated user's bookcase page.
|
141
|
+
Subclasses that support user login/bookcase should override this.
|
142
|
+
|
143
|
+
:param wait_time: Base number of seconds to wait before returning content.
|
144
|
+
:return: The HTML of the bookcase page.
|
145
|
+
"""
|
146
|
+
raise NotImplementedError(
|
147
|
+
"Bookcase fetching is not supported by this session type. "
|
148
|
+
"Override get_bookcase() in your subclass to enable it."
|
149
|
+
)
|
150
|
+
|
151
|
+
async def fetch(self, url: str, **kwargs: Any) -> str:
|
152
|
+
"""
|
153
|
+
Fetch the content from the given URL asynchronously, with retry support.
|
154
|
+
|
155
|
+
:param url: The target URL to fetch.
|
156
|
+
:param kwargs: Additional keyword arguments to pass to `session.get`.
|
157
|
+
:return: The response body as text.
|
158
|
+
:raises: aiohttp.ClientError on final failure.
|
159
|
+
"""
|
160
|
+
if self._session is None:
|
161
|
+
await self._setup()
|
162
|
+
if self._session is None:
|
163
|
+
raise RuntimeError("Session not initialized after setup")
|
164
|
+
|
165
|
+
if self._rate_limiter:
|
166
|
+
await self._rate_limiter.wait()
|
167
|
+
|
168
|
+
for attempt in range(self._retry_times + 1):
|
169
|
+
try:
|
170
|
+
async with self._session.get(url, **kwargs) as resp:
|
171
|
+
resp.raise_for_status()
|
172
|
+
text: str = await resp.text()
|
173
|
+
return text
|
174
|
+
except aiohttp.ClientError:
|
175
|
+
if attempt < self._retry_times:
|
176
|
+
await asyncio.sleep(self._retry_interval)
|
177
|
+
continue
|
178
|
+
raise
|
179
|
+
|
180
|
+
raise RuntimeError("Unreachable code reached in fetch()")
|
181
|
+
|
182
|
+
async def get(
|
183
|
+
self, url: str, params: Optional[Dict[str, Any]] = None, **kwargs: Any
|
184
|
+
) -> ClientResponse:
|
185
|
+
"""
|
186
|
+
Send an HTTP GET request asynchronously.
|
187
|
+
|
188
|
+
:param url: The target URL.
|
189
|
+
:param params: Query parameters to include in the request.
|
190
|
+
:param kwargs: Additional args passed to session.get().
|
191
|
+
:return: aiohttp.ClientResponse object.
|
192
|
+
:raises RuntimeError: If the session is not initialized.
|
193
|
+
"""
|
194
|
+
if self._session is None:
|
195
|
+
await self._setup()
|
196
|
+
if self._session is None:
|
197
|
+
raise RuntimeError("Session not initialized after setup")
|
198
|
+
|
199
|
+
if self._rate_limiter:
|
200
|
+
await self._rate_limiter.wait()
|
201
|
+
return await self._session.get(url, params=params, **kwargs)
|
202
|
+
|
203
|
+
async def post(
|
204
|
+
self,
|
205
|
+
url: str,
|
206
|
+
data: Optional[Union[Dict[str, Any], bytes]] = None,
|
207
|
+
json: Optional[Dict[str, Any]] = None,
|
208
|
+
**kwargs: Any,
|
209
|
+
) -> ClientResponse:
|
210
|
+
"""
|
211
|
+
Send an HTTP POST request asynchronously.
|
212
|
+
|
213
|
+
:param url: The target URL.
|
214
|
+
:param data: Form data to include in the request body.
|
215
|
+
:param json: JSON body to include in the request.
|
216
|
+
:param kwargs: Additional args passed to session.post().
|
217
|
+
:return: aiohttp.ClientResponse object.
|
218
|
+
:raises RuntimeError: If the session is not initialized.
|
219
|
+
"""
|
220
|
+
if self._session is None:
|
221
|
+
await self._setup()
|
222
|
+
if self._session is None:
|
223
|
+
raise RuntimeError("Session not initialized after setup")
|
224
|
+
|
225
|
+
if self._rate_limiter:
|
226
|
+
await self._rate_limiter.wait()
|
227
|
+
return await self._session.post(url, data=data, json=json, **kwargs)
|
228
|
+
|
229
|
+
@property
|
230
|
+
def session(self) -> ClientSession:
|
231
|
+
"""
|
232
|
+
Return the active aiohttp.ClientSession.
|
233
|
+
|
234
|
+
:raises RuntimeError: If the session is uninitialized.
|
235
|
+
"""
|
236
|
+
if self._session is None:
|
237
|
+
raise RuntimeError("Session is not initialized or has been shut down.")
|
238
|
+
return self._session
|
239
|
+
|
240
|
+
@property
|
241
|
+
def timeout(self) -> int:
|
242
|
+
"""Return the default timeout setting."""
|
243
|
+
return self._timeout
|
244
|
+
|
245
|
+
@property
|
246
|
+
def retry_times(self) -> int:
|
247
|
+
"""Return the maximum number of retry attempts."""
|
248
|
+
return self._retry_times
|
249
|
+
|
250
|
+
@property
|
251
|
+
def retry_interval(self) -> float:
|
252
|
+
"""Return the base interval (in seconds) between retries."""
|
253
|
+
return self._retry_interval
|
254
|
+
|
255
|
+
async def update_cookies(
|
256
|
+
self, cookies: Dict[str, str], overwrite: bool = True
|
257
|
+
) -> None:
|
258
|
+
"""
|
259
|
+
Update cookies for the current session and internal cache.
|
260
|
+
|
261
|
+
:param cookies: New cookies to merge.
|
262
|
+
:param overwrite: If True, replace existing; else, only set missing.
|
263
|
+
"""
|
264
|
+
# update internal cache
|
265
|
+
if overwrite:
|
266
|
+
self._cookies.update({str(k): str(v) for k, v in cookies.items()})
|
267
|
+
else:
|
268
|
+
for k, v in cookies.items():
|
269
|
+
self._cookies.setdefault(str(k), str(v))
|
270
|
+
|
271
|
+
# apply to live session
|
272
|
+
if self._session:
|
273
|
+
self._session.cookie_jar.update_cookies(self._cookies)
|
274
|
+
|
275
|
+
async def shutdown(self) -> None:
|
276
|
+
"""
|
277
|
+
Shutdown and clean up the session. Closes connection pool.
|
278
|
+
"""
|
279
|
+
if self._session:
|
280
|
+
await self._session.close()
|
281
|
+
self._session = None
|
282
|
+
|
283
|
+
def __getstate__(self) -> Dict[str, Any]:
|
284
|
+
"""
|
285
|
+
Prepare object state for serialization: remove unpickleable session.
|
286
|
+
"""
|
287
|
+
state = self.__dict__.copy()
|
288
|
+
state.pop("_session", None)
|
289
|
+
state.pop("_rate_limiter", None)
|
290
|
+
return state
|
291
|
+
|
292
|
+
def __setstate__(self, state: Dict[str, Any]) -> None:
|
293
|
+
"""
|
294
|
+
Restore object state. Session will be lazily reinitialized on next request.
|
295
|
+
"""
|
296
|
+
self.__dict__.update(state)
|
297
|
+
self._session = None
|
@@ -0,0 +1,210 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.requesters.base_browser
|
5
|
+
---------------------------------------------
|
6
|
+
|
7
|
+
This module defines the BaseBrowser class, which provides common functionalities
|
8
|
+
for browser operations. Derived classes can extend these methods for
|
9
|
+
specialized purposes.
|
10
|
+
"""
|
11
|
+
|
12
|
+
import abc
|
13
|
+
import logging
|
14
|
+
from typing import Any, Dict, Optional
|
15
|
+
|
16
|
+
from DrissionPage import Chromium, ChromiumOptions, ChromiumPage
|
17
|
+
|
18
|
+
from novel_downloader.config.models import RequesterConfig
|
19
|
+
from novel_downloader.core.interfaces import RequesterProtocol
|
20
|
+
from novel_downloader.utils.constants import (
|
21
|
+
DEFAULT_USER_AGENT,
|
22
|
+
DEFAULT_USER_DATA_DIR,
|
23
|
+
DEFAULT_USER_PROFILE_NAME,
|
24
|
+
)
|
25
|
+
|
26
|
+
logger = logging.getLogger(__name__)
|
27
|
+
|
28
|
+
|
29
|
+
def _is_valid(value: str) -> bool:
|
30
|
+
return bool(value and value.strip())
|
31
|
+
|
32
|
+
|
33
|
+
class BaseBrowser(RequesterProtocol, abc.ABC):
|
34
|
+
"""
|
35
|
+
BaseBrowser wraps basic browser operations using DrissionPage,
|
36
|
+
with full control over browser configuration, session profile,
|
37
|
+
retry and timeout behavior.
|
38
|
+
|
39
|
+
Attributes:
|
40
|
+
_options (ChromiumOptions): Configuration object for Chromium.
|
41
|
+
_browser (Chromium): Chromium instance.
|
42
|
+
_page (ChromiumPage): The active browser tab.
|
43
|
+
"""
|
44
|
+
|
45
|
+
def _init_browser(self, config: RequesterConfig) -> None:
|
46
|
+
"""
|
47
|
+
Initialize the browser with specified options from RequesterConfig.
|
48
|
+
|
49
|
+
:param config: Configuration settings for
|
50
|
+
browser behavior, profile, timeouts, etc.
|
51
|
+
"""
|
52
|
+
self._config = config
|
53
|
+
self._options = ChromiumOptions()
|
54
|
+
|
55
|
+
user_data_path = (
|
56
|
+
config.user_data_folder
|
57
|
+
if _is_valid(config.user_data_folder)
|
58
|
+
else DEFAULT_USER_DATA_DIR
|
59
|
+
)
|
60
|
+
if _is_valid(config.user_data_folder):
|
61
|
+
logger.warning(
|
62
|
+
"[browser] Using user_data_folder='%s'. "
|
63
|
+
"This may interfere with an active Chrome session. "
|
64
|
+
"Do NOT use this profile in both the browser and "
|
65
|
+
"this script at the same time.",
|
66
|
+
config.user_data_folder,
|
67
|
+
)
|
68
|
+
self._options.set_user_data_path(user_data_path)
|
69
|
+
|
70
|
+
profile_name = (
|
71
|
+
config.profile_name
|
72
|
+
if _is_valid(config.profile_name)
|
73
|
+
else DEFAULT_USER_PROFILE_NAME
|
74
|
+
)
|
75
|
+
self._options.set_user(profile_name)
|
76
|
+
|
77
|
+
self._options.headless(config.headless)
|
78
|
+
self._options.set_user_agent(DEFAULT_USER_AGENT)
|
79
|
+
self._options.set_timeouts(base=config.wait_time)
|
80
|
+
self._options.set_retry(
|
81
|
+
times=config.retry_times, interval=config.retry_interval
|
82
|
+
)
|
83
|
+
|
84
|
+
self._disable_images_orig = config.disable_images
|
85
|
+
if config.disable_images:
|
86
|
+
self._options.no_imgs(True)
|
87
|
+
if config.mute_audio:
|
88
|
+
self._options.mute(True)
|
89
|
+
|
90
|
+
# self._options.set_argument('--disable-blink-features', 'AutomationControlled')
|
91
|
+
# self._options.set_argument('--log-level', '3')
|
92
|
+
# self._options.set_argument('--disable-gpu')
|
93
|
+
# self._options.set_argument('no-sandbox')
|
94
|
+
|
95
|
+
self._setup()
|
96
|
+
|
97
|
+
def _setup(self) -> None:
|
98
|
+
"""
|
99
|
+
Set up the browser instance and open the default tab.
|
100
|
+
"""
|
101
|
+
self._browser = Chromium(self._options)
|
102
|
+
self._page = self._browser.get_tab()
|
103
|
+
|
104
|
+
def login(self, max_retries: int = 3, manual_login: bool = False) -> bool:
|
105
|
+
"""
|
106
|
+
Attempt to log in
|
107
|
+
"""
|
108
|
+
raise NotImplementedError(
|
109
|
+
"Login is not supported by this browser type. "
|
110
|
+
"Override login() in your subclass to enable it."
|
111
|
+
)
|
112
|
+
|
113
|
+
@abc.abstractmethod
|
114
|
+
def get_book_info(self, book_id: str, wait_time: Optional[int] = None) -> str:
|
115
|
+
"""
|
116
|
+
Fetch the raw HTML (or JSON) of the book info page.
|
117
|
+
|
118
|
+
:param book_id: The book identifier.
|
119
|
+
:param wait_time: Base number of seconds to wait before returning content.
|
120
|
+
:return: The page content as a string.
|
121
|
+
"""
|
122
|
+
...
|
123
|
+
|
124
|
+
@abc.abstractmethod
|
125
|
+
def get_book_chapter(
|
126
|
+
self, book_id: str, chapter_id: str, wait_time: Optional[int] = None
|
127
|
+
) -> str:
|
128
|
+
"""
|
129
|
+
Fetch the raw HTML (or JSON) of a single chapter.
|
130
|
+
|
131
|
+
:param book_id: The book identifier.
|
132
|
+
:param chapter_id: The chapter identifier.
|
133
|
+
:param wait_time: Base number of seconds to wait before returning content.
|
134
|
+
:return: The chapter content as a string.
|
135
|
+
"""
|
136
|
+
...
|
137
|
+
|
138
|
+
def get_bookcase(self, wait_time: Optional[int] = None) -> str:
|
139
|
+
"""
|
140
|
+
Optional: Retrieve the HTML content of the authenticated user's bookcase page.
|
141
|
+
|
142
|
+
Subclasses that support login+bookcase retrieval should override this.
|
143
|
+
|
144
|
+
:param wait_time: Base number of seconds to wait before returning content.
|
145
|
+
:return: The HTML markup of the bookcase page.
|
146
|
+
:raises NotImplementedError: If bookcase fetching is not supported.
|
147
|
+
"""
|
148
|
+
raise NotImplementedError(
|
149
|
+
"Bookcase fetching is not supported by this browser type. "
|
150
|
+
"Override get_bookcase() in your subclass to enable it."
|
151
|
+
)
|
152
|
+
|
153
|
+
@property
|
154
|
+
def page(self) -> ChromiumPage:
|
155
|
+
"""
|
156
|
+
Return the current Chromium page object.
|
157
|
+
|
158
|
+
:return: ChromiumPage instance of the current tab.
|
159
|
+
"""
|
160
|
+
return self._page
|
161
|
+
|
162
|
+
@property
|
163
|
+
def browser(self) -> Chromium:
|
164
|
+
"""
|
165
|
+
Return the Chromium browser instance.
|
166
|
+
|
167
|
+
:return: Chromium instance used by this browser.
|
168
|
+
"""
|
169
|
+
return self._browser
|
170
|
+
|
171
|
+
def _clear_browser_refs(self) -> None:
|
172
|
+
"""
|
173
|
+
Clear internal browser/page references without quitting.
|
174
|
+
"""
|
175
|
+
self._browser = None
|
176
|
+
self._page = None
|
177
|
+
|
178
|
+
def shutdown(self) -> None:
|
179
|
+
"""
|
180
|
+
Shutdown the browser session and release resources.
|
181
|
+
|
182
|
+
This quits the Chromium instance and clears references to browser and page.
|
183
|
+
"""
|
184
|
+
if self._browser:
|
185
|
+
self._browser.quit()
|
186
|
+
self._clear_browser_refs()
|
187
|
+
|
188
|
+
def __getstate__(self) -> Dict[str, Any]:
|
189
|
+
"""
|
190
|
+
Prepare object state for serialization (e.g., pickling).
|
191
|
+
|
192
|
+
Removes browser-related fields that cannot be pickled.
|
193
|
+
|
194
|
+
:return: A dict representing the serializable object state.
|
195
|
+
"""
|
196
|
+
state = self.__dict__.copy()
|
197
|
+
state.pop("_browser", None)
|
198
|
+
state.pop("_page", None)
|
199
|
+
return state
|
200
|
+
|
201
|
+
def __setstate__(self, state: Dict[str, Any]) -> None:
|
202
|
+
"""
|
203
|
+
Restore object state after deserialization.
|
204
|
+
|
205
|
+
Automatically reinitializes the browser setup.
|
206
|
+
|
207
|
+
:param state: The saved state dictionary.
|
208
|
+
"""
|
209
|
+
self.__dict__.update(state)
|
210
|
+
self._setup()
|