novel-downloader 1.3.1__py3-none-any.whl → 1.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/download.py +1 -1
- novel_downloader/config/adapter.py +3 -0
- novel_downloader/config/models.py +3 -0
- novel_downloader/core/downloaders/__init__.py +23 -1
- novel_downloader/core/downloaders/biquge/__init__.py +2 -0
- novel_downloader/core/downloaders/biquge/biquge_async.py +27 -0
- novel_downloader/core/downloaders/biquge/biquge_sync.py +5 -3
- novel_downloader/core/downloaders/common/common_async.py +5 -11
- novel_downloader/core/downloaders/common/common_sync.py +18 -18
- novel_downloader/core/downloaders/esjzone/__init__.py +14 -0
- novel_downloader/core/downloaders/esjzone/esjzone_async.py +27 -0
- novel_downloader/core/downloaders/esjzone/esjzone_sync.py +27 -0
- novel_downloader/core/downloaders/qianbi/__init__.py +14 -0
- novel_downloader/core/downloaders/qianbi/qianbi_async.py +27 -0
- novel_downloader/core/downloaders/qianbi/qianbi_sync.py +27 -0
- novel_downloader/core/downloaders/qidian/qidian_sync.py +9 -14
- novel_downloader/core/downloaders/sfacg/__init__.py +14 -0
- novel_downloader/core/downloaders/sfacg/sfacg_async.py +27 -0
- novel_downloader/core/downloaders/sfacg/sfacg_sync.py +27 -0
- novel_downloader/core/downloaders/yamibo/__init__.py +14 -0
- novel_downloader/core/downloaders/yamibo/yamibo_async.py +27 -0
- novel_downloader/core/downloaders/yamibo/yamibo_sync.py +27 -0
- novel_downloader/core/factory/downloader.py +35 -7
- novel_downloader/core/factory/parser.py +23 -2
- novel_downloader/core/factory/requester.py +32 -7
- novel_downloader/core/factory/saver.py +14 -2
- novel_downloader/core/interfaces/async_requester.py +3 -3
- novel_downloader/core/interfaces/parser.py +7 -2
- novel_downloader/core/interfaces/sync_requester.py +3 -3
- novel_downloader/core/parsers/__init__.py +15 -5
- novel_downloader/core/parsers/base.py +7 -2
- novel_downloader/core/parsers/biquge/main_parser.py +13 -4
- novel_downloader/core/parsers/common/main_parser.py +13 -4
- novel_downloader/core/parsers/esjzone/__init__.py +10 -0
- novel_downloader/core/parsers/esjzone/main_parser.py +220 -0
- novel_downloader/core/parsers/qianbi/__init__.py +10 -0
- novel_downloader/core/parsers/qianbi/main_parser.py +142 -0
- novel_downloader/core/parsers/qidian/browser/main_parser.py +13 -4
- novel_downloader/core/parsers/qidian/session/main_parser.py +13 -4
- novel_downloader/core/parsers/sfacg/__init__.py +10 -0
- novel_downloader/core/parsers/sfacg/main_parser.py +166 -0
- novel_downloader/core/parsers/yamibo/__init__.py +10 -0
- novel_downloader/core/parsers/yamibo/main_parser.py +194 -0
- novel_downloader/core/requesters/__init__.py +33 -3
- novel_downloader/core/requesters/base/async_session.py +14 -10
- novel_downloader/core/requesters/base/browser.py +4 -7
- novel_downloader/core/requesters/base/session.py +25 -11
- novel_downloader/core/requesters/biquge/__init__.py +2 -0
- novel_downloader/core/requesters/biquge/async_session.py +71 -0
- novel_downloader/core/requesters/biquge/session.py +6 -6
- novel_downloader/core/requesters/common/async_session.py +4 -4
- novel_downloader/core/requesters/common/session.py +6 -6
- novel_downloader/core/requesters/esjzone/__init__.py +13 -0
- novel_downloader/core/requesters/esjzone/async_session.py +211 -0
- novel_downloader/core/requesters/esjzone/session.py +235 -0
- novel_downloader/core/requesters/qianbi/__init__.py +13 -0
- novel_downloader/core/requesters/qianbi/async_session.py +96 -0
- novel_downloader/core/requesters/qianbi/session.py +125 -0
- novel_downloader/core/requesters/qidian/broswer.py +9 -9
- novel_downloader/core/requesters/qidian/session.py +14 -11
- novel_downloader/core/requesters/sfacg/__init__.py +13 -0
- novel_downloader/core/requesters/sfacg/async_session.py +204 -0
- novel_downloader/core/requesters/sfacg/session.py +242 -0
- novel_downloader/core/requesters/yamibo/__init__.py +13 -0
- novel_downloader/core/requesters/yamibo/async_session.py +211 -0
- novel_downloader/core/requesters/yamibo/session.py +237 -0
- novel_downloader/core/savers/__init__.py +15 -3
- novel_downloader/core/savers/base.py +3 -7
- novel_downloader/core/savers/common/epub.py +21 -33
- novel_downloader/core/savers/common/main_saver.py +3 -1
- novel_downloader/core/savers/common/txt.py +1 -2
- novel_downloader/core/savers/epub_utils/__init__.py +14 -5
- novel_downloader/core/savers/epub_utils/css_builder.py +1 -0
- novel_downloader/core/savers/epub_utils/image_loader.py +89 -0
- novel_downloader/core/savers/epub_utils/initializer.py +1 -0
- novel_downloader/core/savers/epub_utils/text_to_html.py +48 -1
- novel_downloader/core/savers/epub_utils/volume_intro.py +1 -0
- novel_downloader/core/savers/esjzone.py +25 -0
- novel_downloader/core/savers/qianbi.py +25 -0
- novel_downloader/core/savers/sfacg.py +25 -0
- novel_downloader/core/savers/yamibo.py +25 -0
- novel_downloader/locales/en.json +1 -0
- novel_downloader/locales/zh.json +1 -0
- novel_downloader/resources/config/settings.toml +40 -4
- novel_downloader/utils/constants.py +4 -0
- novel_downloader/utils/file_utils/io.py +1 -1
- novel_downloader/utils/network.py +51 -38
- novel_downloader/utils/time_utils/__init__.py +2 -1
- novel_downloader/utils/time_utils/datetime_utils.py +3 -1
- novel_downloader/utils/time_utils/sleep_utils.py +44 -2
- {novel_downloader-1.3.1.dist-info → novel_downloader-1.3.3.dist-info}/METADATA +29 -24
- novel_downloader-1.3.3.dist-info/RECORD +166 -0
- novel_downloader-1.3.1.dist-info/RECORD +0 -127
- {novel_downloader-1.3.1.dist-info → novel_downloader-1.3.3.dist-info}/WHEEL +0 -0
- {novel_downloader-1.3.1.dist-info → novel_downloader-1.3.3.dist-info}/entry_points.txt +0 -0
- {novel_downloader-1.3.1.dist-info → novel_downloader-1.3.3.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.3.1.dist-info → novel_downloader-1.3.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,235 @@
|
|
1
|
+
"""
|
2
|
+
novel_downloader.core.requesters.esjzone.session
|
3
|
+
----------------------------------------------
|
4
|
+
|
5
|
+
"""
|
6
|
+
|
7
|
+
import re
|
8
|
+
from typing import Any
|
9
|
+
|
10
|
+
from novel_downloader.config.models import RequesterConfig
|
11
|
+
from novel_downloader.core.requesters.base import BaseSession
|
12
|
+
from novel_downloader.utils.i18n import t
|
13
|
+
from novel_downloader.utils.state import state_mgr
|
14
|
+
from novel_downloader.utils.time_utils import sleep_with_random_delay
|
15
|
+
|
16
|
+
|
17
|
+
class EsjzoneSession(BaseSession):
|
18
|
+
"""
|
19
|
+
A session class for interacting with the
|
20
|
+
esjzone (www.esjzone.cc) novel website.
|
21
|
+
"""
|
22
|
+
|
23
|
+
BOOKCASE_URL = "https://www.esjzone.cc/my/favorite"
|
24
|
+
BOOK_INFO_URL = "https://www.esjzone.cc/detail/{book_id}.html"
|
25
|
+
CHAPTER_URL = "https://www.esjzone.cc/forum/{book_id}/{chapter_id}.html"
|
26
|
+
|
27
|
+
API_LOGIN_URL_1 = "https://www.esjzone.cc/my/login"
|
28
|
+
API_LOGIN_URL_2 = "https://www.esjzone.cc/inc/mem_login.php"
|
29
|
+
|
30
|
+
def __init__(
|
31
|
+
self,
|
32
|
+
config: RequesterConfig,
|
33
|
+
):
|
34
|
+
super().__init__(config)
|
35
|
+
self._logged_in: bool = False
|
36
|
+
self._request_interval = config.backoff_factor
|
37
|
+
self._retry_times = config.retry_times
|
38
|
+
self._username = config.username
|
39
|
+
self._password = config.password
|
40
|
+
|
41
|
+
def login(
|
42
|
+
self,
|
43
|
+
username: str = "",
|
44
|
+
password: str = "",
|
45
|
+
manual_login: bool = False,
|
46
|
+
**kwargs: Any,
|
47
|
+
) -> bool:
|
48
|
+
"""
|
49
|
+
Restore cookies persisted by the session-based workflow.
|
50
|
+
"""
|
51
|
+
cookies: dict[str, str] = state_mgr.get_cookies("esjzone")
|
52
|
+
username = username or self._username
|
53
|
+
password = password or self._password
|
54
|
+
|
55
|
+
self.update_cookies(cookies)
|
56
|
+
for _ in range(self._retry_times):
|
57
|
+
if self._check_login_status():
|
58
|
+
self.logger.debug("[auth] Already logged in.")
|
59
|
+
self._logged_in = True
|
60
|
+
return True
|
61
|
+
if username and password and not self._api_login(username, password):
|
62
|
+
print(t("session_login_failed", site="esjzone"))
|
63
|
+
sleep_with_random_delay(
|
64
|
+
self._request_interval,
|
65
|
+
mul_spread=1.1,
|
66
|
+
max_sleep=self._request_interval + 2,
|
67
|
+
)
|
68
|
+
|
69
|
+
self._logged_in = self._check_login_status()
|
70
|
+
return self._logged_in
|
71
|
+
|
72
|
+
def get_book_info(
|
73
|
+
self,
|
74
|
+
book_id: str,
|
75
|
+
**kwargs: Any,
|
76
|
+
) -> list[str]:
|
77
|
+
"""
|
78
|
+
Fetch the raw HTML of the book info and catalog pages.
|
79
|
+
|
80
|
+
Order: [info, catalog]
|
81
|
+
|
82
|
+
:param book_id: The book identifier.
|
83
|
+
:return: The page content as a string.
|
84
|
+
"""
|
85
|
+
url = self.book_info_url(book_id=book_id)
|
86
|
+
try:
|
87
|
+
resp = self.get(url, **kwargs)
|
88
|
+
resp.raise_for_status()
|
89
|
+
return [resp.text]
|
90
|
+
except Exception as exc:
|
91
|
+
self.logger.warning(
|
92
|
+
"[session] get_book_info(%s) failed: %s",
|
93
|
+
book_id,
|
94
|
+
exc,
|
95
|
+
)
|
96
|
+
return []
|
97
|
+
|
98
|
+
def get_book_chapter(
|
99
|
+
self,
|
100
|
+
book_id: str,
|
101
|
+
chapter_id: str,
|
102
|
+
**kwargs: Any,
|
103
|
+
) -> list[str]:
|
104
|
+
"""
|
105
|
+
Fetch the HTML of a single chapter.
|
106
|
+
|
107
|
+
:param book_id: The book identifier.
|
108
|
+
:param chapter_id: The chapter identifier.
|
109
|
+
:return: The chapter content as a string.
|
110
|
+
"""
|
111
|
+
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
112
|
+
try:
|
113
|
+
resp = self.get(url, **kwargs)
|
114
|
+
resp.raise_for_status()
|
115
|
+
return [resp.text]
|
116
|
+
except Exception as exc:
|
117
|
+
self.logger.warning(
|
118
|
+
"[session] get_book_chapter(%s) failed: %s",
|
119
|
+
book_id,
|
120
|
+
exc,
|
121
|
+
)
|
122
|
+
return []
|
123
|
+
|
124
|
+
def get_bookcase(
|
125
|
+
self,
|
126
|
+
page: int = 1,
|
127
|
+
**kwargs: Any,
|
128
|
+
) -> list[str]:
|
129
|
+
"""
|
130
|
+
Retrieve the user's *bookcase* page.
|
131
|
+
|
132
|
+
:return: The HTML markup of the bookcase page.
|
133
|
+
"""
|
134
|
+
url = self.bookcase_url()
|
135
|
+
try:
|
136
|
+
resp = self.get(url, **kwargs)
|
137
|
+
resp.raise_for_status()
|
138
|
+
return [resp.text]
|
139
|
+
except Exception as exc:
|
140
|
+
self.logger.warning(
|
141
|
+
"[session] get_bookcase failed: %s",
|
142
|
+
exc,
|
143
|
+
)
|
144
|
+
return []
|
145
|
+
|
146
|
+
@classmethod
|
147
|
+
def bookcase_url(cls) -> str:
|
148
|
+
"""
|
149
|
+
Construct the URL for the user's bookcase page.
|
150
|
+
|
151
|
+
:return: Fully qualified URL of the bookcase.
|
152
|
+
"""
|
153
|
+
return cls.BOOKCASE_URL
|
154
|
+
|
155
|
+
@classmethod
|
156
|
+
def book_info_url(cls, book_id: str) -> str:
|
157
|
+
"""
|
158
|
+
Construct the URL for fetching a book's info page.
|
159
|
+
|
160
|
+
:param book_id: The identifier of the book.
|
161
|
+
:return: Fully qualified URL for the book info page.
|
162
|
+
"""
|
163
|
+
return cls.BOOK_INFO_URL.format(book_id=book_id)
|
164
|
+
|
165
|
+
@classmethod
|
166
|
+
def chapter_url(cls, book_id: str, chapter_id: str) -> str:
|
167
|
+
"""
|
168
|
+
Construct the URL for fetching a specific chapter.
|
169
|
+
|
170
|
+
:param book_id: The identifier of the book.
|
171
|
+
:param chapter_id: The identifier of the chapter.
|
172
|
+
:return: Fully qualified chapter URL.
|
173
|
+
"""
|
174
|
+
return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
|
175
|
+
|
176
|
+
def _api_login(self, username: str, password: str) -> bool:
|
177
|
+
"""
|
178
|
+
Login to the API using a 2-step token-based process.
|
179
|
+
|
180
|
+
Step 1: Get auth token.
|
181
|
+
Step 2: Use token and credentials to perform login.
|
182
|
+
Return True if login succeeds, False otherwise.
|
183
|
+
"""
|
184
|
+
data_1 = {
|
185
|
+
"plxf": "getAuthToken",
|
186
|
+
}
|
187
|
+
try:
|
188
|
+
resp_1 = self.post(self.API_LOGIN_URL_1, data=data_1)
|
189
|
+
resp_1.raise_for_status()
|
190
|
+
# Example response: <JinJing>token_here</JinJing>
|
191
|
+
token = self._extract_token(resp_1.text)
|
192
|
+
except Exception as exc:
|
193
|
+
self.logger.warning("[session] _api_login failed at step 1: %s", exc)
|
194
|
+
return False
|
195
|
+
|
196
|
+
data_2 = {
|
197
|
+
"email": username,
|
198
|
+
"pwd": password,
|
199
|
+
"remember_me": "on",
|
200
|
+
}
|
201
|
+
temp_headers = dict(self.headers)
|
202
|
+
temp_headers["Authorization"] = token
|
203
|
+
try:
|
204
|
+
resp_2 = self.post(self.API_LOGIN_URL_2, data=data_2, headers=temp_headers)
|
205
|
+
resp_2.raise_for_status()
|
206
|
+
resp_code: int = resp_2.json().get("status", 301)
|
207
|
+
return resp_code == 200
|
208
|
+
except Exception as exc:
|
209
|
+
self.logger.warning("[session] _api_login failed at step 2: %s", exc)
|
210
|
+
return False
|
211
|
+
|
212
|
+
def _check_login_status(self) -> bool:
|
213
|
+
"""
|
214
|
+
Check whether the user is currently logged in by
|
215
|
+
inspecting the bookcase page content.
|
216
|
+
|
217
|
+
:return: True if the user is logged in, False otherwise.
|
218
|
+
"""
|
219
|
+
keywords = [
|
220
|
+
"window.location.href='/my/login'",
|
221
|
+
]
|
222
|
+
resp_text = self.get_bookcase()
|
223
|
+
if not resp_text:
|
224
|
+
return False
|
225
|
+
return not any(kw in resp_text[0] for kw in keywords)
|
226
|
+
|
227
|
+
def _extract_token(self, text: str) -> str:
|
228
|
+
match = re.search(r"<JinJing>(.+?)</JinJing>", text)
|
229
|
+
return match.group(1) if match else ""
|
230
|
+
|
231
|
+
def _on_close(self) -> None:
|
232
|
+
"""
|
233
|
+
Save cookies to the state manager before closing.
|
234
|
+
"""
|
235
|
+
state_mgr.set_cookies("esjzone", self.cookies)
|
@@ -0,0 +1,96 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.requesters.qianbi.async_session
|
4
|
+
-----------------------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
import asyncio
|
9
|
+
from typing import Any
|
10
|
+
|
11
|
+
from novel_downloader.core.requesters.base import BaseAsyncSession
|
12
|
+
|
13
|
+
|
14
|
+
class QianbiAsyncSession(BaseAsyncSession):
|
15
|
+
"""
|
16
|
+
A async session class for interacting with the
|
17
|
+
Qianbi (www.23qb.com) novel website.
|
18
|
+
"""
|
19
|
+
|
20
|
+
BASE_URLS = [
|
21
|
+
"www.23qb.com",
|
22
|
+
"www.23qb.net",
|
23
|
+
]
|
24
|
+
|
25
|
+
BOOK_INFO_URL = "https://www.23qb.com/book/{book_id}/"
|
26
|
+
BOOK_CATALOG_URL = "https://www.23qb.com/book/{book_id}/catalog"
|
27
|
+
CHAPTER_URL = "https://www.23qb.com/book/{book_id}/{chapter_id}.html"
|
28
|
+
|
29
|
+
async def get_book_info(
|
30
|
+
self,
|
31
|
+
book_id: str,
|
32
|
+
**kwargs: Any,
|
33
|
+
) -> list[str]:
|
34
|
+
"""
|
35
|
+
Fetch the raw HTML of the book info page asynchronously.
|
36
|
+
|
37
|
+
Order: [info, catalog]
|
38
|
+
|
39
|
+
:param book_id: The book identifier.
|
40
|
+
:return: The page content as a string.
|
41
|
+
"""
|
42
|
+
info_url = self.book_info_url(book_id=book_id)
|
43
|
+
catalog_url = self.book_catalog_url(book_id=book_id)
|
44
|
+
|
45
|
+
info_html, catalog_html = await asyncio.gather(
|
46
|
+
self.fetch(info_url, **kwargs),
|
47
|
+
self.fetch(catalog_url, **kwargs),
|
48
|
+
)
|
49
|
+
return [info_html, catalog_html]
|
50
|
+
|
51
|
+
async def get_book_chapter(
|
52
|
+
self,
|
53
|
+
book_id: str,
|
54
|
+
chapter_id: str,
|
55
|
+
**kwargs: Any,
|
56
|
+
) -> list[str]:
|
57
|
+
"""
|
58
|
+
Fetch the raw HTML of a single chapter asynchronously.
|
59
|
+
|
60
|
+
:param book_id: The book identifier.
|
61
|
+
:param chapter_id: The chapter identifier.
|
62
|
+
:return: The chapter content as a string.
|
63
|
+
"""
|
64
|
+
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
65
|
+
return [await self.fetch(url, **kwargs)]
|
66
|
+
|
67
|
+
@classmethod
|
68
|
+
def book_info_url(cls, book_id: str) -> str:
|
69
|
+
"""
|
70
|
+
Construct the URL for fetching a book's info page.
|
71
|
+
|
72
|
+
:param book_id: The identifier of the book.
|
73
|
+
:return: Fully qualified URL for the book info page.
|
74
|
+
"""
|
75
|
+
return cls.BOOK_INFO_URL.format(book_id=book_id)
|
76
|
+
|
77
|
+
@classmethod
|
78
|
+
def book_catalog_url(cls, book_id: str) -> str:
|
79
|
+
"""
|
80
|
+
Construct the URL for fetching a book's catalog page.
|
81
|
+
|
82
|
+
:param book_id: The identifier of the book.
|
83
|
+
:return: Fully qualified catalog page URL.
|
84
|
+
"""
|
85
|
+
return cls.BOOK_CATALOG_URL.format(book_id=book_id)
|
86
|
+
|
87
|
+
@classmethod
|
88
|
+
def chapter_url(cls, book_id: str, chapter_id: str) -> str:
|
89
|
+
"""
|
90
|
+
Construct the URL for fetching a specific chapter.
|
91
|
+
|
92
|
+
:param book_id: The identifier of the book.
|
93
|
+
:param chapter_id: The identifier of the chapter.
|
94
|
+
:return: Fully qualified chapter URL.
|
95
|
+
"""
|
96
|
+
return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
|
@@ -0,0 +1,125 @@
|
|
1
|
+
"""
|
2
|
+
novel_downloader.core.requesters.qianbi.session
|
3
|
+
-----------------------------------------------
|
4
|
+
|
5
|
+
"""
|
6
|
+
|
7
|
+
from typing import Any
|
8
|
+
|
9
|
+
from novel_downloader.core.requesters.base import BaseSession
|
10
|
+
|
11
|
+
|
12
|
+
class QianbiSession(BaseSession):
|
13
|
+
"""
|
14
|
+
A session class for interacting with the
|
15
|
+
Qianbi (www.23qb.com) novel website.
|
16
|
+
"""
|
17
|
+
|
18
|
+
BASE_URLS = [
|
19
|
+
"www.23qb.com",
|
20
|
+
"www.23qb.net",
|
21
|
+
]
|
22
|
+
|
23
|
+
BOOK_INFO_URL = "https://www.23qb.com/book/{book_id}/"
|
24
|
+
BOOK_CATALOG_URL = "https://www.23qb.com/book/{book_id}/catalog"
|
25
|
+
CHAPTER_URL = "https://www.23qb.com/book/{book_id}/{chapter_id}.html"
|
26
|
+
|
27
|
+
def get_book_info(
|
28
|
+
self,
|
29
|
+
book_id: str,
|
30
|
+
**kwargs: Any,
|
31
|
+
) -> list[str]:
|
32
|
+
"""
|
33
|
+
Fetch the raw HTML of the book info and catalog pages.
|
34
|
+
|
35
|
+
Order: [info, catalog]
|
36
|
+
|
37
|
+
:param book_id: The book identifier.
|
38
|
+
:return: The page content as a string.
|
39
|
+
"""
|
40
|
+
info_url = self.book_info_url(book_id=book_id)
|
41
|
+
catalog_url = self.book_catalog_url(book_id=book_id)
|
42
|
+
|
43
|
+
pages = []
|
44
|
+
try:
|
45
|
+
resp = self.get(info_url, **kwargs)
|
46
|
+
resp.raise_for_status()
|
47
|
+
pages.append(resp.text)
|
48
|
+
except Exception as exc:
|
49
|
+
self.logger.warning(
|
50
|
+
"[session] get_book_info(info:%s) failed: %s",
|
51
|
+
book_id,
|
52
|
+
exc,
|
53
|
+
)
|
54
|
+
pages.append("")
|
55
|
+
|
56
|
+
try:
|
57
|
+
resp = self.get(catalog_url, **kwargs)
|
58
|
+
resp.raise_for_status()
|
59
|
+
pages.append(resp.text)
|
60
|
+
except Exception as exc:
|
61
|
+
self.logger.warning(
|
62
|
+
"[session] get_book_info(catalog:%s) failed: %s",
|
63
|
+
book_id,
|
64
|
+
exc,
|
65
|
+
)
|
66
|
+
pages.append("")
|
67
|
+
|
68
|
+
return pages
|
69
|
+
|
70
|
+
def get_book_chapter(
|
71
|
+
self,
|
72
|
+
book_id: str,
|
73
|
+
chapter_id: str,
|
74
|
+
**kwargs: Any,
|
75
|
+
) -> list[str]:
|
76
|
+
"""
|
77
|
+
Fetch the HTML of a single chapter.
|
78
|
+
|
79
|
+
:param book_id: The book identifier.
|
80
|
+
:param chapter_id: The chapter identifier.
|
81
|
+
:return: The chapter content as a string.
|
82
|
+
"""
|
83
|
+
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
84
|
+
try:
|
85
|
+
resp = self.get(url, **kwargs)
|
86
|
+
resp.raise_for_status()
|
87
|
+
return [resp.text]
|
88
|
+
except Exception as exc:
|
89
|
+
self.logger.warning(
|
90
|
+
"[session] get_book_chapter(%s) failed: %s",
|
91
|
+
book_id,
|
92
|
+
exc,
|
93
|
+
)
|
94
|
+
return []
|
95
|
+
|
96
|
+
@classmethod
|
97
|
+
def book_info_url(cls, book_id: str) -> str:
|
98
|
+
"""
|
99
|
+
Construct the URL for fetching a book's info page.
|
100
|
+
|
101
|
+
:param book_id: The identifier of the book.
|
102
|
+
:return: Fully qualified URL for the book info page.
|
103
|
+
"""
|
104
|
+
return cls.BOOK_INFO_URL.format(book_id=book_id)
|
105
|
+
|
106
|
+
@classmethod
|
107
|
+
def book_catalog_url(cls, book_id: str) -> str:
|
108
|
+
"""
|
109
|
+
Construct the URL for fetching a book's catalog page.
|
110
|
+
|
111
|
+
:param book_id: The identifier of the book.
|
112
|
+
:return: Fully qualified catalog page URL.
|
113
|
+
"""
|
114
|
+
return cls.BOOK_CATALOG_URL.format(book_id=book_id)
|
115
|
+
|
116
|
+
@classmethod
|
117
|
+
def chapter_url(cls, book_id: str, chapter_id: str) -> str:
|
118
|
+
"""
|
119
|
+
Construct the URL for fetching a specific chapter.
|
120
|
+
|
121
|
+
:param book_id: The identifier of the book.
|
122
|
+
:param chapter_id: The identifier of the chapter.
|
123
|
+
:return: Fully qualified chapter URL.
|
124
|
+
"""
|
125
|
+
return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
|
@@ -63,7 +63,7 @@ class QidianBrowser(BaseBrowser):
|
|
63
63
|
self,
|
64
64
|
book_id: str,
|
65
65
|
**kwargs: Any,
|
66
|
-
) -> str:
|
66
|
+
) -> list[str]:
|
67
67
|
"""
|
68
68
|
Retrieve the HTML of a Qidian book info page.
|
69
69
|
|
@@ -78,19 +78,19 @@ class QidianBrowser(BaseBrowser):
|
|
78
78
|
self.logger.debug(
|
79
79
|
"[fetch] Fetched book info for ID %s from %s", book_id, url
|
80
80
|
)
|
81
|
-
return html
|
81
|
+
return [html]
|
82
82
|
except Exception as e:
|
83
83
|
self.logger.warning(
|
84
84
|
"[fetch] Error fetching book info from '%s': %s", url, e
|
85
85
|
)
|
86
|
-
return
|
86
|
+
return []
|
87
87
|
|
88
88
|
def get_book_chapter(
|
89
89
|
self,
|
90
90
|
book_id: str,
|
91
91
|
chapter_id: str,
|
92
92
|
**kwargs: Any,
|
93
|
-
) -> str:
|
93
|
+
) -> list[str]:
|
94
94
|
"""
|
95
95
|
Retrieve the HTML content of a specific chapter.
|
96
96
|
|
@@ -108,16 +108,16 @@ class QidianBrowser(BaseBrowser):
|
|
108
108
|
self.logger.debug(
|
109
109
|
"[fetch] Fetched chapter %s for book %s", chapter_id, book_id
|
110
110
|
)
|
111
|
-
return html
|
111
|
+
return [html]
|
112
112
|
except Exception as e:
|
113
113
|
self.logger.warning("[fetch] Error fetching chapter from '%s': %s", url, e)
|
114
|
-
return
|
114
|
+
return []
|
115
115
|
|
116
116
|
def get_bookcase(
|
117
117
|
self,
|
118
118
|
page: int = 1,
|
119
119
|
**kwargs: Any,
|
120
|
-
) -> str:
|
120
|
+
) -> list[str]:
|
121
121
|
"""
|
122
122
|
Retrieve the HTML content of the logged-in user's Qidian bookcase page.
|
123
123
|
|
@@ -133,10 +133,10 @@ class QidianBrowser(BaseBrowser):
|
|
133
133
|
self.page.get(url)
|
134
134
|
html = str(self.page.html)
|
135
135
|
self.logger.debug("[fetch] Fetched bookcase HTML from %s", url)
|
136
|
-
return html
|
136
|
+
return [html]
|
137
137
|
except Exception as e:
|
138
138
|
self.logger.warning("[fetch] Error fetching bookcase from '%s': %s", url, e)
|
139
|
-
return
|
139
|
+
return []
|
140
140
|
|
141
141
|
@classmethod
|
142
142
|
def book_info_url(cls, book_id: str) -> str:
|
@@ -67,7 +67,7 @@ class QidianSession(BaseSession):
|
|
67
67
|
**kwargs: Any,
|
68
68
|
) -> bool:
|
69
69
|
"""
|
70
|
-
Restore cookies persisted by the
|
70
|
+
Restore cookies persisted by the session-based workflow.
|
71
71
|
"""
|
72
72
|
cookies: dict[str, str] = state_mgr.get_cookies("qidian")
|
73
73
|
|
@@ -76,6 +76,7 @@ class QidianSession(BaseSession):
|
|
76
76
|
for attempt in range(1, self._retry_times + 1):
|
77
77
|
if self._check_login_status():
|
78
78
|
self.logger.debug("[auth] Already logged in.")
|
79
|
+
self._logged_in = True
|
79
80
|
return True
|
80
81
|
|
81
82
|
if attempt == 1:
|
@@ -100,7 +101,7 @@ class QidianSession(BaseSession):
|
|
100
101
|
self,
|
101
102
|
book_id: str,
|
102
103
|
**kwargs: Any,
|
103
|
-
) -> str:
|
104
|
+
) -> list[str]:
|
104
105
|
"""
|
105
106
|
Fetch the raw HTML of the book info page.
|
106
107
|
|
@@ -111,21 +112,21 @@ class QidianSession(BaseSession):
|
|
111
112
|
try:
|
112
113
|
resp = self.get(url, **kwargs)
|
113
114
|
resp.raise_for_status()
|
114
|
-
return resp.text
|
115
|
+
return [resp.text]
|
115
116
|
except Exception as exc:
|
116
117
|
self.logger.warning(
|
117
118
|
"[session] get_book_info(%s) failed: %s",
|
118
119
|
book_id,
|
119
120
|
exc,
|
120
121
|
)
|
121
|
-
return
|
122
|
+
return []
|
122
123
|
|
123
124
|
def get_book_chapter(
|
124
125
|
self,
|
125
126
|
book_id: str,
|
126
127
|
chapter_id: str,
|
127
128
|
**kwargs: Any,
|
128
|
-
) -> str:
|
129
|
+
) -> list[str]:
|
129
130
|
"""
|
130
131
|
Fetch the HTML of a single chapter.
|
131
132
|
|
@@ -137,20 +138,20 @@ class QidianSession(BaseSession):
|
|
137
138
|
try:
|
138
139
|
resp = self.get(url, **kwargs)
|
139
140
|
resp.raise_for_status()
|
140
|
-
return resp.text
|
141
|
+
return [resp.text]
|
141
142
|
except Exception as exc:
|
142
143
|
self.logger.warning(
|
143
144
|
"[session] get_book_chapter(%s) failed: %s",
|
144
145
|
book_id,
|
145
146
|
exc,
|
146
147
|
)
|
147
|
-
return
|
148
|
+
return []
|
148
149
|
|
149
150
|
def get_bookcase(
|
150
151
|
self,
|
151
152
|
page: int = 1,
|
152
153
|
**kwargs: Any,
|
153
|
-
) -> str:
|
154
|
+
) -> list[str]:
|
154
155
|
"""
|
155
156
|
Retrieve the user's *bookcase* page.
|
156
157
|
|
@@ -160,13 +161,13 @@ class QidianSession(BaseSession):
|
|
160
161
|
try:
|
161
162
|
resp = self.get(url, **kwargs)
|
162
163
|
resp.raise_for_status()
|
163
|
-
return resp.text
|
164
|
+
return [resp.text]
|
164
165
|
except Exception as exc:
|
165
166
|
self.logger.warning(
|
166
167
|
"[session] get_bookcase failed: %s",
|
167
168
|
exc,
|
168
169
|
)
|
169
|
-
return
|
170
|
+
return []
|
170
171
|
|
171
172
|
def get(
|
172
173
|
self,
|
@@ -247,7 +248,9 @@ class QidianSession(BaseSession):
|
|
247
248
|
"C2WF946J0/probe.js",
|
248
249
|
]
|
249
250
|
resp_text = self.get_bookcase()
|
250
|
-
|
251
|
+
if not resp_text:
|
252
|
+
return False
|
253
|
+
return not any(kw in resp_text[0] for kw in keywords)
|
251
254
|
|
252
255
|
@staticmethod
|
253
256
|
def _parse_cookie_input(cookie_str: str) -> dict[str, str]:
|