novel-downloader 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +14 -0
- novel_downloader/cli/__init__.py +14 -0
- novel_downloader/cli/clean.py +134 -0
- novel_downloader/cli/download.py +98 -0
- novel_downloader/cli/interactive.py +67 -0
- novel_downloader/cli/main.py +45 -0
- novel_downloader/cli/settings.py +177 -0
- novel_downloader/config/__init__.py +52 -0
- novel_downloader/config/adapter.py +150 -0
- novel_downloader/config/loader.py +177 -0
- novel_downloader/config/models.py +170 -0
- novel_downloader/config/site_rules.py +97 -0
- novel_downloader/core/__init__.py +25 -0
- novel_downloader/core/downloaders/__init__.py +20 -0
- novel_downloader/core/downloaders/base_downloader.py +187 -0
- novel_downloader/core/downloaders/common_downloader.py +192 -0
- novel_downloader/core/downloaders/qidian_downloader.py +208 -0
- novel_downloader/core/factory/__init__.py +21 -0
- novel_downloader/core/factory/downloader_factory.py +62 -0
- novel_downloader/core/factory/parser_factory.py +62 -0
- novel_downloader/core/factory/requester_factory.py +62 -0
- novel_downloader/core/factory/saver_factory.py +49 -0
- novel_downloader/core/interfaces/__init__.py +28 -0
- novel_downloader/core/interfaces/downloader_protocol.py +37 -0
- novel_downloader/core/interfaces/parser_protocol.py +40 -0
- novel_downloader/core/interfaces/requester_protocol.py +65 -0
- novel_downloader/core/interfaces/saver_protocol.py +61 -0
- novel_downloader/core/parsers/__init__.py +28 -0
- novel_downloader/core/parsers/base_parser.py +96 -0
- novel_downloader/core/parsers/common_parser/__init__.py +14 -0
- novel_downloader/core/parsers/common_parser/helper.py +321 -0
- novel_downloader/core/parsers/common_parser/main_parser.py +86 -0
- novel_downloader/core/parsers/qidian_parser/__init__.py +20 -0
- novel_downloader/core/parsers/qidian_parser/browser/__init__.py +13 -0
- novel_downloader/core/parsers/qidian_parser/browser/chapter_encrypted.py +498 -0
- novel_downloader/core/parsers/qidian_parser/browser/chapter_normal.py +97 -0
- novel_downloader/core/parsers/qidian_parser/browser/chapter_router.py +70 -0
- novel_downloader/core/parsers/qidian_parser/browser/main_parser.py +110 -0
- novel_downloader/core/parsers/qidian_parser/session/__init__.py +13 -0
- novel_downloader/core/parsers/qidian_parser/session/chapter_encrypted.py +451 -0
- novel_downloader/core/parsers/qidian_parser/session/chapter_normal.py +119 -0
- novel_downloader/core/parsers/qidian_parser/session/chapter_router.py +67 -0
- novel_downloader/core/parsers/qidian_parser/session/main_parser.py +113 -0
- novel_downloader/core/parsers/qidian_parser/session/node_decryptor.py +164 -0
- novel_downloader/core/parsers/qidian_parser/shared/__init__.py +38 -0
- novel_downloader/core/parsers/qidian_parser/shared/book_info_parser.py +95 -0
- novel_downloader/core/parsers/qidian_parser/shared/helpers.py +133 -0
- novel_downloader/core/requesters/__init__.py +27 -0
- novel_downloader/core/requesters/base_browser.py +210 -0
- novel_downloader/core/requesters/base_session.py +243 -0
- novel_downloader/core/requesters/common_requester/__init__.py +14 -0
- novel_downloader/core/requesters/common_requester/common_session.py +126 -0
- novel_downloader/core/requesters/qidian_requester/__init__.py +22 -0
- novel_downloader/core/requesters/qidian_requester/qidian_broswer.py +377 -0
- novel_downloader/core/requesters/qidian_requester/qidian_session.py +202 -0
- novel_downloader/core/savers/__init__.py +20 -0
- novel_downloader/core/savers/base_saver.py +169 -0
- novel_downloader/core/savers/common_saver/__init__.py +13 -0
- novel_downloader/core/savers/common_saver/common_epub.py +232 -0
- novel_downloader/core/savers/common_saver/common_txt.py +176 -0
- novel_downloader/core/savers/common_saver/main_saver.py +86 -0
- novel_downloader/core/savers/epub_utils/__init__.py +27 -0
- novel_downloader/core/savers/epub_utils/css_builder.py +68 -0
- novel_downloader/core/savers/epub_utils/initializer.py +98 -0
- novel_downloader/core/savers/epub_utils/text_to_html.py +132 -0
- novel_downloader/core/savers/epub_utils/volume_intro.py +61 -0
- novel_downloader/core/savers/qidian_saver.py +22 -0
- novel_downloader/locales/en.json +91 -0
- novel_downloader/locales/zh.json +91 -0
- novel_downloader/resources/config/rules.toml +196 -0
- novel_downloader/resources/config/settings.yaml +70 -0
- novel_downloader/resources/css_styles/main.css +104 -0
- novel_downloader/resources/css_styles/volume-intro.css +56 -0
- novel_downloader/resources/images/volume_border.png +0 -0
- novel_downloader/resources/js_scripts/qidian_decrypt_node.js +82 -0
- novel_downloader/resources/json/replace_word_map.json +4 -0
- novel_downloader/resources/text/blacklist.txt +22 -0
- novel_downloader/utils/__init__.py +0 -0
- novel_downloader/utils/cache.py +24 -0
- novel_downloader/utils/constants.py +158 -0
- novel_downloader/utils/crypto_utils.py +144 -0
- novel_downloader/utils/file_utils/__init__.py +43 -0
- novel_downloader/utils/file_utils/io.py +252 -0
- novel_downloader/utils/file_utils/normalize.py +68 -0
- novel_downloader/utils/file_utils/sanitize.py +77 -0
- novel_downloader/utils/fontocr/__init__.py +23 -0
- novel_downloader/utils/fontocr/ocr_v1.py +304 -0
- novel_downloader/utils/fontocr/ocr_v2.py +658 -0
- novel_downloader/utils/hash_store.py +288 -0
- novel_downloader/utils/hash_utils.py +103 -0
- novel_downloader/utils/i18n.py +41 -0
- novel_downloader/utils/logger.py +104 -0
- novel_downloader/utils/model_loader.py +72 -0
- novel_downloader/utils/network.py +287 -0
- novel_downloader/utils/state.py +156 -0
- novel_downloader/utils/text_utils/__init__.py +27 -0
- novel_downloader/utils/text_utils/chapter_formatting.py +46 -0
- novel_downloader/utils/text_utils/diff_display.py +75 -0
- novel_downloader/utils/text_utils/font_mapping.py +31 -0
- novel_downloader/utils/text_utils/text_cleaning.py +57 -0
- novel_downloader/utils/time_utils/__init__.py +22 -0
- novel_downloader/utils/time_utils/datetime_utils.py +146 -0
- novel_downloader/utils/time_utils/sleep_utils.py +49 -0
- novel_downloader-1.1.1.dist-info/METADATA +137 -0
- novel_downloader-1.1.1.dist-info/RECORD +109 -0
- novel_downloader-1.1.1.dist-info/WHEEL +5 -0
- novel_downloader-1.1.1.dist-info/entry_points.txt +2 -0
- novel_downloader-1.1.1.dist-info/licenses/LICENSE +21 -0
- novel_downloader-1.1.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,377 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.requesters.qidian_requester.qidian_broswer
|
5
|
+
----------------------------------------------------------------
|
6
|
+
|
7
|
+
This module defines the QidianRequester class for interacting with
|
8
|
+
the Qidian website.
|
9
|
+
It extends the BaseBrowser by adding methods for logging in and
|
10
|
+
retrieving book information.
|
11
|
+
"""
|
12
|
+
|
13
|
+
import logging
|
14
|
+
import random
|
15
|
+
import time
|
16
|
+
from typing import Optional
|
17
|
+
|
18
|
+
from DrissionPage.common import Keys
|
19
|
+
|
20
|
+
from novel_downloader.config.models import RequesterConfig
|
21
|
+
from novel_downloader.core.requesters.base_browser import BaseBrowser
|
22
|
+
from novel_downloader.utils.time_utils import sleep_with_random_delay
|
23
|
+
|
24
|
+
logger = logging.getLogger(__name__)
|
25
|
+
|
26
|
+
|
27
|
+
class QidianBrowser(BaseBrowser):
|
28
|
+
"""
|
29
|
+
QidianRequester provides methods for interacting with Qidian.com,
|
30
|
+
including checking login status and preparing book-related URLs.
|
31
|
+
|
32
|
+
Inherits base browser setup from BaseBrowser.
|
33
|
+
"""
|
34
|
+
|
35
|
+
DEFAULT_SCHEME = "https:"
|
36
|
+
QIDIAN_BASE_URL = "www.qidian.com"
|
37
|
+
QIDIAN_BOOKCASE_URL = f"{DEFAULT_SCHEME}//my.qidian.com/bookcase/"
|
38
|
+
QIDIAN_BOOK_INFO_URL_1 = f"{DEFAULT_SCHEME}//www.qidian.com/book"
|
39
|
+
QIDIAN_BOOK_INFO_URL_2 = f"{DEFAULT_SCHEME}//book.qidian.com/info"
|
40
|
+
QIDIAN_CHAPTER_URL = f"{DEFAULT_SCHEME}//www.qidian.com/chapter"
|
41
|
+
|
42
|
+
def __init__(self, config: RequesterConfig):
|
43
|
+
"""
|
44
|
+
Initialize the QidianRequester with a browser configuration.
|
45
|
+
|
46
|
+
:param config: The RequesterConfig instance containing browser settings.
|
47
|
+
"""
|
48
|
+
self._init_browser(config=config)
|
49
|
+
self._headless: bool = config.headless
|
50
|
+
self._logged_in: bool = False
|
51
|
+
|
52
|
+
def _is_user_logged_in(self) -> bool:
|
53
|
+
"""
|
54
|
+
Check whether the user is currently logged in by inspecting
|
55
|
+
the visibility of the 'sign-in' element on the page.
|
56
|
+
|
57
|
+
:return: True if the user appears to be logged in, False otherwise.
|
58
|
+
"""
|
59
|
+
try:
|
60
|
+
self._handle_overlay_mask()
|
61
|
+
sign_in_elem = self._page.ele("@class=sign-in")
|
62
|
+
if sign_in_elem:
|
63
|
+
class_value = sign_in_elem.attr("class")
|
64
|
+
if class_value and "hidden" not in class_value:
|
65
|
+
return True
|
66
|
+
except Exception as e:
|
67
|
+
logger.warning("[auth] Error while checking login status: %s", e)
|
68
|
+
return False
|
69
|
+
|
70
|
+
def login(self, max_retries: int = 3, manual_login: bool = False) -> bool:
|
71
|
+
"""
|
72
|
+
Attempt to log in to Qidian
|
73
|
+
"""
|
74
|
+
if manual_login:
|
75
|
+
return self._manual_login(max_retries)
|
76
|
+
else:
|
77
|
+
return self._login(max_retries)
|
78
|
+
|
79
|
+
def _login(self, max_retries: int = 3) -> bool:
|
80
|
+
"""
|
81
|
+
Attempt to log in to Qidian by handling overlays and clicking the login button.
|
82
|
+
|
83
|
+
:param max_retries: Maximum number of times to try clicking the login button.
|
84
|
+
:return: True if login succeeds or is already in place; False otherwise.
|
85
|
+
"""
|
86
|
+
original_url = self._page.url
|
87
|
+
try:
|
88
|
+
self._page.get("https://www.qidian.com/")
|
89
|
+
self._page.wait.eles_loaded("#login-box")
|
90
|
+
except Exception as e:
|
91
|
+
logger.warning("[auth] Failed to load login box: %s", e)
|
92
|
+
return False
|
93
|
+
|
94
|
+
for attempt in range(1, max_retries + 1):
|
95
|
+
if self._is_user_logged_in():
|
96
|
+
logger.debug("[auth] Already logged in.")
|
97
|
+
break
|
98
|
+
|
99
|
+
self._click_login_button(attempt)
|
100
|
+
time.sleep(self._config.retry_interval)
|
101
|
+
|
102
|
+
self._logged_in = self._is_user_logged_in()
|
103
|
+
if self._logged_in:
|
104
|
+
logger.info("[auth] Login successful.")
|
105
|
+
else:
|
106
|
+
logger.warning("[auth] Login failed after max retries.")
|
107
|
+
|
108
|
+
# return to original page
|
109
|
+
try:
|
110
|
+
self._page.get(original_url)
|
111
|
+
except Exception as e:
|
112
|
+
logger.debug("[auth] Failed to restore page URL: %s", e)
|
113
|
+
|
114
|
+
return self._logged_in
|
115
|
+
|
116
|
+
def _handle_overlay_mask(self) -> None:
|
117
|
+
"""
|
118
|
+
Detect and close any full-page overlay mask that might block the login UI.
|
119
|
+
"""
|
120
|
+
try:
|
121
|
+
mask = self._page.ele("@@tag()=div@@class=mask", timeout=2)
|
122
|
+
if not mask:
|
123
|
+
return
|
124
|
+
|
125
|
+
logger.debug("[auth] Overlay mask detected; attempting to close.")
|
126
|
+
iframe = self._page.get_frame("loginIfr", timeout=5)
|
127
|
+
if not iframe:
|
128
|
+
logger.debug("[auth] Login iframe not found.")
|
129
|
+
return
|
130
|
+
|
131
|
+
close_btn = iframe.ele("@id=close", timeout=5)
|
132
|
+
if close_btn:
|
133
|
+
close_btn.click()
|
134
|
+
logger.debug("[auth] Closed overlay mask via iframe close button.")
|
135
|
+
else:
|
136
|
+
logger.debug("[auth] Close button not found in login iframe.")
|
137
|
+
except Exception as e:
|
138
|
+
logger.debug("[auth] Error handling overlay mask: %s", e)
|
139
|
+
|
140
|
+
def _click_login_button(self, attempt: int) -> None:
|
141
|
+
"""
|
142
|
+
Try to click the login button on the page.
|
143
|
+
|
144
|
+
:param attempt: The current attempt number (for logging).
|
145
|
+
"""
|
146
|
+
try:
|
147
|
+
logger.debug("[auth] Attempting login click (#%s).", attempt)
|
148
|
+
login_btn = self._page.ele("@id=login-btn", timeout=5)
|
149
|
+
if login_btn:
|
150
|
+
login_btn.click()
|
151
|
+
logger.debug("[auth] Login button clicked.")
|
152
|
+
else:
|
153
|
+
logger.debug("[auth] Login button not found.")
|
154
|
+
except Exception as e:
|
155
|
+
logger.debug("[auth] Exception clicking login button: %s", e)
|
156
|
+
|
157
|
+
def _manual_login(
|
158
|
+
self,
|
159
|
+
max_retries: int = 3,
|
160
|
+
) -> bool:
|
161
|
+
"""
|
162
|
+
Guide the user through an interactive manual login flow.
|
163
|
+
|
164
|
+
Steps:
|
165
|
+
1. If the browser is headless, shut it down and restart in headful mode.
|
166
|
+
2. Navigate to the Qidian homepage.
|
167
|
+
3. Prompt the user to complete login, retrying up to `max_retries` times.
|
168
|
+
4. Once logged in, restore original headless mode if needed.
|
169
|
+
|
170
|
+
:param max_retries: Number of times to check for login success.
|
171
|
+
:return: True if login was detected, False otherwise.
|
172
|
+
"""
|
173
|
+
original_headless = self._headless
|
174
|
+
|
175
|
+
# 1. Switch to headful mode if needed
|
176
|
+
if getattr(self, "_disable_images_orig", False):
|
177
|
+
logger.debug("[auth] Temporarily enabling images for manual login.")
|
178
|
+
self._options.no_imgs(False)
|
179
|
+
self._restart_browser(headless=False)
|
180
|
+
elif original_headless:
|
181
|
+
self._restart_browser(headless=False)
|
182
|
+
|
183
|
+
# 2. Navigate to home page
|
184
|
+
try:
|
185
|
+
self._page.get("https://www.qidian.com/")
|
186
|
+
except Exception as e:
|
187
|
+
logger.warning("[auth] Failed to load homepage for manual login: %s", e)
|
188
|
+
return False
|
189
|
+
|
190
|
+
# 3. Retry loop
|
191
|
+
for attempt in range(1, max_retries + 1):
|
192
|
+
if self._is_user_logged_in():
|
193
|
+
logger.info("[auth] Detected successful login.")
|
194
|
+
self._logged_in = True
|
195
|
+
break
|
196
|
+
|
197
|
+
logger.info(
|
198
|
+
"[auth] Attempt %d/%d: Press Enter after completing login...",
|
199
|
+
attempt,
|
200
|
+
max_retries,
|
201
|
+
)
|
202
|
+
input()
|
203
|
+
else:
|
204
|
+
logger.warning("[auth] Manual login failed after %d attempts.", max_retries)
|
205
|
+
self._logged_in = False
|
206
|
+
return self._logged_in
|
207
|
+
|
208
|
+
# 4. Restore headless if we changed it, then re-establish session
|
209
|
+
if original_headless or getattr(self, "_disable_images_orig", False):
|
210
|
+
logger.debug("[auth] Restoring browser settings after manual login...")
|
211
|
+
self._options.no_imgs(self._disable_images_orig)
|
212
|
+
self._restart_browser(headless=original_headless)
|
213
|
+
self.login()
|
214
|
+
if self._logged_in:
|
215
|
+
logger.info(
|
216
|
+
"[auth] Login session successfully carried over after restart."
|
217
|
+
)
|
218
|
+
else:
|
219
|
+
logger.warning(
|
220
|
+
"[auth] Lost login session after restoring headless mode."
|
221
|
+
)
|
222
|
+
|
223
|
+
return self._logged_in
|
224
|
+
|
225
|
+
def _restart_browser(self, headless: Optional[bool] = None) -> None:
|
226
|
+
"""
|
227
|
+
Shutdown the current browser and restart it with the given headless setting.
|
228
|
+
|
229
|
+
:param headless: Whether to run the browser in headless mode.
|
230
|
+
"""
|
231
|
+
if self._browser:
|
232
|
+
self._browser.quit()
|
233
|
+
self._clear_browser_refs()
|
234
|
+
|
235
|
+
# Apply new headless setting and reinitialize
|
236
|
+
if headless is not None:
|
237
|
+
self._options.headless(headless)
|
238
|
+
self._headless = headless
|
239
|
+
self._setup()
|
240
|
+
logger.debug("[browser] Browser restarted (headless=%s).", headless)
|
241
|
+
|
242
|
+
def _build_book_info_url(self, book_id: str) -> str:
|
243
|
+
"""
|
244
|
+
Construct the URL for fetching a book's info page.
|
245
|
+
|
246
|
+
:param book_id: The identifier of the book.
|
247
|
+
:return: Fully qualified URL for the book info page.
|
248
|
+
"""
|
249
|
+
return f"{self.QIDIAN_BOOK_INFO_URL_2}/{book_id}/"
|
250
|
+
|
251
|
+
def _build_chapter_url(self, book_id: str, chapter_id: str) -> str:
|
252
|
+
"""
|
253
|
+
Construct the URL for fetching a specific chapter.
|
254
|
+
|
255
|
+
:param book_id: The identifier of the book.
|
256
|
+
:param chapter_id: The identifier of the chapter.
|
257
|
+
:return: Fully qualified chapter URL.
|
258
|
+
"""
|
259
|
+
return f"{self.QIDIAN_CHAPTER_URL}/{book_id}/{chapter_id}/"
|
260
|
+
|
261
|
+
def _build_bookcase_url(self) -> str:
|
262
|
+
"""
|
263
|
+
Construct the URL for the user's bookcase page.
|
264
|
+
|
265
|
+
:return: Fully qualified URL of the bookcase.
|
266
|
+
"""
|
267
|
+
return self.QIDIAN_BOOKCASE_URL
|
268
|
+
|
269
|
+
def get_book_info(self, book_id: str, wait_time: Optional[int] = None) -> str:
|
270
|
+
"""
|
271
|
+
Retrieve the HTML of a Qidian book info page.
|
272
|
+
|
273
|
+
This method enforces that the user is logged in, navigates to the
|
274
|
+
book's info URL, waits a randomized delay to mimic human browsing,
|
275
|
+
and returns the page HTML.
|
276
|
+
|
277
|
+
:param book_id: The identifier of the book to fetch.
|
278
|
+
:param wait_time: Base wait time in seconds before returning content.
|
279
|
+
If None, uses `self._config.wait_time`.
|
280
|
+
:return: The HTML content of the book info page, or an empty string on error.
|
281
|
+
"""
|
282
|
+
url = self._build_book_info_url(book_id)
|
283
|
+
try:
|
284
|
+
# Navigate and fetch
|
285
|
+
self._page.get(url)
|
286
|
+
|
287
|
+
# Randomized human‑like delay
|
288
|
+
base = wait_time if wait_time is not None else self._config.wait_time
|
289
|
+
sleep_with_random_delay(base, base * 0.2)
|
290
|
+
|
291
|
+
html = str(self._page.html)
|
292
|
+
logger.debug("[fetch] Fetched book info for ID %s from %s", book_id, url)
|
293
|
+
return html
|
294
|
+
|
295
|
+
except Exception as e:
|
296
|
+
logger.warning("[fetch] Error fetching book info from '%s': %s", url, e)
|
297
|
+
return ""
|
298
|
+
|
299
|
+
def _scroll_page(self, presses: int, pause: float) -> None:
|
300
|
+
"""
|
301
|
+
Scroll down by sending DOWN key presses to the page.
|
302
|
+
|
303
|
+
:param presses: Number of DOWN key presses.
|
304
|
+
:param pause: Seconds to wait between each press.
|
305
|
+
"""
|
306
|
+
for _ in range(presses):
|
307
|
+
try:
|
308
|
+
self._page.actions.key_down(Keys.DOWN)
|
309
|
+
except Exception as e:
|
310
|
+
logger.debug("[page] Scroll press failed: %s", e)
|
311
|
+
time.sleep(pause)
|
312
|
+
|
313
|
+
def get_book_chapter(
|
314
|
+
self, book_id: str, chapter_id: str, wait_time: Optional[int] = None
|
315
|
+
) -> str:
|
316
|
+
"""
|
317
|
+
Retrieve the HTML content of a specific chapter.
|
318
|
+
|
319
|
+
Ensures the user is logged in, navigates to the chapter page,
|
320
|
+
waits a randomized delay to mimic human reading, then scrolls
|
321
|
+
to trigger any lazy‑loaded content.
|
322
|
+
|
323
|
+
:param book_id: The identifier of the book.
|
324
|
+
:param chapter_id: The identifier of the chapter.
|
325
|
+
:param wait_time: Base wait time in seconds before scrolling. If None,
|
326
|
+
falls back to `self._config.wait_time`.
|
327
|
+
:return: The HTML content of the chapter page, or empty string on error.
|
328
|
+
"""
|
329
|
+
url = self._build_chapter_url(book_id, chapter_id)
|
330
|
+
try:
|
331
|
+
# 1. Navigate to chapter URL
|
332
|
+
self._page.get(url)
|
333
|
+
|
334
|
+
# 2. Randomized human‑like delay
|
335
|
+
base = wait_time if wait_time is not None else self._config.wait_time
|
336
|
+
# sleep_with_random_delay(base, base*0.2)
|
337
|
+
|
338
|
+
# 3. Scroll down to load dynamic content
|
339
|
+
presses = int(random.uniform(base, base + 5) * 2)
|
340
|
+
self._scroll_page(presses, pause=0.5)
|
341
|
+
|
342
|
+
html = str(self._page.html)
|
343
|
+
logger.debug("[fetch] Fetched chapter %s for book %s", chapter_id, book_id)
|
344
|
+
return html
|
345
|
+
|
346
|
+
except Exception as e:
|
347
|
+
logger.warning("[fetch] Error fetching chapter from '%s': %s", url, e)
|
348
|
+
return ""
|
349
|
+
|
350
|
+
def get_bookcase(self, wait_time: Optional[int] = None) -> str:
|
351
|
+
"""
|
352
|
+
Retrieve the HTML content of the logged‑in user's Qidian bookcase page.
|
353
|
+
|
354
|
+
:param wait_time: Base number of seconds to wait before returning content.
|
355
|
+
If None, falls back to `self._config.wait_time`.
|
356
|
+
:return: The HTML markup of the bookcase page, or empty string on error.
|
357
|
+
:raises RuntimeError: If the user is not logged in.
|
358
|
+
"""
|
359
|
+
if not self._logged_in:
|
360
|
+
raise RuntimeError("User not logged in. Please call login() first.")
|
361
|
+
|
362
|
+
url = self._build_bookcase_url()
|
363
|
+
try:
|
364
|
+
# Navigate to the bookcase page
|
365
|
+
self._page.get(url)
|
366
|
+
|
367
|
+
# Randomized human‑like delay
|
368
|
+
base = wait_time if wait_time is not None else self._config.wait_time
|
369
|
+
sleep_with_random_delay(base, base * 0.2)
|
370
|
+
|
371
|
+
html = str(self._page.html)
|
372
|
+
logger.debug("[fetch] Fetched bookcase HTML from %s", url)
|
373
|
+
return html
|
374
|
+
|
375
|
+
except Exception as e:
|
376
|
+
logger.warning("[fetch] Error fetching bookcase from '%s': %s", url, e)
|
377
|
+
return ""
|
@@ -0,0 +1,202 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.requesters.qidian_requester.qidian_session
|
5
|
+
----------------------------------------------------------------
|
6
|
+
|
7
|
+
This module defines the QidianRequester class for interacting with
|
8
|
+
the Qidian website.
|
9
|
+
It extends the BaseSession by adding methods for logging in and
|
10
|
+
retrieving book information.
|
11
|
+
"""
|
12
|
+
|
13
|
+
from __future__ import annotations
|
14
|
+
|
15
|
+
import base64
|
16
|
+
import logging
|
17
|
+
import time
|
18
|
+
from typing import Any, Dict, Optional
|
19
|
+
|
20
|
+
from requests import Response
|
21
|
+
|
22
|
+
from novel_downloader.config.models import RequesterConfig
|
23
|
+
from novel_downloader.core.requesters.base_session import BaseSession
|
24
|
+
from novel_downloader.utils.crypto_utils import patch_qd_payload_token
|
25
|
+
from novel_downloader.utils.state import state_mgr
|
26
|
+
from novel_downloader.utils.time_utils import sleep_with_random_delay
|
27
|
+
|
28
|
+
logger = logging.getLogger(__name__)
|
29
|
+
|
30
|
+
|
31
|
+
class QidianSession(BaseSession):
|
32
|
+
"""
|
33
|
+
A concrete :class:`BaseSession` for the Qidian site. Besides the usual
|
34
|
+
``get``/``post`` helpers provided by the base class, this subclass adds:
|
35
|
+
|
36
|
+
* URL builders for book info / chapter / bookcase pages
|
37
|
+
* High-level convenience wrappers that:
|
38
|
+
1. sleep a configurable (jittered) amount of time;
|
39
|
+
2. retry on failures;
|
40
|
+
3. automatically persist fresh cookies to :pydata:`state_mgr`
|
41
|
+
so that the next run can reuse them.
|
42
|
+
"""
|
43
|
+
|
44
|
+
DEFAULT_SCHEME = "https:"
|
45
|
+
QIDIAN_BASE_URL = "www.qidian.com"
|
46
|
+
QIDIAN_BOOKCASE_URL = f"{DEFAULT_SCHEME}//my.qidian.com/bookcase/"
|
47
|
+
QIDIAN_BOOK_INFO_URL_1 = f"{DEFAULT_SCHEME}//www.qidian.com/book"
|
48
|
+
QIDIAN_BOOK_INFO_URL_2 = f"{DEFAULT_SCHEME}//book.qidian.com/info"
|
49
|
+
QIDIAN_CHAPTER_URL = f"{DEFAULT_SCHEME}//www.qidian.com/chapter"
|
50
|
+
|
51
|
+
def __init__(self, config: RequesterConfig):
|
52
|
+
"""
|
53
|
+
Initialise the underlying :class:`requests.Session`.
|
54
|
+
"""
|
55
|
+
self._init_session(config=config)
|
56
|
+
|
57
|
+
def get(
|
58
|
+
self,
|
59
|
+
url: str,
|
60
|
+
params: Optional[Dict[str, Any]] = None,
|
61
|
+
**kwargs: Any,
|
62
|
+
) -> Response:
|
63
|
+
"""
|
64
|
+
Same as :py:meth:`BaseSession.get`, but transparently refreshes
|
65
|
+
a cookie-based token used for request validation.
|
66
|
+
|
67
|
+
The method:
|
68
|
+
1. Reads the existing cookie (if any);
|
69
|
+
2. Generates a new value tied to *url*;
|
70
|
+
3. Updates both the live ``requests.Session`` and the internal cache;
|
71
|
+
4. Delegates the actual request to ``super().get``.
|
72
|
+
"""
|
73
|
+
if self._session is None: # defensive – mirrors BaseSession check
|
74
|
+
raise RuntimeError("Session is not initialized or has been shut down.")
|
75
|
+
|
76
|
+
# ---- 1. refresh token cookie --------------------------------------
|
77
|
+
cookie_key = base64.b64decode("d190c2Zw").decode()
|
78
|
+
old_token = self._session.cookies.get(cookie_key, "")
|
79
|
+
|
80
|
+
if old_token:
|
81
|
+
refreshed_token = patch_qd_payload_token(old_token, url)
|
82
|
+
self._session.cookies.set(cookie_key, refreshed_token)
|
83
|
+
self._cookies[cookie_key] = refreshed_token
|
84
|
+
|
85
|
+
# ---- 2. perform the real GET --------------------------------------------
|
86
|
+
resp: Response = super().get(url, params=params, **kwargs)
|
87
|
+
|
88
|
+
# ---- 3. persist any server-set cookies (optional) --------------
|
89
|
+
self.update_cookies(self._session.cookies.get_dict(), overwrite=True)
|
90
|
+
state_mgr.set_cookies("qidian", self._cookies)
|
91
|
+
|
92
|
+
return resp
|
93
|
+
|
94
|
+
def login(self, max_retries: int = 3, manual_login: bool = False) -> bool:
|
95
|
+
"""
|
96
|
+
Restore cookies persisted by the browser-based workflow.
|
97
|
+
"""
|
98
|
+
cookies: Dict[str, str] = state_mgr.get_cookies("qidian")
|
99
|
+
if not cookies:
|
100
|
+
logger.info(
|
101
|
+
"[session] No stored cookies found: session remains unauthenticated."
|
102
|
+
)
|
103
|
+
return False
|
104
|
+
|
105
|
+
# Merge cookies into both the internal cache and the live session
|
106
|
+
self.update_cookies(cookies, overwrite=True)
|
107
|
+
logger.info("[session] Loaded %d cookie(s) from state.", len(cookies))
|
108
|
+
self.get("https://www.qidian.com")
|
109
|
+
return True
|
110
|
+
|
111
|
+
def get_book_info(self, book_id: str, wait_time: Optional[int] = None) -> str:
|
112
|
+
"""
|
113
|
+
Fetch the raw HTML of the book info page.
|
114
|
+
|
115
|
+
:param book_id: The book identifier.
|
116
|
+
:param wait_time: Base number of seconds to wait before returning content.
|
117
|
+
:return: The page content as a string.
|
118
|
+
"""
|
119
|
+
url = f"{self.QIDIAN_BOOK_INFO_URL_2}/{book_id}/"
|
120
|
+
base_delay = wait_time or self._config.wait_time
|
121
|
+
|
122
|
+
for attempt in range(1, self.retry_times + 1):
|
123
|
+
try:
|
124
|
+
resp = self.get(url)
|
125
|
+
resp.raise_for_status()
|
126
|
+
sleep_with_random_delay(base_delay, base_delay * 0.2)
|
127
|
+
return resp.text
|
128
|
+
except Exception as exc:
|
129
|
+
logger.warning(
|
130
|
+
"[session] get_book_info(%s) attempt %s/%s failed: %s",
|
131
|
+
book_id,
|
132
|
+
attempt,
|
133
|
+
self.retry_times,
|
134
|
+
exc,
|
135
|
+
)
|
136
|
+
if attempt == self.retry_times:
|
137
|
+
raise
|
138
|
+
time.sleep(self.retry_interval)
|
139
|
+
|
140
|
+
raise RuntimeError("Unexpected fall-through in get_book_info")
|
141
|
+
|
142
|
+
def get_book_chapter(
|
143
|
+
self, book_id: str, chapter_id: str, wait_time: Optional[int] = None
|
144
|
+
) -> str:
|
145
|
+
"""
|
146
|
+
Fetch the HTML of a single chapter.
|
147
|
+
|
148
|
+
:param book_id: The book identifier.
|
149
|
+
:param chapter_id: The chapter identifier.
|
150
|
+
:param wait_time: Base number of seconds to wait before returning content.
|
151
|
+
:return: The chapter content as a string.
|
152
|
+
"""
|
153
|
+
url = f"{self.QIDIAN_CHAPTER_URL}/{book_id}/{chapter_id}/"
|
154
|
+
base_delay = wait_time or self._config.wait_time
|
155
|
+
|
156
|
+
for attempt in range(1, self.retry_times + 1):
|
157
|
+
try:
|
158
|
+
resp = self.get(url)
|
159
|
+
resp.raise_for_status()
|
160
|
+
sleep_with_random_delay(base_delay, base_delay * 0.2)
|
161
|
+
return resp.text
|
162
|
+
except Exception as exc:
|
163
|
+
logger.warning(
|
164
|
+
"[session] get_book_chapter(%s, %s) attempt %s/%s failed: %s",
|
165
|
+
book_id,
|
166
|
+
chapter_id,
|
167
|
+
attempt,
|
168
|
+
self.retry_times,
|
169
|
+
exc,
|
170
|
+
)
|
171
|
+
if attempt == self.retry_times:
|
172
|
+
raise
|
173
|
+
time.sleep(self.retry_interval)
|
174
|
+
|
175
|
+
raise RuntimeError("Unexpected fall-through in get_book_chapter")
|
176
|
+
|
177
|
+
def get_bookcase(self, wait_time: Optional[int] = None) -> str:
|
178
|
+
"""
|
179
|
+
Retrieve the user's *bookcase* page.
|
180
|
+
|
181
|
+
:param wait_time: Base number of seconds to wait before returning content.
|
182
|
+
:return: The HTML markup of the bookcase page.
|
183
|
+
"""
|
184
|
+
base_delay = wait_time or self._config.wait_time
|
185
|
+
for attempt in range(1, self.retry_times + 1):
|
186
|
+
try:
|
187
|
+
resp = self.get(self.QIDIAN_BOOKCASE_URL, allow_redirects=True)
|
188
|
+
resp.raise_for_status()
|
189
|
+
sleep_with_random_delay(base_delay, base_delay * 0.2)
|
190
|
+
return resp.text
|
191
|
+
except Exception as exc:
|
192
|
+
logger.warning(
|
193
|
+
"[session] get_bookcase attempt %s/%s failed: %s",
|
194
|
+
attempt,
|
195
|
+
self.retry_times,
|
196
|
+
exc,
|
197
|
+
)
|
198
|
+
if attempt == self.retry_times:
|
199
|
+
raise
|
200
|
+
time.sleep(self.retry_interval)
|
201
|
+
|
202
|
+
raise RuntimeError("Unexpected fall-through in get_bookcase")
|
@@ -0,0 +1,20 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.savers
|
5
|
+
----------------------------
|
6
|
+
|
7
|
+
This module defines saver classes for different novel platforms.
|
8
|
+
|
9
|
+
Currently supported platforms:
|
10
|
+
- Qidian (起点中文网)
|
11
|
+
- CommonSaver (通用)
|
12
|
+
"""
|
13
|
+
|
14
|
+
from .common_saver import CommonSaver
|
15
|
+
from .qidian_saver import QidianSaver
|
16
|
+
|
17
|
+
__all__ = [
|
18
|
+
"CommonSaver",
|
19
|
+
"QidianSaver",
|
20
|
+
]
|