novel-downloader 1.2.2__py3-none-any.whl → 1.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -2
- novel_downloader/cli/__init__.py +0 -1
- novel_downloader/cli/clean.py +2 -10
- novel_downloader/cli/download.py +16 -22
- novel_downloader/cli/interactive.py +0 -1
- novel_downloader/cli/main.py +1 -3
- novel_downloader/cli/settings.py +8 -8
- novel_downloader/config/__init__.py +0 -1
- novel_downloader/config/adapter.py +32 -27
- novel_downloader/config/loader.py +116 -108
- novel_downloader/config/models.py +35 -29
- novel_downloader/config/site_rules.py +2 -4
- novel_downloader/core/__init__.py +0 -1
- novel_downloader/core/downloaders/__init__.py +4 -4
- novel_downloader/core/downloaders/base/__init__.py +14 -0
- novel_downloader/core/downloaders/{base_async_downloader.py → base/base_async.py} +49 -53
- novel_downloader/core/downloaders/{base_downloader.py → base/base_sync.py} +64 -43
- novel_downloader/core/downloaders/biquge/__init__.py +12 -0
- novel_downloader/core/downloaders/biquge/biquge_sync.py +25 -0
- novel_downloader/core/downloaders/common/__init__.py +14 -0
- novel_downloader/core/downloaders/{common_asynb_downloader.py → common/common_async.py} +42 -33
- novel_downloader/core/downloaders/{common_downloader.py → common/common_sync.py} +33 -21
- novel_downloader/core/downloaders/qidian/__init__.py +10 -0
- novel_downloader/core/downloaders/{qidian_downloader.py → qidian/qidian_sync.py} +79 -62
- novel_downloader/core/factory/__init__.py +4 -5
- novel_downloader/core/factory/{downloader_factory.py → downloader.py} +25 -26
- novel_downloader/core/factory/{parser_factory.py → parser.py} +12 -14
- novel_downloader/core/factory/{requester_factory.py → requester.py} +29 -16
- novel_downloader/core/factory/{saver_factory.py → saver.py} +4 -9
- novel_downloader/core/interfaces/__init__.py +8 -9
- novel_downloader/core/interfaces/{async_downloader_protocol.py → async_downloader.py} +4 -5
- novel_downloader/core/interfaces/{async_requester_protocol.py → async_requester.py} +23 -12
- novel_downloader/core/interfaces/{parser_protocol.py → parser.py} +11 -6
- novel_downloader/core/interfaces/{saver_protocol.py → saver.py} +2 -3
- novel_downloader/core/interfaces/{downloader_protocol.py → sync_downloader.py} +6 -7
- novel_downloader/core/interfaces/{requester_protocol.py → sync_requester.py} +31 -17
- novel_downloader/core/parsers/__init__.py +5 -4
- novel_downloader/core/parsers/{base_parser.py → base.py} +18 -9
- novel_downloader/core/parsers/biquge/__init__.py +10 -0
- novel_downloader/core/parsers/biquge/main_parser.py +126 -0
- novel_downloader/core/parsers/{common_parser → common}/__init__.py +2 -3
- novel_downloader/core/parsers/{common_parser → common}/helper.py +13 -13
- novel_downloader/core/parsers/{common_parser → common}/main_parser.py +15 -9
- novel_downloader/core/parsers/{qidian_parser → qidian}/__init__.py +2 -3
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/__init__.py +2 -3
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_encrypted.py +40 -48
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_normal.py +17 -21
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_router.py +10 -9
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/main_parser.py +14 -10
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/__init__.py +2 -3
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_encrypted.py +36 -44
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_normal.py +19 -23
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_router.py +10 -9
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/main_parser.py +14 -10
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/node_decryptor.py +7 -10
- novel_downloader/core/parsers/{qidian_parser → qidian}/shared/__init__.py +2 -3
- novel_downloader/core/parsers/{qidian_parser → qidian}/shared/book_info_parser.py +5 -6
- novel_downloader/core/parsers/{qidian_parser → qidian}/shared/helpers.py +7 -8
- novel_downloader/core/requesters/__init__.py +9 -5
- novel_downloader/core/requesters/base/__init__.py +16 -0
- novel_downloader/core/requesters/{base_async_session.py → base/async_session.py} +177 -73
- novel_downloader/core/requesters/base/browser.py +340 -0
- novel_downloader/core/requesters/base/session.py +364 -0
- novel_downloader/core/requesters/biquge/__init__.py +12 -0
- novel_downloader/core/requesters/biquge/session.py +90 -0
- novel_downloader/core/requesters/{common_requester → common}/__init__.py +4 -5
- novel_downloader/core/requesters/common/async_session.py +96 -0
- novel_downloader/core/requesters/common/session.py +113 -0
- novel_downloader/core/requesters/qidian/__init__.py +21 -0
- novel_downloader/core/requesters/qidian/broswer.py +307 -0
- novel_downloader/core/requesters/qidian/session.py +287 -0
- novel_downloader/core/savers/__init__.py +5 -3
- novel_downloader/core/savers/{base_saver.py → base.py} +12 -13
- novel_downloader/core/savers/biquge.py +25 -0
- novel_downloader/core/savers/{common_saver → common}/__init__.py +2 -3
- novel_downloader/core/savers/{common_saver/common_epub.py → common/epub.py} +23 -51
- novel_downloader/core/savers/{common_saver → common}/main_saver.py +43 -9
- novel_downloader/core/savers/{common_saver/common_txt.py → common/txt.py} +16 -46
- novel_downloader/core/savers/epub_utils/__init__.py +0 -1
- novel_downloader/core/savers/epub_utils/css_builder.py +13 -7
- novel_downloader/core/savers/epub_utils/initializer.py +4 -5
- novel_downloader/core/savers/epub_utils/text_to_html.py +2 -3
- novel_downloader/core/savers/epub_utils/volume_intro.py +1 -3
- novel_downloader/core/savers/{qidian_saver.py → qidian.py} +12 -6
- novel_downloader/locales/en.json +8 -4
- novel_downloader/locales/zh.json +5 -1
- novel_downloader/resources/config/settings.toml +88 -0
- novel_downloader/utils/cache.py +2 -2
- novel_downloader/utils/chapter_storage.py +340 -0
- novel_downloader/utils/constants.py +6 -4
- novel_downloader/utils/crypto_utils.py +3 -3
- novel_downloader/utils/file_utils/__init__.py +0 -1
- novel_downloader/utils/file_utils/io.py +12 -17
- novel_downloader/utils/file_utils/normalize.py +1 -3
- novel_downloader/utils/file_utils/sanitize.py +2 -9
- novel_downloader/utils/fontocr/__init__.py +0 -1
- novel_downloader/utils/fontocr/ocr_v1.py +19 -22
- novel_downloader/utils/fontocr/ocr_v2.py +147 -60
- novel_downloader/utils/hash_store.py +19 -20
- novel_downloader/utils/hash_utils.py +0 -1
- novel_downloader/utils/i18n.py +3 -4
- novel_downloader/utils/logger.py +5 -6
- novel_downloader/utils/model_loader.py +5 -8
- novel_downloader/utils/network.py +9 -10
- novel_downloader/utils/state.py +6 -7
- novel_downloader/utils/text_utils/__init__.py +0 -1
- novel_downloader/utils/text_utils/chapter_formatting.py +2 -7
- novel_downloader/utils/text_utils/diff_display.py +0 -1
- novel_downloader/utils/text_utils/font_mapping.py +1 -4
- novel_downloader/utils/text_utils/text_cleaning.py +0 -1
- novel_downloader/utils/time_utils/__init__.py +0 -1
- novel_downloader/utils/time_utils/datetime_utils.py +8 -10
- novel_downloader/utils/time_utils/sleep_utils.py +1 -3
- {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.1.dist-info}/METADATA +14 -17
- novel_downloader-1.3.1.dist-info/RECORD +127 -0
- {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.1.dist-info}/WHEEL +1 -1
- novel_downloader/core/requesters/base_browser.py +0 -214
- novel_downloader/core/requesters/base_session.py +0 -246
- novel_downloader/core/requesters/common_requester/common_async_session.py +0 -98
- novel_downloader/core/requesters/common_requester/common_session.py +0 -126
- novel_downloader/core/requesters/qidian_requester/__init__.py +0 -22
- novel_downloader/core/requesters/qidian_requester/qidian_broswer.py +0 -396
- novel_downloader/core/requesters/qidian_requester/qidian_session.py +0 -202
- novel_downloader/resources/config/settings.yaml +0 -76
- novel_downloader-1.2.2.dist-info/RECORD +0 -115
- {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.1.dist-info}/entry_points.txt +0 -0
- {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.1.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.1.dist-info}/top_level.txt +0 -0
@@ -1,98 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
"""
|
4
|
-
novel_downloader.core.requesters.common_requester.common_async_session
|
5
|
-
----------------------------------------------------------------------
|
6
|
-
|
7
|
-
This module defines a `CommonAsyncSession` class for handling HTTP requests
|
8
|
-
to common novel sites **asynchronously**. It provides methods to retrieve
|
9
|
-
raw book info pages and chapter contents using a flexible URL templating
|
10
|
-
system defined by a site profile, with retry logic and random delays.
|
11
|
-
"""
|
12
|
-
|
13
|
-
import asyncio
|
14
|
-
import random
|
15
|
-
from typing import Dict, Optional
|
16
|
-
|
17
|
-
from novel_downloader.config import RequesterConfig, SiteProfile
|
18
|
-
from novel_downloader.core.requesters.base_async_session import BaseAsyncSession
|
19
|
-
|
20
|
-
|
21
|
-
class CommonAsyncSession(BaseAsyncSession):
|
22
|
-
"""
|
23
|
-
A common async session for handling site-specific HTTP requests.
|
24
|
-
|
25
|
-
:ivar _site: The unique identifier or name of the site.
|
26
|
-
:ivar _profile: Metadata and URL templates related to the site.
|
27
|
-
"""
|
28
|
-
|
29
|
-
def __init__(
|
30
|
-
self,
|
31
|
-
config: RequesterConfig,
|
32
|
-
site: str,
|
33
|
-
profile: SiteProfile,
|
34
|
-
cookies: Optional[Dict[str, str]] = None,
|
35
|
-
) -> None:
|
36
|
-
"""
|
37
|
-
Initialize a CommonAsyncSession instance.
|
38
|
-
|
39
|
-
:param config: The RequesterConfig instance containing settings.
|
40
|
-
:param site: The identifier or domain of the target site.
|
41
|
-
:param profile: The site's metadata and URL templates.
|
42
|
-
:param cookies: Optional cookies to preload into the session.
|
43
|
-
"""
|
44
|
-
self._init_session(config=config, cookies=cookies)
|
45
|
-
self._site = site
|
46
|
-
self._profile = profile
|
47
|
-
|
48
|
-
async def get_book_info(
|
49
|
-
self, book_id: str, wait_time: Optional[float] = None
|
50
|
-
) -> str:
|
51
|
-
"""
|
52
|
-
Fetch the raw HTML of the book info page asynchronously.
|
53
|
-
|
54
|
-
Relies on BaseAsyncSession.fetch for retry logic, then sleeps with jitter.
|
55
|
-
|
56
|
-
:param book_id: The book identifier.
|
57
|
-
:param wait_time: Base seconds to sleep (with 0.5-1.5x random factor).
|
58
|
-
:return: The page content as a string.
|
59
|
-
"""
|
60
|
-
url = self.book_info_url.format(book_id=book_id)
|
61
|
-
html = await self.fetch(url)
|
62
|
-
base = wait_time if wait_time is not None else self._config.wait_time
|
63
|
-
await asyncio.sleep(base * random.uniform(0.5, 1.5))
|
64
|
-
return html
|
65
|
-
|
66
|
-
async def get_book_chapter(
|
67
|
-
self, book_id: str, chapter_id: str, wait_time: Optional[float] = None
|
68
|
-
) -> str:
|
69
|
-
"""
|
70
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
71
|
-
|
72
|
-
Relies on BaseAsyncSession.fetch for retry logic, then sleeps with jitter.
|
73
|
-
|
74
|
-
:param book_id: The book identifier.
|
75
|
-
:param chapter_id: The chapter identifier.
|
76
|
-
:param wait_time: Base seconds to sleep (with 0.5-1.5x random factor).
|
77
|
-
:return: The chapter content as a string.
|
78
|
-
"""
|
79
|
-
url = self.chapter_url.format(book_id=book_id, chapter_id=chapter_id)
|
80
|
-
html = await self.fetch(url)
|
81
|
-
base = wait_time if wait_time is not None else self._config.wait_time
|
82
|
-
await asyncio.sleep(base * random.uniform(0.5, 1.5))
|
83
|
-
return html
|
84
|
-
|
85
|
-
@property
|
86
|
-
def site(self) -> str:
|
87
|
-
"""Return the site name."""
|
88
|
-
return self._site
|
89
|
-
|
90
|
-
@property
|
91
|
-
def book_info_url(self) -> str:
|
92
|
-
"""Return the URL template for fetching book info."""
|
93
|
-
return self._profile["book_info_url"]
|
94
|
-
|
95
|
-
@property
|
96
|
-
def chapter_url(self) -> str:
|
97
|
-
"""Return the URL template for fetching chapter content."""
|
98
|
-
return self._profile["chapter_url"]
|
@@ -1,126 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
"""
|
4
|
-
novel_downloader.core.requesters.common_requester.common_session
|
5
|
-
------------------------------------------------------------------
|
6
|
-
|
7
|
-
This module defines a `CommonSession` class for handling HTTP requests
|
8
|
-
to common novel sites. It provides methods to retrieve raw book
|
9
|
-
information pages and chapter contents using a flexible URL templating
|
10
|
-
system defined by a site profile.
|
11
|
-
"""
|
12
|
-
|
13
|
-
import time
|
14
|
-
from typing import Dict, Optional
|
15
|
-
|
16
|
-
from novel_downloader.config import RequesterConfig, SiteProfile
|
17
|
-
from novel_downloader.utils.time_utils import sleep_with_random_delay
|
18
|
-
|
19
|
-
from ..base_session import BaseSession
|
20
|
-
|
21
|
-
|
22
|
-
class CommonSession(BaseSession):
|
23
|
-
"""
|
24
|
-
A common session for handling site-specific HTTP requests.
|
25
|
-
|
26
|
-
:ivar _site: The unique identifier or name of the site.
|
27
|
-
:ivar _profile: Metadata and URL templates related to the site.
|
28
|
-
:ivar session: The HTTP session used to make requests.
|
29
|
-
"""
|
30
|
-
|
31
|
-
def __init__(
|
32
|
-
self,
|
33
|
-
config: RequesterConfig,
|
34
|
-
site: str,
|
35
|
-
profile: SiteProfile,
|
36
|
-
cookies: Optional[Dict[str, str]] = None,
|
37
|
-
):
|
38
|
-
"""
|
39
|
-
Initialize a CommonSession instance.
|
40
|
-
|
41
|
-
:param config: The RequesterConfig instance containing settings.
|
42
|
-
:param site: The identifier or domain of the target site.
|
43
|
-
:param profile: The site's metadata and URL templates.
|
44
|
-
:param cookies: Optional cookies to preload into the session.
|
45
|
-
"""
|
46
|
-
self._init_session(config=config, cookies=cookies)
|
47
|
-
self._site = site
|
48
|
-
self._profile = profile
|
49
|
-
|
50
|
-
def get_book_info(self, book_id: str, wait_time: Optional[float] = None) -> str:
|
51
|
-
"""
|
52
|
-
Fetch the raw HTML (or JSON) of the book info page.
|
53
|
-
|
54
|
-
:param book_id: The book identifier.
|
55
|
-
:param wait_time: Base number of seconds to wait before returning content.
|
56
|
-
:return: The page content as a string.
|
57
|
-
:raises requests.HTTPError: If the request returns an unsuccessful status code.
|
58
|
-
"""
|
59
|
-
url = self.book_info_url.format(book_id=book_id)
|
60
|
-
base = wait_time if wait_time is not None else self._config.wait_time
|
61
|
-
|
62
|
-
for attempt in range(1, self.retry_times + 1):
|
63
|
-
try:
|
64
|
-
with self.session.get(url, timeout=self.timeout) as response:
|
65
|
-
response.raise_for_status()
|
66
|
-
content = response.text
|
67
|
-
sleep_with_random_delay(base, add_spread=1.0)
|
68
|
-
return content
|
69
|
-
except Exception as e:
|
70
|
-
if attempt == self.retry_times:
|
71
|
-
raise e # 最后一次也失败了,抛出异常
|
72
|
-
else:
|
73
|
-
time.sleep(self.retry_interval)
|
74
|
-
continue
|
75
|
-
raise RuntimeError("Unexpected error: get_book_info failed without returning")
|
76
|
-
|
77
|
-
def get_book_chapter(
|
78
|
-
self, book_id: str, chapter_id: str, wait_time: Optional[float] = None
|
79
|
-
) -> str:
|
80
|
-
"""
|
81
|
-
Fetch the raw HTML (or JSON) of a single chapter.
|
82
|
-
|
83
|
-
:param book_id: The book identifier.
|
84
|
-
:param chapter_id: The chapter identifier.
|
85
|
-
:param wait_time: Base number of seconds to wait before returning content.
|
86
|
-
:return: The chapter content as a string.
|
87
|
-
:raises requests.HTTPError: If the request returns an unsuccessful status code.
|
88
|
-
"""
|
89
|
-
url = self.chapter_url.format(book_id=book_id, chapter_id=chapter_id)
|
90
|
-
base = wait_time if wait_time is not None else self._config.wait_time
|
91
|
-
|
92
|
-
for attempt in range(1, self.retry_times + 1):
|
93
|
-
try:
|
94
|
-
with self.session.get(url, timeout=self.timeout) as response:
|
95
|
-
response.raise_for_status()
|
96
|
-
content = response.text
|
97
|
-
sleep_with_random_delay(base, add_spread=1.0)
|
98
|
-
return content
|
99
|
-
except Exception as e:
|
100
|
-
if attempt == self.retry_times:
|
101
|
-
raise e # 最后一次也失败了,抛出异常
|
102
|
-
else:
|
103
|
-
time.sleep(self.retry_interval)
|
104
|
-
continue
|
105
|
-
raise RuntimeError(
|
106
|
-
"Unexpected error: get_book_chapter failed without returning"
|
107
|
-
)
|
108
|
-
|
109
|
-
@property
|
110
|
-
def site(self) -> str:
|
111
|
-
"""Return the site name."""
|
112
|
-
return self._site
|
113
|
-
|
114
|
-
@property
|
115
|
-
def book_info_url(self) -> str:
|
116
|
-
"""
|
117
|
-
Return the URL template for fetching book information.
|
118
|
-
"""
|
119
|
-
return self._profile["book_info_url"]
|
120
|
-
|
121
|
-
@property
|
122
|
-
def chapter_url(self) -> str:
|
123
|
-
"""
|
124
|
-
Return the URL template for fetching chapter information.
|
125
|
-
"""
|
126
|
-
return self._profile["chapter_url"]
|
@@ -1,22 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
"""
|
4
|
-
novel_downloader.core.requesters.qidian_requester
|
5
|
-
-------------------------------------------------
|
6
|
-
|
7
|
-
This package provides the implementation of the Qidian-specific requester logic.
|
8
|
-
It contains modules for interacting with Qidian's website, including login,
|
9
|
-
page navigation, and data retrieval using a browser-based automation approach.
|
10
|
-
|
11
|
-
Modules:
|
12
|
-
- qidian_browser: Implements the QidianBrowser class for automated browser control.
|
13
|
-
- qidian_session: Implements the QidianSession class.
|
14
|
-
"""
|
15
|
-
|
16
|
-
from .qidian_broswer import QidianBrowser
|
17
|
-
from .qidian_session import QidianSession
|
18
|
-
|
19
|
-
__all__ = [
|
20
|
-
"QidianBrowser",
|
21
|
-
"QidianSession",
|
22
|
-
]
|
@@ -1,396 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
"""
|
4
|
-
novel_downloader.core.requesters.qidian_requester.qidian_broswer
|
5
|
-
----------------------------------------------------------------
|
6
|
-
|
7
|
-
This module defines the QidianRequester class for interacting with
|
8
|
-
the Qidian website.
|
9
|
-
It extends the BaseBrowser by adding methods for logging in and
|
10
|
-
retrieving book information.
|
11
|
-
"""
|
12
|
-
|
13
|
-
import logging
|
14
|
-
import random
|
15
|
-
import time
|
16
|
-
from typing import Optional
|
17
|
-
|
18
|
-
from DrissionPage._elements.chromium_element import ChromiumElement
|
19
|
-
from DrissionPage.common import Keys
|
20
|
-
|
21
|
-
from novel_downloader.config.models import RequesterConfig
|
22
|
-
from novel_downloader.core.requesters.base_browser import BaseBrowser
|
23
|
-
from novel_downloader.utils.i18n import t
|
24
|
-
from novel_downloader.utils.time_utils import sleep_with_random_delay
|
25
|
-
|
26
|
-
logger = logging.getLogger(__name__)
|
27
|
-
|
28
|
-
|
29
|
-
class QidianBrowser(BaseBrowser):
|
30
|
-
"""
|
31
|
-
QidianRequester provides methods for interacting with Qidian.com,
|
32
|
-
including checking login status and preparing book-related URLs.
|
33
|
-
|
34
|
-
Inherits base browser setup from BaseBrowser.
|
35
|
-
"""
|
36
|
-
|
37
|
-
DEFAULT_SCHEME = "https:"
|
38
|
-
QIDIAN_BASE_URL = "www.qidian.com"
|
39
|
-
QIDIAN_BOOKCASE_URL = f"{DEFAULT_SCHEME}//my.qidian.com/bookcase/"
|
40
|
-
QIDIAN_BOOK_INFO_URL_1 = f"{DEFAULT_SCHEME}//www.qidian.com/book"
|
41
|
-
QIDIAN_BOOK_INFO_URL_2 = f"{DEFAULT_SCHEME}//book.qidian.com/info"
|
42
|
-
QIDIAN_CHAPTER_URL = f"{DEFAULT_SCHEME}//www.qidian.com/chapter"
|
43
|
-
|
44
|
-
def __init__(self, config: RequesterConfig):
|
45
|
-
"""
|
46
|
-
Initialize the QidianRequester with a browser configuration.
|
47
|
-
|
48
|
-
:param config: The RequesterConfig instance containing browser settings.
|
49
|
-
"""
|
50
|
-
self._init_browser(config=config)
|
51
|
-
self._headless: bool = config.headless
|
52
|
-
self._logged_in: bool = False
|
53
|
-
|
54
|
-
def _is_user_logged_in(self) -> bool:
|
55
|
-
"""
|
56
|
-
Check whether the user is currently logged in by inspecting
|
57
|
-
the visibility of the 'sign-in' element on the page.
|
58
|
-
|
59
|
-
:return: True if the user appears to be logged in, False otherwise.
|
60
|
-
"""
|
61
|
-
if self._page is None:
|
62
|
-
raise RuntimeError("Browser page not initialized.")
|
63
|
-
try:
|
64
|
-
self._handle_overlay_mask()
|
65
|
-
sign_in_elem = self._page.ele("@class=sign-in")
|
66
|
-
if sign_in_elem:
|
67
|
-
class_value = sign_in_elem.attr("class")
|
68
|
-
if class_value and "hidden" not in class_value:
|
69
|
-
return True
|
70
|
-
except Exception as e:
|
71
|
-
logger.warning("[auth] Error while checking login status: %s", e)
|
72
|
-
return False
|
73
|
-
|
74
|
-
def login(self, max_retries: int = 3, manual_login: bool = False) -> bool:
|
75
|
-
"""
|
76
|
-
Attempt to log in to Qidian
|
77
|
-
"""
|
78
|
-
if manual_login:
|
79
|
-
return self._manual_login(max_retries)
|
80
|
-
else:
|
81
|
-
return self._login(max_retries)
|
82
|
-
|
83
|
-
def _login(self, max_retries: int = 3) -> bool:
|
84
|
-
"""
|
85
|
-
Attempt to log in to Qidian by handling overlays and clicking the login button.
|
86
|
-
|
87
|
-
:param max_retries: Maximum number of times to try clicking the login button.
|
88
|
-
:return: True if login succeeds or is already in place; False otherwise.
|
89
|
-
"""
|
90
|
-
if self._page is None:
|
91
|
-
raise RuntimeError("Browser page not initialized.")
|
92
|
-
original_url = self._page.url
|
93
|
-
try:
|
94
|
-
self._page.get("https://www.qidian.com/")
|
95
|
-
self._page.wait.eles_loaded("#login-box")
|
96
|
-
except Exception as e:
|
97
|
-
logger.warning("[auth] Failed to load login box: %s", e)
|
98
|
-
return False
|
99
|
-
|
100
|
-
for attempt in range(1, max_retries + 1):
|
101
|
-
if self._is_user_logged_in():
|
102
|
-
logger.debug("[auth] Already logged in.")
|
103
|
-
break
|
104
|
-
|
105
|
-
self._click_login_button(attempt)
|
106
|
-
time.sleep(self._config.retry_interval)
|
107
|
-
|
108
|
-
self._logged_in = self._is_user_logged_in()
|
109
|
-
if self._logged_in:
|
110
|
-
logger.info("[auth] Login successful.")
|
111
|
-
else:
|
112
|
-
logger.warning("[auth] Login failed after max retries.")
|
113
|
-
|
114
|
-
# return to original page
|
115
|
-
try:
|
116
|
-
if original_url:
|
117
|
-
self._page.get(original_url)
|
118
|
-
except Exception as e:
|
119
|
-
logger.debug("[auth] Failed to restore page URL: %s", e)
|
120
|
-
|
121
|
-
return self._logged_in
|
122
|
-
|
123
|
-
def _handle_overlay_mask(self) -> None:
|
124
|
-
"""
|
125
|
-
Detect and close any full-page overlay mask that might block the login UI.
|
126
|
-
"""
|
127
|
-
if self._page is None:
|
128
|
-
raise RuntimeError("Browser page not initialized.")
|
129
|
-
try:
|
130
|
-
mask = self._page.ele("@@tag()=div@@class=mask", timeout=2)
|
131
|
-
if not mask:
|
132
|
-
return
|
133
|
-
|
134
|
-
logger.debug("[auth] Overlay mask detected; attempting to close.")
|
135
|
-
iframe = self._page.get_frame("loginIfr", timeout=5)
|
136
|
-
if not iframe:
|
137
|
-
logger.debug("[auth] Login iframe not found.")
|
138
|
-
return
|
139
|
-
|
140
|
-
close_btn = iframe.ele("@id=close", timeout=5)
|
141
|
-
if close_btn:
|
142
|
-
close_btn.click()
|
143
|
-
logger.debug("[auth] Closed overlay mask via iframe close button.")
|
144
|
-
else:
|
145
|
-
logger.debug("[auth] Close button not found in login iframe.")
|
146
|
-
except Exception as e:
|
147
|
-
logger.debug("[auth] Error handling overlay mask: %s", e)
|
148
|
-
|
149
|
-
def _click_login_button(self, attempt: int) -> None:
|
150
|
-
"""
|
151
|
-
Try to click the login button on the page.
|
152
|
-
|
153
|
-
:param attempt: The current attempt number (for logging).
|
154
|
-
"""
|
155
|
-
if self._page is None:
|
156
|
-
raise RuntimeError("Browser page not initialized.")
|
157
|
-
try:
|
158
|
-
logger.debug("[auth] Attempting login click (#%s).", attempt)
|
159
|
-
login_btn = self._page.ele("@id=login-btn", timeout=5)
|
160
|
-
if isinstance(login_btn, ChromiumElement):
|
161
|
-
login_btn.click()
|
162
|
-
logger.debug("[auth] Login button clicked.")
|
163
|
-
else:
|
164
|
-
logger.debug("[auth] Login button not found.")
|
165
|
-
except Exception as e:
|
166
|
-
logger.debug("[auth] Exception clicking login button: %s", e)
|
167
|
-
|
168
|
-
def _manual_login(
|
169
|
-
self,
|
170
|
-
max_retries: int = 3,
|
171
|
-
) -> bool:
|
172
|
-
"""
|
173
|
-
Guide the user through an interactive manual login flow.
|
174
|
-
|
175
|
-
Steps:
|
176
|
-
1. If the browser is headless, shut it down and restart in headful mode.
|
177
|
-
2. Navigate to the Qidian homepage.
|
178
|
-
3. Prompt the user to complete login, retrying up to `max_retries` times.
|
179
|
-
4. Once logged in, restore original headless mode if needed.
|
180
|
-
|
181
|
-
:param max_retries: Number of times to check for login success.
|
182
|
-
:return: True if login was detected, False otherwise.
|
183
|
-
"""
|
184
|
-
if self._page is None:
|
185
|
-
raise RuntimeError("Browser page not initialized.")
|
186
|
-
original_headless = self._headless
|
187
|
-
|
188
|
-
# 1. Switch to headful mode if needed
|
189
|
-
if getattr(self, "_disable_images_orig", False):
|
190
|
-
logger.debug("[auth] Temporarily enabling images for manual login.")
|
191
|
-
self._options.no_imgs(False)
|
192
|
-
self._restart_browser(headless=False)
|
193
|
-
elif original_headless:
|
194
|
-
self._restart_browser(headless=False)
|
195
|
-
|
196
|
-
# 2. Navigate to home page
|
197
|
-
try:
|
198
|
-
self._page.get("https://www.qidian.com/")
|
199
|
-
except Exception as e:
|
200
|
-
logger.warning("[auth] Failed to load homepage for manual login: %s", e)
|
201
|
-
return False
|
202
|
-
|
203
|
-
# 3. Retry loop
|
204
|
-
for attempt in range(1, max_retries + 1):
|
205
|
-
if self._is_user_logged_in():
|
206
|
-
logger.info("[auth] Detected successful login.")
|
207
|
-
self._logged_in = True
|
208
|
-
break
|
209
|
-
if attempt == 1:
|
210
|
-
print(t("login_prompt_intro"))
|
211
|
-
input(
|
212
|
-
t("login_prompt_press_enter", attempt=attempt, max_retries=max_retries)
|
213
|
-
)
|
214
|
-
else:
|
215
|
-
logger.warning("[auth] Manual login failed after %d attempts.", max_retries)
|
216
|
-
self._logged_in = False
|
217
|
-
return self._logged_in
|
218
|
-
|
219
|
-
# 4. Restore headless if we changed it, then re-establish session
|
220
|
-
if original_headless or getattr(self, "_disable_images_orig", False):
|
221
|
-
logger.debug("[auth] Restoring browser settings after manual login...")
|
222
|
-
self._options.no_imgs(self._disable_images_orig)
|
223
|
-
self._restart_browser(headless=original_headless)
|
224
|
-
self.login()
|
225
|
-
if self._logged_in:
|
226
|
-
logger.info(
|
227
|
-
"[auth] Login session successfully carried over after restart."
|
228
|
-
)
|
229
|
-
else:
|
230
|
-
logger.warning(
|
231
|
-
"[auth] Lost login session after restoring headless mode."
|
232
|
-
)
|
233
|
-
|
234
|
-
return self._logged_in
|
235
|
-
|
236
|
-
def _restart_browser(self, headless: Optional[bool] = None) -> None:
|
237
|
-
"""
|
238
|
-
Shutdown the current browser and restart it with the given headless setting.
|
239
|
-
|
240
|
-
:param headless: Whether to run the browser in headless mode.
|
241
|
-
"""
|
242
|
-
if self._browser:
|
243
|
-
self._browser.quit()
|
244
|
-
self._clear_browser_refs()
|
245
|
-
|
246
|
-
# Apply new headless setting and reinitialize
|
247
|
-
if headless is not None:
|
248
|
-
self._options.headless(headless)
|
249
|
-
self._headless = headless
|
250
|
-
self._setup()
|
251
|
-
logger.debug("[browser] Browser restarted (headless=%s).", headless)
|
252
|
-
|
253
|
-
def _build_book_info_url(self, book_id: str) -> str:
|
254
|
-
"""
|
255
|
-
Construct the URL for fetching a book's info page.
|
256
|
-
|
257
|
-
:param book_id: The identifier of the book.
|
258
|
-
:return: Fully qualified URL for the book info page.
|
259
|
-
"""
|
260
|
-
return f"{self.QIDIAN_BOOK_INFO_URL_2}/{book_id}/"
|
261
|
-
|
262
|
-
def _build_chapter_url(self, book_id: str, chapter_id: str) -> str:
|
263
|
-
"""
|
264
|
-
Construct the URL for fetching a specific chapter.
|
265
|
-
|
266
|
-
:param book_id: The identifier of the book.
|
267
|
-
:param chapter_id: The identifier of the chapter.
|
268
|
-
:return: Fully qualified chapter URL.
|
269
|
-
"""
|
270
|
-
return f"{self.QIDIAN_CHAPTER_URL}/{book_id}/{chapter_id}/"
|
271
|
-
|
272
|
-
def _build_bookcase_url(self) -> str:
|
273
|
-
"""
|
274
|
-
Construct the URL for the user's bookcase page.
|
275
|
-
|
276
|
-
:return: Fully qualified URL of the bookcase.
|
277
|
-
"""
|
278
|
-
return self.QIDIAN_BOOKCASE_URL
|
279
|
-
|
280
|
-
def get_book_info(self, book_id: str, wait_time: Optional[float] = None) -> str:
|
281
|
-
"""
|
282
|
-
Retrieve the HTML of a Qidian book info page.
|
283
|
-
|
284
|
-
This method enforces that the user is logged in, navigates to the
|
285
|
-
book's info URL, waits a randomized delay to mimic human browsing,
|
286
|
-
and returns the page HTML.
|
287
|
-
|
288
|
-
:param book_id: The identifier of the book to fetch.
|
289
|
-
:param wait_time: Base wait time in seconds before returning content.
|
290
|
-
If None, uses `self._config.wait_time`.
|
291
|
-
:return: The HTML content of the book info page, or an empty string on error.
|
292
|
-
"""
|
293
|
-
if self._page is None:
|
294
|
-
raise RuntimeError("Browser page not initialized.")
|
295
|
-
url = self._build_book_info_url(book_id)
|
296
|
-
try:
|
297
|
-
# Navigate and fetch
|
298
|
-
self._page.get(url)
|
299
|
-
|
300
|
-
# Randomized human-like delay
|
301
|
-
base = wait_time if wait_time is not None else self._config.wait_time
|
302
|
-
sleep_with_random_delay(base, mul_spread=1.2)
|
303
|
-
|
304
|
-
html = str(self._page.html)
|
305
|
-
logger.debug("[fetch] Fetched book info for ID %s from %s", book_id, url)
|
306
|
-
return html
|
307
|
-
|
308
|
-
except Exception as e:
|
309
|
-
logger.warning("[fetch] Error fetching book info from '%s': %s", url, e)
|
310
|
-
return ""
|
311
|
-
|
312
|
-
def _scroll_page(self, presses: int, pause: float) -> None:
|
313
|
-
"""
|
314
|
-
Scroll down by sending DOWN key presses to the page.
|
315
|
-
|
316
|
-
:param presses: Number of DOWN key presses.
|
317
|
-
:param pause: Seconds to wait between each press.
|
318
|
-
"""
|
319
|
-
if self._page is None:
|
320
|
-
raise RuntimeError("Browser page not initialized.")
|
321
|
-
for _ in range(presses):
|
322
|
-
try:
|
323
|
-
self._page.actions.key_down(Keys.DOWN)
|
324
|
-
except Exception as e:
|
325
|
-
logger.debug("[page] Scroll press failed: %s", e)
|
326
|
-
time.sleep(pause)
|
327
|
-
|
328
|
-
def get_book_chapter(
|
329
|
-
self, book_id: str, chapter_id: str, wait_time: Optional[float] = None
|
330
|
-
) -> str:
|
331
|
-
"""
|
332
|
-
Retrieve the HTML content of a specific chapter.
|
333
|
-
|
334
|
-
Ensures the user is logged in, navigates to the chapter page,
|
335
|
-
waits a randomized delay to mimic human reading, then scrolls
|
336
|
-
to trigger any lazy-loaded content.
|
337
|
-
|
338
|
-
:param book_id: The identifier of the book.
|
339
|
-
:param chapter_id: The identifier of the chapter.
|
340
|
-
:param wait_time: Base wait time in seconds before scrolling. If None,
|
341
|
-
falls back to `self._config.wait_time`.
|
342
|
-
:return: The HTML content of the chapter page, or empty string on error.
|
343
|
-
"""
|
344
|
-
if self._page is None:
|
345
|
-
raise RuntimeError("Browser page not initialized.")
|
346
|
-
url = self._build_chapter_url(book_id, chapter_id)
|
347
|
-
try:
|
348
|
-
# 1. Navigate to chapter URL
|
349
|
-
self._page.get(url)
|
350
|
-
|
351
|
-
# 2. Randomized human-like delay
|
352
|
-
base = wait_time if wait_time is not None else self._config.wait_time
|
353
|
-
# sleep_with_random_delay(base, mul_spread=1.2)
|
354
|
-
|
355
|
-
# 3. Scroll down to load dynamic content
|
356
|
-
presses = int(random.uniform(base, base + 5) * 2)
|
357
|
-
self._scroll_page(presses, pause=0.5)
|
358
|
-
|
359
|
-
html = str(self._page.html)
|
360
|
-
logger.debug("[fetch] Fetched chapter %s for book %s", chapter_id, book_id)
|
361
|
-
return html
|
362
|
-
|
363
|
-
except Exception as e:
|
364
|
-
logger.warning("[fetch] Error fetching chapter from '%s': %s", url, e)
|
365
|
-
return ""
|
366
|
-
|
367
|
-
def get_bookcase(self, wait_time: Optional[float] = None) -> str:
|
368
|
-
"""
|
369
|
-
Retrieve the HTML content of the logged-in user's Qidian bookcase page.
|
370
|
-
|
371
|
-
:param wait_time: Base number of seconds to wait before returning content.
|
372
|
-
If None, falls back to `self._config.wait_time`.
|
373
|
-
:return: The HTML markup of the bookcase page, or empty string on error.
|
374
|
-
:raises RuntimeError: If the user is not logged in.
|
375
|
-
"""
|
376
|
-
if self._page is None:
|
377
|
-
raise RuntimeError("Browser page not initialized.")
|
378
|
-
if not self._logged_in:
|
379
|
-
raise RuntimeError("User not logged in. Please call login() first.")
|
380
|
-
|
381
|
-
url = self._build_bookcase_url()
|
382
|
-
try:
|
383
|
-
# Navigate to the bookcase page
|
384
|
-
self._page.get(url)
|
385
|
-
|
386
|
-
# Randomized human-like delay
|
387
|
-
base = wait_time if wait_time is not None else self._config.wait_time
|
388
|
-
sleep_with_random_delay(base, mul_spread=1.2)
|
389
|
-
|
390
|
-
html = str(self._page.html)
|
391
|
-
logger.debug("[fetch] Fetched bookcase HTML from %s", url)
|
392
|
-
return html
|
393
|
-
|
394
|
-
except Exception as e:
|
395
|
-
logger.warning("[fetch] Error fetching bookcase from '%s': %s", url, e)
|
396
|
-
return ""
|