novel-downloader 1.2.2__py3-none-any.whl → 1.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -2
- novel_downloader/cli/__init__.py +0 -1
- novel_downloader/cli/clean.py +2 -10
- novel_downloader/cli/download.py +16 -22
- novel_downloader/cli/interactive.py +0 -1
- novel_downloader/cli/main.py +1 -3
- novel_downloader/cli/settings.py +8 -8
- novel_downloader/config/__init__.py +0 -1
- novel_downloader/config/adapter.py +32 -27
- novel_downloader/config/loader.py +116 -108
- novel_downloader/config/models.py +35 -29
- novel_downloader/config/site_rules.py +2 -4
- novel_downloader/core/__init__.py +0 -1
- novel_downloader/core/downloaders/__init__.py +4 -4
- novel_downloader/core/downloaders/base/__init__.py +14 -0
- novel_downloader/core/downloaders/{base_async_downloader.py → base/base_async.py} +49 -53
- novel_downloader/core/downloaders/{base_downloader.py → base/base_sync.py} +64 -43
- novel_downloader/core/downloaders/biquge/__init__.py +12 -0
- novel_downloader/core/downloaders/biquge/biquge_sync.py +25 -0
- novel_downloader/core/downloaders/common/__init__.py +14 -0
- novel_downloader/core/downloaders/{common_asynb_downloader.py → common/common_async.py} +42 -33
- novel_downloader/core/downloaders/{common_downloader.py → common/common_sync.py} +33 -21
- novel_downloader/core/downloaders/qidian/__init__.py +10 -0
- novel_downloader/core/downloaders/{qidian_downloader.py → qidian/qidian_sync.py} +79 -62
- novel_downloader/core/factory/__init__.py +4 -5
- novel_downloader/core/factory/{downloader_factory.py → downloader.py} +25 -26
- novel_downloader/core/factory/{parser_factory.py → parser.py} +12 -14
- novel_downloader/core/factory/{requester_factory.py → requester.py} +29 -16
- novel_downloader/core/factory/{saver_factory.py → saver.py} +4 -9
- novel_downloader/core/interfaces/__init__.py +8 -9
- novel_downloader/core/interfaces/{async_downloader_protocol.py → async_downloader.py} +4 -5
- novel_downloader/core/interfaces/{async_requester_protocol.py → async_requester.py} +23 -12
- novel_downloader/core/interfaces/{parser_protocol.py → parser.py} +11 -6
- novel_downloader/core/interfaces/{saver_protocol.py → saver.py} +2 -3
- novel_downloader/core/interfaces/{downloader_protocol.py → sync_downloader.py} +6 -7
- novel_downloader/core/interfaces/{requester_protocol.py → sync_requester.py} +31 -17
- novel_downloader/core/parsers/__init__.py +5 -4
- novel_downloader/core/parsers/{base_parser.py → base.py} +18 -9
- novel_downloader/core/parsers/biquge/__init__.py +10 -0
- novel_downloader/core/parsers/biquge/main_parser.py +126 -0
- novel_downloader/core/parsers/{common_parser → common}/__init__.py +2 -3
- novel_downloader/core/parsers/{common_parser → common}/helper.py +13 -13
- novel_downloader/core/parsers/{common_parser → common}/main_parser.py +15 -9
- novel_downloader/core/parsers/{qidian_parser → qidian}/__init__.py +2 -3
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/__init__.py +2 -3
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_encrypted.py +40 -48
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_normal.py +17 -21
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_router.py +10 -9
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/main_parser.py +14 -10
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/__init__.py +2 -3
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_encrypted.py +36 -44
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_normal.py +19 -23
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_router.py +10 -9
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/main_parser.py +14 -10
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/node_decryptor.py +7 -10
- novel_downloader/core/parsers/{qidian_parser → qidian}/shared/__init__.py +2 -3
- novel_downloader/core/parsers/{qidian_parser → qidian}/shared/book_info_parser.py +5 -6
- novel_downloader/core/parsers/{qidian_parser → qidian}/shared/helpers.py +7 -8
- novel_downloader/core/requesters/__init__.py +9 -5
- novel_downloader/core/requesters/base/__init__.py +16 -0
- novel_downloader/core/requesters/{base_async_session.py → base/async_session.py} +177 -73
- novel_downloader/core/requesters/base/browser.py +340 -0
- novel_downloader/core/requesters/base/session.py +364 -0
- novel_downloader/core/requesters/biquge/__init__.py +12 -0
- novel_downloader/core/requesters/biquge/session.py +90 -0
- novel_downloader/core/requesters/{common_requester → common}/__init__.py +4 -5
- novel_downloader/core/requesters/common/async_session.py +96 -0
- novel_downloader/core/requesters/common/session.py +113 -0
- novel_downloader/core/requesters/qidian/__init__.py +21 -0
- novel_downloader/core/requesters/qidian/broswer.py +307 -0
- novel_downloader/core/requesters/qidian/session.py +287 -0
- novel_downloader/core/savers/__init__.py +5 -3
- novel_downloader/core/savers/{base_saver.py → base.py} +12 -13
- novel_downloader/core/savers/biquge.py +25 -0
- novel_downloader/core/savers/{common_saver → common}/__init__.py +2 -3
- novel_downloader/core/savers/{common_saver/common_epub.py → common/epub.py} +23 -51
- novel_downloader/core/savers/{common_saver → common}/main_saver.py +43 -9
- novel_downloader/core/savers/{common_saver/common_txt.py → common/txt.py} +16 -46
- novel_downloader/core/savers/epub_utils/__init__.py +0 -1
- novel_downloader/core/savers/epub_utils/css_builder.py +13 -7
- novel_downloader/core/savers/epub_utils/initializer.py +4 -5
- novel_downloader/core/savers/epub_utils/text_to_html.py +2 -3
- novel_downloader/core/savers/epub_utils/volume_intro.py +1 -3
- novel_downloader/core/savers/{qidian_saver.py → qidian.py} +12 -6
- novel_downloader/locales/en.json +8 -4
- novel_downloader/locales/zh.json +5 -1
- novel_downloader/resources/config/settings.toml +88 -0
- novel_downloader/utils/cache.py +2 -2
- novel_downloader/utils/chapter_storage.py +340 -0
- novel_downloader/utils/constants.py +6 -4
- novel_downloader/utils/crypto_utils.py +3 -3
- novel_downloader/utils/file_utils/__init__.py +0 -1
- novel_downloader/utils/file_utils/io.py +12 -17
- novel_downloader/utils/file_utils/normalize.py +1 -3
- novel_downloader/utils/file_utils/sanitize.py +2 -9
- novel_downloader/utils/fontocr/__init__.py +0 -1
- novel_downloader/utils/fontocr/ocr_v1.py +19 -22
- novel_downloader/utils/fontocr/ocr_v2.py +147 -60
- novel_downloader/utils/hash_store.py +19 -20
- novel_downloader/utils/hash_utils.py +0 -1
- novel_downloader/utils/i18n.py +3 -4
- novel_downloader/utils/logger.py +5 -6
- novel_downloader/utils/model_loader.py +5 -8
- novel_downloader/utils/network.py +9 -10
- novel_downloader/utils/state.py +6 -7
- novel_downloader/utils/text_utils/__init__.py +0 -1
- novel_downloader/utils/text_utils/chapter_formatting.py +2 -7
- novel_downloader/utils/text_utils/diff_display.py +0 -1
- novel_downloader/utils/text_utils/font_mapping.py +1 -4
- novel_downloader/utils/text_utils/text_cleaning.py +0 -1
- novel_downloader/utils/time_utils/__init__.py +0 -1
- novel_downloader/utils/time_utils/datetime_utils.py +8 -10
- novel_downloader/utils/time_utils/sleep_utils.py +1 -3
- {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.1.dist-info}/METADATA +14 -17
- novel_downloader-1.3.1.dist-info/RECORD +127 -0
- {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.1.dist-info}/WHEEL +1 -1
- novel_downloader/core/requesters/base_browser.py +0 -214
- novel_downloader/core/requesters/base_session.py +0 -246
- novel_downloader/core/requesters/common_requester/common_async_session.py +0 -98
- novel_downloader/core/requesters/common_requester/common_session.py +0 -126
- novel_downloader/core/requesters/qidian_requester/__init__.py +0 -22
- novel_downloader/core/requesters/qidian_requester/qidian_broswer.py +0 -396
- novel_downloader/core/requesters/qidian_requester/qidian_session.py +0 -202
- novel_downloader/resources/config/settings.yaml +0 -76
- novel_downloader-1.2.2.dist-info/RECORD +0 -115
- {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.1.dist-info}/entry_points.txt +0 -0
- {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.1.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,90 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.requesters.biquge.session
|
4
|
+
-----------------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import Any
|
9
|
+
|
10
|
+
from novel_downloader.core.requesters.base import BaseSession
|
11
|
+
|
12
|
+
|
13
|
+
class BiqugeSession(BaseSession):
|
14
|
+
"""
|
15
|
+
A session class for interacting with the Biquge (www.b520.cc) novel website.
|
16
|
+
"""
|
17
|
+
|
18
|
+
BOOK_INFO_URL = "http://www.b520.cc/{book_id}/"
|
19
|
+
CHAPTER_URL = "http://www.b520.cc/{book_id}/{chapter_id}.html"
|
20
|
+
|
21
|
+
def get_book_info(
|
22
|
+
self,
|
23
|
+
book_id: str,
|
24
|
+
**kwargs: Any,
|
25
|
+
) -> str:
|
26
|
+
"""
|
27
|
+
Fetch the raw HTML of the book info page.
|
28
|
+
|
29
|
+
:param book_id: The book identifier.
|
30
|
+
:return: The page content as a string.
|
31
|
+
"""
|
32
|
+
url = self.book_info_url(book_id=book_id)
|
33
|
+
try:
|
34
|
+
resp = self.get(url, **kwargs)
|
35
|
+
resp.raise_for_status()
|
36
|
+
return resp.text
|
37
|
+
except Exception as exc:
|
38
|
+
self.logger.warning(
|
39
|
+
"[session] get_book_info(%s) failed: %s",
|
40
|
+
book_id,
|
41
|
+
exc,
|
42
|
+
)
|
43
|
+
return ""
|
44
|
+
|
45
|
+
def get_book_chapter(
|
46
|
+
self,
|
47
|
+
book_id: str,
|
48
|
+
chapter_id: str,
|
49
|
+
**kwargs: Any,
|
50
|
+
) -> str:
|
51
|
+
"""
|
52
|
+
Fetch the HTML of a single chapter.
|
53
|
+
|
54
|
+
:param book_id: The book identifier.
|
55
|
+
:param chapter_id: The chapter identifier.
|
56
|
+
:return: The chapter content as a string.
|
57
|
+
"""
|
58
|
+
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
59
|
+
try:
|
60
|
+
resp = self.get(url, **kwargs)
|
61
|
+
resp.raise_for_status()
|
62
|
+
return resp.text
|
63
|
+
except Exception as exc:
|
64
|
+
self.logger.warning(
|
65
|
+
"[session] get_book_chapter(%s) failed: %s",
|
66
|
+
book_id,
|
67
|
+
exc,
|
68
|
+
)
|
69
|
+
return ""
|
70
|
+
|
71
|
+
@classmethod
|
72
|
+
def book_info_url(cls, book_id: str) -> str:
|
73
|
+
"""
|
74
|
+
Construct the URL for fetching a book's info page.
|
75
|
+
|
76
|
+
:param book_id: The identifier of the book.
|
77
|
+
:return: Fully qualified URL for the book info page.
|
78
|
+
"""
|
79
|
+
return cls.BOOK_INFO_URL.format(book_id=book_id)
|
80
|
+
|
81
|
+
@classmethod
|
82
|
+
def chapter_url(cls, book_id: str, chapter_id: str) -> str:
|
83
|
+
"""
|
84
|
+
Construct the URL for fetching a specific chapter.
|
85
|
+
|
86
|
+
:param book_id: The identifier of the book.
|
87
|
+
:param chapter_id: The identifier of the chapter.
|
88
|
+
:return: Fully qualified chapter URL.
|
89
|
+
"""
|
90
|
+
return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
|
@@ -1,16 +1,15 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
2
|
"""
|
4
|
-
novel_downloader.core.requesters.
|
5
|
-
|
3
|
+
novel_downloader.core.requesters.common
|
4
|
+
---------------------------------------
|
6
5
|
|
7
6
|
This module provides the `CommonSession` class wrapper for common HTTP
|
8
7
|
request operations to novel websites. It serves as a unified access
|
9
8
|
point to import `CommonSession` without exposing lower-level modules.
|
10
9
|
"""
|
11
10
|
|
12
|
-
from .
|
13
|
-
from .
|
11
|
+
from .async_session import CommonAsyncSession
|
12
|
+
from .session import CommonSession
|
14
13
|
|
15
14
|
__all__ = [
|
16
15
|
"CommonAsyncSession",
|
@@ -0,0 +1,96 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.requesters.common.async_session
|
4
|
+
-----------------------------------------------------
|
5
|
+
|
6
|
+
This module defines a `CommonAsyncSession` class for handling HTTP requests
|
7
|
+
to common novel sites **asynchronously**. It provides methods to retrieve
|
8
|
+
raw book info pages and chapter contents using a flexible URL templating
|
9
|
+
system defined by a site profile, with retry logic and random delays.
|
10
|
+
"""
|
11
|
+
|
12
|
+
from typing import Any
|
13
|
+
|
14
|
+
from novel_downloader.config import RequesterConfig, SiteProfile
|
15
|
+
from novel_downloader.core.requesters.base import BaseAsyncSession
|
16
|
+
|
17
|
+
|
18
|
+
class CommonAsyncSession(BaseAsyncSession):
|
19
|
+
"""
|
20
|
+
A common async session for handling site-specific HTTP requests.
|
21
|
+
"""
|
22
|
+
|
23
|
+
def __init__(
|
24
|
+
self,
|
25
|
+
config: RequesterConfig,
|
26
|
+
site: str,
|
27
|
+
profile: SiteProfile,
|
28
|
+
cookies: dict[str, str] | None = None,
|
29
|
+
) -> None:
|
30
|
+
"""
|
31
|
+
Initialize a CommonAsyncSession instance.
|
32
|
+
|
33
|
+
:param config: The RequesterConfig instance containing settings.
|
34
|
+
:param site: The identifier or domain of the target site.
|
35
|
+
:param profile: The site's metadata and URL templates.
|
36
|
+
:param cookies: Optional cookies to preload into the session.
|
37
|
+
"""
|
38
|
+
super().__init__(config, cookies)
|
39
|
+
self._site = site
|
40
|
+
self._profile = profile
|
41
|
+
|
42
|
+
async def get_book_info(
|
43
|
+
self,
|
44
|
+
book_id: str,
|
45
|
+
**kwargs: Any,
|
46
|
+
) -> str:
|
47
|
+
"""
|
48
|
+
Fetch the raw HTML of the book info page asynchronously.
|
49
|
+
|
50
|
+
:param book_id: The book identifier.
|
51
|
+
:return: The page content as a string.
|
52
|
+
"""
|
53
|
+
url = self.book_info_url(book_id=book_id)
|
54
|
+
return await self.fetch(url, **kwargs)
|
55
|
+
|
56
|
+
async def get_book_chapter(
|
57
|
+
self,
|
58
|
+
book_id: str,
|
59
|
+
chapter_id: str,
|
60
|
+
**kwargs: Any,
|
61
|
+
) -> str:
|
62
|
+
"""
|
63
|
+
Fetch the raw HTML of a single chapter asynchronously.
|
64
|
+
|
65
|
+
:param book_id: The book identifier.
|
66
|
+
:param chapter_id: The chapter identifier.
|
67
|
+
:return: The chapter content as a string.
|
68
|
+
"""
|
69
|
+
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
70
|
+
return await self.fetch(url, **kwargs)
|
71
|
+
|
72
|
+
@property
|
73
|
+
def site(self) -> str:
|
74
|
+
"""Return the site name."""
|
75
|
+
return self._site
|
76
|
+
|
77
|
+
def book_info_url(self, book_id: str) -> str:
|
78
|
+
"""
|
79
|
+
Construct the URL for fetching a book's info page.
|
80
|
+
|
81
|
+
:param book_id: The identifier of the book.
|
82
|
+
:return: Fully qualified URL for the book info page.
|
83
|
+
"""
|
84
|
+
return self._profile["book_info_url"].format(book_id=book_id)
|
85
|
+
|
86
|
+
def chapter_url(self, book_id: str, chapter_id: str) -> str:
|
87
|
+
"""
|
88
|
+
Construct the URL for fetching a specific chapter.
|
89
|
+
|
90
|
+
:param book_id: The identifier of the book.
|
91
|
+
:param chapter_id: The identifier of the chapter.
|
92
|
+
:return: Fully qualified chapter URL.
|
93
|
+
"""
|
94
|
+
return self._profile["chapter_url"].format(
|
95
|
+
book_id=book_id, chapter_id=chapter_id
|
96
|
+
)
|
@@ -0,0 +1,113 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.requesters.common.session
|
4
|
+
-----------------------------------------------
|
5
|
+
|
6
|
+
This module defines a `CommonSession` class for handling HTTP requests
|
7
|
+
to common novel sites. It provides methods to retrieve raw book
|
8
|
+
information pages and chapter contents using a flexible URL templating
|
9
|
+
system defined by a site profile.
|
10
|
+
"""
|
11
|
+
|
12
|
+
from typing import Any
|
13
|
+
|
14
|
+
from novel_downloader.config import RequesterConfig, SiteProfile
|
15
|
+
from novel_downloader.core.requesters.base import BaseSession
|
16
|
+
|
17
|
+
|
18
|
+
class CommonSession(BaseSession):
|
19
|
+
"""
|
20
|
+
A common session for handling site-specific HTTP requests.
|
21
|
+
"""
|
22
|
+
|
23
|
+
def __init__(
|
24
|
+
self,
|
25
|
+
config: RequesterConfig,
|
26
|
+
site: str,
|
27
|
+
profile: SiteProfile,
|
28
|
+
cookies: dict[str, str] | None = None,
|
29
|
+
) -> None:
|
30
|
+
"""
|
31
|
+
Initialize a CommonSession instance.
|
32
|
+
|
33
|
+
:param config: The RequesterConfig instance containing settings.
|
34
|
+
:param site: The identifier or domain of the target site.
|
35
|
+
:param profile: The site's metadata and URL templates.
|
36
|
+
:param cookies: Optional cookies to preload into the session.
|
37
|
+
"""
|
38
|
+
super().__init__(config, cookies)
|
39
|
+
self._site = site
|
40
|
+
self._profile = profile
|
41
|
+
|
42
|
+
def get_book_info(
|
43
|
+
self,
|
44
|
+
book_id: str,
|
45
|
+
**kwargs: Any,
|
46
|
+
) -> str:
|
47
|
+
"""
|
48
|
+
Fetch the raw HTML of the book info page.
|
49
|
+
|
50
|
+
:param book_id: The book identifier.
|
51
|
+
:return: The page content as a string.
|
52
|
+
"""
|
53
|
+
url = self.book_info_url(book_id=book_id)
|
54
|
+
try:
|
55
|
+
resp = self.get(url, **kwargs)
|
56
|
+
resp.raise_for_status()
|
57
|
+
return resp.text
|
58
|
+
except Exception as e:
|
59
|
+
self.logger.warning("Failed to fetch book info for %s: %s", book_id, e)
|
60
|
+
return ""
|
61
|
+
|
62
|
+
def get_book_chapter(
|
63
|
+
self,
|
64
|
+
book_id: str,
|
65
|
+
chapter_id: str,
|
66
|
+
**kwargs: Any,
|
67
|
+
) -> str:
|
68
|
+
"""
|
69
|
+
Fetch the raw HTML of a single chapter.
|
70
|
+
|
71
|
+
:param book_id: The book identifier.
|
72
|
+
:param chapter_id: The chapter identifier.
|
73
|
+
:return: The chapter content as a string.
|
74
|
+
"""
|
75
|
+
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
76
|
+
try:
|
77
|
+
resp = self.get(url, **kwargs)
|
78
|
+
resp.raise_for_status()
|
79
|
+
return resp.text
|
80
|
+
except Exception as e:
|
81
|
+
self.logger.warning(
|
82
|
+
"Failed to fetch book chapter for %s(%s): %s",
|
83
|
+
book_id,
|
84
|
+
chapter_id,
|
85
|
+
e,
|
86
|
+
)
|
87
|
+
return ""
|
88
|
+
|
89
|
+
@property
|
90
|
+
def site(self) -> str:
|
91
|
+
"""Return the site name."""
|
92
|
+
return self._site
|
93
|
+
|
94
|
+
def book_info_url(self, book_id: str) -> str:
|
95
|
+
"""
|
96
|
+
Construct the URL for fetching a book's info page.
|
97
|
+
|
98
|
+
:param book_id: The identifier of the book.
|
99
|
+
:return: Fully qualified URL for the book info page.
|
100
|
+
"""
|
101
|
+
return self._profile["book_info_url"].format(book_id=book_id)
|
102
|
+
|
103
|
+
def chapter_url(self, book_id: str, chapter_id: str) -> str:
|
104
|
+
"""
|
105
|
+
Construct the URL for fetching a specific chapter.
|
106
|
+
|
107
|
+
:param book_id: The identifier of the book.
|
108
|
+
:param chapter_id: The identifier of the chapter.
|
109
|
+
:return: Fully qualified chapter URL.
|
110
|
+
"""
|
111
|
+
return self._profile["chapter_url"].format(
|
112
|
+
book_id=book_id, chapter_id=chapter_id
|
113
|
+
)
|
@@ -0,0 +1,21 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.requesters.qidian
|
4
|
+
---------------------------------------
|
5
|
+
|
6
|
+
This package provides the implementation of the Qidian-specific requester logic.
|
7
|
+
It contains modules for interacting with Qidian's website, including login,
|
8
|
+
page navigation, and data retrieval using a browser-based automation approach.
|
9
|
+
|
10
|
+
Modules:
|
11
|
+
- browser
|
12
|
+
- session
|
13
|
+
"""
|
14
|
+
|
15
|
+
from .broswer import QidianBrowser
|
16
|
+
from .session import QidianSession
|
17
|
+
|
18
|
+
__all__ = [
|
19
|
+
"QidianBrowser",
|
20
|
+
"QidianSession",
|
21
|
+
]
|
@@ -0,0 +1,307 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.requesters.qidian.broswer
|
4
|
+
-----------------------------------------------
|
5
|
+
|
6
|
+
This module defines the QidianRequester class for interacting with
|
7
|
+
the Qidian website.
|
8
|
+
It extends the BaseBrowser by adding methods for logging in and
|
9
|
+
retrieving book information.
|
10
|
+
"""
|
11
|
+
|
12
|
+
import time
|
13
|
+
from typing import Any
|
14
|
+
|
15
|
+
from novel_downloader.config.models import RequesterConfig
|
16
|
+
from novel_downloader.core.requesters.base import BaseBrowser
|
17
|
+
from novel_downloader.utils.i18n import t
|
18
|
+
|
19
|
+
|
20
|
+
class QidianBrowser(BaseBrowser):
|
21
|
+
"""
|
22
|
+
QidianRequester provides methods for interacting with Qidian.com,
|
23
|
+
including checking login status and preparing book-related URLs.
|
24
|
+
|
25
|
+
Inherits base browser setup from BaseBrowser.
|
26
|
+
"""
|
27
|
+
|
28
|
+
BOOKCASE_URL = "https://my.qidian.com/bookcase/"
|
29
|
+
BOOK_INFO_URL = "https://book.qidian.com/info/{book_id}/"
|
30
|
+
CHAPTER_URL = "https://www.qidian.com/chapter/{book_id}/{chapter_id}/"
|
31
|
+
|
32
|
+
def __init__(
|
33
|
+
self,
|
34
|
+
config: RequesterConfig,
|
35
|
+
):
|
36
|
+
"""
|
37
|
+
Initialize the QidianRequester with a browser configuration.
|
38
|
+
|
39
|
+
:param config: The RequesterConfig instance containing browser settings.
|
40
|
+
"""
|
41
|
+
super().__init__(config)
|
42
|
+
self._logged_in: bool = False
|
43
|
+
self._retry_times = config.retry_times
|
44
|
+
self._retry_interval = config.backoff_factor
|
45
|
+
self._timeout = config.timeout
|
46
|
+
|
47
|
+
def login(
|
48
|
+
self,
|
49
|
+
username: str = "",
|
50
|
+
password: str = "",
|
51
|
+
manual_login: bool = False,
|
52
|
+
**kwargs: Any,
|
53
|
+
) -> bool:
|
54
|
+
"""
|
55
|
+
Attempt to log in to Qidian
|
56
|
+
"""
|
57
|
+
if manual_login:
|
58
|
+
return self._login_manual()
|
59
|
+
else:
|
60
|
+
return self._login_auto()
|
61
|
+
|
62
|
+
def get_book_info(
|
63
|
+
self,
|
64
|
+
book_id: str,
|
65
|
+
**kwargs: Any,
|
66
|
+
) -> str:
|
67
|
+
"""
|
68
|
+
Retrieve the HTML of a Qidian book info page.
|
69
|
+
|
70
|
+
:param book_id: The identifier of the book to fetch.
|
71
|
+
:return: The HTML content of the book info page, or an empty string on error.
|
72
|
+
"""
|
73
|
+
url = self.book_info_url(book_id)
|
74
|
+
try:
|
75
|
+
# Navigate and fetch
|
76
|
+
self.page.get(url)
|
77
|
+
html = str(self.page.html)
|
78
|
+
self.logger.debug(
|
79
|
+
"[fetch] Fetched book info for ID %s from %s", book_id, url
|
80
|
+
)
|
81
|
+
return html
|
82
|
+
except Exception as e:
|
83
|
+
self.logger.warning(
|
84
|
+
"[fetch] Error fetching book info from '%s': %s", url, e
|
85
|
+
)
|
86
|
+
return ""
|
87
|
+
|
88
|
+
def get_book_chapter(
|
89
|
+
self,
|
90
|
+
book_id: str,
|
91
|
+
chapter_id: str,
|
92
|
+
**kwargs: Any,
|
93
|
+
) -> str:
|
94
|
+
"""
|
95
|
+
Retrieve the HTML content of a specific chapter.
|
96
|
+
|
97
|
+
Ensures the user is logged in, navigates to the chapter page
|
98
|
+
|
99
|
+
:param book_id: The identifier of the book.
|
100
|
+
:param chapter_id: The identifier of the chapter.
|
101
|
+
:return: The HTML content of the chapter page, or empty string on error.
|
102
|
+
"""
|
103
|
+
url = self.chapter_url(book_id, chapter_id)
|
104
|
+
try:
|
105
|
+
# Navigate to chapter URL
|
106
|
+
self.page.get(url)
|
107
|
+
html = str(self.page.html)
|
108
|
+
self.logger.debug(
|
109
|
+
"[fetch] Fetched chapter %s for book %s", chapter_id, book_id
|
110
|
+
)
|
111
|
+
return html
|
112
|
+
except Exception as e:
|
113
|
+
self.logger.warning("[fetch] Error fetching chapter from '%s': %s", url, e)
|
114
|
+
return ""
|
115
|
+
|
116
|
+
def get_bookcase(
|
117
|
+
self,
|
118
|
+
page: int = 1,
|
119
|
+
**kwargs: Any,
|
120
|
+
) -> str:
|
121
|
+
"""
|
122
|
+
Retrieve the HTML content of the logged-in user's Qidian bookcase page.
|
123
|
+
|
124
|
+
:return: The HTML markup of the bookcase page, or empty string on error.
|
125
|
+
:raises RuntimeError: If the user is not logged in.
|
126
|
+
"""
|
127
|
+
if not self._logged_in:
|
128
|
+
raise RuntimeError("User not logged in. Please call login() first.")
|
129
|
+
|
130
|
+
url = self.bookcase_url()
|
131
|
+
try:
|
132
|
+
# Navigate to the bookcase page
|
133
|
+
self.page.get(url)
|
134
|
+
html = str(self.page.html)
|
135
|
+
self.logger.debug("[fetch] Fetched bookcase HTML from %s", url)
|
136
|
+
return html
|
137
|
+
except Exception as e:
|
138
|
+
self.logger.warning("[fetch] Error fetching bookcase from '%s': %s", url, e)
|
139
|
+
return ""
|
140
|
+
|
141
|
+
@classmethod
|
142
|
+
def book_info_url(cls, book_id: str) -> str:
|
143
|
+
"""
|
144
|
+
Construct the URL for fetching a book's info page.
|
145
|
+
|
146
|
+
:param book_id: The identifier of the book.
|
147
|
+
:return: Fully qualified URL for the book info page.
|
148
|
+
"""
|
149
|
+
return cls.BOOK_INFO_URL.format(book_id=book_id)
|
150
|
+
|
151
|
+
@classmethod
|
152
|
+
def chapter_url(cls, book_id: str, chapter_id: str) -> str:
|
153
|
+
"""
|
154
|
+
Construct the URL for fetching a specific chapter.
|
155
|
+
|
156
|
+
:param book_id: The identifier of the book.
|
157
|
+
:param chapter_id: The identifier of the chapter.
|
158
|
+
:return: Fully qualified chapter URL.
|
159
|
+
"""
|
160
|
+
return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
|
161
|
+
|
162
|
+
@classmethod
|
163
|
+
def bookcase_url(cls) -> str:
|
164
|
+
"""
|
165
|
+
Construct the URL for the user's bookcase page.
|
166
|
+
|
167
|
+
:return: Fully qualified URL of the bookcase.
|
168
|
+
"""
|
169
|
+
return cls.BOOKCASE_URL
|
170
|
+
|
171
|
+
def _login_auto(self, timeout: float = 5.0) -> bool:
|
172
|
+
"""
|
173
|
+
Attempt to log in to Qidian by handling overlays and clicking the login button.
|
174
|
+
|
175
|
+
:return: True if login succeeds or is already in place; False otherwise.
|
176
|
+
"""
|
177
|
+
try:
|
178
|
+
self.page.get("https://www.qidian.com/")
|
179
|
+
self.page.wait.eles_loaded("#login-box", timeout=timeout)
|
180
|
+
except Exception as e:
|
181
|
+
self.logger.warning("[auth] Failed to load login box: %s", e)
|
182
|
+
return False
|
183
|
+
|
184
|
+
for attempt in range(1, self._retry_times + 1):
|
185
|
+
if self._check_login_status():
|
186
|
+
self.logger.debug("[auth] Already logged in.")
|
187
|
+
break
|
188
|
+
self.logger.debug("[auth] Attempting login click (#%s).", attempt)
|
189
|
+
if self.click_button("@id=login-btn", timeout=timeout):
|
190
|
+
self.logger.debug("[auth] Login button clicked.")
|
191
|
+
else:
|
192
|
+
self.logger.debug("[auth] Login button not found.")
|
193
|
+
time.sleep(self._retry_interval)
|
194
|
+
|
195
|
+
self._logged_in = self._check_login_status()
|
196
|
+
if self._logged_in:
|
197
|
+
self.logger.info("[auth] Login successful.")
|
198
|
+
else:
|
199
|
+
self.logger.warning("[auth] Login failed after max retries.")
|
200
|
+
|
201
|
+
return self._logged_in
|
202
|
+
|
203
|
+
def _login_manual(self) -> bool:
|
204
|
+
"""
|
205
|
+
Guide the user through an interactive manual login flow.
|
206
|
+
|
207
|
+
Steps:
|
208
|
+
1. If the browser is headless, shut it down and restart in headful mode.
|
209
|
+
2. Navigate to the Qidian homepage.
|
210
|
+
3. Prompt the user to complete login, retrying up to `max_retries` times.
|
211
|
+
4. Once logged in, restore original headless mode if needed.
|
212
|
+
|
213
|
+
:param max_retries: Number of times to check for login success.
|
214
|
+
:return: True if login was detected, False otherwise.
|
215
|
+
"""
|
216
|
+
original_headless = self._headless
|
217
|
+
|
218
|
+
# 1. Switch to headful mode if needed
|
219
|
+
if self._disable_images_orig:
|
220
|
+
self.logger.debug("[auth] Temporarily enabling images for manual login.")
|
221
|
+
self._options.no_imgs(False)
|
222
|
+
self.restart_browser(headless=False)
|
223
|
+
elif original_headless:
|
224
|
+
self.restart_browser(headless=False)
|
225
|
+
|
226
|
+
# 2. Navigate to home page
|
227
|
+
try:
|
228
|
+
self.page.get("https://www.qidian.com/")
|
229
|
+
except Exception as e:
|
230
|
+
self.logger.warning(
|
231
|
+
"[auth] Failed to load homepage for manual login: %s", e
|
232
|
+
)
|
233
|
+
return False
|
234
|
+
|
235
|
+
# 3. Retry loop
|
236
|
+
for attempt in range(1, self._retry_times + 1):
|
237
|
+
if self._check_login_status():
|
238
|
+
self.logger.debug("[auth] Already logged in.")
|
239
|
+
self._logged_in = True
|
240
|
+
break
|
241
|
+
if attempt == 1:
|
242
|
+
print(t("login_prompt_intro"))
|
243
|
+
input(
|
244
|
+
t(
|
245
|
+
"login_prompt_press_enter",
|
246
|
+
attempt=attempt,
|
247
|
+
max_retries=self._retry_times,
|
248
|
+
)
|
249
|
+
)
|
250
|
+
else:
|
251
|
+
self.logger.warning(
|
252
|
+
"[auth] Manual login failed after %d attempts.", self._retry_times
|
253
|
+
)
|
254
|
+
self._logged_in = False
|
255
|
+
return self._logged_in
|
256
|
+
|
257
|
+
# 4. Restore headless if changed, then re-establish session
|
258
|
+
if original_headless or self._disable_images_orig:
|
259
|
+
self.logger.debug("[auth] Restoring browser settings after manual login...")
|
260
|
+
self._options.no_imgs(self._disable_images_orig)
|
261
|
+
self.restart_browser(headless=original_headless)
|
262
|
+
self._logged_in = self._login_auto()
|
263
|
+
if self._logged_in:
|
264
|
+
self.logger.info(
|
265
|
+
"[auth] Login session successfully carried over after restart."
|
266
|
+
)
|
267
|
+
else:
|
268
|
+
self.logger.warning(
|
269
|
+
"[auth] Lost login session after restoring headless mode."
|
270
|
+
)
|
271
|
+
|
272
|
+
return self._logged_in
|
273
|
+
|
274
|
+
def _check_login_status(self) -> bool:
|
275
|
+
"""
|
276
|
+
Check whether the user is currently logged in by inspecting
|
277
|
+
the visibility of the 'sign-in' element on the page.
|
278
|
+
|
279
|
+
:return: True if the user appears to be logged in, False otherwise.
|
280
|
+
"""
|
281
|
+
try:
|
282
|
+
self._dismiss_overlay()
|
283
|
+
sign_in_elem = self.page.ele("@class=sign-in")
|
284
|
+
if sign_in_elem:
|
285
|
+
class_value = sign_in_elem.attr("class")
|
286
|
+
if class_value and "hidden" not in class_value:
|
287
|
+
return True
|
288
|
+
except Exception as e:
|
289
|
+
self.logger.warning("[auth] Error while checking login status: %s", e)
|
290
|
+
return False
|
291
|
+
|
292
|
+
def _dismiss_overlay(self, timeout: float = 2.0) -> None:
|
293
|
+
"""
|
294
|
+
Detect and close any full-page overlay mask that might block the login UI.
|
295
|
+
"""
|
296
|
+
try:
|
297
|
+
mask = self.page.ele("@@tag()=div@@class=mask", timeout=timeout)
|
298
|
+
if not mask:
|
299
|
+
return
|
300
|
+
self.logger.debug("[auth] Overlay mask detected; attempting to close.")
|
301
|
+
iframe = self.get_frame("loginIfr")
|
302
|
+
if iframe is None:
|
303
|
+
self.logger.debug("[auth] Login iframe not found.")
|
304
|
+
return
|
305
|
+
self.click_button("@id=close", page=iframe)
|
306
|
+
except Exception as e:
|
307
|
+
self.logger.debug("[auth] Error handling overlay mask: %s", e)
|