novel-downloader 1.2.2__py3-none-any.whl → 1.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. novel_downloader/__init__.py +1 -2
  2. novel_downloader/cli/__init__.py +0 -1
  3. novel_downloader/cli/clean.py +2 -10
  4. novel_downloader/cli/download.py +16 -22
  5. novel_downloader/cli/interactive.py +0 -1
  6. novel_downloader/cli/main.py +1 -3
  7. novel_downloader/cli/settings.py +8 -8
  8. novel_downloader/config/__init__.py +0 -1
  9. novel_downloader/config/adapter.py +32 -27
  10. novel_downloader/config/loader.py +116 -108
  11. novel_downloader/config/models.py +35 -29
  12. novel_downloader/config/site_rules.py +2 -4
  13. novel_downloader/core/__init__.py +0 -1
  14. novel_downloader/core/downloaders/__init__.py +4 -4
  15. novel_downloader/core/downloaders/base/__init__.py +14 -0
  16. novel_downloader/core/downloaders/{base_async_downloader.py → base/base_async.py} +49 -53
  17. novel_downloader/core/downloaders/{base_downloader.py → base/base_sync.py} +64 -43
  18. novel_downloader/core/downloaders/biquge/__init__.py +12 -0
  19. novel_downloader/core/downloaders/biquge/biquge_sync.py +25 -0
  20. novel_downloader/core/downloaders/common/__init__.py +14 -0
  21. novel_downloader/core/downloaders/{common_asynb_downloader.py → common/common_async.py} +42 -33
  22. novel_downloader/core/downloaders/{common_downloader.py → common/common_sync.py} +33 -21
  23. novel_downloader/core/downloaders/qidian/__init__.py +10 -0
  24. novel_downloader/core/downloaders/{qidian_downloader.py → qidian/qidian_sync.py} +79 -62
  25. novel_downloader/core/factory/__init__.py +4 -5
  26. novel_downloader/core/factory/{downloader_factory.py → downloader.py} +25 -26
  27. novel_downloader/core/factory/{parser_factory.py → parser.py} +12 -14
  28. novel_downloader/core/factory/{requester_factory.py → requester.py} +29 -16
  29. novel_downloader/core/factory/{saver_factory.py → saver.py} +4 -9
  30. novel_downloader/core/interfaces/__init__.py +8 -9
  31. novel_downloader/core/interfaces/{async_downloader_protocol.py → async_downloader.py} +4 -5
  32. novel_downloader/core/interfaces/{async_requester_protocol.py → async_requester.py} +23 -12
  33. novel_downloader/core/interfaces/{parser_protocol.py → parser.py} +11 -6
  34. novel_downloader/core/interfaces/{saver_protocol.py → saver.py} +2 -3
  35. novel_downloader/core/interfaces/{downloader_protocol.py → sync_downloader.py} +6 -7
  36. novel_downloader/core/interfaces/{requester_protocol.py → sync_requester.py} +31 -17
  37. novel_downloader/core/parsers/__init__.py +5 -4
  38. novel_downloader/core/parsers/{base_parser.py → base.py} +18 -9
  39. novel_downloader/core/parsers/biquge/__init__.py +10 -0
  40. novel_downloader/core/parsers/biquge/main_parser.py +126 -0
  41. novel_downloader/core/parsers/{common_parser → common}/__init__.py +2 -3
  42. novel_downloader/core/parsers/{common_parser → common}/helper.py +13 -13
  43. novel_downloader/core/parsers/{common_parser → common}/main_parser.py +15 -9
  44. novel_downloader/core/parsers/{qidian_parser → qidian}/__init__.py +2 -3
  45. novel_downloader/core/parsers/{qidian_parser → qidian}/browser/__init__.py +2 -3
  46. novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_encrypted.py +40 -48
  47. novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_normal.py +17 -21
  48. novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_router.py +10 -9
  49. novel_downloader/core/parsers/{qidian_parser → qidian}/browser/main_parser.py +14 -10
  50. novel_downloader/core/parsers/{qidian_parser → qidian}/session/__init__.py +2 -3
  51. novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_encrypted.py +36 -44
  52. novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_normal.py +19 -23
  53. novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_router.py +10 -9
  54. novel_downloader/core/parsers/{qidian_parser → qidian}/session/main_parser.py +14 -10
  55. novel_downloader/core/parsers/{qidian_parser → qidian}/session/node_decryptor.py +7 -10
  56. novel_downloader/core/parsers/{qidian_parser → qidian}/shared/__init__.py +2 -3
  57. novel_downloader/core/parsers/{qidian_parser → qidian}/shared/book_info_parser.py +5 -6
  58. novel_downloader/core/parsers/{qidian_parser → qidian}/shared/helpers.py +7 -8
  59. novel_downloader/core/requesters/__init__.py +9 -5
  60. novel_downloader/core/requesters/base/__init__.py +16 -0
  61. novel_downloader/core/requesters/{base_async_session.py → base/async_session.py} +177 -73
  62. novel_downloader/core/requesters/base/browser.py +340 -0
  63. novel_downloader/core/requesters/base/session.py +364 -0
  64. novel_downloader/core/requesters/biquge/__init__.py +12 -0
  65. novel_downloader/core/requesters/biquge/session.py +90 -0
  66. novel_downloader/core/requesters/{common_requester → common}/__init__.py +4 -5
  67. novel_downloader/core/requesters/common/async_session.py +96 -0
  68. novel_downloader/core/requesters/common/session.py +113 -0
  69. novel_downloader/core/requesters/qidian/__init__.py +21 -0
  70. novel_downloader/core/requesters/qidian/broswer.py +307 -0
  71. novel_downloader/core/requesters/qidian/session.py +287 -0
  72. novel_downloader/core/savers/__init__.py +5 -3
  73. novel_downloader/core/savers/{base_saver.py → base.py} +12 -13
  74. novel_downloader/core/savers/biquge.py +25 -0
  75. novel_downloader/core/savers/{common_saver → common}/__init__.py +2 -3
  76. novel_downloader/core/savers/{common_saver/common_epub.py → common/epub.py} +23 -51
  77. novel_downloader/core/savers/{common_saver → common}/main_saver.py +43 -9
  78. novel_downloader/core/savers/{common_saver/common_txt.py → common/txt.py} +16 -46
  79. novel_downloader/core/savers/epub_utils/__init__.py +0 -1
  80. novel_downloader/core/savers/epub_utils/css_builder.py +13 -7
  81. novel_downloader/core/savers/epub_utils/initializer.py +4 -5
  82. novel_downloader/core/savers/epub_utils/text_to_html.py +2 -3
  83. novel_downloader/core/savers/epub_utils/volume_intro.py +1 -3
  84. novel_downloader/core/savers/{qidian_saver.py → qidian.py} +12 -6
  85. novel_downloader/locales/en.json +8 -4
  86. novel_downloader/locales/zh.json +5 -1
  87. novel_downloader/resources/config/settings.toml +88 -0
  88. novel_downloader/utils/cache.py +2 -2
  89. novel_downloader/utils/chapter_storage.py +340 -0
  90. novel_downloader/utils/constants.py +6 -4
  91. novel_downloader/utils/crypto_utils.py +3 -3
  92. novel_downloader/utils/file_utils/__init__.py +0 -1
  93. novel_downloader/utils/file_utils/io.py +12 -17
  94. novel_downloader/utils/file_utils/normalize.py +1 -3
  95. novel_downloader/utils/file_utils/sanitize.py +2 -9
  96. novel_downloader/utils/fontocr/__init__.py +0 -1
  97. novel_downloader/utils/fontocr/ocr_v1.py +19 -22
  98. novel_downloader/utils/fontocr/ocr_v2.py +147 -60
  99. novel_downloader/utils/hash_store.py +19 -20
  100. novel_downloader/utils/hash_utils.py +0 -1
  101. novel_downloader/utils/i18n.py +3 -4
  102. novel_downloader/utils/logger.py +5 -6
  103. novel_downloader/utils/model_loader.py +5 -8
  104. novel_downloader/utils/network.py +9 -10
  105. novel_downloader/utils/state.py +6 -7
  106. novel_downloader/utils/text_utils/__init__.py +0 -1
  107. novel_downloader/utils/text_utils/chapter_formatting.py +2 -7
  108. novel_downloader/utils/text_utils/diff_display.py +0 -1
  109. novel_downloader/utils/text_utils/font_mapping.py +1 -4
  110. novel_downloader/utils/text_utils/text_cleaning.py +0 -1
  111. novel_downloader/utils/time_utils/__init__.py +0 -1
  112. novel_downloader/utils/time_utils/datetime_utils.py +8 -10
  113. novel_downloader/utils/time_utils/sleep_utils.py +1 -3
  114. {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.1.dist-info}/METADATA +14 -17
  115. novel_downloader-1.3.1.dist-info/RECORD +127 -0
  116. {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.1.dist-info}/WHEEL +1 -1
  117. novel_downloader/core/requesters/base_browser.py +0 -214
  118. novel_downloader/core/requesters/base_session.py +0 -246
  119. novel_downloader/core/requesters/common_requester/common_async_session.py +0 -98
  120. novel_downloader/core/requesters/common_requester/common_session.py +0 -126
  121. novel_downloader/core/requesters/qidian_requester/__init__.py +0 -22
  122. novel_downloader/core/requesters/qidian_requester/qidian_broswer.py +0 -396
  123. novel_downloader/core/requesters/qidian_requester/qidian_session.py +0 -202
  124. novel_downloader/resources/config/settings.yaml +0 -76
  125. novel_downloader-1.2.2.dist-info/RECORD +0 -115
  126. {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.1.dist-info}/entry_points.txt +0 -0
  127. {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.1.dist-info}/licenses/LICENSE +0 -0
  128. {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.1.dist-info}/top_level.txt +0 -0
@@ -1,98 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- """
4
- novel_downloader.core.requesters.common_requester.common_async_session
5
- ----------------------------------------------------------------------
6
-
7
- This module defines a `CommonAsyncSession` class for handling HTTP requests
8
- to common novel sites **asynchronously**. It provides methods to retrieve
9
- raw book info pages and chapter contents using a flexible URL templating
10
- system defined by a site profile, with retry logic and random delays.
11
- """
12
-
13
- import asyncio
14
- import random
15
- from typing import Dict, Optional
16
-
17
- from novel_downloader.config import RequesterConfig, SiteProfile
18
- from novel_downloader.core.requesters.base_async_session import BaseAsyncSession
19
-
20
-
21
- class CommonAsyncSession(BaseAsyncSession):
22
- """
23
- A common async session for handling site-specific HTTP requests.
24
-
25
- :ivar _site: The unique identifier or name of the site.
26
- :ivar _profile: Metadata and URL templates related to the site.
27
- """
28
-
29
- def __init__(
30
- self,
31
- config: RequesterConfig,
32
- site: str,
33
- profile: SiteProfile,
34
- cookies: Optional[Dict[str, str]] = None,
35
- ) -> None:
36
- """
37
- Initialize a CommonAsyncSession instance.
38
-
39
- :param config: The RequesterConfig instance containing settings.
40
- :param site: The identifier or domain of the target site.
41
- :param profile: The site's metadata and URL templates.
42
- :param cookies: Optional cookies to preload into the session.
43
- """
44
- self._init_session(config=config, cookies=cookies)
45
- self._site = site
46
- self._profile = profile
47
-
48
- async def get_book_info(
49
- self, book_id: str, wait_time: Optional[float] = None
50
- ) -> str:
51
- """
52
- Fetch the raw HTML of the book info page asynchronously.
53
-
54
- Relies on BaseAsyncSession.fetch for retry logic, then sleeps with jitter.
55
-
56
- :param book_id: The book identifier.
57
- :param wait_time: Base seconds to sleep (with 0.5-1.5x random factor).
58
- :return: The page content as a string.
59
- """
60
- url = self.book_info_url.format(book_id=book_id)
61
- html = await self.fetch(url)
62
- base = wait_time if wait_time is not None else self._config.wait_time
63
- await asyncio.sleep(base * random.uniform(0.5, 1.5))
64
- return html
65
-
66
- async def get_book_chapter(
67
- self, book_id: str, chapter_id: str, wait_time: Optional[float] = None
68
- ) -> str:
69
- """
70
- Fetch the raw HTML of a single chapter asynchronously.
71
-
72
- Relies on BaseAsyncSession.fetch for retry logic, then sleeps with jitter.
73
-
74
- :param book_id: The book identifier.
75
- :param chapter_id: The chapter identifier.
76
- :param wait_time: Base seconds to sleep (with 0.5-1.5x random factor).
77
- :return: The chapter content as a string.
78
- """
79
- url = self.chapter_url.format(book_id=book_id, chapter_id=chapter_id)
80
- html = await self.fetch(url)
81
- base = wait_time if wait_time is not None else self._config.wait_time
82
- await asyncio.sleep(base * random.uniform(0.5, 1.5))
83
- return html
84
-
85
- @property
86
- def site(self) -> str:
87
- """Return the site name."""
88
- return self._site
89
-
90
- @property
91
- def book_info_url(self) -> str:
92
- """Return the URL template for fetching book info."""
93
- return self._profile["book_info_url"]
94
-
95
- @property
96
- def chapter_url(self) -> str:
97
- """Return the URL template for fetching chapter content."""
98
- return self._profile["chapter_url"]
@@ -1,126 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- """
4
- novel_downloader.core.requesters.common_requester.common_session
5
- ------------------------------------------------------------------
6
-
7
- This module defines a `CommonSession` class for handling HTTP requests
8
- to common novel sites. It provides methods to retrieve raw book
9
- information pages and chapter contents using a flexible URL templating
10
- system defined by a site profile.
11
- """
12
-
13
- import time
14
- from typing import Dict, Optional
15
-
16
- from novel_downloader.config import RequesterConfig, SiteProfile
17
- from novel_downloader.utils.time_utils import sleep_with_random_delay
18
-
19
- from ..base_session import BaseSession
20
-
21
-
22
- class CommonSession(BaseSession):
23
- """
24
- A common session for handling site-specific HTTP requests.
25
-
26
- :ivar _site: The unique identifier or name of the site.
27
- :ivar _profile: Metadata and URL templates related to the site.
28
- :ivar session: The HTTP session used to make requests.
29
- """
30
-
31
- def __init__(
32
- self,
33
- config: RequesterConfig,
34
- site: str,
35
- profile: SiteProfile,
36
- cookies: Optional[Dict[str, str]] = None,
37
- ):
38
- """
39
- Initialize a CommonSession instance.
40
-
41
- :param config: The RequesterConfig instance containing settings.
42
- :param site: The identifier or domain of the target site.
43
- :param profile: The site's metadata and URL templates.
44
- :param cookies: Optional cookies to preload into the session.
45
- """
46
- self._init_session(config=config, cookies=cookies)
47
- self._site = site
48
- self._profile = profile
49
-
50
- def get_book_info(self, book_id: str, wait_time: Optional[float] = None) -> str:
51
- """
52
- Fetch the raw HTML (or JSON) of the book info page.
53
-
54
- :param book_id: The book identifier.
55
- :param wait_time: Base number of seconds to wait before returning content.
56
- :return: The page content as a string.
57
- :raises requests.HTTPError: If the request returns an unsuccessful status code.
58
- """
59
- url = self.book_info_url.format(book_id=book_id)
60
- base = wait_time if wait_time is not None else self._config.wait_time
61
-
62
- for attempt in range(1, self.retry_times + 1):
63
- try:
64
- with self.session.get(url, timeout=self.timeout) as response:
65
- response.raise_for_status()
66
- content = response.text
67
- sleep_with_random_delay(base, add_spread=1.0)
68
- return content
69
- except Exception as e:
70
- if attempt == self.retry_times:
71
- raise e # 最后一次也失败了,抛出异常
72
- else:
73
- time.sleep(self.retry_interval)
74
- continue
75
- raise RuntimeError("Unexpected error: get_book_info failed without returning")
76
-
77
- def get_book_chapter(
78
- self, book_id: str, chapter_id: str, wait_time: Optional[float] = None
79
- ) -> str:
80
- """
81
- Fetch the raw HTML (or JSON) of a single chapter.
82
-
83
- :param book_id: The book identifier.
84
- :param chapter_id: The chapter identifier.
85
- :param wait_time: Base number of seconds to wait before returning content.
86
- :return: The chapter content as a string.
87
- :raises requests.HTTPError: If the request returns an unsuccessful status code.
88
- """
89
- url = self.chapter_url.format(book_id=book_id, chapter_id=chapter_id)
90
- base = wait_time if wait_time is not None else self._config.wait_time
91
-
92
- for attempt in range(1, self.retry_times + 1):
93
- try:
94
- with self.session.get(url, timeout=self.timeout) as response:
95
- response.raise_for_status()
96
- content = response.text
97
- sleep_with_random_delay(base, add_spread=1.0)
98
- return content
99
- except Exception as e:
100
- if attempt == self.retry_times:
101
- raise e # 最后一次也失败了,抛出异常
102
- else:
103
- time.sleep(self.retry_interval)
104
- continue
105
- raise RuntimeError(
106
- "Unexpected error: get_book_chapter failed without returning"
107
- )
108
-
109
- @property
110
- def site(self) -> str:
111
- """Return the site name."""
112
- return self._site
113
-
114
- @property
115
- def book_info_url(self) -> str:
116
- """
117
- Return the URL template for fetching book information.
118
- """
119
- return self._profile["book_info_url"]
120
-
121
- @property
122
- def chapter_url(self) -> str:
123
- """
124
- Return the URL template for fetching chapter information.
125
- """
126
- return self._profile["chapter_url"]
@@ -1,22 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- """
4
- novel_downloader.core.requesters.qidian_requester
5
- -------------------------------------------------
6
-
7
- This package provides the implementation of the Qidian-specific requester logic.
8
- It contains modules for interacting with Qidian's website, including login,
9
- page navigation, and data retrieval using a browser-based automation approach.
10
-
11
- Modules:
12
- - qidian_browser: Implements the QidianBrowser class for automated browser control.
13
- - qidian_session: Implements the QidianSession class.
14
- """
15
-
16
- from .qidian_broswer import QidianBrowser
17
- from .qidian_session import QidianSession
18
-
19
- __all__ = [
20
- "QidianBrowser",
21
- "QidianSession",
22
- ]
@@ -1,396 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- """
4
- novel_downloader.core.requesters.qidian_requester.qidian_broswer
5
- ----------------------------------------------------------------
6
-
7
- This module defines the QidianRequester class for interacting with
8
- the Qidian website.
9
- It extends the BaseBrowser by adding methods for logging in and
10
- retrieving book information.
11
- """
12
-
13
- import logging
14
- import random
15
- import time
16
- from typing import Optional
17
-
18
- from DrissionPage._elements.chromium_element import ChromiumElement
19
- from DrissionPage.common import Keys
20
-
21
- from novel_downloader.config.models import RequesterConfig
22
- from novel_downloader.core.requesters.base_browser import BaseBrowser
23
- from novel_downloader.utils.i18n import t
24
- from novel_downloader.utils.time_utils import sleep_with_random_delay
25
-
26
- logger = logging.getLogger(__name__)
27
-
28
-
29
- class QidianBrowser(BaseBrowser):
30
- """
31
- QidianRequester provides methods for interacting with Qidian.com,
32
- including checking login status and preparing book-related URLs.
33
-
34
- Inherits base browser setup from BaseBrowser.
35
- """
36
-
37
- DEFAULT_SCHEME = "https:"
38
- QIDIAN_BASE_URL = "www.qidian.com"
39
- QIDIAN_BOOKCASE_URL = f"{DEFAULT_SCHEME}//my.qidian.com/bookcase/"
40
- QIDIAN_BOOK_INFO_URL_1 = f"{DEFAULT_SCHEME}//www.qidian.com/book"
41
- QIDIAN_BOOK_INFO_URL_2 = f"{DEFAULT_SCHEME}//book.qidian.com/info"
42
- QIDIAN_CHAPTER_URL = f"{DEFAULT_SCHEME}//www.qidian.com/chapter"
43
-
44
- def __init__(self, config: RequesterConfig):
45
- """
46
- Initialize the QidianRequester with a browser configuration.
47
-
48
- :param config: The RequesterConfig instance containing browser settings.
49
- """
50
- self._init_browser(config=config)
51
- self._headless: bool = config.headless
52
- self._logged_in: bool = False
53
-
54
- def _is_user_logged_in(self) -> bool:
55
- """
56
- Check whether the user is currently logged in by inspecting
57
- the visibility of the 'sign-in' element on the page.
58
-
59
- :return: True if the user appears to be logged in, False otherwise.
60
- """
61
- if self._page is None:
62
- raise RuntimeError("Browser page not initialized.")
63
- try:
64
- self._handle_overlay_mask()
65
- sign_in_elem = self._page.ele("@class=sign-in")
66
- if sign_in_elem:
67
- class_value = sign_in_elem.attr("class")
68
- if class_value and "hidden" not in class_value:
69
- return True
70
- except Exception as e:
71
- logger.warning("[auth] Error while checking login status: %s", e)
72
- return False
73
-
74
- def login(self, max_retries: int = 3, manual_login: bool = False) -> bool:
75
- """
76
- Attempt to log in to Qidian
77
- """
78
- if manual_login:
79
- return self._manual_login(max_retries)
80
- else:
81
- return self._login(max_retries)
82
-
83
- def _login(self, max_retries: int = 3) -> bool:
84
- """
85
- Attempt to log in to Qidian by handling overlays and clicking the login button.
86
-
87
- :param max_retries: Maximum number of times to try clicking the login button.
88
- :return: True if login succeeds or is already in place; False otherwise.
89
- """
90
- if self._page is None:
91
- raise RuntimeError("Browser page not initialized.")
92
- original_url = self._page.url
93
- try:
94
- self._page.get("https://www.qidian.com/")
95
- self._page.wait.eles_loaded("#login-box")
96
- except Exception as e:
97
- logger.warning("[auth] Failed to load login box: %s", e)
98
- return False
99
-
100
- for attempt in range(1, max_retries + 1):
101
- if self._is_user_logged_in():
102
- logger.debug("[auth] Already logged in.")
103
- break
104
-
105
- self._click_login_button(attempt)
106
- time.sleep(self._config.retry_interval)
107
-
108
- self._logged_in = self._is_user_logged_in()
109
- if self._logged_in:
110
- logger.info("[auth] Login successful.")
111
- else:
112
- logger.warning("[auth] Login failed after max retries.")
113
-
114
- # return to original page
115
- try:
116
- if original_url:
117
- self._page.get(original_url)
118
- except Exception as e:
119
- logger.debug("[auth] Failed to restore page URL: %s", e)
120
-
121
- return self._logged_in
122
-
123
- def _handle_overlay_mask(self) -> None:
124
- """
125
- Detect and close any full-page overlay mask that might block the login UI.
126
- """
127
- if self._page is None:
128
- raise RuntimeError("Browser page not initialized.")
129
- try:
130
- mask = self._page.ele("@@tag()=div@@class=mask", timeout=2)
131
- if not mask:
132
- return
133
-
134
- logger.debug("[auth] Overlay mask detected; attempting to close.")
135
- iframe = self._page.get_frame("loginIfr", timeout=5)
136
- if not iframe:
137
- logger.debug("[auth] Login iframe not found.")
138
- return
139
-
140
- close_btn = iframe.ele("@id=close", timeout=5)
141
- if close_btn:
142
- close_btn.click()
143
- logger.debug("[auth] Closed overlay mask via iframe close button.")
144
- else:
145
- logger.debug("[auth] Close button not found in login iframe.")
146
- except Exception as e:
147
- logger.debug("[auth] Error handling overlay mask: %s", e)
148
-
149
- def _click_login_button(self, attempt: int) -> None:
150
- """
151
- Try to click the login button on the page.
152
-
153
- :param attempt: The current attempt number (for logging).
154
- """
155
- if self._page is None:
156
- raise RuntimeError("Browser page not initialized.")
157
- try:
158
- logger.debug("[auth] Attempting login click (#%s).", attempt)
159
- login_btn = self._page.ele("@id=login-btn", timeout=5)
160
- if isinstance(login_btn, ChromiumElement):
161
- login_btn.click()
162
- logger.debug("[auth] Login button clicked.")
163
- else:
164
- logger.debug("[auth] Login button not found.")
165
- except Exception as e:
166
- logger.debug("[auth] Exception clicking login button: %s", e)
167
-
168
- def _manual_login(
169
- self,
170
- max_retries: int = 3,
171
- ) -> bool:
172
- """
173
- Guide the user through an interactive manual login flow.
174
-
175
- Steps:
176
- 1. If the browser is headless, shut it down and restart in headful mode.
177
- 2. Navigate to the Qidian homepage.
178
- 3. Prompt the user to complete login, retrying up to `max_retries` times.
179
- 4. Once logged in, restore original headless mode if needed.
180
-
181
- :param max_retries: Number of times to check for login success.
182
- :return: True if login was detected, False otherwise.
183
- """
184
- if self._page is None:
185
- raise RuntimeError("Browser page not initialized.")
186
- original_headless = self._headless
187
-
188
- # 1. Switch to headful mode if needed
189
- if getattr(self, "_disable_images_orig", False):
190
- logger.debug("[auth] Temporarily enabling images for manual login.")
191
- self._options.no_imgs(False)
192
- self._restart_browser(headless=False)
193
- elif original_headless:
194
- self._restart_browser(headless=False)
195
-
196
- # 2. Navigate to home page
197
- try:
198
- self._page.get("https://www.qidian.com/")
199
- except Exception as e:
200
- logger.warning("[auth] Failed to load homepage for manual login: %s", e)
201
- return False
202
-
203
- # 3. Retry loop
204
- for attempt in range(1, max_retries + 1):
205
- if self._is_user_logged_in():
206
- logger.info("[auth] Detected successful login.")
207
- self._logged_in = True
208
- break
209
- if attempt == 1:
210
- print(t("login_prompt_intro"))
211
- input(
212
- t("login_prompt_press_enter", attempt=attempt, max_retries=max_retries)
213
- )
214
- else:
215
- logger.warning("[auth] Manual login failed after %d attempts.", max_retries)
216
- self._logged_in = False
217
- return self._logged_in
218
-
219
- # 4. Restore headless if we changed it, then re-establish session
220
- if original_headless or getattr(self, "_disable_images_orig", False):
221
- logger.debug("[auth] Restoring browser settings after manual login...")
222
- self._options.no_imgs(self._disable_images_orig)
223
- self._restart_browser(headless=original_headless)
224
- self.login()
225
- if self._logged_in:
226
- logger.info(
227
- "[auth] Login session successfully carried over after restart."
228
- )
229
- else:
230
- logger.warning(
231
- "[auth] Lost login session after restoring headless mode."
232
- )
233
-
234
- return self._logged_in
235
-
236
- def _restart_browser(self, headless: Optional[bool] = None) -> None:
237
- """
238
- Shutdown the current browser and restart it with the given headless setting.
239
-
240
- :param headless: Whether to run the browser in headless mode.
241
- """
242
- if self._browser:
243
- self._browser.quit()
244
- self._clear_browser_refs()
245
-
246
- # Apply new headless setting and reinitialize
247
- if headless is not None:
248
- self._options.headless(headless)
249
- self._headless = headless
250
- self._setup()
251
- logger.debug("[browser] Browser restarted (headless=%s).", headless)
252
-
253
- def _build_book_info_url(self, book_id: str) -> str:
254
- """
255
- Construct the URL for fetching a book's info page.
256
-
257
- :param book_id: The identifier of the book.
258
- :return: Fully qualified URL for the book info page.
259
- """
260
- return f"{self.QIDIAN_BOOK_INFO_URL_2}/{book_id}/"
261
-
262
- def _build_chapter_url(self, book_id: str, chapter_id: str) -> str:
263
- """
264
- Construct the URL for fetching a specific chapter.
265
-
266
- :param book_id: The identifier of the book.
267
- :param chapter_id: The identifier of the chapter.
268
- :return: Fully qualified chapter URL.
269
- """
270
- return f"{self.QIDIAN_CHAPTER_URL}/{book_id}/{chapter_id}/"
271
-
272
- def _build_bookcase_url(self) -> str:
273
- """
274
- Construct the URL for the user's bookcase page.
275
-
276
- :return: Fully qualified URL of the bookcase.
277
- """
278
- return self.QIDIAN_BOOKCASE_URL
279
-
280
- def get_book_info(self, book_id: str, wait_time: Optional[float] = None) -> str:
281
- """
282
- Retrieve the HTML of a Qidian book info page.
283
-
284
- This method enforces that the user is logged in, navigates to the
285
- book's info URL, waits a randomized delay to mimic human browsing,
286
- and returns the page HTML.
287
-
288
- :param book_id: The identifier of the book to fetch.
289
- :param wait_time: Base wait time in seconds before returning content.
290
- If None, uses `self._config.wait_time`.
291
- :return: The HTML content of the book info page, or an empty string on error.
292
- """
293
- if self._page is None:
294
- raise RuntimeError("Browser page not initialized.")
295
- url = self._build_book_info_url(book_id)
296
- try:
297
- # Navigate and fetch
298
- self._page.get(url)
299
-
300
- # Randomized human-like delay
301
- base = wait_time if wait_time is not None else self._config.wait_time
302
- sleep_with_random_delay(base, mul_spread=1.2)
303
-
304
- html = str(self._page.html)
305
- logger.debug("[fetch] Fetched book info for ID %s from %s", book_id, url)
306
- return html
307
-
308
- except Exception as e:
309
- logger.warning("[fetch] Error fetching book info from '%s': %s", url, e)
310
- return ""
311
-
312
- def _scroll_page(self, presses: int, pause: float) -> None:
313
- """
314
- Scroll down by sending DOWN key presses to the page.
315
-
316
- :param presses: Number of DOWN key presses.
317
- :param pause: Seconds to wait between each press.
318
- """
319
- if self._page is None:
320
- raise RuntimeError("Browser page not initialized.")
321
- for _ in range(presses):
322
- try:
323
- self._page.actions.key_down(Keys.DOWN)
324
- except Exception as e:
325
- logger.debug("[page] Scroll press failed: %s", e)
326
- time.sleep(pause)
327
-
328
- def get_book_chapter(
329
- self, book_id: str, chapter_id: str, wait_time: Optional[float] = None
330
- ) -> str:
331
- """
332
- Retrieve the HTML content of a specific chapter.
333
-
334
- Ensures the user is logged in, navigates to the chapter page,
335
- waits a randomized delay to mimic human reading, then scrolls
336
- to trigger any lazy-loaded content.
337
-
338
- :param book_id: The identifier of the book.
339
- :param chapter_id: The identifier of the chapter.
340
- :param wait_time: Base wait time in seconds before scrolling. If None,
341
- falls back to `self._config.wait_time`.
342
- :return: The HTML content of the chapter page, or empty string on error.
343
- """
344
- if self._page is None:
345
- raise RuntimeError("Browser page not initialized.")
346
- url = self._build_chapter_url(book_id, chapter_id)
347
- try:
348
- # 1. Navigate to chapter URL
349
- self._page.get(url)
350
-
351
- # 2. Randomized human-like delay
352
- base = wait_time if wait_time is not None else self._config.wait_time
353
- # sleep_with_random_delay(base, mul_spread=1.2)
354
-
355
- # 3. Scroll down to load dynamic content
356
- presses = int(random.uniform(base, base + 5) * 2)
357
- self._scroll_page(presses, pause=0.5)
358
-
359
- html = str(self._page.html)
360
- logger.debug("[fetch] Fetched chapter %s for book %s", chapter_id, book_id)
361
- return html
362
-
363
- except Exception as e:
364
- logger.warning("[fetch] Error fetching chapter from '%s': %s", url, e)
365
- return ""
366
-
367
- def get_bookcase(self, wait_time: Optional[float] = None) -> str:
368
- """
369
- Retrieve the HTML content of the logged-in user's Qidian bookcase page.
370
-
371
- :param wait_time: Base number of seconds to wait before returning content.
372
- If None, falls back to `self._config.wait_time`.
373
- :return: The HTML markup of the bookcase page, or empty string on error.
374
- :raises RuntimeError: If the user is not logged in.
375
- """
376
- if self._page is None:
377
- raise RuntimeError("Browser page not initialized.")
378
- if not self._logged_in:
379
- raise RuntimeError("User not logged in. Please call login() first.")
380
-
381
- url = self._build_bookcase_url()
382
- try:
383
- # Navigate to the bookcase page
384
- self._page.get(url)
385
-
386
- # Randomized human-like delay
387
- base = wait_time if wait_time is not None else self._config.wait_time
388
- sleep_with_random_delay(base, mul_spread=1.2)
389
-
390
- html = str(self._page.html)
391
- logger.debug("[fetch] Fetched bookcase HTML from %s", url)
392
- return html
393
-
394
- except Exception as e:
395
- logger.warning("[fetch] Error fetching bookcase from '%s': %s", url, e)
396
- return ""