novel-downloader 1.3.2__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (213) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/clean.py +97 -78
  3. novel_downloader/cli/config.py +177 -0
  4. novel_downloader/cli/download.py +132 -87
  5. novel_downloader/cli/export.py +77 -0
  6. novel_downloader/cli/main.py +21 -28
  7. novel_downloader/config/__init__.py +1 -25
  8. novel_downloader/config/adapter.py +32 -31
  9. novel_downloader/config/loader.py +3 -3
  10. novel_downloader/config/site_rules.py +1 -2
  11. novel_downloader/core/__init__.py +3 -6
  12. novel_downloader/core/downloaders/__init__.py +10 -13
  13. novel_downloader/core/downloaders/base.py +233 -0
  14. novel_downloader/core/downloaders/biquge.py +27 -0
  15. novel_downloader/core/downloaders/common.py +414 -0
  16. novel_downloader/core/downloaders/esjzone.py +27 -0
  17. novel_downloader/core/downloaders/linovelib.py +27 -0
  18. novel_downloader/core/downloaders/qianbi.py +27 -0
  19. novel_downloader/core/downloaders/qidian.py +352 -0
  20. novel_downloader/core/downloaders/sfacg.py +27 -0
  21. novel_downloader/core/downloaders/yamibo.py +27 -0
  22. novel_downloader/core/exporters/__init__.py +37 -0
  23. novel_downloader/core/{savers → exporters}/base.py +73 -44
  24. novel_downloader/core/exporters/biquge.py +25 -0
  25. novel_downloader/core/exporters/common/__init__.py +12 -0
  26. novel_downloader/core/{savers → exporters}/common/epub.py +40 -52
  27. novel_downloader/core/{savers/common/main_saver.py → exporters/common/main_exporter.py} +36 -39
  28. novel_downloader/core/{savers → exporters}/common/txt.py +20 -24
  29. novel_downloader/core/exporters/epub_utils/__init__.py +40 -0
  30. novel_downloader/core/{savers → exporters}/epub_utils/css_builder.py +2 -1
  31. novel_downloader/core/exporters/epub_utils/image_loader.py +131 -0
  32. novel_downloader/core/{savers → exporters}/epub_utils/initializer.py +6 -3
  33. novel_downloader/core/{savers → exporters}/epub_utils/text_to_html.py +49 -2
  34. novel_downloader/core/{savers → exporters}/epub_utils/volume_intro.py +2 -1
  35. novel_downloader/core/exporters/esjzone.py +25 -0
  36. novel_downloader/core/exporters/linovelib/__init__.py +10 -0
  37. novel_downloader/core/exporters/linovelib/epub.py +449 -0
  38. novel_downloader/core/exporters/linovelib/main_exporter.py +127 -0
  39. novel_downloader/core/exporters/linovelib/txt.py +129 -0
  40. novel_downloader/core/exporters/qianbi.py +25 -0
  41. novel_downloader/core/{savers → exporters}/qidian.py +8 -8
  42. novel_downloader/core/exporters/sfacg.py +25 -0
  43. novel_downloader/core/exporters/yamibo.py +25 -0
  44. novel_downloader/core/factory/__init__.py +5 -17
  45. novel_downloader/core/factory/downloader.py +24 -126
  46. novel_downloader/core/factory/exporter.py +58 -0
  47. novel_downloader/core/factory/fetcher.py +96 -0
  48. novel_downloader/core/factory/parser.py +17 -12
  49. novel_downloader/core/{requesters → fetchers}/__init__.py +22 -15
  50. novel_downloader/core/{requesters → fetchers}/base/__init__.py +2 -4
  51. novel_downloader/core/fetchers/base/browser.py +383 -0
  52. novel_downloader/core/fetchers/base/rate_limiter.py +86 -0
  53. novel_downloader/core/fetchers/base/session.py +419 -0
  54. novel_downloader/core/fetchers/biquge/__init__.py +14 -0
  55. novel_downloader/core/{requesters/biquge/async_session.py → fetchers/biquge/browser.py} +18 -6
  56. novel_downloader/core/{requesters → fetchers}/biquge/session.py +23 -30
  57. novel_downloader/core/fetchers/common/__init__.py +14 -0
  58. novel_downloader/core/fetchers/common/browser.py +79 -0
  59. novel_downloader/core/{requesters/common/async_session.py → fetchers/common/session.py} +8 -25
  60. novel_downloader/core/fetchers/esjzone/__init__.py +14 -0
  61. novel_downloader/core/fetchers/esjzone/browser.py +202 -0
  62. novel_downloader/core/{requesters/esjzone/async_session.py → fetchers/esjzone/session.py} +62 -42
  63. novel_downloader/core/fetchers/linovelib/__init__.py +14 -0
  64. novel_downloader/core/fetchers/linovelib/browser.py +178 -0
  65. novel_downloader/core/fetchers/linovelib/session.py +178 -0
  66. novel_downloader/core/fetchers/qianbi/__init__.py +14 -0
  67. novel_downloader/core/{requesters/qianbi/session.py → fetchers/qianbi/browser.py} +30 -48
  68. novel_downloader/core/{requesters/qianbi/async_session.py → fetchers/qianbi/session.py} +18 -6
  69. novel_downloader/core/fetchers/qidian/__init__.py +14 -0
  70. novel_downloader/core/fetchers/qidian/browser.py +266 -0
  71. novel_downloader/core/fetchers/qidian/session.py +326 -0
  72. novel_downloader/core/fetchers/sfacg/__init__.py +14 -0
  73. novel_downloader/core/fetchers/sfacg/browser.py +189 -0
  74. novel_downloader/core/{requesters/sfacg/async_session.py → fetchers/sfacg/session.py} +43 -73
  75. novel_downloader/core/fetchers/yamibo/__init__.py +14 -0
  76. novel_downloader/core/fetchers/yamibo/browser.py +229 -0
  77. novel_downloader/core/{requesters/yamibo/async_session.py → fetchers/yamibo/session.py} +62 -44
  78. novel_downloader/core/interfaces/__init__.py +8 -12
  79. novel_downloader/core/interfaces/downloader.py +54 -0
  80. novel_downloader/core/interfaces/{saver.py → exporter.py} +12 -12
  81. novel_downloader/core/interfaces/fetcher.py +162 -0
  82. novel_downloader/core/interfaces/parser.py +6 -7
  83. novel_downloader/core/parsers/__init__.py +5 -6
  84. novel_downloader/core/parsers/base.py +9 -13
  85. novel_downloader/core/parsers/biquge/main_parser.py +12 -13
  86. novel_downloader/core/parsers/common/helper.py +3 -3
  87. novel_downloader/core/parsers/common/main_parser.py +39 -34
  88. novel_downloader/core/parsers/esjzone/main_parser.py +24 -17
  89. novel_downloader/core/parsers/linovelib/__init__.py +10 -0
  90. novel_downloader/core/parsers/linovelib/main_parser.py +210 -0
  91. novel_downloader/core/parsers/qianbi/main_parser.py +21 -15
  92. novel_downloader/core/parsers/qidian/__init__.py +2 -11
  93. novel_downloader/core/parsers/qidian/book_info_parser.py +113 -0
  94. novel_downloader/core/parsers/qidian/{browser/chapter_encrypted.py → chapter_encrypted.py} +162 -135
  95. novel_downloader/core/parsers/qidian/chapter_normal.py +150 -0
  96. novel_downloader/core/parsers/qidian/{session/chapter_router.py → chapter_router.py} +15 -15
  97. novel_downloader/core/parsers/qidian/{browser/main_parser.py → main_parser.py} +49 -40
  98. novel_downloader/core/parsers/qidian/utils/__init__.py +27 -0
  99. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +145 -0
  100. novel_downloader/core/parsers/qidian/{shared → utils}/helpers.py +41 -68
  101. novel_downloader/core/parsers/qidian/{session → utils}/node_decryptor.py +64 -50
  102. novel_downloader/core/parsers/sfacg/main_parser.py +12 -12
  103. novel_downloader/core/parsers/yamibo/main_parser.py +10 -10
  104. novel_downloader/locales/en.json +18 -2
  105. novel_downloader/locales/zh.json +18 -2
  106. novel_downloader/models/__init__.py +64 -0
  107. novel_downloader/models/browser.py +21 -0
  108. novel_downloader/models/chapter.py +25 -0
  109. novel_downloader/models/config.py +100 -0
  110. novel_downloader/models/login.py +20 -0
  111. novel_downloader/models/site_rules.py +99 -0
  112. novel_downloader/models/tasks.py +33 -0
  113. novel_downloader/models/types.py +15 -0
  114. novel_downloader/resources/config/settings.toml +31 -25
  115. novel_downloader/resources/json/linovelib_font_map.json +3573 -0
  116. novel_downloader/tui/__init__.py +7 -0
  117. novel_downloader/tui/app.py +32 -0
  118. novel_downloader/tui/main.py +17 -0
  119. novel_downloader/tui/screens/__init__.py +14 -0
  120. novel_downloader/tui/screens/home.py +191 -0
  121. novel_downloader/tui/screens/login.py +74 -0
  122. novel_downloader/tui/styles/home_layout.tcss +79 -0
  123. novel_downloader/tui/widgets/richlog_handler.py +24 -0
  124. novel_downloader/utils/__init__.py +6 -0
  125. novel_downloader/utils/chapter_storage.py +25 -38
  126. novel_downloader/utils/constants.py +15 -5
  127. novel_downloader/utils/cookies.py +66 -0
  128. novel_downloader/utils/crypto_utils.py +1 -74
  129. novel_downloader/utils/file_utils/io.py +1 -1
  130. novel_downloader/utils/fontocr/ocr_v1.py +2 -1
  131. novel_downloader/utils/fontocr/ocr_v2.py +2 -2
  132. novel_downloader/utils/hash_store.py +10 -18
  133. novel_downloader/utils/hash_utils.py +3 -2
  134. novel_downloader/utils/logger.py +2 -3
  135. novel_downloader/utils/network.py +53 -39
  136. novel_downloader/utils/text_utils/chapter_formatting.py +6 -1
  137. novel_downloader/utils/text_utils/font_mapping.py +1 -1
  138. novel_downloader/utils/text_utils/text_cleaning.py +1 -1
  139. novel_downloader/utils/time_utils/datetime_utils.py +3 -3
  140. novel_downloader/utils/time_utils/sleep_utils.py +3 -3
  141. {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/METADATA +72 -38
  142. novel_downloader-1.4.0.dist-info/RECORD +170 -0
  143. {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/WHEEL +1 -1
  144. {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/entry_points.txt +1 -0
  145. novel_downloader/cli/interactive.py +0 -66
  146. novel_downloader/cli/settings.py +0 -177
  147. novel_downloader/config/models.py +0 -187
  148. novel_downloader/core/downloaders/base/__init__.py +0 -14
  149. novel_downloader/core/downloaders/base/base_async.py +0 -153
  150. novel_downloader/core/downloaders/base/base_sync.py +0 -208
  151. novel_downloader/core/downloaders/biquge/__init__.py +0 -14
  152. novel_downloader/core/downloaders/biquge/biquge_async.py +0 -27
  153. novel_downloader/core/downloaders/biquge/biquge_sync.py +0 -27
  154. novel_downloader/core/downloaders/common/__init__.py +0 -14
  155. novel_downloader/core/downloaders/common/common_async.py +0 -218
  156. novel_downloader/core/downloaders/common/common_sync.py +0 -210
  157. novel_downloader/core/downloaders/esjzone/__init__.py +0 -14
  158. novel_downloader/core/downloaders/esjzone/esjzone_async.py +0 -27
  159. novel_downloader/core/downloaders/esjzone/esjzone_sync.py +0 -27
  160. novel_downloader/core/downloaders/qianbi/__init__.py +0 -14
  161. novel_downloader/core/downloaders/qianbi/qianbi_async.py +0 -27
  162. novel_downloader/core/downloaders/qianbi/qianbi_sync.py +0 -27
  163. novel_downloader/core/downloaders/qidian/__init__.py +0 -10
  164. novel_downloader/core/downloaders/qidian/qidian_sync.py +0 -227
  165. novel_downloader/core/downloaders/sfacg/__init__.py +0 -14
  166. novel_downloader/core/downloaders/sfacg/sfacg_async.py +0 -27
  167. novel_downloader/core/downloaders/sfacg/sfacg_sync.py +0 -27
  168. novel_downloader/core/downloaders/yamibo/__init__.py +0 -14
  169. novel_downloader/core/downloaders/yamibo/yamibo_async.py +0 -27
  170. novel_downloader/core/downloaders/yamibo/yamibo_sync.py +0 -27
  171. novel_downloader/core/factory/requester.py +0 -144
  172. novel_downloader/core/factory/saver.py +0 -56
  173. novel_downloader/core/interfaces/async_downloader.py +0 -36
  174. novel_downloader/core/interfaces/async_requester.py +0 -84
  175. novel_downloader/core/interfaces/sync_downloader.py +0 -36
  176. novel_downloader/core/interfaces/sync_requester.py +0 -82
  177. novel_downloader/core/parsers/qidian/browser/__init__.py +0 -12
  178. novel_downloader/core/parsers/qidian/browser/chapter_normal.py +0 -93
  179. novel_downloader/core/parsers/qidian/browser/chapter_router.py +0 -71
  180. novel_downloader/core/parsers/qidian/session/__init__.py +0 -12
  181. novel_downloader/core/parsers/qidian/session/chapter_encrypted.py +0 -443
  182. novel_downloader/core/parsers/qidian/session/chapter_normal.py +0 -115
  183. novel_downloader/core/parsers/qidian/session/main_parser.py +0 -128
  184. novel_downloader/core/parsers/qidian/shared/__init__.py +0 -37
  185. novel_downloader/core/parsers/qidian/shared/book_info_parser.py +0 -150
  186. novel_downloader/core/requesters/base/async_session.py +0 -410
  187. novel_downloader/core/requesters/base/browser.py +0 -337
  188. novel_downloader/core/requesters/base/session.py +0 -378
  189. novel_downloader/core/requesters/biquge/__init__.py +0 -14
  190. novel_downloader/core/requesters/common/__init__.py +0 -17
  191. novel_downloader/core/requesters/common/session.py +0 -113
  192. novel_downloader/core/requesters/esjzone/__init__.py +0 -13
  193. novel_downloader/core/requesters/esjzone/session.py +0 -235
  194. novel_downloader/core/requesters/qianbi/__init__.py +0 -13
  195. novel_downloader/core/requesters/qidian/__init__.py +0 -21
  196. novel_downloader/core/requesters/qidian/broswer.py +0 -307
  197. novel_downloader/core/requesters/qidian/session.py +0 -290
  198. novel_downloader/core/requesters/sfacg/__init__.py +0 -13
  199. novel_downloader/core/requesters/sfacg/session.py +0 -242
  200. novel_downloader/core/requesters/yamibo/__init__.py +0 -13
  201. novel_downloader/core/requesters/yamibo/session.py +0 -237
  202. novel_downloader/core/savers/__init__.py +0 -34
  203. novel_downloader/core/savers/biquge.py +0 -25
  204. novel_downloader/core/savers/common/__init__.py +0 -12
  205. novel_downloader/core/savers/epub_utils/__init__.py +0 -26
  206. novel_downloader/core/savers/esjzone.py +0 -25
  207. novel_downloader/core/savers/qianbi.py +0 -25
  208. novel_downloader/core/savers/sfacg.py +0 -25
  209. novel_downloader/core/savers/yamibo.py +0 -25
  210. novel_downloader/resources/config/rules.toml +0 -196
  211. novel_downloader-1.3.2.dist-info/RECORD +0 -165
  212. {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/licenses/LICENSE +0 -0
  213. {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/top_level.txt +0 -0
@@ -1,337 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.core.requesters.base.browser
4
- ---------------------------------------------
5
-
6
- This module defines the BaseBrowser class, which provides common functionalities
7
- for browser operations. Derived classes can extend these methods for
8
- specialized purposes.
9
- """
10
-
11
- import abc
12
- import logging
13
- import random
14
- import time
15
- import types
16
- from typing import Any, Literal, Self, cast
17
-
18
- from DrissionPage import Chromium, ChromiumOptions
19
- from DrissionPage._elements.chromium_element import ChromiumElement
20
- from DrissionPage._pages.chromium_frame import ChromiumFrame
21
- from DrissionPage._pages.mix_tab import MixTab
22
- from DrissionPage.common import Keys
23
-
24
- from novel_downloader.config.models import RequesterConfig
25
- from novel_downloader.core.interfaces import SyncRequesterProtocol
26
- from novel_downloader.utils.constants import (
27
- DEFAULT_USER_AGENT,
28
- DEFAULT_USER_DATA_DIR,
29
- DEFAULT_USER_PROFILE_NAME,
30
- )
31
-
32
-
33
- class BaseBrowser(SyncRequesterProtocol, abc.ABC):
34
- """
35
- BaseBrowser wraps basic browser operations using DrissionPage,
36
- with full control over browser configuration, session profile,
37
- retry and timeout behavior.
38
-
39
- Attributes:
40
- _options (ChromiumOptions): Configuration object for Chromium.
41
- _browser (Chromium): Chromium instance.
42
- _page (ChromiumPage): The active browser tab.
43
- """
44
-
45
- def is_async(self) -> Literal[False]:
46
- return False
47
-
48
- def __init__(
49
- self,
50
- config: RequesterConfig,
51
- ) -> None:
52
- """
53
- Initialize the Requester with a browser configuration.
54
-
55
- :param config: The RequesterConfig instance containing browser settings.
56
- """
57
- super().__init__()
58
- self._config = config
59
- self._options = ChromiumOptions()
60
- self._browser: Chromium | None = None
61
- self._page: MixTab | None = None
62
- self._headless: bool = config.headless
63
-
64
- user_data_path = (
65
- config.user_data_folder
66
- if self._is_valid(config.user_data_folder)
67
- else DEFAULT_USER_DATA_DIR
68
- )
69
- self._options.set_user_data_path(user_data_path)
70
-
71
- profile_name = (
72
- config.profile_name
73
- if self._is_valid(config.profile_name)
74
- else DEFAULT_USER_PROFILE_NAME
75
- )
76
- self._options.set_user(profile_name)
77
-
78
- self._options.headless(config.headless)
79
- self._options.set_user_agent(DEFAULT_USER_AGENT)
80
- self._options.set_timeouts(base=config.timeout)
81
- self._options.set_retry(
82
- times=config.retry_times, interval=config.backoff_factor
83
- )
84
-
85
- self._disable_images_orig = config.disable_images
86
- if config.disable_images:
87
- self._options.no_imgs(True)
88
- if config.mute_audio:
89
- self._options.mute(True)
90
-
91
- # self._options.set_argument('--disable-blink-features', 'AutomationControlled')
92
- # self._options.set_argument('--log-level', '3')
93
- # self._options.set_argument('--disable-gpu')
94
- # self._options.set_argument('no-sandbox')
95
-
96
- self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
97
-
98
- self._init_browser()
99
-
100
- def _init_browser(self) -> None:
101
- """
102
- Set up the browser instance and open the default tab.
103
- """
104
- if not self._browser:
105
- self._browser = Chromium(self._options)
106
- if not self._page:
107
- self._page = cast(MixTab, self._browser.get_tab())
108
-
109
- def login(
110
- self,
111
- username: str = "",
112
- password: str = "",
113
- manual_login: bool = False,
114
- **kwargs: Any,
115
- ) -> bool:
116
- """
117
- Attempt to log in
118
- """
119
- return True
120
-
121
- @abc.abstractmethod
122
- def get_book_info(
123
- self,
124
- book_id: str,
125
- **kwargs: Any,
126
- ) -> list[str]:
127
- """
128
- Fetch the raw HTML (or JSON) of the book info page.
129
-
130
- :param book_id: The book identifier.
131
- :param wait_time: Base number of seconds to wait before returning content.
132
- :return: The page content as a string.
133
- """
134
- ...
135
-
136
- @abc.abstractmethod
137
- def get_book_chapter(
138
- self,
139
- book_id: str,
140
- chapter_id: str,
141
- **kwargs: Any,
142
- ) -> list[str]:
143
- """
144
- Fetch the raw HTML (or JSON) of a single chapter.
145
-
146
- :param book_id: The book identifier.
147
- :param chapter_id: The chapter identifier.
148
- :param wait_time: Base number of seconds to wait before returning content.
149
- :return: The chapter content as a string.
150
- """
151
- ...
152
-
153
- def get_bookcase(
154
- self,
155
- page: int = 1,
156
- **kwargs: Any,
157
- ) -> list[str]:
158
- """
159
- Optional: Retrieve the HTML content of the authenticated user's bookcase page.
160
-
161
- Subclasses that support login+bookcase retrieval should override this.
162
-
163
- :param wait_time: Base number of seconds to wait before returning content.
164
- :return: The HTML markup of the bookcase page.
165
- :raises NotImplementedError: If bookcase fetching is not supported.
166
- """
167
- raise NotImplementedError(
168
- "Bookcase fetching is not supported by this browser type. "
169
- "Override get_bookcase() in your subclass to enable it."
170
- )
171
-
172
- def scroll_page(
173
- self,
174
- presses: int,
175
- pause: float = 0.5,
176
- jitter: float = 0.1,
177
- ) -> None:
178
- """
179
- Scroll down by sending DOWN key presses to the page.
180
-
181
- :param presses: Number of DOWN key presses.
182
- :param pause: Seconds to wait between each press.
183
- """
184
- for _ in range(int(presses)):
185
- try:
186
- self.page.actions.key_down(Keys.DOWN)
187
- except Exception as e:
188
- self.logger.debug("[page] Scroll press failed: %s", e)
189
- actual_pause = pause + random.uniform(-jitter, jitter)
190
- actual_pause = max(0, actual_pause)
191
- time.sleep(actual_pause)
192
-
193
- def click_button(
194
- self,
195
- locator: str | tuple[str, str] | ChromiumElement,
196
- timeout: float = 5.0,
197
- page: MixTab | ChromiumFrame | None = None,
198
- ) -> bool:
199
- """
200
- Attempt to locate and click a button on the page.
201
-
202
- :param locator: The target element to click.
203
- :param timeout: Maximum time (in seconds) to wait.
204
- :param page: Optional page or frame to search in.
205
- :return: True if the element was located and clicked; False otherwise.
206
- """
207
- try:
208
- page = page or self.page
209
- btn = page.ele(locator, timeout=timeout)
210
- if isinstance(btn, ChromiumElement):
211
- btn.click()
212
- return True
213
- except Exception as e:
214
- self.logger.debug("[browser] Exception clicking button: %s", e)
215
- return False
216
-
217
- def get_frame(
218
- self,
219
- loc_ind_ele: str | int | ChromiumFrame | ChromiumElement,
220
- timeout: float = 5.0,
221
- page: MixTab | ChromiumFrame | None = None,
222
- ) -> ChromiumFrame | None:
223
- """
224
- Attempt to locate and return a frame from the page.
225
-
226
- :param loc_ind_ele: The frame to locate.
227
- :param timeout: Maximum time (in seconds) to wait.
228
- :param page: Optional page or frame to search in.
229
- :return: The located ChromiumFrame if found; otherwise, None.
230
- """
231
- try:
232
- page = page or self.page
233
- return page.get_frame(loc_ind_ele, timeout=timeout)
234
- except Exception as e:
235
- self.logger.debug(
236
- "[browser] Exception occurred while getting frame [%s]: %s",
237
- loc_ind_ele,
238
- e,
239
- )
240
- return None
241
-
242
- def restart_browser(
243
- self,
244
- headless: bool | None = None,
245
- ) -> None:
246
- """
247
- Shutdown the current browser and restart it with the given headless setting.
248
-
249
- :param headless: Whether to run the browser in headless mode.
250
- """
251
- if self._browser:
252
- self._browser.quit()
253
- self._browser = None
254
- self._page = None
255
-
256
- # Apply new headless setting and reinitialize
257
- if headless is not None:
258
- self._options.headless(headless)
259
- self._headless = headless
260
- self._init_browser()
261
- self.logger.debug("[browser] Browser restarted (headless=%s).", headless)
262
-
263
- @property
264
- def page(self) -> MixTab:
265
- """
266
- Return the current Chromium page object.
267
-
268
- :return: ChromiumPage instance of the current tab.
269
- """
270
- if self._page is None:
271
- raise RuntimeError("Page is not initialized or has been shut down.")
272
- return self._page
273
-
274
- @property
275
- def browser(self) -> Chromium:
276
- """
277
- Return the Chromium browser instance.
278
-
279
- :return: Chromium instance used by this browser.
280
- """
281
- if self._browser is None:
282
- raise RuntimeError("Browser is not initialized or has been shut down.")
283
- return self._browser
284
-
285
- @staticmethod
286
- def _is_valid(value: str) -> bool:
287
- return bool(value and value.strip())
288
-
289
- def close(self) -> None:
290
- """
291
- Shutdown the browser session and release resources.
292
-
293
- This quits the Chromium instance and clears references to browser and page.
294
- """
295
- if self._browser and self._config.auto_close:
296
- self._browser.quit()
297
- self._browser = None
298
- self._page = None
299
-
300
- def __enter__(self) -> Self:
301
- self._init_browser()
302
- return self
303
-
304
- def __exit__(
305
- self,
306
- exc_type: type[BaseException] | None,
307
- exc_val: BaseException | None,
308
- tb: types.TracebackType | None,
309
- ) -> None:
310
- self.close()
311
-
312
- def __del__(self) -> None:
313
- self.close()
314
-
315
- def __getstate__(self) -> dict[str, Any]:
316
- """
317
- Prepare object state for serialization (e.g., pickling).
318
-
319
- Removes browser-related fields that cannot be pickled.
320
-
321
- :return: A dict representing the serializable object state.
322
- """
323
- state = self.__dict__.copy()
324
- state.pop("_browser", None)
325
- state.pop("_page", None)
326
- return state
327
-
328
- def __setstate__(self, state: dict[str, Any]) -> None:
329
- """
330
- Restore object state after deserialization.
331
-
332
- Automatically reinitializes the browser setup.
333
-
334
- :param state: The saved state dictionary.
335
- """
336
- self.__dict__.update(state)
337
- self._init_browser()
@@ -1,378 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.core.requesters.base.session
4
- ---------------------------------------------
5
-
6
- This module defines the BaseSession class, which provides basic HTTP
7
- request capabilities using the requests library. It maintains a
8
- persistent session and supports retries, headers, and timeout configurations.
9
- """
10
-
11
- import abc
12
- import logging
13
- import types
14
- from collections.abc import Mapping
15
- from typing import Any, Literal, Self
16
-
17
- import requests
18
- from requests import Response, Session
19
- from requests.adapters import HTTPAdapter, Retry
20
-
21
- from novel_downloader.config.models import RequesterConfig
22
- from novel_downloader.core.interfaces import SyncRequesterProtocol
23
- from novel_downloader.utils.constants import DEFAULT_USER_HEADERS
24
-
25
-
26
- class BaseSession(SyncRequesterProtocol, abc.ABC):
27
- """
28
- BaseSession wraps basic HTTP operations using requests.Session,
29
- supporting retry logic, timeout, and persistent connections.
30
-
31
- Attributes:
32
- _session (requests.Session): The persistent HTTP session.
33
- """
34
-
35
- def is_async(self) -> Literal[False]:
36
- return False
37
-
38
- def __init__(
39
- self,
40
- config: RequesterConfig,
41
- cookies: dict[str, str] | None = None,
42
- ) -> None:
43
- """
44
- Initialize a Session instance.
45
-
46
- :param config: The RequesterConfig instance containing settings.
47
- :param cookies: Optional cookies to preload into the session.
48
- """
49
- super().__init__()
50
- self._config = config
51
- self._cookies = cookies or {}
52
- self._headers = DEFAULT_USER_HEADERS.copy()
53
- self._session: Session | None = None
54
-
55
- retry_strategy = Retry(
56
- total=config.retry_times,
57
- backoff_factor=config.backoff_factor,
58
- status_forcelist=[408, 429, 500, 502, 503, 504],
59
- allowed_methods=["HEAD", "GET", "OPTIONS"],
60
- )
61
-
62
- self._adapter = HTTPAdapter(max_retries=retry_strategy)
63
- self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
64
-
65
- self._init_session()
66
-
67
- def _init_session(self) -> None:
68
- """
69
- Set up the session with retry strategy and apply default headers.
70
- """
71
- if self._session:
72
- return
73
-
74
- self._session = requests.Session()
75
- self._session.mount("http://", self._adapter)
76
- self._session.mount("https://", self._adapter)
77
- self._session.headers.update(self._headers)
78
-
79
- if self._cookies:
80
- self._session.cookies.update(self._cookies)
81
-
82
- def login(
83
- self,
84
- username: str = "",
85
- password: str = "",
86
- manual_login: bool = False,
87
- **kwargs: Any,
88
- ) -> bool:
89
- """
90
- Attempt to log in
91
- """
92
- return True
93
-
94
- @abc.abstractmethod
95
- def get_book_info(
96
- self,
97
- book_id: str,
98
- **kwargs: Any,
99
- ) -> list[str]:
100
- """
101
- Fetch the raw HTML (or JSON) of the book info page.
102
-
103
- :param book_id: The book identifier.
104
- :return: The page content as a string.
105
- """
106
- ...
107
-
108
- @abc.abstractmethod
109
- def get_book_chapter(
110
- self,
111
- book_id: str,
112
- chapter_id: str,
113
- **kwargs: Any,
114
- ) -> list[str]:
115
- """
116
- Fetch the raw HTML (or JSON) of a single chapter.
117
-
118
- :param book_id: The book identifier.
119
- :param chapter_id: The chapter identifier.
120
- :return: The chapter content as a string.
121
- """
122
- ...
123
-
124
- def get_bookcase(
125
- self,
126
- page: int = 1,
127
- **kwargs: Any,
128
- ) -> list[str]:
129
- """
130
- Optional: Retrieve the HTML content of the authenticated user's bookcase page.
131
-
132
- Subclasses that support user login and bookcase retrieval should override this.
133
-
134
- :param page: Page idx
135
- :return: The HTML markup of the bookcase page.
136
- :raises NotImplementedError: If the subclass does not implement.
137
- """
138
- raise NotImplementedError(
139
- "Bookcase fetching is not supported by this session type. "
140
- "Override get_bookcase() in your subclass to enable it."
141
- )
142
-
143
- def get(
144
- self,
145
- url: str,
146
- params: dict[str, Any] | None = None,
147
- **kwargs: Any,
148
- ) -> Response:
149
- """
150
- Send a GET request.
151
-
152
- :param url: The target URL.
153
- :param params: Query parameters to include in the request.
154
- :param kwargs: Additional arguments passed to requests.
155
- :return: Response object from the GET request.
156
- :raises RuntimeError: If the session is not initialized.
157
- """
158
- return self.session.get(url, params=params, **kwargs)
159
-
160
- def post(
161
- self,
162
- url: str,
163
- data: dict[str, Any] | bytes | None = None,
164
- json: dict[str, Any] | None = None,
165
- **kwargs: Any,
166
- ) -> Response:
167
- """
168
- Send a POST request.
169
-
170
- :param url: The target URL.
171
- :param data: Form data to include in the request body.
172
- :param json: JSON body to include in the request.
173
- :param kwargs: Additional arguments passed to requests.
174
- :return: Response object from the POST request.
175
- :raises RuntimeError: If the session is not initialized.
176
- """
177
- return self.session.post(url, data=data, json=json, **kwargs)
178
-
179
- def put(
180
- self,
181
- url: str,
182
- data: dict[str, Any] | bytes | None = None,
183
- json: dict[str, Any] | None = None,
184
- **kwargs: Any,
185
- ) -> Response:
186
- """
187
- Send a PUT request with retry logic.
188
- """
189
- return self.session.put(url, data=data, json=json, **kwargs)
190
-
191
- def patch(
192
- self,
193
- url: str,
194
- data: dict[str, Any] | bytes | None = None,
195
- json: dict[str, Any] | None = None,
196
- **kwargs: Any,
197
- ) -> Response:
198
- """
199
- Send a PATCH request with retry logic.
200
- """
201
- return self.session.patch(url, data=data, json=json, **kwargs)
202
-
203
- def delete(
204
- self,
205
- url: str,
206
- **kwargs: Any,
207
- ) -> Response:
208
- """
209
- Send a DELETE request with retry logic.
210
- """
211
- return self.session.delete(url, **kwargs)
212
-
213
- @property
214
- def session(self) -> Session:
215
- """
216
- Return the active requests.Session.
217
-
218
- :raises RuntimeError: If the session is uninitialized or has been shut down.
219
- """
220
- if self._session is None:
221
- # self._init_session()
222
- raise RuntimeError("Session is not initialized or has been shut down.")
223
- return self._session
224
-
225
- @property
226
- def cookies(self) -> dict[str, str]:
227
- """
228
- Get the current session cookies.
229
-
230
- :return: A dict mapping cookie names to their values.
231
- """
232
- if self._session:
233
- return self._session.cookies.get_dict()
234
- else:
235
- return self._cookies
236
-
237
- @property
238
- def headers(self) -> Mapping[str, str | bytes]:
239
- """
240
- Get a copy of the current session headers for temporary use.
241
-
242
- :return: A dict mapping header names to their values.
243
- """
244
- if self._session:
245
- return dict(self._session.headers)
246
- return self._headers.copy()
247
-
248
- def get_header(self, key: str, default: Any = None) -> Any:
249
- """
250
- Retrieve a specific header value by name.
251
-
252
- :param key: The header name to look up.
253
- :param default: The value to return if the header is not present.
254
- :return: The header value if present, else default.
255
- """
256
- if self._session:
257
- return self._session.headers.get(key, default)
258
- else:
259
- return self._headers.get(key, default)
260
-
261
- def update_header(self, key: str, value: str) -> None:
262
- """
263
- Update or add a single header in the session.
264
-
265
- :param key: The name of the header.
266
- :param value: The value of the header.
267
- """
268
- self._headers[key] = value
269
- if self._session:
270
- self._session.headers[key] = value
271
-
272
- def update_headers(self, headers: dict[str, str]) -> None:
273
- """
274
- Update or add multiple headers in the session.
275
-
276
- :param headers: A dictionary of header key-value pairs.
277
- """
278
- self._headers.update(headers)
279
- if self._session:
280
- self._session.headers.update(headers)
281
-
282
- def del_header(self, key: str) -> None:
283
- """
284
- Delete a header from the session if it exists.
285
-
286
- :param key: The name of the header to remove.
287
- """
288
- self._headers.pop(key, None)
289
- if self._session:
290
- self._session.headers.pop(key, None)
291
-
292
- def update_cookie(self, key: str, value: str) -> None:
293
- """
294
- Update or add a single cookie in the session.
295
-
296
- :param key: The name of the cookie.
297
- :param value: The value of the cookie.
298
- """
299
- self._cookies[key] = value
300
- if self._session:
301
- self._session.cookies.set(key, value)
302
-
303
- def update_cookies(
304
- self,
305
- cookies: dict[str, str],
306
- ) -> None:
307
- """
308
- Update or add multiple cookies in the session.
309
-
310
- :param cookies: A dictionary of cookie key-value pairs.
311
- """
312
- self._cookies.update(cookies)
313
- if self._session:
314
- self._session.cookies.update(cookies)
315
-
316
- def clear_cookies(self) -> None:
317
- """
318
- Clear cookies from the session.
319
- """
320
- self._cookies = {}
321
- if self._session:
322
- self._session.cookies.clear()
323
-
324
- def _on_close(self) -> None:
325
- """
326
- Hook method called at the beginning of close().
327
- Override in subclass if needed.
328
- """
329
- pass
330
-
331
- def close(self) -> None:
332
- """
333
- Shutdown and clean up the session.
334
-
335
- This closes the underlying connection pool and removes the session.
336
- """
337
- self._on_close()
338
- if self._session:
339
- self._session.close()
340
- self._session = None
341
-
342
- def __enter__(self) -> Self:
343
- if self._session is None:
344
- self._init_session()
345
- return self
346
-
347
- def __exit__(
348
- self,
349
- exc_type: type[BaseException] | None,
350
- exc_val: BaseException | None,
351
- tb: types.TracebackType | None,
352
- ) -> None:
353
- self.close()
354
-
355
- def __del__(self) -> None:
356
- self.close()
357
-
358
- def __getstate__(self) -> dict[str, Any]:
359
- """
360
- Prepare object state for serialization.
361
-
362
- Removes unpickleable session object.
363
-
364
- :return: Serializable dict of the object state.
365
- """
366
- self.close()
367
- state = self.__dict__.copy()
368
- state.pop("_session", None)
369
- return state
370
-
371
- def __setstate__(self, state: dict[str, Any]) -> None:
372
- """
373
- Restore object state and reinitialize session.
374
-
375
- :param state: Saved state dictionary.
376
- """
377
- self.__dict__.update(state)
378
- self._init_session()