novel-downloader 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/download.py +2 -0
  3. novel_downloader/config/adapter.py +41 -13
  4. novel_downloader/config/models.py +13 -8
  5. novel_downloader/core/downloaders/base_async_downloader.py +1 -1
  6. novel_downloader/core/downloaders/common_downloader.py +1 -2
  7. novel_downloader/core/downloaders/qidian_downloader.py +1 -2
  8. novel_downloader/core/factory/downloader_factory.py +13 -11
  9. novel_downloader/core/interfaces/async_requester_protocol.py +9 -4
  10. novel_downloader/core/interfaces/requester_protocol.py +7 -4
  11. novel_downloader/core/parsers/base_parser.py +3 -3
  12. novel_downloader/core/parsers/common_parser/helper.py +7 -5
  13. novel_downloader/core/parsers/qidian_parser/browser/chapter_encrypted.py +1 -1
  14. novel_downloader/core/parsers/qidian_parser/browser/main_parser.py +5 -3
  15. novel_downloader/core/parsers/qidian_parser/session/chapter_encrypted.py +1 -1
  16. novel_downloader/core/parsers/qidian_parser/session/main_parser.py +5 -3
  17. novel_downloader/core/parsers/qidian_parser/shared/book_info_parser.py +74 -18
  18. novel_downloader/core/parsers/qidian_parser/shared/helpers.py +2 -2
  19. novel_downloader/core/requesters/base_async_session.py +11 -6
  20. novel_downloader/core/requesters/base_browser.py +12 -8
  21. novel_downloader/core/requesters/base_session.py +9 -6
  22. novel_downloader/core/requesters/common_requester/common_async_session.py +4 -2
  23. novel_downloader/core/requesters/common_requester/common_session.py +4 -4
  24. novel_downloader/core/requesters/qidian_requester/qidian_broswer.py +38 -19
  25. novel_downloader/core/requesters/qidian_requester/qidian_session.py +6 -6
  26. novel_downloader/core/savers/common_saver/common_epub.py +1 -1
  27. novel_downloader/locales/en.json +4 -0
  28. novel_downloader/locales/zh.json +4 -0
  29. novel_downloader/resources/config/settings.yaml +16 -13
  30. novel_downloader/utils/constants.py +2 -1
  31. novel_downloader/utils/fontocr/ocr_v2.py +6 -0
  32. novel_downloader/utils/time_utils/datetime_utils.py +1 -1
  33. novel_downloader/utils/time_utils/sleep_utils.py +27 -11
  34. {novel_downloader-1.2.0.dist-info → novel_downloader-1.2.2.dist-info}/METADATA +1 -1
  35. {novel_downloader-1.2.0.dist-info → novel_downloader-1.2.2.dist-info}/RECORD +39 -39
  36. {novel_downloader-1.2.0.dist-info → novel_downloader-1.2.2.dist-info}/WHEEL +1 -1
  37. {novel_downloader-1.2.0.dist-info → novel_downloader-1.2.2.dist-info}/entry_points.txt +0 -0
  38. {novel_downloader-1.2.0.dist-info → novel_downloader-1.2.2.dist-info}/licenses/LICENSE +0 -0
  39. {novel_downloader-1.2.0.dist-info → novel_downloader-1.2.2.dist-info}/top_level.txt +0 -0
@@ -13,7 +13,7 @@ cookie handling, and defines abstract methods for subclasses.
13
13
  import abc
14
14
  import asyncio
15
15
  import time
16
- from typing import Any, Dict, Optional, Union
16
+ from typing import Any, Dict, Literal, Optional, Union
17
17
 
18
18
  import aiohttp
19
19
  from aiohttp import ClientResponse, ClientSession, ClientTimeout, TCPConnector
@@ -51,13 +51,16 @@ class BaseAsyncSession(AsyncRequesterProtocol, abc.ABC):
51
51
 
52
52
  Attributes:
53
53
  _session (ClientSession): The persistent aiohttp client session.
54
- _timeout (int): Timeout for each request in seconds.
54
+ _timeout (float): Timeout for each request in seconds.
55
55
  _retry_times (int): Number of retry attempts on failure.
56
56
  _retry_interval (float): Delay (in seconds) between retries.
57
57
  _headers (Dict[str, str]): Default HTTP headers to send.
58
58
  _cookies (Dict[str, str]): Optional cookie jar for the session.
59
59
  """
60
60
 
61
+ def is_async(self) -> Literal[True]:
62
+ return True
63
+
61
64
  def _init_session(
62
65
  self,
63
66
  config: RequesterConfig,
@@ -111,7 +114,9 @@ class BaseAsyncSession(AsyncRequesterProtocol, abc.ABC):
111
114
  )
112
115
 
113
116
  @abc.abstractmethod
114
- async def get_book_info(self, book_id: str, wait_time: Optional[int] = None) -> str:
117
+ async def get_book_info(
118
+ self, book_id: str, wait_time: Optional[float] = None
119
+ ) -> str:
115
120
  """
116
121
  Fetch the raw HTML (or JSON) of the book info page asynchronously.
117
122
 
@@ -123,7 +128,7 @@ class BaseAsyncSession(AsyncRequesterProtocol, abc.ABC):
123
128
 
124
129
  @abc.abstractmethod
125
130
  async def get_book_chapter(
126
- self, book_id: str, chapter_id: str, wait_time: Optional[int] = None
131
+ self, book_id: str, chapter_id: str, wait_time: Optional[float] = None
127
132
  ) -> str:
128
133
  """
129
134
  Fetch the raw HTML (or JSON) of a single chapter asynchronously.
@@ -135,7 +140,7 @@ class BaseAsyncSession(AsyncRequesterProtocol, abc.ABC):
135
140
  """
136
141
  ...
137
142
 
138
- async def get_bookcase(self, wait_time: Optional[int] = None) -> str:
143
+ async def get_bookcase(self, wait_time: Optional[float] = None) -> str:
139
144
  """
140
145
  Optional: Retrieve the HTML content of the authenticated user's bookcase page.
141
146
  Subclasses that support user login/bookcase should override this.
@@ -238,7 +243,7 @@ class BaseAsyncSession(AsyncRequesterProtocol, abc.ABC):
238
243
  return self._session
239
244
 
240
245
  @property
241
- def timeout(self) -> int:
246
+ def timeout(self) -> float:
242
247
  """Return the default timeout setting."""
243
248
  return self._timeout
244
249
 
@@ -11,9 +11,10 @@ specialized purposes.
11
11
 
12
12
  import abc
13
13
  import logging
14
- from typing import Any, Dict, Optional
14
+ from typing import Any, Dict, Literal, Optional, cast
15
15
 
16
- from DrissionPage import Chromium, ChromiumOptions, ChromiumPage
16
+ from DrissionPage import Chromium, ChromiumOptions
17
+ from DrissionPage._pages.mix_tab import MixTab
17
18
 
18
19
  from novel_downloader.config.models import RequesterConfig
19
20
  from novel_downloader.core.interfaces import RequesterProtocol
@@ -42,6 +43,9 @@ class BaseBrowser(RequesterProtocol, abc.ABC):
42
43
  _page (ChromiumPage): The active browser tab.
43
44
  """
44
45
 
46
+ def is_async(self) -> Literal[False]:
47
+ return False
48
+
45
49
  def _init_browser(self, config: RequesterConfig) -> None:
46
50
  """
47
51
  Initialize the browser with specified options from RequesterConfig.
@@ -99,7 +103,7 @@ class BaseBrowser(RequesterProtocol, abc.ABC):
99
103
  Set up the browser instance and open the default tab.
100
104
  """
101
105
  self._browser = Chromium(self._options)
102
- self._page = self._browser.get_tab()
106
+ self._page = cast(MixTab, self._browser.get_tab())
103
107
 
104
108
  def login(self, max_retries: int = 3, manual_login: bool = False) -> bool:
105
109
  """
@@ -111,7 +115,7 @@ class BaseBrowser(RequesterProtocol, abc.ABC):
111
115
  )
112
116
 
113
117
  @abc.abstractmethod
114
- def get_book_info(self, book_id: str, wait_time: Optional[int] = None) -> str:
118
+ def get_book_info(self, book_id: str, wait_time: Optional[float] = None) -> str:
115
119
  """
116
120
  Fetch the raw HTML (or JSON) of the book info page.
117
121
 
@@ -123,7 +127,7 @@ class BaseBrowser(RequesterProtocol, abc.ABC):
123
127
 
124
128
  @abc.abstractmethod
125
129
  def get_book_chapter(
126
- self, book_id: str, chapter_id: str, wait_time: Optional[int] = None
130
+ self, book_id: str, chapter_id: str, wait_time: Optional[float] = None
127
131
  ) -> str:
128
132
  """
129
133
  Fetch the raw HTML (or JSON) of a single chapter.
@@ -135,7 +139,7 @@ class BaseBrowser(RequesterProtocol, abc.ABC):
135
139
  """
136
140
  ...
137
141
 
138
- def get_bookcase(self, wait_time: Optional[int] = None) -> str:
142
+ def get_bookcase(self, wait_time: Optional[float] = None) -> str:
139
143
  """
140
144
  Optional: Retrieve the HTML content of the authenticated user's bookcase page.
141
145
 
@@ -151,7 +155,7 @@ class BaseBrowser(RequesterProtocol, abc.ABC):
151
155
  )
152
156
 
153
157
  @property
154
- def page(self) -> ChromiumPage:
158
+ def page(self) -> Optional[MixTab]:
155
159
  """
156
160
  Return the current Chromium page object.
157
161
 
@@ -160,7 +164,7 @@ class BaseBrowser(RequesterProtocol, abc.ABC):
160
164
  return self._page
161
165
 
162
166
  @property
163
- def browser(self) -> Chromium:
167
+ def browser(self) -> Optional[Chromium]:
164
168
  """
165
169
  Return the Chromium browser instance.
166
170
 
@@ -10,7 +10,7 @@ persistent session and supports retries, headers, and timeout configurations.
10
10
  """
11
11
 
12
12
  import abc
13
- from typing import Any, Dict, Optional, Union
13
+ from typing import Any, Dict, Literal, Optional, Union
14
14
 
15
15
  import requests
16
16
  from requests import Response, Session
@@ -28,9 +28,12 @@ class BaseSession(RequesterProtocol, abc.ABC):
28
28
 
29
29
  Attributes:
30
30
  _session (requests.Session): The persistent HTTP session.
31
- _timeout (int): Timeout for each request in seconds.
31
+ _timeout (float): Timeout for each request in seconds.
32
32
  """
33
33
 
34
+ def is_async(self) -> Literal[False]:
35
+ return False
36
+
34
37
  def _init_session(
35
38
  self, config: RequesterConfig, cookies: Optional[Dict[str, str]] = None
36
39
  ) -> None:
@@ -81,7 +84,7 @@ class BaseSession(RequesterProtocol, abc.ABC):
81
84
  )
82
85
 
83
86
  @abc.abstractmethod
84
- def get_book_info(self, book_id: str, wait_time: Optional[int] = None) -> str:
87
+ def get_book_info(self, book_id: str, wait_time: Optional[float] = None) -> str:
85
88
  """
86
89
  Fetch the raw HTML (or JSON) of the book info page.
87
90
 
@@ -93,7 +96,7 @@ class BaseSession(RequesterProtocol, abc.ABC):
93
96
 
94
97
  @abc.abstractmethod
95
98
  def get_book_chapter(
96
- self, book_id: str, chapter_id: str, wait_time: Optional[int] = None
99
+ self, book_id: str, chapter_id: str, wait_time: Optional[float] = None
97
100
  ) -> str:
98
101
  """
99
102
  Fetch the raw HTML (or JSON) of a single chapter.
@@ -105,7 +108,7 @@ class BaseSession(RequesterProtocol, abc.ABC):
105
108
  """
106
109
  ...
107
110
 
108
- def get_bookcase(self, wait_time: Optional[int] = None) -> str:
111
+ def get_bookcase(self, wait_time: Optional[float] = None) -> str:
109
112
  """
110
113
  Optional: Retrieve the HTML content of the authenticated user's bookcase page.
111
114
 
@@ -171,7 +174,7 @@ class BaseSession(RequesterProtocol, abc.ABC):
171
174
  return self._session
172
175
 
173
176
  @property
174
- def timeout(self) -> int:
177
+ def timeout(self) -> float:
175
178
  """Return the default timeout setting."""
176
179
  return self._timeout
177
180
 
@@ -45,7 +45,9 @@ class CommonAsyncSession(BaseAsyncSession):
45
45
  self._site = site
46
46
  self._profile = profile
47
47
 
48
- async def get_book_info(self, book_id: str, wait_time: Optional[int] = None) -> str:
48
+ async def get_book_info(
49
+ self, book_id: str, wait_time: Optional[float] = None
50
+ ) -> str:
49
51
  """
50
52
  Fetch the raw HTML of the book info page asynchronously.
51
53
 
@@ -62,7 +64,7 @@ class CommonAsyncSession(BaseAsyncSession):
62
64
  return html
63
65
 
64
66
  async def get_book_chapter(
65
- self, book_id: str, chapter_id: str, wait_time: Optional[int] = None
67
+ self, book_id: str, chapter_id: str, wait_time: Optional[float] = None
66
68
  ) -> str:
67
69
  """
68
70
  Fetch the raw HTML of a single chapter asynchronously.
@@ -47,7 +47,7 @@ class CommonSession(BaseSession):
47
47
  self._site = site
48
48
  self._profile = profile
49
49
 
50
- def get_book_info(self, book_id: str, wait_time: Optional[int] = None) -> str:
50
+ def get_book_info(self, book_id: str, wait_time: Optional[float] = None) -> str:
51
51
  """
52
52
  Fetch the raw HTML (or JSON) of the book info page.
53
53
 
@@ -64,7 +64,7 @@ class CommonSession(BaseSession):
64
64
  with self.session.get(url, timeout=self.timeout) as response:
65
65
  response.raise_for_status()
66
66
  content = response.text
67
- sleep_with_random_delay(base)
67
+ sleep_with_random_delay(base, add_spread=1.0)
68
68
  return content
69
69
  except Exception as e:
70
70
  if attempt == self.retry_times:
@@ -75,7 +75,7 @@ class CommonSession(BaseSession):
75
75
  raise RuntimeError("Unexpected error: get_book_info failed without returning")
76
76
 
77
77
  def get_book_chapter(
78
- self, book_id: str, chapter_id: str, wait_time: Optional[int] = None
78
+ self, book_id: str, chapter_id: str, wait_time: Optional[float] = None
79
79
  ) -> str:
80
80
  """
81
81
  Fetch the raw HTML (or JSON) of a single chapter.
@@ -94,7 +94,7 @@ class CommonSession(BaseSession):
94
94
  with self.session.get(url, timeout=self.timeout) as response:
95
95
  response.raise_for_status()
96
96
  content = response.text
97
- sleep_with_random_delay(base)
97
+ sleep_with_random_delay(base, add_spread=1.0)
98
98
  return content
99
99
  except Exception as e:
100
100
  if attempt == self.retry_times:
@@ -15,10 +15,12 @@ import random
15
15
  import time
16
16
  from typing import Optional
17
17
 
18
+ from DrissionPage._elements.chromium_element import ChromiumElement
18
19
  from DrissionPage.common import Keys
19
20
 
20
21
  from novel_downloader.config.models import RequesterConfig
21
22
  from novel_downloader.core.requesters.base_browser import BaseBrowser
23
+ from novel_downloader.utils.i18n import t
22
24
  from novel_downloader.utils.time_utils import sleep_with_random_delay
23
25
 
24
26
  logger = logging.getLogger(__name__)
@@ -56,6 +58,8 @@ class QidianBrowser(BaseBrowser):
56
58
 
57
59
  :return: True if the user appears to be logged in, False otherwise.
58
60
  """
61
+ if self._page is None:
62
+ raise RuntimeError("Browser page not initialized.")
59
63
  try:
60
64
  self._handle_overlay_mask()
61
65
  sign_in_elem = self._page.ele("@class=sign-in")
@@ -83,6 +87,8 @@ class QidianBrowser(BaseBrowser):
83
87
  :param max_retries: Maximum number of times to try clicking the login button.
84
88
  :return: True if login succeeds or is already in place; False otherwise.
85
89
  """
90
+ if self._page is None:
91
+ raise RuntimeError("Browser page not initialized.")
86
92
  original_url = self._page.url
87
93
  try:
88
94
  self._page.get("https://www.qidian.com/")
@@ -107,7 +113,8 @@ class QidianBrowser(BaseBrowser):
107
113
 
108
114
  # return to original page
109
115
  try:
110
- self._page.get(original_url)
116
+ if original_url:
117
+ self._page.get(original_url)
111
118
  except Exception as e:
112
119
  logger.debug("[auth] Failed to restore page URL: %s", e)
113
120
 
@@ -117,6 +124,8 @@ class QidianBrowser(BaseBrowser):
117
124
  """
118
125
  Detect and close any full-page overlay mask that might block the login UI.
119
126
  """
127
+ if self._page is None:
128
+ raise RuntimeError("Browser page not initialized.")
120
129
  try:
121
130
  mask = self._page.ele("@@tag()=div@@class=mask", timeout=2)
122
131
  if not mask:
@@ -143,10 +152,12 @@ class QidianBrowser(BaseBrowser):
143
152
 
144
153
  :param attempt: The current attempt number (for logging).
145
154
  """
155
+ if self._page is None:
156
+ raise RuntimeError("Browser page not initialized.")
146
157
  try:
147
158
  logger.debug("[auth] Attempting login click (#%s).", attempt)
148
159
  login_btn = self._page.ele("@id=login-btn", timeout=5)
149
- if login_btn:
160
+ if isinstance(login_btn, ChromiumElement):
150
161
  login_btn.click()
151
162
  logger.debug("[auth] Login button clicked.")
152
163
  else:
@@ -170,6 +181,8 @@ class QidianBrowser(BaseBrowser):
170
181
  :param max_retries: Number of times to check for login success.
171
182
  :return: True if login was detected, False otherwise.
172
183
  """
184
+ if self._page is None:
185
+ raise RuntimeError("Browser page not initialized.")
173
186
  original_headless = self._headless
174
187
 
175
188
  # 1. Switch to headful mode if needed
@@ -193,13 +206,11 @@ class QidianBrowser(BaseBrowser):
193
206
  logger.info("[auth] Detected successful login.")
194
207
  self._logged_in = True
195
208
  break
196
-
197
- logger.info(
198
- "[auth] Attempt %d/%d: Press Enter after completing login...",
199
- attempt,
200
- max_retries,
209
+ if attempt == 1:
210
+ print(t("login_prompt_intro"))
211
+ input(
212
+ t("login_prompt_press_enter", attempt=attempt, max_retries=max_retries)
201
213
  )
202
- input()
203
214
  else:
204
215
  logger.warning("[auth] Manual login failed after %d attempts.", max_retries)
205
216
  self._logged_in = False
@@ -266,7 +277,7 @@ class QidianBrowser(BaseBrowser):
266
277
  """
267
278
  return self.QIDIAN_BOOKCASE_URL
268
279
 
269
- def get_book_info(self, book_id: str, wait_time: Optional[int] = None) -> str:
280
+ def get_book_info(self, book_id: str, wait_time: Optional[float] = None) -> str:
270
281
  """
271
282
  Retrieve the HTML of a Qidian book info page.
272
283
 
@@ -279,14 +290,16 @@ class QidianBrowser(BaseBrowser):
279
290
  If None, uses `self._config.wait_time`.
280
291
  :return: The HTML content of the book info page, or an empty string on error.
281
292
  """
293
+ if self._page is None:
294
+ raise RuntimeError("Browser page not initialized.")
282
295
  url = self._build_book_info_url(book_id)
283
296
  try:
284
297
  # Navigate and fetch
285
298
  self._page.get(url)
286
299
 
287
- # Randomized humanlike delay
300
+ # Randomized human-like delay
288
301
  base = wait_time if wait_time is not None else self._config.wait_time
289
- sleep_with_random_delay(base, base * 0.2)
302
+ sleep_with_random_delay(base, mul_spread=1.2)
290
303
 
291
304
  html = str(self._page.html)
292
305
  logger.debug("[fetch] Fetched book info for ID %s from %s", book_id, url)
@@ -303,6 +316,8 @@ class QidianBrowser(BaseBrowser):
303
316
  :param presses: Number of DOWN key presses.
304
317
  :param pause: Seconds to wait between each press.
305
318
  """
319
+ if self._page is None:
320
+ raise RuntimeError("Browser page not initialized.")
306
321
  for _ in range(presses):
307
322
  try:
308
323
  self._page.actions.key_down(Keys.DOWN)
@@ -311,14 +326,14 @@ class QidianBrowser(BaseBrowser):
311
326
  time.sleep(pause)
312
327
 
313
328
  def get_book_chapter(
314
- self, book_id: str, chapter_id: str, wait_time: Optional[int] = None
329
+ self, book_id: str, chapter_id: str, wait_time: Optional[float] = None
315
330
  ) -> str:
316
331
  """
317
332
  Retrieve the HTML content of a specific chapter.
318
333
 
319
334
  Ensures the user is logged in, navigates to the chapter page,
320
335
  waits a randomized delay to mimic human reading, then scrolls
321
- to trigger any lazyloaded content.
336
+ to trigger any lazy-loaded content.
322
337
 
323
338
  :param book_id: The identifier of the book.
324
339
  :param chapter_id: The identifier of the chapter.
@@ -326,14 +341,16 @@ class QidianBrowser(BaseBrowser):
326
341
  falls back to `self._config.wait_time`.
327
342
  :return: The HTML content of the chapter page, or empty string on error.
328
343
  """
344
+ if self._page is None:
345
+ raise RuntimeError("Browser page not initialized.")
329
346
  url = self._build_chapter_url(book_id, chapter_id)
330
347
  try:
331
348
  # 1. Navigate to chapter URL
332
349
  self._page.get(url)
333
350
 
334
- # 2. Randomized humanlike delay
351
+ # 2. Randomized human-like delay
335
352
  base = wait_time if wait_time is not None else self._config.wait_time
336
- # sleep_with_random_delay(base, base*0.2)
353
+ # sleep_with_random_delay(base, mul_spread=1.2)
337
354
 
338
355
  # 3. Scroll down to load dynamic content
339
356
  presses = int(random.uniform(base, base + 5) * 2)
@@ -347,15 +364,17 @@ class QidianBrowser(BaseBrowser):
347
364
  logger.warning("[fetch] Error fetching chapter from '%s': %s", url, e)
348
365
  return ""
349
366
 
350
- def get_bookcase(self, wait_time: Optional[int] = None) -> str:
367
+ def get_bookcase(self, wait_time: Optional[float] = None) -> str:
351
368
  """
352
- Retrieve the HTML content of the loggedin user's Qidian bookcase page.
369
+ Retrieve the HTML content of the logged-in user's Qidian bookcase page.
353
370
 
354
371
  :param wait_time: Base number of seconds to wait before returning content.
355
372
  If None, falls back to `self._config.wait_time`.
356
373
  :return: The HTML markup of the bookcase page, or empty string on error.
357
374
  :raises RuntimeError: If the user is not logged in.
358
375
  """
376
+ if self._page is None:
377
+ raise RuntimeError("Browser page not initialized.")
359
378
  if not self._logged_in:
360
379
  raise RuntimeError("User not logged in. Please call login() first.")
361
380
 
@@ -364,9 +383,9 @@ class QidianBrowser(BaseBrowser):
364
383
  # Navigate to the bookcase page
365
384
  self._page.get(url)
366
385
 
367
- # Randomized humanlike delay
386
+ # Randomized human-like delay
368
387
  base = wait_time if wait_time is not None else self._config.wait_time
369
- sleep_with_random_delay(base, base * 0.2)
388
+ sleep_with_random_delay(base, mul_spread=1.2)
370
389
 
371
390
  html = str(self._page.html)
372
391
  logger.debug("[fetch] Fetched bookcase HTML from %s", url)
@@ -108,7 +108,7 @@ class QidianSession(BaseSession):
108
108
  self.get("https://www.qidian.com")
109
109
  return True
110
110
 
111
- def get_book_info(self, book_id: str, wait_time: Optional[int] = None) -> str:
111
+ def get_book_info(self, book_id: str, wait_time: Optional[float] = None) -> str:
112
112
  """
113
113
  Fetch the raw HTML of the book info page.
114
114
 
@@ -123,7 +123,7 @@ class QidianSession(BaseSession):
123
123
  try:
124
124
  resp = self.get(url)
125
125
  resp.raise_for_status()
126
- sleep_with_random_delay(base_delay, base_delay * 0.2)
126
+ sleep_with_random_delay(base_delay, mul_spread=1.2)
127
127
  return resp.text
128
128
  except Exception as exc:
129
129
  logger.warning(
@@ -140,7 +140,7 @@ class QidianSession(BaseSession):
140
140
  raise RuntimeError("Unexpected fall-through in get_book_info")
141
141
 
142
142
  def get_book_chapter(
143
- self, book_id: str, chapter_id: str, wait_time: Optional[int] = None
143
+ self, book_id: str, chapter_id: str, wait_time: Optional[float] = None
144
144
  ) -> str:
145
145
  """
146
146
  Fetch the HTML of a single chapter.
@@ -157,7 +157,7 @@ class QidianSession(BaseSession):
157
157
  try:
158
158
  resp = self.get(url)
159
159
  resp.raise_for_status()
160
- sleep_with_random_delay(base_delay, base_delay * 0.2)
160
+ sleep_with_random_delay(base_delay, mul_spread=1.2)
161
161
  return resp.text
162
162
  except Exception as exc:
163
163
  logger.warning(
@@ -174,7 +174,7 @@ class QidianSession(BaseSession):
174
174
 
175
175
  raise RuntimeError("Unexpected fall-through in get_book_chapter")
176
176
 
177
- def get_bookcase(self, wait_time: Optional[int] = None) -> str:
177
+ def get_bookcase(self, wait_time: Optional[float] = None) -> str:
178
178
  """
179
179
  Retrieve the user's *bookcase* page.
180
180
 
@@ -186,7 +186,7 @@ class QidianSession(BaseSession):
186
186
  try:
187
187
  resp = self.get(self.QIDIAN_BOOKCASE_URL, allow_redirects=True)
188
188
  resp.raise_for_status()
189
- sleep_with_random_delay(base_delay, base_delay * 0.2)
189
+ sleep_with_random_delay(base_delay, mul_spread=1.2)
190
190
  return resp.text
191
191
  except Exception as exc:
192
192
  logger.warning(
@@ -212,7 +212,7 @@ def common_save_as_epub(
212
212
 
213
213
  # --- 5. Finalize EPUB ---
214
214
  logger.info("%s Building TOC and spine...", TAG)
215
- book.toc = tuple(toc_list)
215
+ book.toc = toc_list
216
216
  book.spine = spine
217
217
  book.add_item(epub.EpubNcx())
218
218
  book.add_item(epub.EpubNav())
@@ -58,6 +58,7 @@
58
58
  "download_option_site": "Website source, default is '{default}'.",
59
59
  "download_using_config": "Using config: {path}",
60
60
  "download_site_info": "Site: {site}",
61
+ "download_site_mode": "Mode: {mode}",
61
62
  "download_no_ids": "No book IDs provided. Exiting.",
62
63
  "download_fail_get_ids": "Failed to get book IDs from config: {err}",
63
64
  "download_only_example": "Only example book IDs found (e.g. '{example}').",
@@ -65,6 +66,9 @@
65
66
  "download_downloading": "Downloading book {book_id} from {site}...",
66
67
  "download_prompt_parse": "Parse...",
67
68
 
69
+ "login_prompt_intro": "Manual login is required. Please switch to the browser and log in.",
70
+ "login_prompt_press_enter": "Attempt {attempt}/{max_retries}: Press Enter after completing login in the browser...",
71
+
68
72
  "clean_logs": "Clean log directory",
69
73
  "clean_cache": "Clean scripts and browser cache",
70
74
  "clean_state": "Clean state files (state.json)",
@@ -58,6 +58,7 @@
58
58
  "download_option_site": "网站来源, 默认为 '{default}'",
59
59
  "download_using_config": "使用配置: {path}",
60
60
  "download_site_info": "站点: {site}",
61
+ "download_site_mode": "使用模式: {mode}",
61
62
  "download_no_ids": "未提供书籍 ID, 正在退出",
62
63
  "download_fail_get_ids": "从配置获取书籍 ID 失败: {err}",
63
64
  "download_only_example": "只发现示例书籍 ID (例如 '{example}')",
@@ -65,6 +66,9 @@
65
66
  "download_downloading": "正在从 {site} 下载书籍 {book_id}...",
66
67
  "download_prompt_parse": "结束...",
67
68
 
69
+ "login_prompt_intro": "需要手动登录, 请切换到浏览器窗口完成登录",
70
+ "login_prompt_press_enter": "第 {attempt}/{max_retries} 次尝试: 请在浏览器中完成登录后按回车键...",
71
+
68
72
  "clean_logs": "清理日志目录",
69
73
  "clean_cache": "清理脚本和浏览器缓存",
70
74
  "clean_state": "清理状态文件 (state.json)",
@@ -1,9 +1,9 @@
1
1
  # 网络请求层设置
2
2
  requests:
3
- wait_time: 5 # 每次请求等待时间 (秒)
3
+ wait_time: 5.0 # 每次请求等待时间 (秒)
4
4
  retry_times: 3 # 请求失败重试次数
5
- retry_interval: 5
6
- timeout: 30 # 页面加载超时时间 (秒)
5
+ retry_interval: 5.0
6
+ timeout: 30.0 # 页面加载超时时间 (秒)
7
7
  max_rps: null # 最大请求速率 (requests per second), 为 null 则不限制
8
8
  # DrissionPage 专用设置
9
9
  headless: false # 是否以无头模式启动浏览器
@@ -15,7 +15,7 @@ requests:
15
15
 
16
16
  # 全局通用设置
17
17
  general:
18
- request_interval: 5 # 同一本书各章节请求间隔 (秒)
18
+ request_interval: 5.0 # 同一本书各章节请求间隔 (秒)
19
19
  raw_data_dir: "./raw_data" # 原始章节 HTML/JSON 存放目录
20
20
  output_dir: "./downloads" # 最终输出文件存放目录
21
21
  cache_dir: "./novel_cache" # 本地缓存目录 (字体 / 图片等)
@@ -26,6 +26,18 @@ general:
26
26
  debug:
27
27
  save_html: false # 是否将抓取到的原始 HTML 保留到磁盘
28
28
  log_level: "INFO" # 日志级别: DEBUG, INFO, WARNING, ERROR
29
+ font_ocr:
30
+ decode_font: false # 是否尝试本地解码混淆字体
31
+ use_freq: false # 是否使用频率分析
32
+ ocr_version: "v2.0" # "v1.0" / "v2.0"
33
+ use_ocr: true # 是否使用 OCR 辅助识别文本
34
+ use_vec: false # 是否使用 Vector 辅助识别文本
35
+ save_font_debug: false # 是否保存字体解码调试数据
36
+ batch_size: 32
37
+ gpu_mem: 500 # GPU 显存限制 (MB)
38
+ gpu_id: null # 使用哪个 GPU
39
+ ocr_weight: 0.6
40
+ vec_weight: 0.4
29
41
 
30
42
  # 各站点的特定配置
31
43
  sites:
@@ -38,15 +50,6 @@ sites:
38
50
  - "0000000000"
39
51
  mode: "browser" # browser / session
40
52
  login_required: true # 是否需要登录才能访问
41
- decode_font: false # 是否尝试本地解码混淆字体
42
- use_freq: false # 是否使用频率分析
43
- ocr_version: "v2.0" # "v1.0" / "v2.0"
44
- use_ocr: true # 是否使用 OCR 辅助识别文本
45
- use_vec: false # 是否使用 Vector 辅助识别文本
46
- save_font_debug: false # 是否保存字体解码调试数据
47
- batch_size: 32
48
- ocr_weight: 0.6
49
- vec_weight: 0.4
50
53
  #
51
54
  sample_site:
52
55
  book_ids:
@@ -26,11 +26,12 @@ LOGGER_NAME = PACKAGE_NAME # Root logger name
26
26
  # -----------------------------------------------------------------------------
27
27
  # Base config directory (e.g. ~/AppData/Local/novel_downloader/)
28
28
  BASE_CONFIG_DIR = Path(user_config_dir(APP_DIR_NAME, appauthor=False))
29
+ WORK_DIR = Path.cwd()
29
30
  PACKAGE_ROOT: Path = Path(__file__).parent.parent
30
31
  LOCALES_DIR: Path = PACKAGE_ROOT / "locales"
31
32
 
32
33
  # Subdirectories under BASE_CONFIG_DIR
33
- LOGGER_DIR = BASE_CONFIG_DIR / "logs"
34
+ LOGGER_DIR = WORK_DIR / "logs"
34
35
  JS_SCRIPT_DIR = BASE_CONFIG_DIR / "scripts"
35
36
  STATE_DIR = BASE_CONFIG_DIR / "state"
36
37
  DATA_DIR = BASE_CONFIG_DIR / "data"
@@ -221,6 +221,8 @@ class FontOCRV2:
221
221
  use_ocr: bool = True,
222
222
  use_vec: bool = False,
223
223
  batch_size: int = 32,
224
+ gpu_mem: int = 500,
225
+ gpu_id: Optional[int] = None,
224
226
  ocr_weight: float = 0.6,
225
227
  vec_weight: float = 0.4,
226
228
  ocr_version: str = "v1.0",
@@ -232,6 +234,8 @@ class FontOCRV2:
232
234
  self.use_ocr = use_ocr
233
235
  self.use_vec = use_vec
234
236
  self.batch_size = batch_size
237
+ self.gpu_mem = gpu_mem
238
+ self.gpu_id = gpu_id
235
239
  self.ocr_weight = ocr_weight
236
240
  self.vec_weight = vec_weight
237
241
  self.ocr_version = ocr_version
@@ -279,6 +283,8 @@ class FontOCRV2:
279
283
  rec_batch_num=self.batch_size,
280
284
  use_space_char=False,
281
285
  use_gpu=gpu_available,
286
+ gpu_mem=self.gpu_mem,
287
+ gpu_id=self.gpu_id,
282
288
  )
283
289
 
284
290
  def _load_char_freq_db(self) -> bool:
@@ -106,7 +106,7 @@ def calculate_time_difference(
106
106
  """
107
107
  Calculate the difference between two datetime values.
108
108
 
109
- :param from_time_str: Date‐time string "YYYYMMDD HH:MM:SS" for the start.
109
+ :param from_time_str: Date‐time string "YYYY-MM-DD HH:MM:SS" for the start.
110
110
  :param tz_str: Timezone of from_time_str, e.g. 'UTC+8'. Defaults to 'UTC'.
111
111
  :param to_time_str: Optional date‐time string for the end; if None, uses now().
112
112
  :param to_tz_str: Timezone of to_time_str. Defaults to 'UTC'.