novel-downloader 1.3.2__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (213) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/clean.py +97 -78
  3. novel_downloader/cli/config.py +177 -0
  4. novel_downloader/cli/download.py +132 -87
  5. novel_downloader/cli/export.py +77 -0
  6. novel_downloader/cli/main.py +21 -28
  7. novel_downloader/config/__init__.py +1 -25
  8. novel_downloader/config/adapter.py +32 -31
  9. novel_downloader/config/loader.py +3 -3
  10. novel_downloader/config/site_rules.py +1 -2
  11. novel_downloader/core/__init__.py +3 -6
  12. novel_downloader/core/downloaders/__init__.py +10 -13
  13. novel_downloader/core/downloaders/base.py +233 -0
  14. novel_downloader/core/downloaders/biquge.py +27 -0
  15. novel_downloader/core/downloaders/common.py +414 -0
  16. novel_downloader/core/downloaders/esjzone.py +27 -0
  17. novel_downloader/core/downloaders/linovelib.py +27 -0
  18. novel_downloader/core/downloaders/qianbi.py +27 -0
  19. novel_downloader/core/downloaders/qidian.py +352 -0
  20. novel_downloader/core/downloaders/sfacg.py +27 -0
  21. novel_downloader/core/downloaders/yamibo.py +27 -0
  22. novel_downloader/core/exporters/__init__.py +37 -0
  23. novel_downloader/core/{savers → exporters}/base.py +73 -44
  24. novel_downloader/core/exporters/biquge.py +25 -0
  25. novel_downloader/core/exporters/common/__init__.py +12 -0
  26. novel_downloader/core/{savers → exporters}/common/epub.py +40 -52
  27. novel_downloader/core/{savers/common/main_saver.py → exporters/common/main_exporter.py} +36 -39
  28. novel_downloader/core/{savers → exporters}/common/txt.py +20 -24
  29. novel_downloader/core/exporters/epub_utils/__init__.py +40 -0
  30. novel_downloader/core/{savers → exporters}/epub_utils/css_builder.py +2 -1
  31. novel_downloader/core/exporters/epub_utils/image_loader.py +131 -0
  32. novel_downloader/core/{savers → exporters}/epub_utils/initializer.py +6 -3
  33. novel_downloader/core/{savers → exporters}/epub_utils/text_to_html.py +49 -2
  34. novel_downloader/core/{savers → exporters}/epub_utils/volume_intro.py +2 -1
  35. novel_downloader/core/exporters/esjzone.py +25 -0
  36. novel_downloader/core/exporters/linovelib/__init__.py +10 -0
  37. novel_downloader/core/exporters/linovelib/epub.py +449 -0
  38. novel_downloader/core/exporters/linovelib/main_exporter.py +127 -0
  39. novel_downloader/core/exporters/linovelib/txt.py +129 -0
  40. novel_downloader/core/exporters/qianbi.py +25 -0
  41. novel_downloader/core/{savers → exporters}/qidian.py +8 -8
  42. novel_downloader/core/exporters/sfacg.py +25 -0
  43. novel_downloader/core/exporters/yamibo.py +25 -0
  44. novel_downloader/core/factory/__init__.py +5 -17
  45. novel_downloader/core/factory/downloader.py +24 -126
  46. novel_downloader/core/factory/exporter.py +58 -0
  47. novel_downloader/core/factory/fetcher.py +96 -0
  48. novel_downloader/core/factory/parser.py +17 -12
  49. novel_downloader/core/{requesters → fetchers}/__init__.py +22 -15
  50. novel_downloader/core/{requesters → fetchers}/base/__init__.py +2 -4
  51. novel_downloader/core/fetchers/base/browser.py +383 -0
  52. novel_downloader/core/fetchers/base/rate_limiter.py +86 -0
  53. novel_downloader/core/fetchers/base/session.py +419 -0
  54. novel_downloader/core/fetchers/biquge/__init__.py +14 -0
  55. novel_downloader/core/{requesters/biquge/async_session.py → fetchers/biquge/browser.py} +18 -6
  56. novel_downloader/core/{requesters → fetchers}/biquge/session.py +23 -30
  57. novel_downloader/core/fetchers/common/__init__.py +14 -0
  58. novel_downloader/core/fetchers/common/browser.py +79 -0
  59. novel_downloader/core/{requesters/common/async_session.py → fetchers/common/session.py} +8 -25
  60. novel_downloader/core/fetchers/esjzone/__init__.py +14 -0
  61. novel_downloader/core/fetchers/esjzone/browser.py +202 -0
  62. novel_downloader/core/{requesters/esjzone/async_session.py → fetchers/esjzone/session.py} +62 -42
  63. novel_downloader/core/fetchers/linovelib/__init__.py +14 -0
  64. novel_downloader/core/fetchers/linovelib/browser.py +178 -0
  65. novel_downloader/core/fetchers/linovelib/session.py +178 -0
  66. novel_downloader/core/fetchers/qianbi/__init__.py +14 -0
  67. novel_downloader/core/{requesters/qianbi/session.py → fetchers/qianbi/browser.py} +30 -48
  68. novel_downloader/core/{requesters/qianbi/async_session.py → fetchers/qianbi/session.py} +18 -6
  69. novel_downloader/core/fetchers/qidian/__init__.py +14 -0
  70. novel_downloader/core/fetchers/qidian/browser.py +266 -0
  71. novel_downloader/core/fetchers/qidian/session.py +326 -0
  72. novel_downloader/core/fetchers/sfacg/__init__.py +14 -0
  73. novel_downloader/core/fetchers/sfacg/browser.py +189 -0
  74. novel_downloader/core/{requesters/sfacg/async_session.py → fetchers/sfacg/session.py} +43 -73
  75. novel_downloader/core/fetchers/yamibo/__init__.py +14 -0
  76. novel_downloader/core/fetchers/yamibo/browser.py +229 -0
  77. novel_downloader/core/{requesters/yamibo/async_session.py → fetchers/yamibo/session.py} +62 -44
  78. novel_downloader/core/interfaces/__init__.py +8 -12
  79. novel_downloader/core/interfaces/downloader.py +54 -0
  80. novel_downloader/core/interfaces/{saver.py → exporter.py} +12 -12
  81. novel_downloader/core/interfaces/fetcher.py +162 -0
  82. novel_downloader/core/interfaces/parser.py +6 -7
  83. novel_downloader/core/parsers/__init__.py +5 -6
  84. novel_downloader/core/parsers/base.py +9 -13
  85. novel_downloader/core/parsers/biquge/main_parser.py +12 -13
  86. novel_downloader/core/parsers/common/helper.py +3 -3
  87. novel_downloader/core/parsers/common/main_parser.py +39 -34
  88. novel_downloader/core/parsers/esjzone/main_parser.py +24 -17
  89. novel_downloader/core/parsers/linovelib/__init__.py +10 -0
  90. novel_downloader/core/parsers/linovelib/main_parser.py +210 -0
  91. novel_downloader/core/parsers/qianbi/main_parser.py +21 -15
  92. novel_downloader/core/parsers/qidian/__init__.py +2 -11
  93. novel_downloader/core/parsers/qidian/book_info_parser.py +113 -0
  94. novel_downloader/core/parsers/qidian/{browser/chapter_encrypted.py → chapter_encrypted.py} +162 -135
  95. novel_downloader/core/parsers/qidian/chapter_normal.py +150 -0
  96. novel_downloader/core/parsers/qidian/{session/chapter_router.py → chapter_router.py} +15 -15
  97. novel_downloader/core/parsers/qidian/{browser/main_parser.py → main_parser.py} +49 -40
  98. novel_downloader/core/parsers/qidian/utils/__init__.py +27 -0
  99. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +145 -0
  100. novel_downloader/core/parsers/qidian/{shared → utils}/helpers.py +41 -68
  101. novel_downloader/core/parsers/qidian/{session → utils}/node_decryptor.py +64 -50
  102. novel_downloader/core/parsers/sfacg/main_parser.py +12 -12
  103. novel_downloader/core/parsers/yamibo/main_parser.py +10 -10
  104. novel_downloader/locales/en.json +18 -2
  105. novel_downloader/locales/zh.json +18 -2
  106. novel_downloader/models/__init__.py +64 -0
  107. novel_downloader/models/browser.py +21 -0
  108. novel_downloader/models/chapter.py +25 -0
  109. novel_downloader/models/config.py +100 -0
  110. novel_downloader/models/login.py +20 -0
  111. novel_downloader/models/site_rules.py +99 -0
  112. novel_downloader/models/tasks.py +33 -0
  113. novel_downloader/models/types.py +15 -0
  114. novel_downloader/resources/config/settings.toml +31 -25
  115. novel_downloader/resources/json/linovelib_font_map.json +3573 -0
  116. novel_downloader/tui/__init__.py +7 -0
  117. novel_downloader/tui/app.py +32 -0
  118. novel_downloader/tui/main.py +17 -0
  119. novel_downloader/tui/screens/__init__.py +14 -0
  120. novel_downloader/tui/screens/home.py +191 -0
  121. novel_downloader/tui/screens/login.py +74 -0
  122. novel_downloader/tui/styles/home_layout.tcss +79 -0
  123. novel_downloader/tui/widgets/richlog_handler.py +24 -0
  124. novel_downloader/utils/__init__.py +6 -0
  125. novel_downloader/utils/chapter_storage.py +25 -38
  126. novel_downloader/utils/constants.py +15 -5
  127. novel_downloader/utils/cookies.py +66 -0
  128. novel_downloader/utils/crypto_utils.py +1 -74
  129. novel_downloader/utils/file_utils/io.py +1 -1
  130. novel_downloader/utils/fontocr/ocr_v1.py +2 -1
  131. novel_downloader/utils/fontocr/ocr_v2.py +2 -2
  132. novel_downloader/utils/hash_store.py +10 -18
  133. novel_downloader/utils/hash_utils.py +3 -2
  134. novel_downloader/utils/logger.py +2 -3
  135. novel_downloader/utils/network.py +53 -39
  136. novel_downloader/utils/text_utils/chapter_formatting.py +6 -1
  137. novel_downloader/utils/text_utils/font_mapping.py +1 -1
  138. novel_downloader/utils/text_utils/text_cleaning.py +1 -1
  139. novel_downloader/utils/time_utils/datetime_utils.py +3 -3
  140. novel_downloader/utils/time_utils/sleep_utils.py +3 -3
  141. {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/METADATA +72 -38
  142. novel_downloader-1.4.0.dist-info/RECORD +170 -0
  143. {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/WHEEL +1 -1
  144. {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/entry_points.txt +1 -0
  145. novel_downloader/cli/interactive.py +0 -66
  146. novel_downloader/cli/settings.py +0 -177
  147. novel_downloader/config/models.py +0 -187
  148. novel_downloader/core/downloaders/base/__init__.py +0 -14
  149. novel_downloader/core/downloaders/base/base_async.py +0 -153
  150. novel_downloader/core/downloaders/base/base_sync.py +0 -208
  151. novel_downloader/core/downloaders/biquge/__init__.py +0 -14
  152. novel_downloader/core/downloaders/biquge/biquge_async.py +0 -27
  153. novel_downloader/core/downloaders/biquge/biquge_sync.py +0 -27
  154. novel_downloader/core/downloaders/common/__init__.py +0 -14
  155. novel_downloader/core/downloaders/common/common_async.py +0 -218
  156. novel_downloader/core/downloaders/common/common_sync.py +0 -210
  157. novel_downloader/core/downloaders/esjzone/__init__.py +0 -14
  158. novel_downloader/core/downloaders/esjzone/esjzone_async.py +0 -27
  159. novel_downloader/core/downloaders/esjzone/esjzone_sync.py +0 -27
  160. novel_downloader/core/downloaders/qianbi/__init__.py +0 -14
  161. novel_downloader/core/downloaders/qianbi/qianbi_async.py +0 -27
  162. novel_downloader/core/downloaders/qianbi/qianbi_sync.py +0 -27
  163. novel_downloader/core/downloaders/qidian/__init__.py +0 -10
  164. novel_downloader/core/downloaders/qidian/qidian_sync.py +0 -227
  165. novel_downloader/core/downloaders/sfacg/__init__.py +0 -14
  166. novel_downloader/core/downloaders/sfacg/sfacg_async.py +0 -27
  167. novel_downloader/core/downloaders/sfacg/sfacg_sync.py +0 -27
  168. novel_downloader/core/downloaders/yamibo/__init__.py +0 -14
  169. novel_downloader/core/downloaders/yamibo/yamibo_async.py +0 -27
  170. novel_downloader/core/downloaders/yamibo/yamibo_sync.py +0 -27
  171. novel_downloader/core/factory/requester.py +0 -144
  172. novel_downloader/core/factory/saver.py +0 -56
  173. novel_downloader/core/interfaces/async_downloader.py +0 -36
  174. novel_downloader/core/interfaces/async_requester.py +0 -84
  175. novel_downloader/core/interfaces/sync_downloader.py +0 -36
  176. novel_downloader/core/interfaces/sync_requester.py +0 -82
  177. novel_downloader/core/parsers/qidian/browser/__init__.py +0 -12
  178. novel_downloader/core/parsers/qidian/browser/chapter_normal.py +0 -93
  179. novel_downloader/core/parsers/qidian/browser/chapter_router.py +0 -71
  180. novel_downloader/core/parsers/qidian/session/__init__.py +0 -12
  181. novel_downloader/core/parsers/qidian/session/chapter_encrypted.py +0 -443
  182. novel_downloader/core/parsers/qidian/session/chapter_normal.py +0 -115
  183. novel_downloader/core/parsers/qidian/session/main_parser.py +0 -128
  184. novel_downloader/core/parsers/qidian/shared/__init__.py +0 -37
  185. novel_downloader/core/parsers/qidian/shared/book_info_parser.py +0 -150
  186. novel_downloader/core/requesters/base/async_session.py +0 -410
  187. novel_downloader/core/requesters/base/browser.py +0 -337
  188. novel_downloader/core/requesters/base/session.py +0 -378
  189. novel_downloader/core/requesters/biquge/__init__.py +0 -14
  190. novel_downloader/core/requesters/common/__init__.py +0 -17
  191. novel_downloader/core/requesters/common/session.py +0 -113
  192. novel_downloader/core/requesters/esjzone/__init__.py +0 -13
  193. novel_downloader/core/requesters/esjzone/session.py +0 -235
  194. novel_downloader/core/requesters/qianbi/__init__.py +0 -13
  195. novel_downloader/core/requesters/qidian/__init__.py +0 -21
  196. novel_downloader/core/requesters/qidian/broswer.py +0 -307
  197. novel_downloader/core/requesters/qidian/session.py +0 -290
  198. novel_downloader/core/requesters/sfacg/__init__.py +0 -13
  199. novel_downloader/core/requesters/sfacg/session.py +0 -242
  200. novel_downloader/core/requesters/yamibo/__init__.py +0 -13
  201. novel_downloader/core/requesters/yamibo/session.py +0 -237
  202. novel_downloader/core/savers/__init__.py +0 -34
  203. novel_downloader/core/savers/biquge.py +0 -25
  204. novel_downloader/core/savers/common/__init__.py +0 -12
  205. novel_downloader/core/savers/epub_utils/__init__.py +0 -26
  206. novel_downloader/core/savers/esjzone.py +0 -25
  207. novel_downloader/core/savers/qianbi.py +0 -25
  208. novel_downloader/core/savers/sfacg.py +0 -25
  209. novel_downloader/core/savers/yamibo.py +0 -25
  210. novel_downloader/resources/config/rules.toml +0 -196
  211. novel_downloader-1.3.2.dist-info/RECORD +0 -165
  212. {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/licenses/LICENSE +0 -0
  213. {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- novel_downloader.core.factory.parser_factory
4
- --------------------------------------------
3
+ novel_downloader.core.factory.parser
4
+ ------------------------------------
5
5
 
6
6
  This module implements a factory function for creating parser instances
7
7
  based on the site name and parser mode specified in the configuration.
@@ -9,45 +9,50 @@ based on the site name and parser mode specified in the configuration.
9
9
 
10
10
  from collections.abc import Callable
11
11
 
12
- from novel_downloader.config import ParserConfig, load_site_rules
12
+ from novel_downloader.config import load_site_rules
13
13
  from novel_downloader.core.interfaces import ParserProtocol
14
14
  from novel_downloader.core.parsers import (
15
15
  BiqugeParser,
16
16
  CommonParser,
17
17
  EsjzoneParser,
18
+ LinovelibParser,
18
19
  QianbiParser,
19
- QidianBrowserParser,
20
- QidianSessionParser,
20
+ QidianParser,
21
21
  SfacgParser,
22
22
  YamiboParser,
23
23
  )
24
+ from novel_downloader.models import ParserConfig
24
25
 
25
26
  ParserBuilder = Callable[[ParserConfig], ParserProtocol]
26
27
 
27
28
  _site_map: dict[str, dict[str, ParserBuilder]] = {
28
29
  "biquge": {
30
+ "browser": BiqugeParser,
29
31
  "session": BiqugeParser,
30
- "async": BiqugeParser,
31
32
  },
32
33
  "esjzone": {
34
+ "browser": EsjzoneParser,
33
35
  "session": EsjzoneParser,
34
- "async": EsjzoneParser,
36
+ },
37
+ "linovelib": {
38
+ "browser": LinovelibParser,
39
+ "session": LinovelibParser,
35
40
  },
36
41
  "qianbi": {
42
+ "browser": QianbiParser,
37
43
  "session": QianbiParser,
38
- "async": QianbiParser,
39
44
  },
40
45
  "qidian": {
41
- "browser": QidianBrowserParser,
42
- "session": QidianSessionParser,
46
+ "browser": QidianParser,
47
+ "session": QidianParser,
43
48
  },
44
49
  "sfacg": {
50
+ "browser": SfacgParser,
45
51
  "session": SfacgParser,
46
- "async": SfacgParser,
47
52
  },
48
53
  "yamibo": {
54
+ "browser": YamiboParser,
49
55
  "session": YamiboParser,
50
- "async": YamiboParser,
51
56
  },
52
57
  }
53
58
 
@@ -1,9 +1,9 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- novel_downloader.core.requesters
4
- --------------------------------
3
+ novel_downloader.core.fetchers
4
+ ------------------------------
5
5
 
6
- This package provides requester implementations for different novel platforms.
6
+ This package provides fetcher implementations for different novel platforms.
7
7
  Each submodule corresponds to a specific site and encapsulates the logic needed
8
8
  to perform network interactions, such as logging in, sending requests,
9
9
  or interacting with browser/session-based sources.
@@ -11,6 +11,7 @@ or interacting with browser/session-based sources.
11
11
  Subpackages:
12
12
  - biquge (笔趣阁)
13
13
  - esjzone (ESJ Zone)
14
+ - linovelib (哔哩轻小说)
14
15
  - qianbi (铅笔小说)
15
16
  - qidian (起点中文网)
16
17
  - sfacg (SF轻小说)
@@ -19,19 +20,23 @@ Subpackages:
19
20
  """
20
21
 
21
22
  from .biquge import (
22
- BiqugeAsyncSession,
23
+ BiqugeBrowser,
23
24
  BiqugeSession,
24
25
  )
25
26
  from .common import (
26
- CommonAsyncSession,
27
+ CommonBrowser,
27
28
  CommonSession,
28
29
  )
29
30
  from .esjzone import (
30
- EsjzoneAsyncSession,
31
+ EsjzoneBrowser,
31
32
  EsjzoneSession,
32
33
  )
34
+ from .linovelib import (
35
+ LinovelibBrowser,
36
+ LinovelibSession,
37
+ )
33
38
  from .qianbi import (
34
- QianbiAsyncSession,
39
+ QianbiBrowser,
35
40
  QianbiSession,
36
41
  )
37
42
  from .qidian import (
@@ -39,27 +44,29 @@ from .qidian import (
39
44
  QidianSession,
40
45
  )
41
46
  from .sfacg import (
42
- SfacgAsyncSession,
47
+ SfacgBrowser,
43
48
  SfacgSession,
44
49
  )
45
50
  from .yamibo import (
46
- YamiboAsyncSession,
51
+ YamiboBrowser,
47
52
  YamiboSession,
48
53
  )
49
54
 
50
55
  __all__ = [
51
- "BiqugeAsyncSession",
56
+ "BiqugeBrowser",
52
57
  "BiqugeSession",
53
- "CommonAsyncSession",
58
+ "CommonBrowser",
54
59
  "CommonSession",
55
- "EsjzoneAsyncSession",
60
+ "EsjzoneBrowser",
56
61
  "EsjzoneSession",
57
- "QianbiAsyncSession",
62
+ "LinovelibBrowser",
63
+ "LinovelibSession",
64
+ "QianbiBrowser",
58
65
  "QianbiSession",
59
66
  "QidianBrowser",
60
67
  "QidianSession",
61
- "SfacgAsyncSession",
68
+ "SfacgBrowser",
62
69
  "SfacgSession",
63
- "YamiboAsyncSession",
70
+ "YamiboBrowser",
64
71
  "YamiboSession",
65
72
  ]
@@ -1,16 +1,14 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- novel_downloader.core.requesters.base
4
- -------------------------------------
3
+ novel_downloader.core.fetchers.base
4
+ -----------------------------------
5
5
 
6
6
  """
7
7
 
8
- from .async_session import BaseAsyncSession
9
8
  from .browser import BaseBrowser
10
9
  from .session import BaseSession
11
10
 
12
11
  __all__ = [
13
- "BaseAsyncSession",
14
12
  "BaseBrowser",
15
13
  "BaseSession",
16
14
  ]
@@ -0,0 +1,383 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.fetchers.base.browser
4
+ -------------------------------------------
5
+
6
+ """
7
+
8
+ import abc
9
+ import logging
10
+ import types
11
+ from typing import Any, Literal, Self
12
+
13
+ from playwright.async_api import (
14
+ Browser,
15
+ BrowserContext,
16
+ BrowserType,
17
+ Page,
18
+ Playwright,
19
+ ViewportSize,
20
+ async_playwright,
21
+ )
22
+
23
+ from novel_downloader.core.interfaces import FetcherProtocol
24
+ from novel_downloader.models import FetcherConfig, LoginField, NewContextOptions
25
+ from novel_downloader.utils.constants import (
26
+ DATA_DIR,
27
+ DEFAULT_USER_AGENT,
28
+ )
29
+
30
+ from .rate_limiter import TokenBucketRateLimiter
31
+
32
+ _STEALTH_SCRIPT = """
33
+ Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
34
+ Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3] });
35
+ Object.defineProperty(navigator, 'languages', { get: () => ['zh-CN', 'zh', 'en'] });
36
+ window.chrome = { runtime: {} };
37
+ """.strip()
38
+
39
+
40
+ class BaseBrowser(FetcherProtocol, abc.ABC):
41
+ """
42
+ BaseBrowser wraps basic browser operations using playwright
43
+ """
44
+
45
+ def __init__(
46
+ self,
47
+ site: str,
48
+ config: FetcherConfig,
49
+ reuse_page: bool = False,
50
+ **kwargs: Any,
51
+ ) -> None:
52
+ """
53
+ Initialize the async browser with configuration.
54
+
55
+ :param config: Configuration object for session behavior
56
+ """
57
+ self._site = site
58
+ self._config = config
59
+
60
+ self._state_file = DATA_DIR / site / "browser_state.cookies"
61
+ self._state_file.parent.mkdir(parents=True, exist_ok=True)
62
+
63
+ self._is_logged_in = False
64
+ self._reuse_page = reuse_page
65
+ self._pw: Playwright | None = None
66
+ self._browser: Browser | None = None
67
+ self._context: BrowserContext | None = None
68
+ self._page: Page | None = None
69
+ self._manual_page: Page | None = None
70
+ self._rate_limiter: TokenBucketRateLimiter | None = None
71
+
72
+ if config.max_rps is not None and config.max_rps > 0:
73
+ self._rate_limiter = TokenBucketRateLimiter(config.max_rps)
74
+
75
+ self.logger = logging.getLogger(f"{self.__class__.__name__}")
76
+
77
+ async def login(
78
+ self,
79
+ username: str = "",
80
+ password: str = "",
81
+ cookies: dict[str, str] | None = None,
82
+ attempt: int = 1,
83
+ **kwargs: Any,
84
+ ) -> bool:
85
+ """
86
+ Attempt to log in asynchronously.
87
+
88
+ :returns: True if login succeeded.
89
+ """
90
+ return False
91
+
92
+ @abc.abstractmethod
93
+ async def get_book_info(
94
+ self,
95
+ book_id: str,
96
+ **kwargs: Any,
97
+ ) -> list[str]:
98
+ """
99
+ Fetch the raw HTML (or JSON) of the book info page asynchronously.
100
+
101
+ :param book_id: The book identifier.
102
+ :return: The page content as a string.
103
+ """
104
+ ...
105
+
106
+ @abc.abstractmethod
107
+ async def get_book_chapter(
108
+ self,
109
+ book_id: str,
110
+ chapter_id: str,
111
+ **kwargs: Any,
112
+ ) -> list[str]:
113
+ """
114
+ Fetch the raw HTML (or JSON) of a single chapter asynchronously.
115
+
116
+ :param book_id: The book identifier.
117
+ :param chapter_id: The chapter identifier.
118
+ :return: The chapter content as a string.
119
+ """
120
+ ...
121
+
122
+ async def get_bookcase(
123
+ self,
124
+ **kwargs: Any,
125
+ ) -> list[str]:
126
+ """
127
+ Optional: Retrieve the HTML content of the authenticated user's bookcase page.
128
+ Subclasses that support user login/bookcase should override this.
129
+
130
+ :return: The HTML of the bookcase page.
131
+ """
132
+ raise NotImplementedError(
133
+ "Bookcase fetching is not supported by this session type. "
134
+ "Override get_bookcase() in your subclass to enable it."
135
+ )
136
+
137
+ async def init(
138
+ self,
139
+ headless: bool = True,
140
+ **kwargs: Any,
141
+ ) -> None:
142
+ """
143
+ Set up the playwright.
144
+ """
145
+ if self._pw is None:
146
+ self._pw = await async_playwright().start()
147
+
148
+ if self._browser is None or not self._browser.is_connected():
149
+ browser_cls: BrowserType = getattr(self._pw, self.browser_type)
150
+
151
+ launch_args: dict[str, Any] = {
152
+ "headless": headless and self.headless,
153
+ }
154
+ if self._config.proxy:
155
+ launch_args["proxy"] = {"server": self._config.proxy}
156
+
157
+ self._browser = await browser_cls.launch(**launch_args)
158
+
159
+ if self._context is None:
160
+ context_args: NewContextOptions = {
161
+ "user_agent": self.user_agent,
162
+ "locale": "zh-CN",
163
+ "viewport": ViewportSize(width=1280, height=800),
164
+ "java_script_enabled": True,
165
+ "ignore_https_errors": not self._config.verify_ssl,
166
+ }
167
+
168
+ if self._config.headers:
169
+ context_args["extra_http_headers"] = self._config.headers
170
+
171
+ self._context = await self._browser.new_context(**context_args)
172
+ await self._context.add_init_script(_STEALTH_SCRIPT)
173
+ self._context.set_default_timeout(self.timeout * 1000)
174
+
175
+ async def close(self) -> None:
176
+ """
177
+ Shutdown and clean up the broswer.
178
+ """
179
+ if self._page:
180
+ await self._page.close()
181
+ self._page = None
182
+ if self._manual_page:
183
+ await self._manual_page.close()
184
+ self._manual_page = None
185
+ if self._context:
186
+ await self._context.close()
187
+ self._context = None
188
+ if self._browser:
189
+ await self._browser.close()
190
+ self._browser = None
191
+ if self._pw:
192
+ await self._pw.stop()
193
+ self._pw = None
194
+
195
+ async def fetch(
196
+ self,
197
+ url: str,
198
+ wait_until: Literal["commit", "domcontentloaded", "load", "networkidle"]
199
+ | None = "load",
200
+ referer: str | None = None,
201
+ **kwargs: Any,
202
+ ) -> str:
203
+ if self._reuse_page:
204
+ if not self._page:
205
+ self._page = await self.context.new_page()
206
+ page = self._page
207
+ else:
208
+ page = await self.context.new_page()
209
+
210
+ await page.goto(url, wait_until=wait_until, referer=referer)
211
+ content = await page.content()
212
+
213
+ if not self._reuse_page:
214
+ await page.close()
215
+
216
+ return str(content)
217
+
218
+ async def load_state(self) -> bool:
219
+ """ """
220
+ if not self._state_file.exists() or self._context is None:
221
+ return False
222
+ try:
223
+ if self._context is not None:
224
+ await self._context.close()
225
+ context_args: NewContextOptions = {
226
+ "user_agent": self.user_agent,
227
+ "locale": "zh-CN",
228
+ "viewport": ViewportSize(width=1280, height=800),
229
+ "java_script_enabled": True,
230
+ "ignore_https_errors": not self._config.verify_ssl,
231
+ "storage_state": self._state_file,
232
+ }
233
+
234
+ if self._config.headers:
235
+ context_args["extra_http_headers"] = self._config.headers
236
+
237
+ self._context = await self.browser.new_context(**context_args)
238
+ self._context.set_default_timeout(self.timeout * 1000)
239
+ await self._context.add_init_script(_STEALTH_SCRIPT)
240
+ self._is_logged_in = await self._check_login_status()
241
+ return self._is_logged_in
242
+ except Exception as e:
243
+ self.logger.warning("Failed to load state: %s", e)
244
+ return False
245
+
246
+ async def save_state(self) -> bool:
247
+ """ """
248
+ if self._context is None:
249
+ return False
250
+ try:
251
+ await self._context.storage_state(path=self._state_file)
252
+ return True
253
+ except Exception as e:
254
+ self.logger.warning("Failed to save state: %s", e)
255
+ return False
256
+
257
+ async def set_interactive_mode(self, enable: bool) -> bool:
258
+ """
259
+ Enable or disable interactive mode for manual login.
260
+
261
+ :param enable: True to enable, False to disable interactive mode.
262
+ :return: True if operation or login check succeeded, False otherwise.
263
+ """
264
+ return False
265
+
266
+ async def _check_login_status(self) -> bool:
267
+ """
268
+ Check whether the user is currently logged in
269
+
270
+ :return: True if the user is logged in, False otherwise.
271
+ """
272
+ return False
273
+
274
+ async def _restart_browser(
275
+ self,
276
+ headless: bool = True,
277
+ ) -> None:
278
+ """
279
+ Shutdown the current browser and restart it with the given headless setting.
280
+
281
+ :param headless: Whether to run the browser in headless mode.
282
+ """
283
+ await self.close()
284
+
285
+ # Apply new headless setting and reinitialize
286
+ await self.init(headless=headless)
287
+ self.logger.debug("[browser] Browser restarted (headless=%s).", headless)
288
+
289
+ @property
290
+ def hostname(self) -> str:
291
+ return ""
292
+
293
+ @property
294
+ def site(self) -> str:
295
+ return self._site
296
+
297
+ @property
298
+ def requester_type(self) -> str:
299
+ return "browser"
300
+
301
+ @property
302
+ def is_logged_in(self) -> bool:
303
+ """
304
+ Indicates whether the requester is currently authenticated.
305
+ """
306
+ return self._is_logged_in
307
+
308
+ @property
309
+ def login_fields(self) -> list[LoginField]:
310
+ return []
311
+
312
+ @property
313
+ def browser(self) -> Browser:
314
+ """
315
+ Return the active playwright.Browser.
316
+
317
+ :raises RuntimeError: If the browser is uninitialized.
318
+ """
319
+ if self._browser is None:
320
+ raise RuntimeError("Browser is not initialized or has been shut down.")
321
+ return self._browser
322
+
323
+ @property
324
+ def context(self) -> BrowserContext:
325
+ """
326
+ Return the active playwright.BrowserContext.
327
+
328
+ :raises RuntimeError: If the context is uninitialized.
329
+ """
330
+ if self._context is None:
331
+ raise RuntimeError(
332
+ "BrowserContext is not initialized or has been shut down."
333
+ )
334
+ return self._context
335
+
336
+ @property
337
+ def headless(self) -> bool:
338
+ return self._config.headless
339
+
340
+ @property
341
+ def user_agent(self) -> str:
342
+ ua = self._config.user_agent or ""
343
+ return ua.strip() or DEFAULT_USER_AGENT
344
+
345
+ @property
346
+ def browser_type(self) -> str:
347
+ return self._config.browser_type
348
+
349
+ @property
350
+ def disable_images(self) -> bool:
351
+ return self._config.disable_images
352
+
353
+ @property
354
+ def retry_times(self) -> int:
355
+ return self._config.retry_times
356
+
357
+ @property
358
+ def request_interval(self) -> float:
359
+ return self._config.request_interval
360
+
361
+ @property
362
+ def backoff_factor(self) -> float:
363
+ return self._config.backoff_factor
364
+
365
+ @property
366
+ def timeout(self) -> float:
367
+ return self._config.timeout
368
+
369
+ @property
370
+ def max_connections(self) -> int:
371
+ return self._config.max_connections
372
+
373
+ async def __aenter__(self) -> Self:
374
+ await self.init()
375
+ return self
376
+
377
+ async def __aexit__(
378
+ self,
379
+ exc_type: type[BaseException] | None,
380
+ exc_val: BaseException | None,
381
+ tb: types.TracebackType | None,
382
+ ) -> None:
383
+ await self.close()
@@ -0,0 +1,86 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.fetchers.base.rate_limiter
4
+ ------------------------------------------------
5
+
6
+ """
7
+
8
+ import asyncio
9
+ import random
10
+ import time
11
+
12
+
13
+ class RateLimiter:
14
+ """
15
+ Simple async token-bucket rate limiter:
16
+ ensures no more than rate_per_sec
17
+ requests are started per second, across all coroutines.
18
+ """
19
+
20
+ def __init__(self, rate_per_sec: float):
21
+ self._interval = 1.0 / rate_per_sec
22
+ self._lock = asyncio.Lock()
23
+ self._last = time.monotonic()
24
+
25
+ async def wait(self) -> None:
26
+ async with self._lock:
27
+ now = time.monotonic()
28
+ elapsed = now - self._last
29
+ delay = self._interval - elapsed
30
+ if delay > 0:
31
+ jitter = random.uniform(0, 0.3)
32
+ await asyncio.sleep(delay + jitter)
33
+ self._last = time.monotonic()
34
+
35
+
36
+ class RateLimiterV2:
37
+ def __init__(self, rate_per_sec: float):
38
+ self._interval = 1.0 / rate_per_sec
39
+ self._lock = asyncio.Lock()
40
+ self._next_allowed_time = time.monotonic()
41
+
42
+ async def wait(self) -> None:
43
+ async with self._lock:
44
+ now = time.monotonic()
45
+ if now < self._next_allowed_time:
46
+ delay = self._next_allowed_time - now
47
+ jitter = random.uniform(0, 0.05 * self._interval)
48
+ await asyncio.sleep(delay + jitter)
49
+ self._next_allowed_time = max(now, self._next_allowed_time) + self._interval
50
+
51
+
52
+ class TokenBucketRateLimiter:
53
+ def __init__(
54
+ self,
55
+ rate: float,
56
+ burst: int = 10,
57
+ jitter_strength: float = 0.3,
58
+ ):
59
+ self.rate = rate
60
+ self.capacity = burst
61
+ self.tokens = burst
62
+ self.timestamp = time.monotonic()
63
+ self.lock = asyncio.Lock()
64
+ self.jitter_strength = jitter_strength
65
+
66
+ async def wait(self) -> None:
67
+ async with self.lock:
68
+ now = time.monotonic()
69
+ elapsed = now - self.timestamp
70
+
71
+ self.tokens = min(self.capacity, int(self.tokens + elapsed * self.rate))
72
+ self.timestamp = now
73
+
74
+ if self.tokens >= 1:
75
+ self.tokens -= 1
76
+ jitter = random.uniform(-self.jitter_strength, self.jitter_strength)
77
+ if jitter > 0:
78
+ await asyncio.sleep(jitter)
79
+ return
80
+ else:
81
+ wait_time = (1 - self.tokens) / self.rate
82
+ jitter = random.uniform(-self.jitter_strength, self.jitter_strength)
83
+ total_wait = max(0.0, wait_time + jitter)
84
+ await asyncio.sleep(total_wait)
85
+ self.timestamp = time.monotonic()
86
+ self.tokens = max(0, self.tokens - 1)