novel-downloader 1.3.3__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/clean.py +97 -78
- novel_downloader/cli/config.py +177 -0
- novel_downloader/cli/download.py +132 -87
- novel_downloader/cli/export.py +77 -0
- novel_downloader/cli/main.py +21 -28
- novel_downloader/config/__init__.py +1 -25
- novel_downloader/config/adapter.py +32 -31
- novel_downloader/config/loader.py +3 -3
- novel_downloader/config/site_rules.py +1 -2
- novel_downloader/core/__init__.py +3 -6
- novel_downloader/core/downloaders/__init__.py +10 -13
- novel_downloader/core/downloaders/base.py +233 -0
- novel_downloader/core/downloaders/biquge.py +27 -0
- novel_downloader/core/downloaders/common.py +414 -0
- novel_downloader/core/downloaders/esjzone.py +27 -0
- novel_downloader/core/downloaders/linovelib.py +27 -0
- novel_downloader/core/downloaders/qianbi.py +27 -0
- novel_downloader/core/downloaders/qidian.py +352 -0
- novel_downloader/core/downloaders/sfacg.py +27 -0
- novel_downloader/core/downloaders/yamibo.py +27 -0
- novel_downloader/core/exporters/__init__.py +37 -0
- novel_downloader/core/{savers → exporters}/base.py +73 -39
- novel_downloader/core/exporters/biquge.py +25 -0
- novel_downloader/core/exporters/common/__init__.py +12 -0
- novel_downloader/core/{savers → exporters}/common/epub.py +22 -22
- novel_downloader/core/{savers/common/main_saver.py → exporters/common/main_exporter.py} +35 -40
- novel_downloader/core/{savers → exporters}/common/txt.py +20 -23
- novel_downloader/core/{savers → exporters}/epub_utils/__init__.py +8 -3
- novel_downloader/core/{savers → exporters}/epub_utils/css_builder.py +2 -2
- novel_downloader/core/{savers → exporters}/epub_utils/image_loader.py +46 -4
- novel_downloader/core/{savers → exporters}/epub_utils/initializer.py +6 -4
- novel_downloader/core/{savers → exporters}/epub_utils/text_to_html.py +3 -3
- novel_downloader/core/{savers → exporters}/epub_utils/volume_intro.py +2 -2
- novel_downloader/core/exporters/esjzone.py +25 -0
- novel_downloader/core/exporters/linovelib/__init__.py +10 -0
- novel_downloader/core/exporters/linovelib/epub.py +449 -0
- novel_downloader/core/exporters/linovelib/main_exporter.py +127 -0
- novel_downloader/core/exporters/linovelib/txt.py +129 -0
- novel_downloader/core/exporters/qianbi.py +25 -0
- novel_downloader/core/{savers → exporters}/qidian.py +8 -8
- novel_downloader/core/exporters/sfacg.py +25 -0
- novel_downloader/core/exporters/yamibo.py +25 -0
- novel_downloader/core/factory/__init__.py +5 -17
- novel_downloader/core/factory/downloader.py +24 -126
- novel_downloader/core/factory/exporter.py +58 -0
- novel_downloader/core/factory/fetcher.py +96 -0
- novel_downloader/core/factory/parser.py +17 -12
- novel_downloader/core/{requesters → fetchers}/__init__.py +22 -15
- novel_downloader/core/{requesters → fetchers}/base/__init__.py +2 -4
- novel_downloader/core/fetchers/base/browser.py +383 -0
- novel_downloader/core/fetchers/base/rate_limiter.py +86 -0
- novel_downloader/core/fetchers/base/session.py +419 -0
- novel_downloader/core/fetchers/biquge/__init__.py +14 -0
- novel_downloader/core/{requesters/biquge/async_session.py → fetchers/biquge/browser.py} +18 -6
- novel_downloader/core/{requesters → fetchers}/biquge/session.py +23 -30
- novel_downloader/core/fetchers/common/__init__.py +14 -0
- novel_downloader/core/fetchers/common/browser.py +79 -0
- novel_downloader/core/{requesters/common/async_session.py → fetchers/common/session.py} +8 -25
- novel_downloader/core/fetchers/esjzone/__init__.py +14 -0
- novel_downloader/core/fetchers/esjzone/browser.py +202 -0
- novel_downloader/core/{requesters/esjzone/async_session.py → fetchers/esjzone/session.py} +62 -42
- novel_downloader/core/fetchers/linovelib/__init__.py +14 -0
- novel_downloader/core/fetchers/linovelib/browser.py +178 -0
- novel_downloader/core/fetchers/linovelib/session.py +178 -0
- novel_downloader/core/fetchers/qianbi/__init__.py +14 -0
- novel_downloader/core/{requesters/qianbi/session.py → fetchers/qianbi/browser.py} +30 -48
- novel_downloader/core/{requesters/qianbi/async_session.py → fetchers/qianbi/session.py} +18 -6
- novel_downloader/core/fetchers/qidian/__init__.py +14 -0
- novel_downloader/core/fetchers/qidian/browser.py +266 -0
- novel_downloader/core/fetchers/qidian/session.py +326 -0
- novel_downloader/core/fetchers/sfacg/__init__.py +14 -0
- novel_downloader/core/fetchers/sfacg/browser.py +189 -0
- novel_downloader/core/{requesters/sfacg/async_session.py → fetchers/sfacg/session.py} +43 -73
- novel_downloader/core/fetchers/yamibo/__init__.py +14 -0
- novel_downloader/core/fetchers/yamibo/browser.py +229 -0
- novel_downloader/core/{requesters/yamibo/async_session.py → fetchers/yamibo/session.py} +62 -44
- novel_downloader/core/interfaces/__init__.py +8 -12
- novel_downloader/core/interfaces/downloader.py +54 -0
- novel_downloader/core/interfaces/{saver.py → exporter.py} +12 -12
- novel_downloader/core/interfaces/fetcher.py +162 -0
- novel_downloader/core/interfaces/parser.py +6 -7
- novel_downloader/core/parsers/__init__.py +5 -6
- novel_downloader/core/parsers/base.py +9 -13
- novel_downloader/core/parsers/biquge/main_parser.py +12 -13
- novel_downloader/core/parsers/common/helper.py +3 -3
- novel_downloader/core/parsers/common/main_parser.py +39 -34
- novel_downloader/core/parsers/esjzone/main_parser.py +20 -14
- novel_downloader/core/parsers/linovelib/__init__.py +10 -0
- novel_downloader/core/parsers/linovelib/main_parser.py +210 -0
- novel_downloader/core/parsers/qianbi/main_parser.py +21 -15
- novel_downloader/core/parsers/qidian/__init__.py +2 -11
- novel_downloader/core/parsers/qidian/book_info_parser.py +113 -0
- novel_downloader/core/parsers/qidian/{browser/chapter_encrypted.py → chapter_encrypted.py} +162 -135
- novel_downloader/core/parsers/qidian/chapter_normal.py +150 -0
- novel_downloader/core/parsers/qidian/{session/chapter_router.py → chapter_router.py} +15 -15
- novel_downloader/core/parsers/qidian/{browser/main_parser.py → main_parser.py} +49 -40
- novel_downloader/core/parsers/qidian/utils/__init__.py +27 -0
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +145 -0
- novel_downloader/core/parsers/qidian/{shared → utils}/helpers.py +41 -68
- novel_downloader/core/parsers/qidian/{session → utils}/node_decryptor.py +64 -50
- novel_downloader/core/parsers/sfacg/main_parser.py +12 -12
- novel_downloader/core/parsers/yamibo/main_parser.py +10 -10
- novel_downloader/locales/en.json +18 -2
- novel_downloader/locales/zh.json +18 -2
- novel_downloader/models/__init__.py +64 -0
- novel_downloader/models/browser.py +21 -0
- novel_downloader/models/chapter.py +25 -0
- novel_downloader/models/config.py +100 -0
- novel_downloader/models/login.py +20 -0
- novel_downloader/models/site_rules.py +99 -0
- novel_downloader/models/tasks.py +33 -0
- novel_downloader/models/types.py +15 -0
- novel_downloader/resources/config/settings.toml +31 -25
- novel_downloader/resources/json/linovelib_font_map.json +3573 -0
- novel_downloader/tui/__init__.py +7 -0
- novel_downloader/tui/app.py +32 -0
- novel_downloader/tui/main.py +17 -0
- novel_downloader/tui/screens/__init__.py +14 -0
- novel_downloader/tui/screens/home.py +191 -0
- novel_downloader/tui/screens/login.py +74 -0
- novel_downloader/tui/styles/home_layout.tcss +79 -0
- novel_downloader/tui/widgets/richlog_handler.py +24 -0
- novel_downloader/utils/__init__.py +6 -0
- novel_downloader/utils/chapter_storage.py +25 -38
- novel_downloader/utils/constants.py +11 -5
- novel_downloader/utils/cookies.py +66 -0
- novel_downloader/utils/crypto_utils.py +1 -74
- novel_downloader/utils/fontocr/ocr_v1.py +2 -1
- novel_downloader/utils/fontocr/ocr_v2.py +2 -2
- novel_downloader/utils/hash_store.py +10 -18
- novel_downloader/utils/hash_utils.py +3 -2
- novel_downloader/utils/logger.py +2 -3
- novel_downloader/utils/network.py +2 -1
- novel_downloader/utils/text_utils/chapter_formatting.py +6 -1
- novel_downloader/utils/text_utils/font_mapping.py +1 -1
- novel_downloader/utils/text_utils/text_cleaning.py +1 -1
- novel_downloader/utils/time_utils/datetime_utils.py +3 -3
- novel_downloader/utils/time_utils/sleep_utils.py +1 -1
- {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.0.dist-info}/METADATA +69 -35
- novel_downloader-1.4.0.dist-info/RECORD +170 -0
- {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.0.dist-info}/WHEEL +1 -1
- {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.0.dist-info}/entry_points.txt +1 -0
- novel_downloader/cli/interactive.py +0 -66
- novel_downloader/cli/settings.py +0 -177
- novel_downloader/config/models.py +0 -187
- novel_downloader/core/downloaders/base/__init__.py +0 -14
- novel_downloader/core/downloaders/base/base_async.py +0 -153
- novel_downloader/core/downloaders/base/base_sync.py +0 -208
- novel_downloader/core/downloaders/biquge/__init__.py +0 -14
- novel_downloader/core/downloaders/biquge/biquge_async.py +0 -27
- novel_downloader/core/downloaders/biquge/biquge_sync.py +0 -27
- novel_downloader/core/downloaders/common/__init__.py +0 -14
- novel_downloader/core/downloaders/common/common_async.py +0 -210
- novel_downloader/core/downloaders/common/common_sync.py +0 -202
- novel_downloader/core/downloaders/esjzone/__init__.py +0 -14
- novel_downloader/core/downloaders/esjzone/esjzone_async.py +0 -27
- novel_downloader/core/downloaders/esjzone/esjzone_sync.py +0 -27
- novel_downloader/core/downloaders/qianbi/__init__.py +0 -14
- novel_downloader/core/downloaders/qianbi/qianbi_async.py +0 -27
- novel_downloader/core/downloaders/qianbi/qianbi_sync.py +0 -27
- novel_downloader/core/downloaders/qidian/__init__.py +0 -10
- novel_downloader/core/downloaders/qidian/qidian_sync.py +0 -219
- novel_downloader/core/downloaders/sfacg/__init__.py +0 -14
- novel_downloader/core/downloaders/sfacg/sfacg_async.py +0 -27
- novel_downloader/core/downloaders/sfacg/sfacg_sync.py +0 -27
- novel_downloader/core/downloaders/yamibo/__init__.py +0 -14
- novel_downloader/core/downloaders/yamibo/yamibo_async.py +0 -27
- novel_downloader/core/downloaders/yamibo/yamibo_sync.py +0 -27
- novel_downloader/core/factory/requester.py +0 -144
- novel_downloader/core/factory/saver.py +0 -56
- novel_downloader/core/interfaces/async_downloader.py +0 -36
- novel_downloader/core/interfaces/async_requester.py +0 -84
- novel_downloader/core/interfaces/sync_downloader.py +0 -36
- novel_downloader/core/interfaces/sync_requester.py +0 -82
- novel_downloader/core/parsers/qidian/browser/__init__.py +0 -12
- novel_downloader/core/parsers/qidian/browser/chapter_normal.py +0 -93
- novel_downloader/core/parsers/qidian/browser/chapter_router.py +0 -71
- novel_downloader/core/parsers/qidian/session/__init__.py +0 -12
- novel_downloader/core/parsers/qidian/session/chapter_encrypted.py +0 -443
- novel_downloader/core/parsers/qidian/session/chapter_normal.py +0 -115
- novel_downloader/core/parsers/qidian/session/main_parser.py +0 -128
- novel_downloader/core/parsers/qidian/shared/__init__.py +0 -37
- novel_downloader/core/parsers/qidian/shared/book_info_parser.py +0 -150
- novel_downloader/core/requesters/base/async_session.py +0 -410
- novel_downloader/core/requesters/base/browser.py +0 -337
- novel_downloader/core/requesters/base/session.py +0 -378
- novel_downloader/core/requesters/biquge/__init__.py +0 -14
- novel_downloader/core/requesters/common/__init__.py +0 -17
- novel_downloader/core/requesters/common/session.py +0 -113
- novel_downloader/core/requesters/esjzone/__init__.py +0 -13
- novel_downloader/core/requesters/esjzone/session.py +0 -235
- novel_downloader/core/requesters/qianbi/__init__.py +0 -13
- novel_downloader/core/requesters/qidian/__init__.py +0 -21
- novel_downloader/core/requesters/qidian/broswer.py +0 -307
- novel_downloader/core/requesters/qidian/session.py +0 -290
- novel_downloader/core/requesters/sfacg/__init__.py +0 -13
- novel_downloader/core/requesters/sfacg/session.py +0 -242
- novel_downloader/core/requesters/yamibo/__init__.py +0 -13
- novel_downloader/core/requesters/yamibo/session.py +0 -237
- novel_downloader/core/savers/__init__.py +0 -34
- novel_downloader/core/savers/biquge.py +0 -25
- novel_downloader/core/savers/common/__init__.py +0 -12
- novel_downloader/core/savers/esjzone.py +0 -25
- novel_downloader/core/savers/qianbi.py +0 -25
- novel_downloader/core/savers/sfacg.py +0 -25
- novel_downloader/core/savers/yamibo.py +0 -25
- novel_downloader/resources/config/rules.toml +0 -196
- novel_downloader-1.3.3.dist-info/RECORD +0 -166
- {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,383 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.fetchers.base.browser
|
4
|
+
-------------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
import abc
|
9
|
+
import logging
|
10
|
+
import types
|
11
|
+
from typing import Any, Literal, Self
|
12
|
+
|
13
|
+
from playwright.async_api import (
|
14
|
+
Browser,
|
15
|
+
BrowserContext,
|
16
|
+
BrowserType,
|
17
|
+
Page,
|
18
|
+
Playwright,
|
19
|
+
ViewportSize,
|
20
|
+
async_playwright,
|
21
|
+
)
|
22
|
+
|
23
|
+
from novel_downloader.core.interfaces import FetcherProtocol
|
24
|
+
from novel_downloader.models import FetcherConfig, LoginField, NewContextOptions
|
25
|
+
from novel_downloader.utils.constants import (
|
26
|
+
DATA_DIR,
|
27
|
+
DEFAULT_USER_AGENT,
|
28
|
+
)
|
29
|
+
|
30
|
+
from .rate_limiter import TokenBucketRateLimiter
|
31
|
+
|
32
|
+
_STEALTH_SCRIPT = """
|
33
|
+
Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
|
34
|
+
Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3] });
|
35
|
+
Object.defineProperty(navigator, 'languages', { get: () => ['zh-CN', 'zh', 'en'] });
|
36
|
+
window.chrome = { runtime: {} };
|
37
|
+
""".strip()
|
38
|
+
|
39
|
+
|
40
|
+
class BaseBrowser(FetcherProtocol, abc.ABC):
|
41
|
+
"""
|
42
|
+
BaseBrowser wraps basic browser operations using playwright
|
43
|
+
"""
|
44
|
+
|
45
|
+
def __init__(
|
46
|
+
self,
|
47
|
+
site: str,
|
48
|
+
config: FetcherConfig,
|
49
|
+
reuse_page: bool = False,
|
50
|
+
**kwargs: Any,
|
51
|
+
) -> None:
|
52
|
+
"""
|
53
|
+
Initialize the async browser with configuration.
|
54
|
+
|
55
|
+
:param config: Configuration object for session behavior
|
56
|
+
"""
|
57
|
+
self._site = site
|
58
|
+
self._config = config
|
59
|
+
|
60
|
+
self._state_file = DATA_DIR / site / "browser_state.cookies"
|
61
|
+
self._state_file.parent.mkdir(parents=True, exist_ok=True)
|
62
|
+
|
63
|
+
self._is_logged_in = False
|
64
|
+
self._reuse_page = reuse_page
|
65
|
+
self._pw: Playwright | None = None
|
66
|
+
self._browser: Browser | None = None
|
67
|
+
self._context: BrowserContext | None = None
|
68
|
+
self._page: Page | None = None
|
69
|
+
self._manual_page: Page | None = None
|
70
|
+
self._rate_limiter: TokenBucketRateLimiter | None = None
|
71
|
+
|
72
|
+
if config.max_rps is not None and config.max_rps > 0:
|
73
|
+
self._rate_limiter = TokenBucketRateLimiter(config.max_rps)
|
74
|
+
|
75
|
+
self.logger = logging.getLogger(f"{self.__class__.__name__}")
|
76
|
+
|
77
|
+
async def login(
|
78
|
+
self,
|
79
|
+
username: str = "",
|
80
|
+
password: str = "",
|
81
|
+
cookies: dict[str, str] | None = None,
|
82
|
+
attempt: int = 1,
|
83
|
+
**kwargs: Any,
|
84
|
+
) -> bool:
|
85
|
+
"""
|
86
|
+
Attempt to log in asynchronously.
|
87
|
+
|
88
|
+
:returns: True if login succeeded.
|
89
|
+
"""
|
90
|
+
return False
|
91
|
+
|
92
|
+
@abc.abstractmethod
|
93
|
+
async def get_book_info(
|
94
|
+
self,
|
95
|
+
book_id: str,
|
96
|
+
**kwargs: Any,
|
97
|
+
) -> list[str]:
|
98
|
+
"""
|
99
|
+
Fetch the raw HTML (or JSON) of the book info page asynchronously.
|
100
|
+
|
101
|
+
:param book_id: The book identifier.
|
102
|
+
:return: The page content as a string.
|
103
|
+
"""
|
104
|
+
...
|
105
|
+
|
106
|
+
@abc.abstractmethod
|
107
|
+
async def get_book_chapter(
|
108
|
+
self,
|
109
|
+
book_id: str,
|
110
|
+
chapter_id: str,
|
111
|
+
**kwargs: Any,
|
112
|
+
) -> list[str]:
|
113
|
+
"""
|
114
|
+
Fetch the raw HTML (or JSON) of a single chapter asynchronously.
|
115
|
+
|
116
|
+
:param book_id: The book identifier.
|
117
|
+
:param chapter_id: The chapter identifier.
|
118
|
+
:return: The chapter content as a string.
|
119
|
+
"""
|
120
|
+
...
|
121
|
+
|
122
|
+
async def get_bookcase(
|
123
|
+
self,
|
124
|
+
**kwargs: Any,
|
125
|
+
) -> list[str]:
|
126
|
+
"""
|
127
|
+
Optional: Retrieve the HTML content of the authenticated user's bookcase page.
|
128
|
+
Subclasses that support user login/bookcase should override this.
|
129
|
+
|
130
|
+
:return: The HTML of the bookcase page.
|
131
|
+
"""
|
132
|
+
raise NotImplementedError(
|
133
|
+
"Bookcase fetching is not supported by this session type. "
|
134
|
+
"Override get_bookcase() in your subclass to enable it."
|
135
|
+
)
|
136
|
+
|
137
|
+
async def init(
|
138
|
+
self,
|
139
|
+
headless: bool = True,
|
140
|
+
**kwargs: Any,
|
141
|
+
) -> None:
|
142
|
+
"""
|
143
|
+
Set up the playwright.
|
144
|
+
"""
|
145
|
+
if self._pw is None:
|
146
|
+
self._pw = await async_playwright().start()
|
147
|
+
|
148
|
+
if self._browser is None or not self._browser.is_connected():
|
149
|
+
browser_cls: BrowserType = getattr(self._pw, self.browser_type)
|
150
|
+
|
151
|
+
launch_args: dict[str, Any] = {
|
152
|
+
"headless": headless and self.headless,
|
153
|
+
}
|
154
|
+
if self._config.proxy:
|
155
|
+
launch_args["proxy"] = {"server": self._config.proxy}
|
156
|
+
|
157
|
+
self._browser = await browser_cls.launch(**launch_args)
|
158
|
+
|
159
|
+
if self._context is None:
|
160
|
+
context_args: NewContextOptions = {
|
161
|
+
"user_agent": self.user_agent,
|
162
|
+
"locale": "zh-CN",
|
163
|
+
"viewport": ViewportSize(width=1280, height=800),
|
164
|
+
"java_script_enabled": True,
|
165
|
+
"ignore_https_errors": not self._config.verify_ssl,
|
166
|
+
}
|
167
|
+
|
168
|
+
if self._config.headers:
|
169
|
+
context_args["extra_http_headers"] = self._config.headers
|
170
|
+
|
171
|
+
self._context = await self._browser.new_context(**context_args)
|
172
|
+
await self._context.add_init_script(_STEALTH_SCRIPT)
|
173
|
+
self._context.set_default_timeout(self.timeout * 1000)
|
174
|
+
|
175
|
+
async def close(self) -> None:
|
176
|
+
"""
|
177
|
+
Shutdown and clean up the broswer.
|
178
|
+
"""
|
179
|
+
if self._page:
|
180
|
+
await self._page.close()
|
181
|
+
self._page = None
|
182
|
+
if self._manual_page:
|
183
|
+
await self._manual_page.close()
|
184
|
+
self._manual_page = None
|
185
|
+
if self._context:
|
186
|
+
await self._context.close()
|
187
|
+
self._context = None
|
188
|
+
if self._browser:
|
189
|
+
await self._browser.close()
|
190
|
+
self._browser = None
|
191
|
+
if self._pw:
|
192
|
+
await self._pw.stop()
|
193
|
+
self._pw = None
|
194
|
+
|
195
|
+
async def fetch(
|
196
|
+
self,
|
197
|
+
url: str,
|
198
|
+
wait_until: Literal["commit", "domcontentloaded", "load", "networkidle"]
|
199
|
+
| None = "load",
|
200
|
+
referer: str | None = None,
|
201
|
+
**kwargs: Any,
|
202
|
+
) -> str:
|
203
|
+
if self._reuse_page:
|
204
|
+
if not self._page:
|
205
|
+
self._page = await self.context.new_page()
|
206
|
+
page = self._page
|
207
|
+
else:
|
208
|
+
page = await self.context.new_page()
|
209
|
+
|
210
|
+
await page.goto(url, wait_until=wait_until, referer=referer)
|
211
|
+
content = await page.content()
|
212
|
+
|
213
|
+
if not self._reuse_page:
|
214
|
+
await page.close()
|
215
|
+
|
216
|
+
return str(content)
|
217
|
+
|
218
|
+
async def load_state(self) -> bool:
|
219
|
+
""" """
|
220
|
+
if not self._state_file.exists() or self._context is None:
|
221
|
+
return False
|
222
|
+
try:
|
223
|
+
if self._context is not None:
|
224
|
+
await self._context.close()
|
225
|
+
context_args: NewContextOptions = {
|
226
|
+
"user_agent": self.user_agent,
|
227
|
+
"locale": "zh-CN",
|
228
|
+
"viewport": ViewportSize(width=1280, height=800),
|
229
|
+
"java_script_enabled": True,
|
230
|
+
"ignore_https_errors": not self._config.verify_ssl,
|
231
|
+
"storage_state": self._state_file,
|
232
|
+
}
|
233
|
+
|
234
|
+
if self._config.headers:
|
235
|
+
context_args["extra_http_headers"] = self._config.headers
|
236
|
+
|
237
|
+
self._context = await self.browser.new_context(**context_args)
|
238
|
+
self._context.set_default_timeout(self.timeout * 1000)
|
239
|
+
await self._context.add_init_script(_STEALTH_SCRIPT)
|
240
|
+
self._is_logged_in = await self._check_login_status()
|
241
|
+
return self._is_logged_in
|
242
|
+
except Exception as e:
|
243
|
+
self.logger.warning("Failed to load state: %s", e)
|
244
|
+
return False
|
245
|
+
|
246
|
+
async def save_state(self) -> bool:
|
247
|
+
""" """
|
248
|
+
if self._context is None:
|
249
|
+
return False
|
250
|
+
try:
|
251
|
+
await self._context.storage_state(path=self._state_file)
|
252
|
+
return True
|
253
|
+
except Exception as e:
|
254
|
+
self.logger.warning("Failed to save state: %s", e)
|
255
|
+
return False
|
256
|
+
|
257
|
+
async def set_interactive_mode(self, enable: bool) -> bool:
|
258
|
+
"""
|
259
|
+
Enable or disable interactive mode for manual login.
|
260
|
+
|
261
|
+
:param enable: True to enable, False to disable interactive mode.
|
262
|
+
:return: True if operation or login check succeeded, False otherwise.
|
263
|
+
"""
|
264
|
+
return False
|
265
|
+
|
266
|
+
async def _check_login_status(self) -> bool:
|
267
|
+
"""
|
268
|
+
Check whether the user is currently logged in
|
269
|
+
|
270
|
+
:return: True if the user is logged in, False otherwise.
|
271
|
+
"""
|
272
|
+
return False
|
273
|
+
|
274
|
+
async def _restart_browser(
|
275
|
+
self,
|
276
|
+
headless: bool = True,
|
277
|
+
) -> None:
|
278
|
+
"""
|
279
|
+
Shutdown the current browser and restart it with the given headless setting.
|
280
|
+
|
281
|
+
:param headless: Whether to run the browser in headless mode.
|
282
|
+
"""
|
283
|
+
await self.close()
|
284
|
+
|
285
|
+
# Apply new headless setting and reinitialize
|
286
|
+
await self.init(headless=headless)
|
287
|
+
self.logger.debug("[browser] Browser restarted (headless=%s).", headless)
|
288
|
+
|
289
|
+
@property
|
290
|
+
def hostname(self) -> str:
|
291
|
+
return ""
|
292
|
+
|
293
|
+
@property
|
294
|
+
def site(self) -> str:
|
295
|
+
return self._site
|
296
|
+
|
297
|
+
@property
|
298
|
+
def requester_type(self) -> str:
|
299
|
+
return "browser"
|
300
|
+
|
301
|
+
@property
|
302
|
+
def is_logged_in(self) -> bool:
|
303
|
+
"""
|
304
|
+
Indicates whether the requester is currently authenticated.
|
305
|
+
"""
|
306
|
+
return self._is_logged_in
|
307
|
+
|
308
|
+
@property
|
309
|
+
def login_fields(self) -> list[LoginField]:
|
310
|
+
return []
|
311
|
+
|
312
|
+
@property
|
313
|
+
def browser(self) -> Browser:
|
314
|
+
"""
|
315
|
+
Return the active playwright.Browser.
|
316
|
+
|
317
|
+
:raises RuntimeError: If the browser is uninitialized.
|
318
|
+
"""
|
319
|
+
if self._browser is None:
|
320
|
+
raise RuntimeError("Browser is not initialized or has been shut down.")
|
321
|
+
return self._browser
|
322
|
+
|
323
|
+
@property
|
324
|
+
def context(self) -> BrowserContext:
|
325
|
+
"""
|
326
|
+
Return the active playwright.BrowserContext.
|
327
|
+
|
328
|
+
:raises RuntimeError: If the context is uninitialized.
|
329
|
+
"""
|
330
|
+
if self._context is None:
|
331
|
+
raise RuntimeError(
|
332
|
+
"BrowserContext is not initialized or has been shut down."
|
333
|
+
)
|
334
|
+
return self._context
|
335
|
+
|
336
|
+
@property
|
337
|
+
def headless(self) -> bool:
|
338
|
+
return self._config.headless
|
339
|
+
|
340
|
+
@property
|
341
|
+
def user_agent(self) -> str:
|
342
|
+
ua = self._config.user_agent or ""
|
343
|
+
return ua.strip() or DEFAULT_USER_AGENT
|
344
|
+
|
345
|
+
@property
|
346
|
+
def browser_type(self) -> str:
|
347
|
+
return self._config.browser_type
|
348
|
+
|
349
|
+
@property
|
350
|
+
def disable_images(self) -> bool:
|
351
|
+
return self._config.disable_images
|
352
|
+
|
353
|
+
@property
|
354
|
+
def retry_times(self) -> int:
|
355
|
+
return self._config.retry_times
|
356
|
+
|
357
|
+
@property
|
358
|
+
def request_interval(self) -> float:
|
359
|
+
return self._config.request_interval
|
360
|
+
|
361
|
+
@property
|
362
|
+
def backoff_factor(self) -> float:
|
363
|
+
return self._config.backoff_factor
|
364
|
+
|
365
|
+
@property
|
366
|
+
def timeout(self) -> float:
|
367
|
+
return self._config.timeout
|
368
|
+
|
369
|
+
@property
|
370
|
+
def max_connections(self) -> int:
|
371
|
+
return self._config.max_connections
|
372
|
+
|
373
|
+
async def __aenter__(self) -> Self:
|
374
|
+
await self.init()
|
375
|
+
return self
|
376
|
+
|
377
|
+
async def __aexit__(
|
378
|
+
self,
|
379
|
+
exc_type: type[BaseException] | None,
|
380
|
+
exc_val: BaseException | None,
|
381
|
+
tb: types.TracebackType | None,
|
382
|
+
) -> None:
|
383
|
+
await self.close()
|
@@ -0,0 +1,86 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.fetchers.base.rate_limiter
|
4
|
+
------------------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
import asyncio
|
9
|
+
import random
|
10
|
+
import time
|
11
|
+
|
12
|
+
|
13
|
+
class RateLimiter:
|
14
|
+
"""
|
15
|
+
Simple async token-bucket rate limiter:
|
16
|
+
ensures no more than rate_per_sec
|
17
|
+
requests are started per second, across all coroutines.
|
18
|
+
"""
|
19
|
+
|
20
|
+
def __init__(self, rate_per_sec: float):
|
21
|
+
self._interval = 1.0 / rate_per_sec
|
22
|
+
self._lock = asyncio.Lock()
|
23
|
+
self._last = time.monotonic()
|
24
|
+
|
25
|
+
async def wait(self) -> None:
|
26
|
+
async with self._lock:
|
27
|
+
now = time.monotonic()
|
28
|
+
elapsed = now - self._last
|
29
|
+
delay = self._interval - elapsed
|
30
|
+
if delay > 0:
|
31
|
+
jitter = random.uniform(0, 0.3)
|
32
|
+
await asyncio.sleep(delay + jitter)
|
33
|
+
self._last = time.monotonic()
|
34
|
+
|
35
|
+
|
36
|
+
class RateLimiterV2:
|
37
|
+
def __init__(self, rate_per_sec: float):
|
38
|
+
self._interval = 1.0 / rate_per_sec
|
39
|
+
self._lock = asyncio.Lock()
|
40
|
+
self._next_allowed_time = time.monotonic()
|
41
|
+
|
42
|
+
async def wait(self) -> None:
|
43
|
+
async with self._lock:
|
44
|
+
now = time.monotonic()
|
45
|
+
if now < self._next_allowed_time:
|
46
|
+
delay = self._next_allowed_time - now
|
47
|
+
jitter = random.uniform(0, 0.05 * self._interval)
|
48
|
+
await asyncio.sleep(delay + jitter)
|
49
|
+
self._next_allowed_time = max(now, self._next_allowed_time) + self._interval
|
50
|
+
|
51
|
+
|
52
|
+
class TokenBucketRateLimiter:
|
53
|
+
def __init__(
|
54
|
+
self,
|
55
|
+
rate: float,
|
56
|
+
burst: int = 10,
|
57
|
+
jitter_strength: float = 0.3,
|
58
|
+
):
|
59
|
+
self.rate = rate
|
60
|
+
self.capacity = burst
|
61
|
+
self.tokens = burst
|
62
|
+
self.timestamp = time.monotonic()
|
63
|
+
self.lock = asyncio.Lock()
|
64
|
+
self.jitter_strength = jitter_strength
|
65
|
+
|
66
|
+
async def wait(self) -> None:
|
67
|
+
async with self.lock:
|
68
|
+
now = time.monotonic()
|
69
|
+
elapsed = now - self.timestamp
|
70
|
+
|
71
|
+
self.tokens = min(self.capacity, int(self.tokens + elapsed * self.rate))
|
72
|
+
self.timestamp = now
|
73
|
+
|
74
|
+
if self.tokens >= 1:
|
75
|
+
self.tokens -= 1
|
76
|
+
jitter = random.uniform(-self.jitter_strength, self.jitter_strength)
|
77
|
+
if jitter > 0:
|
78
|
+
await asyncio.sleep(jitter)
|
79
|
+
return
|
80
|
+
else:
|
81
|
+
wait_time = (1 - self.tokens) / self.rate
|
82
|
+
jitter = random.uniform(-self.jitter_strength, self.jitter_strength)
|
83
|
+
total_wait = max(0.0, wait_time + jitter)
|
84
|
+
await asyncio.sleep(total_wait)
|
85
|
+
self.timestamp = time.monotonic()
|
86
|
+
self.tokens = max(0, self.tokens - 1)
|