novel-downloader 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/__init__.py +2 -4
- novel_downloader/cli/clean.py +21 -88
- novel_downloader/cli/config.py +27 -104
- novel_downloader/cli/download.py +78 -66
- novel_downloader/cli/export.py +20 -21
- novel_downloader/cli/main.py +3 -1
- novel_downloader/cli/search.py +120 -0
- novel_downloader/cli/ui.py +156 -0
- novel_downloader/config/__init__.py +10 -14
- novel_downloader/config/adapter.py +195 -99
- novel_downloader/config/{loader.py → file_io.py} +53 -27
- novel_downloader/core/__init__.py +14 -13
- novel_downloader/core/archived/deqixs/fetcher.py +115 -0
- novel_downloader/core/archived/deqixs/parser.py +132 -0
- novel_downloader/core/archived/deqixs/searcher.py +89 -0
- novel_downloader/core/archived/qidian/searcher.py +79 -0
- novel_downloader/core/archived/wanbengo/searcher.py +98 -0
- novel_downloader/core/archived/xshbook/searcher.py +93 -0
- novel_downloader/core/downloaders/__init__.py +8 -30
- novel_downloader/core/downloaders/base.py +182 -30
- novel_downloader/core/downloaders/common.py +217 -384
- novel_downloader/core/downloaders/qianbi.py +332 -4
- novel_downloader/core/downloaders/qidian.py +250 -290
- novel_downloader/core/downloaders/registry.py +69 -0
- novel_downloader/core/downloaders/signals.py +46 -0
- novel_downloader/core/exporters/__init__.py +8 -26
- novel_downloader/core/exporters/base.py +107 -31
- novel_downloader/core/exporters/common/__init__.py +3 -4
- novel_downloader/core/exporters/common/epub.py +92 -171
- novel_downloader/core/exporters/common/main_exporter.py +14 -67
- novel_downloader/core/exporters/common/txt.py +90 -86
- novel_downloader/core/exporters/epub_util.py +184 -1327
- novel_downloader/core/exporters/linovelib/__init__.py +3 -2
- novel_downloader/core/exporters/linovelib/epub.py +165 -222
- novel_downloader/core/exporters/linovelib/main_exporter.py +10 -71
- novel_downloader/core/exporters/linovelib/txt.py +76 -66
- novel_downloader/core/exporters/qidian.py +15 -11
- novel_downloader/core/exporters/registry.py +55 -0
- novel_downloader/core/exporters/txt_util.py +67 -0
- novel_downloader/core/fetchers/__init__.py +57 -56
- novel_downloader/core/fetchers/aaatxt.py +83 -0
- novel_downloader/core/fetchers/{biquge/session.py → b520.py} +10 -10
- novel_downloader/core/fetchers/{base/session.py → base.py} +63 -47
- novel_downloader/core/fetchers/biquyuedu.py +83 -0
- novel_downloader/core/fetchers/dxmwx.py +110 -0
- novel_downloader/core/fetchers/eightnovel.py +139 -0
- novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +23 -11
- novel_downloader/core/fetchers/guidaye.py +85 -0
- novel_downloader/core/fetchers/hetushu.py +92 -0
- novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +22 -26
- novel_downloader/core/fetchers/ixdzs8.py +113 -0
- novel_downloader/core/fetchers/jpxs123.py +101 -0
- novel_downloader/core/fetchers/{biquge/browser.py → lewenn.py} +15 -15
- novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +16 -12
- novel_downloader/core/fetchers/piaotia.py +105 -0
- novel_downloader/core/fetchers/qbtr.py +101 -0
- novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +9 -9
- novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +55 -40
- novel_downloader/core/fetchers/quanben5.py +92 -0
- novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
- novel_downloader/core/fetchers/registry.py +60 -0
- novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +11 -9
- novel_downloader/core/fetchers/shencou.py +106 -0
- novel_downloader/core/fetchers/{common/browser.py → shuhaige.py} +24 -19
- novel_downloader/core/fetchers/tongrenquan.py +84 -0
- novel_downloader/core/fetchers/ttkan.py +95 -0
- novel_downloader/core/fetchers/{common/session.py → wanbengo.py} +21 -17
- novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
- novel_downloader/core/fetchers/xiguashuwu.py +177 -0
- novel_downloader/core/fetchers/xs63b.py +171 -0
- novel_downloader/core/fetchers/xshbook.py +85 -0
- novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +23 -11
- novel_downloader/core/fetchers/yibige.py +114 -0
- novel_downloader/core/interfaces/__init__.py +8 -14
- novel_downloader/core/interfaces/downloader.py +6 -2
- novel_downloader/core/interfaces/exporter.py +7 -7
- novel_downloader/core/interfaces/fetcher.py +4 -17
- novel_downloader/core/interfaces/parser.py +5 -6
- novel_downloader/core/interfaces/searcher.py +26 -0
- novel_downloader/core/parsers/__init__.py +58 -22
- novel_downloader/core/parsers/aaatxt.py +132 -0
- novel_downloader/core/parsers/b520.py +116 -0
- novel_downloader/core/parsers/base.py +63 -12
- novel_downloader/core/parsers/biquyuedu.py +133 -0
- novel_downloader/core/parsers/dxmwx.py +162 -0
- novel_downloader/core/parsers/eightnovel.py +224 -0
- novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +67 -67
- novel_downloader/core/parsers/guidaye.py +128 -0
- novel_downloader/core/parsers/hetushu.py +139 -0
- novel_downloader/core/parsers/i25zw.py +137 -0
- novel_downloader/core/parsers/ixdzs8.py +186 -0
- novel_downloader/core/parsers/jpxs123.py +137 -0
- novel_downloader/core/parsers/lewenn.py +142 -0
- novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +54 -65
- novel_downloader/core/parsers/piaotia.py +189 -0
- novel_downloader/core/parsers/qbtr.py +136 -0
- novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +54 -51
- novel_downloader/core/parsers/qidian/__init__.py +2 -2
- novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +290 -346
- novel_downloader/core/parsers/qidian/chapter_normal.py +25 -56
- novel_downloader/core/parsers/qidian/main_parser.py +19 -57
- novel_downloader/core/parsers/qidian/utils/__init__.py +12 -11
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +6 -7
- novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
- novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
- novel_downloader/core/parsers/quanben5.py +103 -0
- novel_downloader/core/parsers/registry.py +57 -0
- novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +46 -48
- novel_downloader/core/parsers/shencou.py +215 -0
- novel_downloader/core/parsers/shuhaige.py +111 -0
- novel_downloader/core/parsers/tongrenquan.py +116 -0
- novel_downloader/core/parsers/ttkan.py +132 -0
- novel_downloader/core/parsers/wanbengo.py +191 -0
- novel_downloader/core/parsers/xiaoshuowu.py +173 -0
- novel_downloader/core/parsers/xiguashuwu.py +435 -0
- novel_downloader/core/parsers/xs63b.py +161 -0
- novel_downloader/core/parsers/xshbook.py +134 -0
- novel_downloader/core/parsers/yamibo.py +155 -0
- novel_downloader/core/parsers/yibige.py +166 -0
- novel_downloader/core/searchers/__init__.py +51 -0
- novel_downloader/core/searchers/aaatxt.py +107 -0
- novel_downloader/core/searchers/b520.py +84 -0
- novel_downloader/core/searchers/base.py +168 -0
- novel_downloader/core/searchers/dxmwx.py +105 -0
- novel_downloader/core/searchers/eightnovel.py +84 -0
- novel_downloader/core/searchers/esjzone.py +102 -0
- novel_downloader/core/searchers/hetushu.py +92 -0
- novel_downloader/core/searchers/i25zw.py +93 -0
- novel_downloader/core/searchers/ixdzs8.py +107 -0
- novel_downloader/core/searchers/jpxs123.py +107 -0
- novel_downloader/core/searchers/piaotia.py +100 -0
- novel_downloader/core/searchers/qbtr.py +106 -0
- novel_downloader/core/searchers/qianbi.py +165 -0
- novel_downloader/core/searchers/quanben5.py +144 -0
- novel_downloader/core/searchers/registry.py +79 -0
- novel_downloader/core/searchers/shuhaige.py +124 -0
- novel_downloader/core/searchers/tongrenquan.py +110 -0
- novel_downloader/core/searchers/ttkan.py +92 -0
- novel_downloader/core/searchers/xiaoshuowu.py +122 -0
- novel_downloader/core/searchers/xiguashuwu.py +95 -0
- novel_downloader/core/searchers/xs63b.py +104 -0
- novel_downloader/locales/en.json +36 -79
- novel_downloader/locales/zh.json +37 -80
- novel_downloader/models/__init__.py +23 -50
- novel_downloader/models/book.py +44 -0
- novel_downloader/models/config.py +16 -43
- novel_downloader/models/login.py +1 -1
- novel_downloader/models/search.py +21 -0
- novel_downloader/resources/config/settings.toml +39 -74
- novel_downloader/resources/css_styles/intro.css +83 -0
- novel_downloader/resources/css_styles/main.css +30 -89
- novel_downloader/resources/json/xiguashuwu.json +718 -0
- novel_downloader/utils/__init__.py +43 -0
- novel_downloader/utils/chapter_storage.py +247 -226
- novel_downloader/utils/constants.py +5 -50
- novel_downloader/utils/cookies.py +6 -18
- novel_downloader/utils/crypto_utils/__init__.py +13 -0
- novel_downloader/utils/crypto_utils/aes_util.py +90 -0
- novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
- novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
- novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
- novel_downloader/utils/epub/__init__.py +34 -0
- novel_downloader/utils/epub/builder.py +377 -0
- novel_downloader/utils/epub/constants.py +118 -0
- novel_downloader/utils/epub/documents.py +297 -0
- novel_downloader/utils/epub/models.py +120 -0
- novel_downloader/utils/epub/utils.py +179 -0
- novel_downloader/utils/file_utils/__init__.py +5 -30
- novel_downloader/utils/file_utils/io.py +9 -150
- novel_downloader/utils/file_utils/normalize.py +2 -2
- novel_downloader/utils/file_utils/sanitize.py +2 -7
- novel_downloader/utils/fontocr.py +207 -0
- novel_downloader/utils/i18n.py +2 -0
- novel_downloader/utils/logger.py +10 -16
- novel_downloader/utils/network.py +111 -252
- novel_downloader/utils/state.py +5 -90
- novel_downloader/utils/text_utils/__init__.py +16 -21
- novel_downloader/utils/text_utils/diff_display.py +6 -9
- novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
- novel_downloader/utils/text_utils/text_cleaner.py +179 -0
- novel_downloader/utils/text_utils/truncate_utils.py +62 -0
- novel_downloader/utils/time_utils/__init__.py +6 -12
- novel_downloader/utils/time_utils/datetime_utils.py +23 -33
- novel_downloader/utils/time_utils/sleep_utils.py +5 -10
- novel_downloader/web/__init__.py +13 -0
- novel_downloader/web/components/__init__.py +11 -0
- novel_downloader/web/components/navigation.py +35 -0
- novel_downloader/web/main.py +66 -0
- novel_downloader/web/pages/__init__.py +17 -0
- novel_downloader/web/pages/download.py +78 -0
- novel_downloader/web/pages/progress.py +147 -0
- novel_downloader/web/pages/search.py +329 -0
- novel_downloader/web/services/__init__.py +17 -0
- novel_downloader/web/services/client_dialog.py +164 -0
- novel_downloader/web/services/cred_broker.py +113 -0
- novel_downloader/web/services/cred_models.py +35 -0
- novel_downloader/web/services/task_manager.py +264 -0
- novel_downloader-2.0.0.dist-info/METADATA +171 -0
- novel_downloader-2.0.0.dist-info/RECORD +210 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
- novel_downloader/config/site_rules.py +0 -94
- novel_downloader/core/downloaders/biquge.py +0 -25
- novel_downloader/core/downloaders/esjzone.py +0 -25
- novel_downloader/core/downloaders/linovelib.py +0 -25
- novel_downloader/core/downloaders/sfacg.py +0 -25
- novel_downloader/core/downloaders/yamibo.py +0 -25
- novel_downloader/core/exporters/biquge.py +0 -25
- novel_downloader/core/exporters/esjzone.py +0 -25
- novel_downloader/core/exporters/qianbi.py +0 -25
- novel_downloader/core/exporters/sfacg.py +0 -25
- novel_downloader/core/exporters/yamibo.py +0 -25
- novel_downloader/core/factory/__init__.py +0 -20
- novel_downloader/core/factory/downloader.py +0 -73
- novel_downloader/core/factory/exporter.py +0 -58
- novel_downloader/core/factory/fetcher.py +0 -96
- novel_downloader/core/factory/parser.py +0 -86
- novel_downloader/core/fetchers/base/__init__.py +0 -14
- novel_downloader/core/fetchers/base/browser.py +0 -403
- novel_downloader/core/fetchers/biquge/__init__.py +0 -14
- novel_downloader/core/fetchers/common/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/browser.py +0 -204
- novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
- novel_downloader/core/fetchers/linovelib/browser.py +0 -193
- novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/browser.py +0 -318
- novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
- novel_downloader/core/fetchers/sfacg/browser.py +0 -189
- novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
- novel_downloader/core/fetchers/yamibo/browser.py +0 -229
- novel_downloader/core/parsers/biquge/__init__.py +0 -10
- novel_downloader/core/parsers/biquge/main_parser.py +0 -134
- novel_downloader/core/parsers/common/__init__.py +0 -13
- novel_downloader/core/parsers/common/helper.py +0 -323
- novel_downloader/core/parsers/common/main_parser.py +0 -106
- novel_downloader/core/parsers/esjzone/__init__.py +0 -10
- novel_downloader/core/parsers/linovelib/__init__.py +0 -10
- novel_downloader/core/parsers/qianbi/__init__.py +0 -10
- novel_downloader/core/parsers/sfacg/__init__.py +0 -10
- novel_downloader/core/parsers/yamibo/__init__.py +0 -10
- novel_downloader/core/parsers/yamibo/main_parser.py +0 -194
- novel_downloader/models/browser.py +0 -21
- novel_downloader/models/chapter.py +0 -25
- novel_downloader/models/site_rules.py +0 -99
- novel_downloader/models/tasks.py +0 -33
- novel_downloader/models/types.py +0 -15
- novel_downloader/resources/css_styles/volume-intro.css +0 -56
- novel_downloader/resources/json/replace_word_map.json +0 -4
- novel_downloader/resources/text/blacklist.txt +0 -22
- novel_downloader/tui/__init__.py +0 -7
- novel_downloader/tui/app.py +0 -32
- novel_downloader/tui/main.py +0 -17
- novel_downloader/tui/screens/__init__.py +0 -14
- novel_downloader/tui/screens/home.py +0 -198
- novel_downloader/tui/screens/login.py +0 -74
- novel_downloader/tui/styles/home_layout.tcss +0 -79
- novel_downloader/tui/widgets/richlog_handler.py +0 -24
- novel_downloader/utils/cache.py +0 -24
- novel_downloader/utils/fontocr/__init__.py +0 -22
- novel_downloader/utils/fontocr/model_loader.py +0 -69
- novel_downloader/utils/fontocr/ocr_v1.py +0 -303
- novel_downloader/utils/fontocr/ocr_v2.py +0 -752
- novel_downloader/utils/hash_store.py +0 -279
- novel_downloader/utils/hash_utils.py +0 -103
- novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
- novel_downloader/utils/text_utils/font_mapping.py +0 -28
- novel_downloader/utils/text_utils/text_cleaning.py +0 -107
- novel_downloader-1.4.5.dist-info/METADATA +0 -196
- novel_downloader-1.4.5.dist-info/RECORD +0 -165
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -1,403 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.core.fetchers.base.browser
|
4
|
-
-------------------------------------------
|
5
|
-
|
6
|
-
"""
|
7
|
-
|
8
|
-
import abc
|
9
|
-
import logging
|
10
|
-
import types
|
11
|
-
from typing import Any, Literal, Self
|
12
|
-
|
13
|
-
from playwright.async_api import (
|
14
|
-
Browser,
|
15
|
-
BrowserContext,
|
16
|
-
BrowserType,
|
17
|
-
Page,
|
18
|
-
Playwright,
|
19
|
-
ViewportSize,
|
20
|
-
async_playwright,
|
21
|
-
)
|
22
|
-
|
23
|
-
from novel_downloader.core.interfaces import FetcherProtocol
|
24
|
-
from novel_downloader.models import FetcherConfig, LoginField, NewContextOptions
|
25
|
-
from novel_downloader.utils.constants import (
|
26
|
-
DATA_DIR,
|
27
|
-
DEFAULT_USER_AGENT,
|
28
|
-
)
|
29
|
-
|
30
|
-
from .rate_limiter import TokenBucketRateLimiter
|
31
|
-
|
32
|
-
_STEALTH_SCRIPT = """
|
33
|
-
Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
|
34
|
-
Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3] });
|
35
|
-
Object.defineProperty(navigator, 'languages', { get: () => ['zh-CN', 'zh', 'en'] });
|
36
|
-
window.chrome = { runtime: {} };
|
37
|
-
""".strip()
|
38
|
-
|
39
|
-
|
40
|
-
class BaseBrowser(FetcherProtocol, abc.ABC):
|
41
|
-
"""
|
42
|
-
BaseBrowser wraps basic browser operations using playwright
|
43
|
-
"""
|
44
|
-
|
45
|
-
def __init__(
|
46
|
-
self,
|
47
|
-
site: str,
|
48
|
-
config: FetcherConfig,
|
49
|
-
reuse_page: bool = False,
|
50
|
-
**kwargs: Any,
|
51
|
-
) -> None:
|
52
|
-
"""
|
53
|
-
Initialize the async browser with configuration.
|
54
|
-
|
55
|
-
:param config: Configuration object for session behavior
|
56
|
-
"""
|
57
|
-
self._site = site
|
58
|
-
self._config = config
|
59
|
-
|
60
|
-
self._state_file = DATA_DIR / site / "browser_state.cookies"
|
61
|
-
self._state_file.parent.mkdir(parents=True, exist_ok=True)
|
62
|
-
|
63
|
-
self._is_logged_in = False
|
64
|
-
self._reuse_page = reuse_page
|
65
|
-
self._pw: Playwright | None = None
|
66
|
-
self._browser: Browser | None = None
|
67
|
-
self._context: BrowserContext | None = None
|
68
|
-
self._page: Page | None = None
|
69
|
-
self._manual_page: Page | None = None
|
70
|
-
self._rate_limiter: TokenBucketRateLimiter | None = None
|
71
|
-
|
72
|
-
if config.max_rps is not None and config.max_rps > 0:
|
73
|
-
self._rate_limiter = TokenBucketRateLimiter(config.max_rps)
|
74
|
-
|
75
|
-
self.logger = logging.getLogger(f"{self.__class__.__name__}")
|
76
|
-
|
77
|
-
async def login(
|
78
|
-
self,
|
79
|
-
username: str = "",
|
80
|
-
password: str = "",
|
81
|
-
cookies: dict[str, str] | None = None,
|
82
|
-
attempt: int = 1,
|
83
|
-
**kwargs: Any,
|
84
|
-
) -> bool:
|
85
|
-
"""
|
86
|
-
Attempt to log in asynchronously.
|
87
|
-
|
88
|
-
:returns: True if login succeeded.
|
89
|
-
"""
|
90
|
-
return False
|
91
|
-
|
92
|
-
@abc.abstractmethod
|
93
|
-
async def get_book_info(
|
94
|
-
self,
|
95
|
-
book_id: str,
|
96
|
-
**kwargs: Any,
|
97
|
-
) -> list[str]:
|
98
|
-
"""
|
99
|
-
Fetch the raw HTML (or JSON) of the book info page asynchronously.
|
100
|
-
|
101
|
-
:param book_id: The book identifier.
|
102
|
-
:return: The page content as a string.
|
103
|
-
"""
|
104
|
-
...
|
105
|
-
|
106
|
-
@abc.abstractmethod
|
107
|
-
async def get_book_chapter(
|
108
|
-
self,
|
109
|
-
book_id: str,
|
110
|
-
chapter_id: str,
|
111
|
-
**kwargs: Any,
|
112
|
-
) -> list[str]:
|
113
|
-
"""
|
114
|
-
Fetch the raw HTML (or JSON) of a single chapter asynchronously.
|
115
|
-
|
116
|
-
:param book_id: The book identifier.
|
117
|
-
:param chapter_id: The chapter identifier.
|
118
|
-
:return: The chapter content as a string.
|
119
|
-
"""
|
120
|
-
...
|
121
|
-
|
122
|
-
async def get_bookcase(
|
123
|
-
self,
|
124
|
-
**kwargs: Any,
|
125
|
-
) -> list[str]:
|
126
|
-
"""
|
127
|
-
Optional: Retrieve the HTML content of the authenticated user's bookcase page.
|
128
|
-
Subclasses that support user login/bookcase should override this.
|
129
|
-
|
130
|
-
:return: The HTML of the bookcase page.
|
131
|
-
"""
|
132
|
-
raise NotImplementedError(
|
133
|
-
"Bookcase fetching is not supported by this session type. "
|
134
|
-
"Override get_bookcase() in your subclass to enable it."
|
135
|
-
)
|
136
|
-
|
137
|
-
async def init(
|
138
|
-
self,
|
139
|
-
headless: bool = True,
|
140
|
-
**kwargs: Any,
|
141
|
-
) -> None:
|
142
|
-
"""
|
143
|
-
Set up the playwright.
|
144
|
-
"""
|
145
|
-
if self._pw is None:
|
146
|
-
self._pw = await async_playwright().start()
|
147
|
-
|
148
|
-
if self._browser is None or not self._browser.is_connected():
|
149
|
-
browser_cls: BrowserType = getattr(self._pw, self.browser_type)
|
150
|
-
|
151
|
-
launch_args: dict[str, Any] = {
|
152
|
-
"headless": headless and self.headless,
|
153
|
-
}
|
154
|
-
if self._config.proxy:
|
155
|
-
launch_args["proxy"] = {"server": self._config.proxy}
|
156
|
-
|
157
|
-
self._browser = await browser_cls.launch(**launch_args)
|
158
|
-
|
159
|
-
if self._context is None:
|
160
|
-
context_args: NewContextOptions = {
|
161
|
-
"user_agent": self.user_agent,
|
162
|
-
"locale": "zh-CN",
|
163
|
-
"viewport": ViewportSize(width=1280, height=800),
|
164
|
-
"java_script_enabled": True,
|
165
|
-
"ignore_https_errors": not self._config.verify_ssl,
|
166
|
-
}
|
167
|
-
|
168
|
-
if self._config.headers:
|
169
|
-
context_args["extra_http_headers"] = self._config.headers
|
170
|
-
|
171
|
-
self._context = await self._browser.new_context(**context_args)
|
172
|
-
await self._context.add_init_script(_STEALTH_SCRIPT)
|
173
|
-
self._context.set_default_timeout(self.timeout * 1000)
|
174
|
-
|
175
|
-
async def close(self) -> None:
|
176
|
-
"""
|
177
|
-
Shutdown and clean up the broswer.
|
178
|
-
"""
|
179
|
-
if self._page:
|
180
|
-
await self._page.close()
|
181
|
-
self._page = None
|
182
|
-
if self._manual_page:
|
183
|
-
await self._manual_page.close()
|
184
|
-
self._manual_page = None
|
185
|
-
if self._context:
|
186
|
-
await self._context.close()
|
187
|
-
self._context = None
|
188
|
-
if self._browser:
|
189
|
-
await self._browser.close()
|
190
|
-
self._browser = None
|
191
|
-
if self._pw:
|
192
|
-
await self._pw.stop()
|
193
|
-
self._pw = None
|
194
|
-
|
195
|
-
async def fetch(
|
196
|
-
self,
|
197
|
-
url: str,
|
198
|
-
wait_until: Literal["commit", "domcontentloaded", "load", "networkidle"]
|
199
|
-
| None = "load",
|
200
|
-
referer: str | None = None,
|
201
|
-
**kwargs: Any,
|
202
|
-
) -> str:
|
203
|
-
if self._reuse_page:
|
204
|
-
return await self._fetch_with_reuse(url, wait_until, referer, **kwargs)
|
205
|
-
else:
|
206
|
-
return await self._fetch_with_new(url, wait_until, referer, **kwargs)
|
207
|
-
|
208
|
-
async def load_state(self) -> bool:
|
209
|
-
""" """
|
210
|
-
if not self._state_file.exists() or self._context is None:
|
211
|
-
return False
|
212
|
-
try:
|
213
|
-
if self._context is not None:
|
214
|
-
await self._context.close()
|
215
|
-
context_args: NewContextOptions = {
|
216
|
-
"user_agent": self.user_agent,
|
217
|
-
"locale": "zh-CN",
|
218
|
-
"viewport": ViewportSize(width=1280, height=800),
|
219
|
-
"java_script_enabled": True,
|
220
|
-
"ignore_https_errors": not self._config.verify_ssl,
|
221
|
-
"storage_state": self._state_file,
|
222
|
-
}
|
223
|
-
|
224
|
-
if self._config.headers:
|
225
|
-
context_args["extra_http_headers"] = self._config.headers
|
226
|
-
|
227
|
-
self._context = await self.browser.new_context(**context_args)
|
228
|
-
self._context.set_default_timeout(self.timeout * 1000)
|
229
|
-
await self._context.add_init_script(_STEALTH_SCRIPT)
|
230
|
-
self._is_logged_in = await self._check_login_status()
|
231
|
-
return self._is_logged_in
|
232
|
-
except Exception as e:
|
233
|
-
self.logger.warning("Failed to load state: %s", e)
|
234
|
-
return False
|
235
|
-
|
236
|
-
async def save_state(self) -> bool:
|
237
|
-
""" """
|
238
|
-
if self._context is None:
|
239
|
-
return False
|
240
|
-
try:
|
241
|
-
await self._context.storage_state(path=self._state_file)
|
242
|
-
return True
|
243
|
-
except Exception as e:
|
244
|
-
self.logger.warning("Failed to save state: %s", e)
|
245
|
-
return False
|
246
|
-
|
247
|
-
async def set_interactive_mode(self, enable: bool) -> bool:
|
248
|
-
"""
|
249
|
-
Enable or disable interactive mode for manual login.
|
250
|
-
|
251
|
-
:param enable: True to enable, False to disable interactive mode.
|
252
|
-
:return: True if operation or login check succeeded, False otherwise.
|
253
|
-
"""
|
254
|
-
return False
|
255
|
-
|
256
|
-
async def _check_login_status(self) -> bool:
|
257
|
-
"""
|
258
|
-
Check whether the user is currently logged in
|
259
|
-
|
260
|
-
:return: True if the user is logged in, False otherwise.
|
261
|
-
"""
|
262
|
-
return False
|
263
|
-
|
264
|
-
async def _restart_browser(
|
265
|
-
self,
|
266
|
-
headless: bool = True,
|
267
|
-
) -> None:
|
268
|
-
"""
|
269
|
-
Shutdown the current browser and restart it with the given headless setting.
|
270
|
-
|
271
|
-
:param headless: Whether to run the browser in headless mode.
|
272
|
-
"""
|
273
|
-
await self.close()
|
274
|
-
|
275
|
-
# Apply new headless setting and reinitialize
|
276
|
-
await self.init(headless=headless)
|
277
|
-
self.logger.debug("[browser] Browser restarted (headless=%s).", headless)
|
278
|
-
|
279
|
-
async def _fetch_with_new(
|
280
|
-
self,
|
281
|
-
url: str,
|
282
|
-
wait_until: Literal["commit", "domcontentloaded", "load", "networkidle"]
|
283
|
-
| None = "load",
|
284
|
-
referer: str | None = None,
|
285
|
-
**kwargs: Any,
|
286
|
-
) -> str:
|
287
|
-
page = await self.context.new_page()
|
288
|
-
try:
|
289
|
-
await page.goto(url, wait_until=wait_until, referer=referer, **kwargs)
|
290
|
-
html: str = await page.content()
|
291
|
-
return html
|
292
|
-
finally:
|
293
|
-
await page.close()
|
294
|
-
|
295
|
-
async def _fetch_with_reuse(
|
296
|
-
self,
|
297
|
-
url: str,
|
298
|
-
wait_until: Literal["commit", "domcontentloaded", "load", "networkidle"]
|
299
|
-
| None = "load",
|
300
|
-
referer: str | None = None,
|
301
|
-
**kwargs: Any,
|
302
|
-
) -> str:
|
303
|
-
if not self._page:
|
304
|
-
self._page = await self.context.new_page()
|
305
|
-
await self._page.goto(url, wait_until=wait_until, referer=referer, **kwargs)
|
306
|
-
html: str = await self._page.content()
|
307
|
-
return html
|
308
|
-
|
309
|
-
@property
|
310
|
-
def hostname(self) -> str:
|
311
|
-
return ""
|
312
|
-
|
313
|
-
@property
|
314
|
-
def site(self) -> str:
|
315
|
-
return self._site
|
316
|
-
|
317
|
-
@property
|
318
|
-
def requester_type(self) -> str:
|
319
|
-
return "browser"
|
320
|
-
|
321
|
-
@property
|
322
|
-
def is_logged_in(self) -> bool:
|
323
|
-
"""
|
324
|
-
Indicates whether the requester is currently authenticated.
|
325
|
-
"""
|
326
|
-
return self._is_logged_in
|
327
|
-
|
328
|
-
@property
|
329
|
-
def login_fields(self) -> list[LoginField]:
|
330
|
-
return []
|
331
|
-
|
332
|
-
@property
|
333
|
-
def browser(self) -> Browser:
|
334
|
-
"""
|
335
|
-
Return the active playwright.Browser.
|
336
|
-
|
337
|
-
:raises RuntimeError: If the browser is uninitialized.
|
338
|
-
"""
|
339
|
-
if self._browser is None:
|
340
|
-
raise RuntimeError("Browser is not initialized or has been shut down.")
|
341
|
-
return self._browser
|
342
|
-
|
343
|
-
@property
|
344
|
-
def context(self) -> BrowserContext:
|
345
|
-
"""
|
346
|
-
Return the active playwright.BrowserContext.
|
347
|
-
|
348
|
-
:raises RuntimeError: If the context is uninitialized.
|
349
|
-
"""
|
350
|
-
if self._context is None:
|
351
|
-
raise RuntimeError(
|
352
|
-
"BrowserContext is not initialized or has been shut down."
|
353
|
-
)
|
354
|
-
return self._context
|
355
|
-
|
356
|
-
@property
|
357
|
-
def headless(self) -> bool:
|
358
|
-
return self._config.headless
|
359
|
-
|
360
|
-
@property
|
361
|
-
def user_agent(self) -> str:
|
362
|
-
ua = self._config.user_agent or ""
|
363
|
-
return ua.strip() or DEFAULT_USER_AGENT
|
364
|
-
|
365
|
-
@property
|
366
|
-
def browser_type(self) -> str:
|
367
|
-
return self._config.browser_type
|
368
|
-
|
369
|
-
@property
|
370
|
-
def disable_images(self) -> bool:
|
371
|
-
return self._config.disable_images
|
372
|
-
|
373
|
-
@property
|
374
|
-
def retry_times(self) -> int:
|
375
|
-
return self._config.retry_times
|
376
|
-
|
377
|
-
@property
|
378
|
-
def request_interval(self) -> float:
|
379
|
-
return self._config.request_interval
|
380
|
-
|
381
|
-
@property
|
382
|
-
def backoff_factor(self) -> float:
|
383
|
-
return self._config.backoff_factor
|
384
|
-
|
385
|
-
@property
|
386
|
-
def timeout(self) -> float:
|
387
|
-
return self._config.timeout
|
388
|
-
|
389
|
-
@property
|
390
|
-
def max_connections(self) -> int:
|
391
|
-
return self._config.max_connections
|
392
|
-
|
393
|
-
async def __aenter__(self) -> Self:
|
394
|
-
await self.init()
|
395
|
-
return self
|
396
|
-
|
397
|
-
async def __aexit__(
|
398
|
-
self,
|
399
|
-
exc_type: type[BaseException] | None,
|
400
|
-
exc_val: BaseException | None,
|
401
|
-
tb: types.TracebackType | None,
|
402
|
-
) -> None:
|
403
|
-
await self.close()
|
@@ -1,14 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.core.fetchers.biquge
|
4
|
-
-------------------------------------
|
5
|
-
|
6
|
-
"""
|
7
|
-
|
8
|
-
from .browser import BiqugeBrowser
|
9
|
-
from .session import BiqugeSession
|
10
|
-
|
11
|
-
__all__ = [
|
12
|
-
"BiqugeBrowser",
|
13
|
-
"BiqugeSession",
|
14
|
-
]
|
@@ -1,14 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.core.fetchers.common
|
4
|
-
-------------------------------------
|
5
|
-
|
6
|
-
"""
|
7
|
-
|
8
|
-
from .browser import CommonBrowser
|
9
|
-
from .session import CommonSession
|
10
|
-
|
11
|
-
__all__ = [
|
12
|
-
"CommonBrowser",
|
13
|
-
"CommonSession",
|
14
|
-
]
|
@@ -1,14 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.core.fetchers.esjzone
|
4
|
-
--------------------------------------
|
5
|
-
|
6
|
-
"""
|
7
|
-
|
8
|
-
from .browser import EsjzoneBrowser
|
9
|
-
from .session import EsjzoneSession
|
10
|
-
|
11
|
-
__all__ = [
|
12
|
-
"EsjzoneBrowser",
|
13
|
-
"EsjzoneSession",
|
14
|
-
]
|
@@ -1,204 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.core.fetchers.esjzone.browser
|
4
|
-
----------------------------------------------
|
5
|
-
|
6
|
-
"""
|
7
|
-
|
8
|
-
from typing import Any
|
9
|
-
|
10
|
-
from novel_downloader.core.fetchers.base import BaseBrowser
|
11
|
-
from novel_downloader.models import FetcherConfig, LoginField
|
12
|
-
|
13
|
-
|
14
|
-
class EsjzoneBrowser(BaseBrowser):
|
15
|
-
"""
|
16
|
-
A browser class for interacting with the Esjzone (www.esjzone.cc) novel website.
|
17
|
-
"""
|
18
|
-
|
19
|
-
BOOKCASE_URL = "https://www.esjzone.cc/my/favorite"
|
20
|
-
BOOK_INFO_URL = "https://www.esjzone.cc/detail/{book_id}.html"
|
21
|
-
CHAPTER_URL = "https://www.esjzone.cc/forum/{book_id}/{chapter_id}.html"
|
22
|
-
|
23
|
-
API_LOGIN_URL_1 = "https://www.esjzone.cc/my/login"
|
24
|
-
API_LOGIN_URL_2 = "https://www.esjzone.cc/inc/mem_login.php"
|
25
|
-
|
26
|
-
def __init__(
|
27
|
-
self,
|
28
|
-
config: FetcherConfig,
|
29
|
-
reuse_page: bool = False,
|
30
|
-
**kwargs: Any,
|
31
|
-
) -> None:
|
32
|
-
super().__init__("esjzone", config, reuse_page, **kwargs)
|
33
|
-
|
34
|
-
async def login(
|
35
|
-
self,
|
36
|
-
username: str = "",
|
37
|
-
password: str = "",
|
38
|
-
cookies: dict[str, str] | None = None,
|
39
|
-
attempt: int = 1,
|
40
|
-
**kwargs: Any,
|
41
|
-
) -> bool:
|
42
|
-
self._is_logged_in = await self._check_login_status()
|
43
|
-
if self._is_logged_in:
|
44
|
-
return True
|
45
|
-
|
46
|
-
if not (username and password):
|
47
|
-
self.logger.warning("[auth] No credentials provided.")
|
48
|
-
return False
|
49
|
-
|
50
|
-
login_page = await self.context.new_page()
|
51
|
-
|
52
|
-
try:
|
53
|
-
await login_page.goto(self.API_LOGIN_URL_1, wait_until="networkidle")
|
54
|
-
|
55
|
-
await login_page.fill('input[name="email"]', username)
|
56
|
-
await login_page.fill('input[name="pwd"]', password)
|
57
|
-
|
58
|
-
await login_page.click('a.btn-send[data-send="mem_login"]')
|
59
|
-
|
60
|
-
await login_page.wait_for_load_state("networkidle")
|
61
|
-
finally:
|
62
|
-
await login_page.close()
|
63
|
-
|
64
|
-
self._is_logged_in = await self._check_login_status()
|
65
|
-
|
66
|
-
return self._is_logged_in
|
67
|
-
|
68
|
-
async def get_book_info(
|
69
|
-
self,
|
70
|
-
book_id: str,
|
71
|
-
**kwargs: Any,
|
72
|
-
) -> list[str]:
|
73
|
-
"""
|
74
|
-
Fetch the raw HTML of the book info page asynchronously.
|
75
|
-
|
76
|
-
:param book_id: The book identifier.
|
77
|
-
:return: The page content as a string.
|
78
|
-
"""
|
79
|
-
url = self.book_info_url(book_id=book_id)
|
80
|
-
return [await self.fetch(url, **kwargs)]
|
81
|
-
|
82
|
-
async def get_book_chapter(
|
83
|
-
self,
|
84
|
-
book_id: str,
|
85
|
-
chapter_id: str,
|
86
|
-
**kwargs: Any,
|
87
|
-
) -> list[str]:
|
88
|
-
"""
|
89
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
90
|
-
|
91
|
-
:param book_id: The book identifier.
|
92
|
-
:param chapter_id: The chapter identifier.
|
93
|
-
:return: The chapter content as a string.
|
94
|
-
"""
|
95
|
-
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
96
|
-
return [await self.fetch(url, **kwargs)]
|
97
|
-
|
98
|
-
async def get_bookcase(
|
99
|
-
self,
|
100
|
-
**kwargs: Any,
|
101
|
-
) -> list[str]:
|
102
|
-
"""
|
103
|
-
Retrieve the user's *bookcase* page.
|
104
|
-
|
105
|
-
:return: The HTML markup of the bookcase page.
|
106
|
-
"""
|
107
|
-
url = self.bookcase_url()
|
108
|
-
return [await self.fetch(url, **kwargs)]
|
109
|
-
|
110
|
-
async def set_interactive_mode(self, enable: bool) -> bool:
|
111
|
-
"""
|
112
|
-
Enable or disable interactive mode for manual login.
|
113
|
-
|
114
|
-
:param enable: True to enable, False to disable interactive mode.
|
115
|
-
:return: True if operation or login check succeeded, False otherwise.
|
116
|
-
"""
|
117
|
-
if enable:
|
118
|
-
if self.headless:
|
119
|
-
await self._restart_browser(headless=False)
|
120
|
-
if self._manual_page is None:
|
121
|
-
self._manual_page = await self.context.new_page()
|
122
|
-
await self._manual_page.goto(self.API_LOGIN_URL_1)
|
123
|
-
return True
|
124
|
-
|
125
|
-
# restore
|
126
|
-
if self._manual_page:
|
127
|
-
await self._manual_page.close()
|
128
|
-
self._manual_page = None
|
129
|
-
if self.headless:
|
130
|
-
await self._restart_browser(headless=True)
|
131
|
-
self._is_logged_in = await self._check_login_status()
|
132
|
-
return self.is_logged_in
|
133
|
-
|
134
|
-
@property
|
135
|
-
def login_fields(self) -> list[LoginField]:
|
136
|
-
return [
|
137
|
-
LoginField(
|
138
|
-
name="username",
|
139
|
-
label="用户名",
|
140
|
-
type="text",
|
141
|
-
required=True,
|
142
|
-
placeholder="请输入你的用户名",
|
143
|
-
description="用于登录 esjzone.cc 的用户名",
|
144
|
-
),
|
145
|
-
LoginField(
|
146
|
-
name="password",
|
147
|
-
label="密码",
|
148
|
-
type="password",
|
149
|
-
required=True,
|
150
|
-
placeholder="请输入你的密码",
|
151
|
-
description="用于登录 esjzone.cc 的密码",
|
152
|
-
),
|
153
|
-
]
|
154
|
-
|
155
|
-
@classmethod
|
156
|
-
def bookcase_url(cls) -> str:
|
157
|
-
"""
|
158
|
-
Construct the URL for the user's bookcase page.
|
159
|
-
|
160
|
-
:return: Fully qualified URL of the bookcase.
|
161
|
-
"""
|
162
|
-
return cls.BOOKCASE_URL
|
163
|
-
|
164
|
-
@classmethod
|
165
|
-
def book_info_url(cls, book_id: str) -> str:
|
166
|
-
"""
|
167
|
-
Construct the URL for fetching a book's info page.
|
168
|
-
|
169
|
-
:param book_id: The identifier of the book.
|
170
|
-
:return: Fully qualified URL for the book info page.
|
171
|
-
"""
|
172
|
-
return cls.BOOK_INFO_URL.format(book_id=book_id)
|
173
|
-
|
174
|
-
@classmethod
|
175
|
-
def chapter_url(cls, book_id: str, chapter_id: str) -> str:
|
176
|
-
"""
|
177
|
-
Construct the URL for fetching a specific chapter.
|
178
|
-
|
179
|
-
:param book_id: The identifier of the book.
|
180
|
-
:param chapter_id: The identifier of the chapter.
|
181
|
-
:return: Fully qualified chapter URL.
|
182
|
-
"""
|
183
|
-
return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
|
184
|
-
|
185
|
-
async def _check_login_status(self) -> bool:
|
186
|
-
"""
|
187
|
-
Check whether the user is currently logged in by
|
188
|
-
inspecting the bookcase page content.
|
189
|
-
|
190
|
-
:return: True if the user is logged in, False otherwise.
|
191
|
-
"""
|
192
|
-
keywords = [
|
193
|
-
"window.location.href='/my/login'",
|
194
|
-
"會員登入",
|
195
|
-
"會員註冊 SIGN UP",
|
196
|
-
]
|
197
|
-
resp_text = await self.get_bookcase()
|
198
|
-
if not resp_text:
|
199
|
-
return False
|
200
|
-
return not any(kw in resp_text[0] for kw in keywords)
|
201
|
-
|
202
|
-
@property
|
203
|
-
def hostname(self) -> str:
|
204
|
-
return "www.esjzone.cc"
|
@@ -1,14 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.core.fetchers.linovelib
|
4
|
-
----------------------------------------
|
5
|
-
|
6
|
-
"""
|
7
|
-
|
8
|
-
from .browser import LinovelibBrowser
|
9
|
-
from .session import LinovelibSession
|
10
|
-
|
11
|
-
__all__ = [
|
12
|
-
"LinovelibBrowser",
|
13
|
-
"LinovelibSession",
|
14
|
-
]
|