novel-downloader 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/__init__.py +2 -4
- novel_downloader/cli/clean.py +21 -88
- novel_downloader/cli/config.py +27 -104
- novel_downloader/cli/download.py +78 -66
- novel_downloader/cli/export.py +20 -21
- novel_downloader/cli/main.py +3 -1
- novel_downloader/cli/search.py +120 -0
- novel_downloader/cli/ui.py +156 -0
- novel_downloader/config/__init__.py +10 -14
- novel_downloader/config/adapter.py +195 -99
- novel_downloader/config/{loader.py → file_io.py} +53 -27
- novel_downloader/core/__init__.py +14 -13
- novel_downloader/core/archived/deqixs/fetcher.py +115 -0
- novel_downloader/core/archived/deqixs/parser.py +132 -0
- novel_downloader/core/archived/deqixs/searcher.py +89 -0
- novel_downloader/core/archived/qidian/searcher.py +79 -0
- novel_downloader/core/archived/wanbengo/searcher.py +98 -0
- novel_downloader/core/archived/xshbook/searcher.py +93 -0
- novel_downloader/core/downloaders/__init__.py +8 -30
- novel_downloader/core/downloaders/base.py +182 -30
- novel_downloader/core/downloaders/common.py +217 -384
- novel_downloader/core/downloaders/qianbi.py +332 -4
- novel_downloader/core/downloaders/qidian.py +250 -290
- novel_downloader/core/downloaders/registry.py +69 -0
- novel_downloader/core/downloaders/signals.py +46 -0
- novel_downloader/core/exporters/__init__.py +8 -26
- novel_downloader/core/exporters/base.py +107 -31
- novel_downloader/core/exporters/common/__init__.py +3 -4
- novel_downloader/core/exporters/common/epub.py +92 -171
- novel_downloader/core/exporters/common/main_exporter.py +14 -67
- novel_downloader/core/exporters/common/txt.py +90 -86
- novel_downloader/core/exporters/epub_util.py +184 -1327
- novel_downloader/core/exporters/linovelib/__init__.py +3 -2
- novel_downloader/core/exporters/linovelib/epub.py +165 -222
- novel_downloader/core/exporters/linovelib/main_exporter.py +10 -71
- novel_downloader/core/exporters/linovelib/txt.py +76 -66
- novel_downloader/core/exporters/qidian.py +15 -11
- novel_downloader/core/exporters/registry.py +55 -0
- novel_downloader/core/exporters/txt_util.py +67 -0
- novel_downloader/core/fetchers/__init__.py +57 -56
- novel_downloader/core/fetchers/aaatxt.py +83 -0
- novel_downloader/core/fetchers/{biquge/session.py → b520.py} +10 -10
- novel_downloader/core/fetchers/{base/session.py → base.py} +63 -47
- novel_downloader/core/fetchers/biquyuedu.py +83 -0
- novel_downloader/core/fetchers/dxmwx.py +110 -0
- novel_downloader/core/fetchers/eightnovel.py +139 -0
- novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +23 -11
- novel_downloader/core/fetchers/guidaye.py +85 -0
- novel_downloader/core/fetchers/hetushu.py +92 -0
- novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +22 -26
- novel_downloader/core/fetchers/ixdzs8.py +113 -0
- novel_downloader/core/fetchers/jpxs123.py +101 -0
- novel_downloader/core/fetchers/{biquge/browser.py → lewenn.py} +15 -15
- novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +16 -12
- novel_downloader/core/fetchers/piaotia.py +105 -0
- novel_downloader/core/fetchers/qbtr.py +101 -0
- novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +9 -9
- novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +55 -40
- novel_downloader/core/fetchers/quanben5.py +92 -0
- novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
- novel_downloader/core/fetchers/registry.py +60 -0
- novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +11 -9
- novel_downloader/core/fetchers/shencou.py +106 -0
- novel_downloader/core/fetchers/{common/browser.py → shuhaige.py} +24 -19
- novel_downloader/core/fetchers/tongrenquan.py +84 -0
- novel_downloader/core/fetchers/ttkan.py +95 -0
- novel_downloader/core/fetchers/{common/session.py → wanbengo.py} +21 -17
- novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
- novel_downloader/core/fetchers/xiguashuwu.py +177 -0
- novel_downloader/core/fetchers/xs63b.py +171 -0
- novel_downloader/core/fetchers/xshbook.py +85 -0
- novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +23 -11
- novel_downloader/core/fetchers/yibige.py +114 -0
- novel_downloader/core/interfaces/__init__.py +8 -14
- novel_downloader/core/interfaces/downloader.py +6 -2
- novel_downloader/core/interfaces/exporter.py +7 -7
- novel_downloader/core/interfaces/fetcher.py +4 -17
- novel_downloader/core/interfaces/parser.py +5 -6
- novel_downloader/core/interfaces/searcher.py +26 -0
- novel_downloader/core/parsers/__init__.py +58 -22
- novel_downloader/core/parsers/aaatxt.py +132 -0
- novel_downloader/core/parsers/b520.py +116 -0
- novel_downloader/core/parsers/base.py +63 -12
- novel_downloader/core/parsers/biquyuedu.py +133 -0
- novel_downloader/core/parsers/dxmwx.py +162 -0
- novel_downloader/core/parsers/eightnovel.py +224 -0
- novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +67 -67
- novel_downloader/core/parsers/guidaye.py +128 -0
- novel_downloader/core/parsers/hetushu.py +139 -0
- novel_downloader/core/parsers/i25zw.py +137 -0
- novel_downloader/core/parsers/ixdzs8.py +186 -0
- novel_downloader/core/parsers/jpxs123.py +137 -0
- novel_downloader/core/parsers/lewenn.py +142 -0
- novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +54 -65
- novel_downloader/core/parsers/piaotia.py +189 -0
- novel_downloader/core/parsers/qbtr.py +136 -0
- novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +54 -51
- novel_downloader/core/parsers/qidian/__init__.py +2 -2
- novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +290 -346
- novel_downloader/core/parsers/qidian/chapter_normal.py +25 -56
- novel_downloader/core/parsers/qidian/main_parser.py +19 -57
- novel_downloader/core/parsers/qidian/utils/__init__.py +12 -11
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +6 -7
- novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
- novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
- novel_downloader/core/parsers/quanben5.py +103 -0
- novel_downloader/core/parsers/registry.py +57 -0
- novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +46 -48
- novel_downloader/core/parsers/shencou.py +215 -0
- novel_downloader/core/parsers/shuhaige.py +111 -0
- novel_downloader/core/parsers/tongrenquan.py +116 -0
- novel_downloader/core/parsers/ttkan.py +132 -0
- novel_downloader/core/parsers/wanbengo.py +191 -0
- novel_downloader/core/parsers/xiaoshuowu.py +173 -0
- novel_downloader/core/parsers/xiguashuwu.py +435 -0
- novel_downloader/core/parsers/xs63b.py +161 -0
- novel_downloader/core/parsers/xshbook.py +134 -0
- novel_downloader/core/parsers/yamibo.py +155 -0
- novel_downloader/core/parsers/yibige.py +166 -0
- novel_downloader/core/searchers/__init__.py +51 -0
- novel_downloader/core/searchers/aaatxt.py +107 -0
- novel_downloader/core/searchers/b520.py +84 -0
- novel_downloader/core/searchers/base.py +168 -0
- novel_downloader/core/searchers/dxmwx.py +105 -0
- novel_downloader/core/searchers/eightnovel.py +84 -0
- novel_downloader/core/searchers/esjzone.py +102 -0
- novel_downloader/core/searchers/hetushu.py +92 -0
- novel_downloader/core/searchers/i25zw.py +93 -0
- novel_downloader/core/searchers/ixdzs8.py +107 -0
- novel_downloader/core/searchers/jpxs123.py +107 -0
- novel_downloader/core/searchers/piaotia.py +100 -0
- novel_downloader/core/searchers/qbtr.py +106 -0
- novel_downloader/core/searchers/qianbi.py +165 -0
- novel_downloader/core/searchers/quanben5.py +144 -0
- novel_downloader/core/searchers/registry.py +79 -0
- novel_downloader/core/searchers/shuhaige.py +124 -0
- novel_downloader/core/searchers/tongrenquan.py +110 -0
- novel_downloader/core/searchers/ttkan.py +92 -0
- novel_downloader/core/searchers/xiaoshuowu.py +122 -0
- novel_downloader/core/searchers/xiguashuwu.py +95 -0
- novel_downloader/core/searchers/xs63b.py +104 -0
- novel_downloader/locales/en.json +36 -79
- novel_downloader/locales/zh.json +37 -80
- novel_downloader/models/__init__.py +23 -50
- novel_downloader/models/book.py +44 -0
- novel_downloader/models/config.py +16 -43
- novel_downloader/models/login.py +1 -1
- novel_downloader/models/search.py +21 -0
- novel_downloader/resources/config/settings.toml +39 -74
- novel_downloader/resources/css_styles/intro.css +83 -0
- novel_downloader/resources/css_styles/main.css +30 -89
- novel_downloader/resources/json/xiguashuwu.json +718 -0
- novel_downloader/utils/__init__.py +43 -0
- novel_downloader/utils/chapter_storage.py +247 -226
- novel_downloader/utils/constants.py +5 -50
- novel_downloader/utils/cookies.py +6 -18
- novel_downloader/utils/crypto_utils/__init__.py +13 -0
- novel_downloader/utils/crypto_utils/aes_util.py +90 -0
- novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
- novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
- novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
- novel_downloader/utils/epub/__init__.py +34 -0
- novel_downloader/utils/epub/builder.py +377 -0
- novel_downloader/utils/epub/constants.py +118 -0
- novel_downloader/utils/epub/documents.py +297 -0
- novel_downloader/utils/epub/models.py +120 -0
- novel_downloader/utils/epub/utils.py +179 -0
- novel_downloader/utils/file_utils/__init__.py +5 -30
- novel_downloader/utils/file_utils/io.py +9 -150
- novel_downloader/utils/file_utils/normalize.py +2 -2
- novel_downloader/utils/file_utils/sanitize.py +2 -7
- novel_downloader/utils/fontocr.py +207 -0
- novel_downloader/utils/i18n.py +2 -0
- novel_downloader/utils/logger.py +10 -16
- novel_downloader/utils/network.py +111 -252
- novel_downloader/utils/state.py +5 -90
- novel_downloader/utils/text_utils/__init__.py +16 -21
- novel_downloader/utils/text_utils/diff_display.py +6 -9
- novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
- novel_downloader/utils/text_utils/text_cleaner.py +179 -0
- novel_downloader/utils/text_utils/truncate_utils.py +62 -0
- novel_downloader/utils/time_utils/__init__.py +6 -12
- novel_downloader/utils/time_utils/datetime_utils.py +23 -33
- novel_downloader/utils/time_utils/sleep_utils.py +5 -10
- novel_downloader/web/__init__.py +13 -0
- novel_downloader/web/components/__init__.py +11 -0
- novel_downloader/web/components/navigation.py +35 -0
- novel_downloader/web/main.py +66 -0
- novel_downloader/web/pages/__init__.py +17 -0
- novel_downloader/web/pages/download.py +78 -0
- novel_downloader/web/pages/progress.py +147 -0
- novel_downloader/web/pages/search.py +329 -0
- novel_downloader/web/services/__init__.py +17 -0
- novel_downloader/web/services/client_dialog.py +164 -0
- novel_downloader/web/services/cred_broker.py +113 -0
- novel_downloader/web/services/cred_models.py +35 -0
- novel_downloader/web/services/task_manager.py +264 -0
- novel_downloader-2.0.0.dist-info/METADATA +171 -0
- novel_downloader-2.0.0.dist-info/RECORD +210 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
- novel_downloader/config/site_rules.py +0 -94
- novel_downloader/core/downloaders/biquge.py +0 -25
- novel_downloader/core/downloaders/esjzone.py +0 -25
- novel_downloader/core/downloaders/linovelib.py +0 -25
- novel_downloader/core/downloaders/sfacg.py +0 -25
- novel_downloader/core/downloaders/yamibo.py +0 -25
- novel_downloader/core/exporters/biquge.py +0 -25
- novel_downloader/core/exporters/esjzone.py +0 -25
- novel_downloader/core/exporters/qianbi.py +0 -25
- novel_downloader/core/exporters/sfacg.py +0 -25
- novel_downloader/core/exporters/yamibo.py +0 -25
- novel_downloader/core/factory/__init__.py +0 -20
- novel_downloader/core/factory/downloader.py +0 -73
- novel_downloader/core/factory/exporter.py +0 -58
- novel_downloader/core/factory/fetcher.py +0 -96
- novel_downloader/core/factory/parser.py +0 -86
- novel_downloader/core/fetchers/base/__init__.py +0 -14
- novel_downloader/core/fetchers/base/browser.py +0 -403
- novel_downloader/core/fetchers/biquge/__init__.py +0 -14
- novel_downloader/core/fetchers/common/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/browser.py +0 -204
- novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
- novel_downloader/core/fetchers/linovelib/browser.py +0 -193
- novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/browser.py +0 -318
- novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
- novel_downloader/core/fetchers/sfacg/browser.py +0 -189
- novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
- novel_downloader/core/fetchers/yamibo/browser.py +0 -229
- novel_downloader/core/parsers/biquge/__init__.py +0 -10
- novel_downloader/core/parsers/biquge/main_parser.py +0 -134
- novel_downloader/core/parsers/common/__init__.py +0 -13
- novel_downloader/core/parsers/common/helper.py +0 -323
- novel_downloader/core/parsers/common/main_parser.py +0 -106
- novel_downloader/core/parsers/esjzone/__init__.py +0 -10
- novel_downloader/core/parsers/linovelib/__init__.py +0 -10
- novel_downloader/core/parsers/qianbi/__init__.py +0 -10
- novel_downloader/core/parsers/sfacg/__init__.py +0 -10
- novel_downloader/core/parsers/yamibo/__init__.py +0 -10
- novel_downloader/core/parsers/yamibo/main_parser.py +0 -194
- novel_downloader/models/browser.py +0 -21
- novel_downloader/models/chapter.py +0 -25
- novel_downloader/models/site_rules.py +0 -99
- novel_downloader/models/tasks.py +0 -33
- novel_downloader/models/types.py +0 -15
- novel_downloader/resources/css_styles/volume-intro.css +0 -56
- novel_downloader/resources/json/replace_word_map.json +0 -4
- novel_downloader/resources/text/blacklist.txt +0 -22
- novel_downloader/tui/__init__.py +0 -7
- novel_downloader/tui/app.py +0 -32
- novel_downloader/tui/main.py +0 -17
- novel_downloader/tui/screens/__init__.py +0 -14
- novel_downloader/tui/screens/home.py +0 -198
- novel_downloader/tui/screens/login.py +0 -74
- novel_downloader/tui/styles/home_layout.tcss +0 -79
- novel_downloader/tui/widgets/richlog_handler.py +0 -24
- novel_downloader/utils/cache.py +0 -24
- novel_downloader/utils/fontocr/__init__.py +0 -22
- novel_downloader/utils/fontocr/model_loader.py +0 -69
- novel_downloader/utils/fontocr/ocr_v1.py +0 -303
- novel_downloader/utils/fontocr/ocr_v2.py +0 -752
- novel_downloader/utils/hash_store.py +0 -279
- novel_downloader/utils/hash_utils.py +0 -103
- novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
- novel_downloader/utils/text_utils/font_mapping.py +0 -28
- novel_downloader/utils/text_utils/text_cleaning.py +0 -107
- novel_downloader-1.4.5.dist-info/METADATA +0 -196
- novel_downloader-1.4.5.dist-info/RECORD +0 -165
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,168 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.searchers.base
|
4
|
+
------------------------------------
|
5
|
+
|
6
|
+
Abstract base class providing common utilities for site-specific searchers.
|
7
|
+
"""
|
8
|
+
|
9
|
+
import abc
|
10
|
+
from typing import Any, ClassVar
|
11
|
+
from urllib.parse import quote_plus, urljoin
|
12
|
+
|
13
|
+
import aiohttp
|
14
|
+
|
15
|
+
from novel_downloader.core.interfaces import SearcherProtocol
|
16
|
+
from novel_downloader.models import SearchResult
|
17
|
+
from novel_downloader.utils.constants import DEFAULT_USER_HEADERS
|
18
|
+
|
19
|
+
|
20
|
+
class BaseSearcher(abc.ABC, SearcherProtocol):
|
21
|
+
site_name: str
|
22
|
+
BASE_URL: str = ""
|
23
|
+
_session: ClassVar[aiohttp.ClientSession | None] = None
|
24
|
+
|
25
|
+
@classmethod
|
26
|
+
def configure(cls, session: aiohttp.ClientSession) -> None:
|
27
|
+
cls._session = session
|
28
|
+
|
29
|
+
@classmethod
|
30
|
+
async def search(cls, keyword: str, limit: int | None = None) -> list[SearchResult]:
|
31
|
+
html = await cls._fetch_html(keyword)
|
32
|
+
return cls._parse_html(html, limit)
|
33
|
+
|
34
|
+
@classmethod
|
35
|
+
@abc.abstractmethod
|
36
|
+
async def _fetch_html(cls, keyword: str) -> str:
|
37
|
+
"""
|
38
|
+
Fetch raw HTML from search API or page
|
39
|
+
|
40
|
+
:param keyword: The search term to query.
|
41
|
+
:return: HTML text of the search results page, or an empty string on fail.
|
42
|
+
"""
|
43
|
+
pass
|
44
|
+
|
45
|
+
@classmethod
|
46
|
+
@abc.abstractmethod
|
47
|
+
def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
|
48
|
+
"""
|
49
|
+
Parse raw HTML from search API or page into list of SearchResult.
|
50
|
+
|
51
|
+
:param html_str: Raw HTML string from search results page.
|
52
|
+
:param limit: Maximum number of results to return, or None for all.
|
53
|
+
:return: List of SearchResult dicts.
|
54
|
+
"""
|
55
|
+
pass
|
56
|
+
|
57
|
+
@classmethod
|
58
|
+
async def _http_get(
|
59
|
+
cls,
|
60
|
+
url: str,
|
61
|
+
*,
|
62
|
+
params: dict[str, str] | None = None,
|
63
|
+
headers: dict[str, str] | None = None,
|
64
|
+
**kwargs: Any,
|
65
|
+
) -> aiohttp.ClientResponse:
|
66
|
+
"""
|
67
|
+
Helper for GET requests with default headers.
|
68
|
+
"""
|
69
|
+
session = cls._ensure_session()
|
70
|
+
hdrs = {**DEFAULT_USER_HEADERS, **(headers or {})}
|
71
|
+
resp = await session.get(url, params=params, headers=hdrs, **kwargs)
|
72
|
+
try:
|
73
|
+
resp.raise_for_status()
|
74
|
+
except aiohttp.ClientResponseError:
|
75
|
+
try:
|
76
|
+
await resp.read()
|
77
|
+
finally:
|
78
|
+
resp.release()
|
79
|
+
raise
|
80
|
+
return resp
|
81
|
+
|
82
|
+
@classmethod
|
83
|
+
async def _http_post(
|
84
|
+
cls,
|
85
|
+
url: str,
|
86
|
+
*,
|
87
|
+
data: dict[str, str] | str | None = None,
|
88
|
+
headers: dict[str, str] | None = None,
|
89
|
+
**kwargs: Any,
|
90
|
+
) -> aiohttp.ClientResponse:
|
91
|
+
"""
|
92
|
+
Helper for POST requests with default headers.
|
93
|
+
"""
|
94
|
+
session = cls._ensure_session()
|
95
|
+
hdrs = {**DEFAULT_USER_HEADERS, **(headers or {})}
|
96
|
+
resp = await session.post(url, data=data, headers=hdrs, **kwargs)
|
97
|
+
try:
|
98
|
+
resp.raise_for_status()
|
99
|
+
except aiohttp.ClientResponseError:
|
100
|
+
try:
|
101
|
+
await resp.read()
|
102
|
+
finally:
|
103
|
+
resp.release()
|
104
|
+
raise
|
105
|
+
return resp
|
106
|
+
|
107
|
+
@classmethod
|
108
|
+
def _ensure_session(cls) -> aiohttp.ClientSession:
|
109
|
+
if cls._session is None:
|
110
|
+
raise RuntimeError(
|
111
|
+
f"{cls.__name__} has no aiohttp session. "
|
112
|
+
"Call .configure(session) first."
|
113
|
+
)
|
114
|
+
return cls._session
|
115
|
+
|
116
|
+
@staticmethod
|
117
|
+
def _quote(q: str, encoding: str | None = None, errors: str | None = None) -> str:
|
118
|
+
"""URL-encode a query string safely."""
|
119
|
+
return quote_plus(q, encoding=encoding, errors=errors)
|
120
|
+
|
121
|
+
@staticmethod
|
122
|
+
async def _response_to_str(
|
123
|
+
resp: aiohttp.ClientResponse,
|
124
|
+
encoding: str | None = None,
|
125
|
+
) -> str:
|
126
|
+
"""
|
127
|
+
Read the full body of resp as text. First try the declared charset,
|
128
|
+
then on UnicodeDecodeError fall back to a lenient utf-8 decode.
|
129
|
+
"""
|
130
|
+
data: bytes = await resp.read()
|
131
|
+
encodings = [
|
132
|
+
encoding,
|
133
|
+
resp.charset,
|
134
|
+
"gb2312",
|
135
|
+
"gb18030",
|
136
|
+
"gbk",
|
137
|
+
"utf-8",
|
138
|
+
]
|
139
|
+
encodings_list: list[str] = [e for e in encodings if e]
|
140
|
+
for enc in encodings_list:
|
141
|
+
try:
|
142
|
+
return data.decode(enc)
|
143
|
+
except UnicodeDecodeError:
|
144
|
+
continue
|
145
|
+
encoding = encoding or "utf-8"
|
146
|
+
return data.decode(encoding, errors="ignore")
|
147
|
+
|
148
|
+
@staticmethod
|
149
|
+
def _first_str(xs: list[str], replaces: list[tuple[str, str]] | None = None) -> str:
|
150
|
+
replaces = replaces or []
|
151
|
+
value: str = xs[0].strip() if xs else ""
|
152
|
+
for replace in replaces:
|
153
|
+
old, new = replace
|
154
|
+
value = value.replace(old, new)
|
155
|
+
return value
|
156
|
+
|
157
|
+
@staticmethod
|
158
|
+
def _build_url(base: str, params: dict[str, str]) -> str:
|
159
|
+
query_string = "&".join(f"{k}={v}" for k, v in params.items())
|
160
|
+
return f"{base}?{query_string}"
|
161
|
+
|
162
|
+
@classmethod
|
163
|
+
def _abs_url(cls, url: str) -> str:
|
164
|
+
return (
|
165
|
+
url
|
166
|
+
if url.startswith(("http://", "https://"))
|
167
|
+
else urljoin(cls.BASE_URL, url)
|
168
|
+
)
|
@@ -0,0 +1,105 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.searchers.dxmwx
|
4
|
+
-------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
import logging
|
9
|
+
|
10
|
+
from lxml import html
|
11
|
+
|
12
|
+
from novel_downloader.core.searchers.base import BaseSearcher
|
13
|
+
from novel_downloader.core.searchers.registry import register_searcher
|
14
|
+
from novel_downloader.models import SearchResult
|
15
|
+
|
16
|
+
logger = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
|
19
|
+
@register_searcher(
|
20
|
+
site_keys=["dxmwx"],
|
21
|
+
)
|
22
|
+
class DxmwxSearcher(BaseSearcher):
|
23
|
+
site_name = "dxmwx"
|
24
|
+
priority = 30
|
25
|
+
BASE_URL = "https://www.dxmwx.org"
|
26
|
+
SEARCH_URL = "https://www.dxmwx.org/list/{query}.html"
|
27
|
+
|
28
|
+
@classmethod
|
29
|
+
async def _fetch_html(cls, keyword: str) -> str:
|
30
|
+
url = cls.SEARCH_URL.format(query=cls._quote(keyword))
|
31
|
+
try:
|
32
|
+
async with (await cls._http_get(url)) as resp:
|
33
|
+
return await cls._response_to_str(resp)
|
34
|
+
except Exception:
|
35
|
+
logger.error(
|
36
|
+
"Failed to fetch HTML for keyword '%s' from '%s'",
|
37
|
+
keyword,
|
38
|
+
cls.SEARCH_URL,
|
39
|
+
)
|
40
|
+
return ""
|
41
|
+
|
42
|
+
@classmethod
|
43
|
+
def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
|
44
|
+
doc = html.fromstring(html_str)
|
45
|
+
rows = doc.xpath(
|
46
|
+
"//div[@id='ListContents']/div[contains(@style,'position: relative')]"
|
47
|
+
)
|
48
|
+
results: list[SearchResult] = []
|
49
|
+
|
50
|
+
for idx, row in enumerate(rows):
|
51
|
+
href = cls._first_str(
|
52
|
+
row.xpath(".//div[contains(@class,'margin0h5')]//a[1]/@href")
|
53
|
+
)
|
54
|
+
if not href:
|
55
|
+
continue
|
56
|
+
|
57
|
+
if limit is not None and idx >= limit:
|
58
|
+
break
|
59
|
+
|
60
|
+
book_url = cls._abs_url(href)
|
61
|
+
# "/book/10409.html" -> "10409"
|
62
|
+
book_id = href.split("/")[-1].split(".", 1)[0]
|
63
|
+
|
64
|
+
title = cls._first_str(
|
65
|
+
row.xpath(".//div[contains(@class,'margin0h5')]//a[1]/text()")
|
66
|
+
)
|
67
|
+
|
68
|
+
author = cls._first_str(
|
69
|
+
row.xpath(".//div[contains(@class,'margin0h5')]//a[2]/text()")
|
70
|
+
)
|
71
|
+
|
72
|
+
cover_src = cls._first_str(
|
73
|
+
row.xpath(".//div[contains(@class,'imgwidth')]//img/@src")
|
74
|
+
)
|
75
|
+
cover_url = cls._abs_url(cover_src) if cover_src else ""
|
76
|
+
|
77
|
+
latest_chapter = cls._first_str(
|
78
|
+
row.xpath(
|
79
|
+
".//a[span and span[contains(normalize-space(.),'最新章节')]]"
|
80
|
+
"/span/following-sibling::text()[1]"
|
81
|
+
)
|
82
|
+
)
|
83
|
+
|
84
|
+
update_date = cls._first_str(
|
85
|
+
row.xpath(".//span[contains(@class,'lefth5')]/text()")
|
86
|
+
)
|
87
|
+
|
88
|
+
# Compute priority
|
89
|
+
prio = cls.priority + idx
|
90
|
+
|
91
|
+
results.append(
|
92
|
+
SearchResult(
|
93
|
+
site=cls.site_name,
|
94
|
+
book_id=book_id,
|
95
|
+
book_url=book_url,
|
96
|
+
cover_url=cover_url,
|
97
|
+
title=title,
|
98
|
+
author=author,
|
99
|
+
latest_chapter=latest_chapter,
|
100
|
+
update_date=update_date,
|
101
|
+
word_count="-",
|
102
|
+
priority=prio,
|
103
|
+
)
|
104
|
+
)
|
105
|
+
return results
|
@@ -0,0 +1,84 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.searchers.eightnovel
|
4
|
+
------------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
import logging
|
9
|
+
|
10
|
+
from lxml import html
|
11
|
+
|
12
|
+
from novel_downloader.core.searchers.base import BaseSearcher
|
13
|
+
from novel_downloader.core.searchers.registry import register_searcher
|
14
|
+
from novel_downloader.models import SearchResult
|
15
|
+
|
16
|
+
logger = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
|
19
|
+
@register_searcher(
|
20
|
+
site_keys=["eightnovel", "8novel"],
|
21
|
+
)
|
22
|
+
class EightnovelSearcher(BaseSearcher):
|
23
|
+
site_name = "8novel"
|
24
|
+
priority = 20
|
25
|
+
BASE_URL = "https://www.8novel.com"
|
26
|
+
SEARCH_URL = "https://www.8novel.com/search/"
|
27
|
+
|
28
|
+
@classmethod
|
29
|
+
async def _fetch_html(cls, keyword: str) -> str:
|
30
|
+
params = {"key": keyword}
|
31
|
+
try:
|
32
|
+
async with (await cls._http_get(cls.SEARCH_URL, params=params)) as resp:
|
33
|
+
return await cls._response_to_str(resp)
|
34
|
+
except Exception:
|
35
|
+
logger.error(
|
36
|
+
"Failed to fetch HTML for keyword '%s' from '%s'",
|
37
|
+
keyword,
|
38
|
+
cls.SEARCH_URL,
|
39
|
+
)
|
40
|
+
return ""
|
41
|
+
|
42
|
+
@classmethod
|
43
|
+
def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
|
44
|
+
doc = html.fromstring(html_str)
|
45
|
+
anchors = doc.xpath("//div[contains(@class,'picsize')]/a")
|
46
|
+
results: list[SearchResult] = []
|
47
|
+
|
48
|
+
for idx, a in enumerate(anchors):
|
49
|
+
href = cls._first_str(a.xpath("./@href"))
|
50
|
+
if not href:
|
51
|
+
continue
|
52
|
+
|
53
|
+
if limit is not None and idx >= limit:
|
54
|
+
break
|
55
|
+
|
56
|
+
# '/novelbooks/6045' -> "6045"
|
57
|
+
book_id = href.rstrip("/").split("/")[-1]
|
58
|
+
book_url = cls._abs_url(href)
|
59
|
+
|
60
|
+
cover_rel = cls._first_str(a.xpath(".//img/@src"))
|
61
|
+
cover_url = cls._abs_url(cover_rel) if cover_rel else ""
|
62
|
+
|
63
|
+
title = cls._first_str(a.xpath("./@title"))
|
64
|
+
|
65
|
+
word_count = cls._first_str(a.xpath(".//eps//text()")) or "-"
|
66
|
+
|
67
|
+
# Compute priority
|
68
|
+
prio = cls.priority + idx
|
69
|
+
|
70
|
+
results.append(
|
71
|
+
SearchResult(
|
72
|
+
site=cls.site_name,
|
73
|
+
book_id=book_id,
|
74
|
+
book_url=book_url,
|
75
|
+
cover_url=cover_url,
|
76
|
+
title=title,
|
77
|
+
author="-",
|
78
|
+
latest_chapter="-",
|
79
|
+
update_date="-",
|
80
|
+
word_count=word_count,
|
81
|
+
priority=prio,
|
82
|
+
)
|
83
|
+
)
|
84
|
+
return results
|
@@ -0,0 +1,102 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.searchers.esjzone
|
4
|
+
---------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
import logging
|
9
|
+
|
10
|
+
from lxml import html
|
11
|
+
|
12
|
+
from novel_downloader.core.searchers.base import BaseSearcher
|
13
|
+
from novel_downloader.core.searchers.registry import register_searcher
|
14
|
+
from novel_downloader.models import SearchResult
|
15
|
+
|
16
|
+
logger = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
|
19
|
+
@register_searcher(
|
20
|
+
site_keys=["esjzone"],
|
21
|
+
)
|
22
|
+
class EsjzoneSearcher(BaseSearcher):
|
23
|
+
site_name = "esjzone"
|
24
|
+
priority = 30
|
25
|
+
BASE_URL = "https://www.esjzone.cc"
|
26
|
+
SEARCH_URL = "https://www.esjzone.cc/tags/{query}/"
|
27
|
+
|
28
|
+
@classmethod
|
29
|
+
async def _fetch_html(cls, keyword: str) -> str:
|
30
|
+
url = cls.SEARCH_URL.format(query=cls._quote(keyword))
|
31
|
+
try:
|
32
|
+
async with (await cls._http_get(url)) as resp:
|
33
|
+
return await cls._response_to_str(resp)
|
34
|
+
except Exception:
|
35
|
+
logger.error(
|
36
|
+
"Failed to fetch HTML for keyword '%s' from '%s'",
|
37
|
+
keyword,
|
38
|
+
url,
|
39
|
+
)
|
40
|
+
return ""
|
41
|
+
|
42
|
+
@classmethod
|
43
|
+
def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
|
44
|
+
doc = html.fromstring(html_str)
|
45
|
+
cards = doc.xpath('//div[contains(@class,"card-body")]')
|
46
|
+
results: list[SearchResult] = []
|
47
|
+
|
48
|
+
for idx, card in enumerate(cards):
|
49
|
+
href = cls._first_str(
|
50
|
+
card.xpath(".//h5[contains(@class,'card-title')]/a[1]/@href")
|
51
|
+
)
|
52
|
+
if not href:
|
53
|
+
continue
|
54
|
+
|
55
|
+
if limit is not None and idx >= limit:
|
56
|
+
break
|
57
|
+
|
58
|
+
# href format: /detail/<book_id>.html
|
59
|
+
book_id = href.split("/")[-1].split(".")[0]
|
60
|
+
book_url = cls._abs_url(href)
|
61
|
+
|
62
|
+
title = cls._first_str(
|
63
|
+
card.xpath(".//h5[contains(@class,'card-title')]/a[1]//text()")
|
64
|
+
)
|
65
|
+
|
66
|
+
latest_chapter = (
|
67
|
+
cls._first_str(
|
68
|
+
card.xpath(".//div[contains(@class,'card-ep')]//a[1]//text()")
|
69
|
+
)
|
70
|
+
or "-"
|
71
|
+
)
|
72
|
+
|
73
|
+
# Author
|
74
|
+
author = cls._first_str(
|
75
|
+
card.xpath(".//div[contains(@class,'card-author')]//a[1]//text()")
|
76
|
+
) or cls._first_str(
|
77
|
+
card.xpath(".//div[contains(@class,'card-author')]//text()")
|
78
|
+
)
|
79
|
+
|
80
|
+
cover_data = card.xpath(
|
81
|
+
'./preceding-sibling::a[contains(@class,"card-img-tiles")]'
|
82
|
+
'//div[contains(@class,"lazyload")]/@data-src'
|
83
|
+
)
|
84
|
+
cover_url = cover_data[0].strip() if cover_data else ""
|
85
|
+
|
86
|
+
# Compute priority incrementally
|
87
|
+
prio = cls.priority + idx
|
88
|
+
results.append(
|
89
|
+
SearchResult(
|
90
|
+
site=cls.site_name,
|
91
|
+
book_id=book_id,
|
92
|
+
book_url=book_url,
|
93
|
+
cover_url=cover_url,
|
94
|
+
title=title,
|
95
|
+
author=author,
|
96
|
+
latest_chapter=latest_chapter,
|
97
|
+
update_date="-",
|
98
|
+
word_count="-",
|
99
|
+
priority=prio,
|
100
|
+
)
|
101
|
+
)
|
102
|
+
return results
|
@@ -0,0 +1,92 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.searchers.hetushu
|
4
|
+
---------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
import logging
|
9
|
+
|
10
|
+
from lxml import html
|
11
|
+
|
12
|
+
from novel_downloader.core.searchers.base import BaseSearcher
|
13
|
+
from novel_downloader.core.searchers.registry import register_searcher
|
14
|
+
from novel_downloader.models import SearchResult
|
15
|
+
|
16
|
+
logger = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
|
19
|
+
@register_searcher(
|
20
|
+
site_keys=["hetushu"],
|
21
|
+
)
|
22
|
+
class HetushuSearcher(BaseSearcher):
|
23
|
+
site_name = "hetushu"
|
24
|
+
priority = 5
|
25
|
+
SEARCH_URL = "https://www.hetushu.com/search/"
|
26
|
+
BASE_URL = "https://www.hetushu.com"
|
27
|
+
|
28
|
+
@classmethod
|
29
|
+
async def _fetch_html(cls, keyword: str) -> str:
|
30
|
+
params = {"keyword": keyword}
|
31
|
+
headers = {
|
32
|
+
"Referer": "https://www.hetushu.com/",
|
33
|
+
}
|
34
|
+
try:
|
35
|
+
async with (
|
36
|
+
await cls._http_get(cls.SEARCH_URL, params=params, headers=headers)
|
37
|
+
) as resp:
|
38
|
+
return await cls._response_to_str(resp)
|
39
|
+
except Exception:
|
40
|
+
logger.error(
|
41
|
+
"Failed to fetch HTML for keyword '%s' from '%s'",
|
42
|
+
keyword,
|
43
|
+
cls.SEARCH_URL,
|
44
|
+
)
|
45
|
+
return ""
|
46
|
+
|
47
|
+
@classmethod
|
48
|
+
def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
|
49
|
+
doc = html.fromstring(html_str)
|
50
|
+
rows = doc.xpath('//dl[@class="list" and @id="body"]/dd')
|
51
|
+
results: list[SearchResult] = []
|
52
|
+
|
53
|
+
for idx, row in enumerate(rows):
|
54
|
+
href = cls._first_str(row.xpath(".//h4/a/@href"))
|
55
|
+
if not href:
|
56
|
+
continue
|
57
|
+
|
58
|
+
if limit is not None and idx >= limit:
|
59
|
+
break
|
60
|
+
|
61
|
+
# "/book/7631/index.html" -> "7631"
|
62
|
+
book_id = href.rstrip("/index.html").split("/")[-1]
|
63
|
+
book_url = cls._abs_url(href)
|
64
|
+
|
65
|
+
title = cls._first_str(row.xpath(".//h4/a/text()"))
|
66
|
+
|
67
|
+
# Author from the adjacent <span>, strip "/" delimiters
|
68
|
+
# e.x. " / 风行云亦行 / "
|
69
|
+
author_raw = cls._first_str(row.xpath(".//h4/span/text()"))
|
70
|
+
author = author_raw.strip("/").strip()
|
71
|
+
|
72
|
+
cover_rel = cls._first_str(row.xpath(".//a/img/@src"))
|
73
|
+
cover_url = cls._abs_url(cover_rel) if cover_rel else ""
|
74
|
+
|
75
|
+
# Compute priority
|
76
|
+
prio = cls.priority + idx
|
77
|
+
|
78
|
+
results.append(
|
79
|
+
SearchResult(
|
80
|
+
site=cls.site_name,
|
81
|
+
book_id=book_id,
|
82
|
+
book_url=book_url,
|
83
|
+
cover_url=cover_url,
|
84
|
+
title=title,
|
85
|
+
author=author,
|
86
|
+
latest_chapter="-",
|
87
|
+
update_date="-",
|
88
|
+
word_count="-",
|
89
|
+
priority=prio,
|
90
|
+
)
|
91
|
+
)
|
92
|
+
return results
|
@@ -0,0 +1,93 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.searchers.i25zw
|
4
|
+
-------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
import logging
|
9
|
+
|
10
|
+
from lxml import html
|
11
|
+
|
12
|
+
from novel_downloader.core.searchers.base import BaseSearcher
|
13
|
+
from novel_downloader.core.searchers.registry import register_searcher
|
14
|
+
from novel_downloader.models import SearchResult
|
15
|
+
|
16
|
+
logger = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
|
19
|
+
@register_searcher(
|
20
|
+
site_keys=["i25zw"],
|
21
|
+
)
|
22
|
+
class I25zwSearcher(BaseSearcher):
|
23
|
+
site_name = "i25zw"
|
24
|
+
priority = 30
|
25
|
+
SEARCH_URL = "https://www.i25zw.com/search.html"
|
26
|
+
|
27
|
+
@classmethod
|
28
|
+
async def _fetch_html(cls, keyword: str) -> str:
|
29
|
+
payload = {
|
30
|
+
"searchkey": keyword,
|
31
|
+
"searchtype": "all",
|
32
|
+
"Submit": "",
|
33
|
+
}
|
34
|
+
try:
|
35
|
+
async with (await cls._http_post(cls.SEARCH_URL, data=payload)) as resp:
|
36
|
+
return await cls._response_to_str(resp)
|
37
|
+
except Exception:
|
38
|
+
logger.error(
|
39
|
+
"Failed to fetch HTML for keyword '%s' from '%s'",
|
40
|
+
keyword,
|
41
|
+
cls.SEARCH_URL,
|
42
|
+
)
|
43
|
+
return ""
|
44
|
+
|
45
|
+
@classmethod
|
46
|
+
def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
|
47
|
+
doc = html.fromstring(html_str)
|
48
|
+
rows = doc.xpath("//div[@id='alistbox']")
|
49
|
+
results: list[SearchResult] = []
|
50
|
+
|
51
|
+
for idx, row in enumerate(rows):
|
52
|
+
book_url = cls._first_str(row.xpath(".//div[@class='pic']/a/@href"))
|
53
|
+
if not book_url:
|
54
|
+
continue
|
55
|
+
|
56
|
+
if limit is not None and idx >= limit:
|
57
|
+
break
|
58
|
+
|
59
|
+
# 'https://www.i25zw.com/book/309209.html' -> "309209"
|
60
|
+
book_id = book_url.split("/")[-1].split(".")[0]
|
61
|
+
|
62
|
+
title = cls._first_str(row.xpath(".//div[@class='title']/h2/a/text()"))
|
63
|
+
|
64
|
+
author = cls._first_str(
|
65
|
+
row.xpath(".//div[@class='title']/span/text()"),
|
66
|
+
replaces=[("作者:", "")],
|
67
|
+
)
|
68
|
+
|
69
|
+
cover_rel = cls._first_str(row.xpath(".//div[@class='pic']//img/@src"))
|
70
|
+
cover_url = cls._abs_url(cover_rel) if cover_rel else ""
|
71
|
+
|
72
|
+
# Latest chapter
|
73
|
+
latest_chapter = (
|
74
|
+
cls._first_str(row.xpath(".//div[@class='sys']//li[1]/a/text()")) or "-"
|
75
|
+
)
|
76
|
+
|
77
|
+
prio = cls.priority + idx
|
78
|
+
|
79
|
+
results.append(
|
80
|
+
SearchResult(
|
81
|
+
site=cls.site_name,
|
82
|
+
book_id=book_id,
|
83
|
+
book_url=book_url,
|
84
|
+
cover_url=cover_url,
|
85
|
+
title=title,
|
86
|
+
author=author,
|
87
|
+
latest_chapter=latest_chapter,
|
88
|
+
update_date="-",
|
89
|
+
word_count="-",
|
90
|
+
priority=prio,
|
91
|
+
)
|
92
|
+
)
|
93
|
+
return results
|