novel-downloader 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/__init__.py +2 -4
- novel_downloader/cli/clean.py +21 -88
- novel_downloader/cli/config.py +27 -104
- novel_downloader/cli/download.py +78 -66
- novel_downloader/cli/export.py +20 -21
- novel_downloader/cli/main.py +3 -1
- novel_downloader/cli/search.py +120 -0
- novel_downloader/cli/ui.py +156 -0
- novel_downloader/config/__init__.py +10 -14
- novel_downloader/config/adapter.py +195 -99
- novel_downloader/config/{loader.py → file_io.py} +53 -27
- novel_downloader/core/__init__.py +14 -13
- novel_downloader/core/archived/deqixs/fetcher.py +115 -0
- novel_downloader/core/archived/deqixs/parser.py +132 -0
- novel_downloader/core/archived/deqixs/searcher.py +89 -0
- novel_downloader/core/archived/qidian/searcher.py +79 -0
- novel_downloader/core/archived/wanbengo/searcher.py +98 -0
- novel_downloader/core/archived/xshbook/searcher.py +93 -0
- novel_downloader/core/downloaders/__init__.py +8 -30
- novel_downloader/core/downloaders/base.py +182 -30
- novel_downloader/core/downloaders/common.py +217 -384
- novel_downloader/core/downloaders/qianbi.py +332 -4
- novel_downloader/core/downloaders/qidian.py +250 -290
- novel_downloader/core/downloaders/registry.py +69 -0
- novel_downloader/core/downloaders/signals.py +46 -0
- novel_downloader/core/exporters/__init__.py +8 -26
- novel_downloader/core/exporters/base.py +107 -31
- novel_downloader/core/exporters/common/__init__.py +3 -4
- novel_downloader/core/exporters/common/epub.py +92 -171
- novel_downloader/core/exporters/common/main_exporter.py +14 -67
- novel_downloader/core/exporters/common/txt.py +90 -86
- novel_downloader/core/exporters/epub_util.py +184 -1327
- novel_downloader/core/exporters/linovelib/__init__.py +3 -2
- novel_downloader/core/exporters/linovelib/epub.py +165 -222
- novel_downloader/core/exporters/linovelib/main_exporter.py +10 -71
- novel_downloader/core/exporters/linovelib/txt.py +76 -66
- novel_downloader/core/exporters/qidian.py +15 -11
- novel_downloader/core/exporters/registry.py +55 -0
- novel_downloader/core/exporters/txt_util.py +67 -0
- novel_downloader/core/fetchers/__init__.py +57 -56
- novel_downloader/core/fetchers/aaatxt.py +83 -0
- novel_downloader/core/fetchers/{biquge/session.py → b520.py} +10 -10
- novel_downloader/core/fetchers/{base/session.py → base.py} +63 -47
- novel_downloader/core/fetchers/biquyuedu.py +83 -0
- novel_downloader/core/fetchers/dxmwx.py +110 -0
- novel_downloader/core/fetchers/eightnovel.py +139 -0
- novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +23 -11
- novel_downloader/core/fetchers/guidaye.py +85 -0
- novel_downloader/core/fetchers/hetushu.py +92 -0
- novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +22 -26
- novel_downloader/core/fetchers/ixdzs8.py +113 -0
- novel_downloader/core/fetchers/jpxs123.py +101 -0
- novel_downloader/core/fetchers/{biquge/browser.py → lewenn.py} +15 -15
- novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +16 -12
- novel_downloader/core/fetchers/piaotia.py +105 -0
- novel_downloader/core/fetchers/qbtr.py +101 -0
- novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +9 -9
- novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +55 -40
- novel_downloader/core/fetchers/quanben5.py +92 -0
- novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
- novel_downloader/core/fetchers/registry.py +60 -0
- novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +11 -9
- novel_downloader/core/fetchers/shencou.py +106 -0
- novel_downloader/core/fetchers/{common/browser.py → shuhaige.py} +24 -19
- novel_downloader/core/fetchers/tongrenquan.py +84 -0
- novel_downloader/core/fetchers/ttkan.py +95 -0
- novel_downloader/core/fetchers/{common/session.py → wanbengo.py} +21 -17
- novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
- novel_downloader/core/fetchers/xiguashuwu.py +177 -0
- novel_downloader/core/fetchers/xs63b.py +171 -0
- novel_downloader/core/fetchers/xshbook.py +85 -0
- novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +23 -11
- novel_downloader/core/fetchers/yibige.py +114 -0
- novel_downloader/core/interfaces/__init__.py +8 -14
- novel_downloader/core/interfaces/downloader.py +6 -2
- novel_downloader/core/interfaces/exporter.py +7 -7
- novel_downloader/core/interfaces/fetcher.py +4 -17
- novel_downloader/core/interfaces/parser.py +5 -6
- novel_downloader/core/interfaces/searcher.py +26 -0
- novel_downloader/core/parsers/__init__.py +58 -22
- novel_downloader/core/parsers/aaatxt.py +132 -0
- novel_downloader/core/parsers/b520.py +116 -0
- novel_downloader/core/parsers/base.py +63 -12
- novel_downloader/core/parsers/biquyuedu.py +133 -0
- novel_downloader/core/parsers/dxmwx.py +162 -0
- novel_downloader/core/parsers/eightnovel.py +224 -0
- novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +67 -67
- novel_downloader/core/parsers/guidaye.py +128 -0
- novel_downloader/core/parsers/hetushu.py +139 -0
- novel_downloader/core/parsers/i25zw.py +137 -0
- novel_downloader/core/parsers/ixdzs8.py +186 -0
- novel_downloader/core/parsers/jpxs123.py +137 -0
- novel_downloader/core/parsers/lewenn.py +142 -0
- novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +54 -65
- novel_downloader/core/parsers/piaotia.py +189 -0
- novel_downloader/core/parsers/qbtr.py +136 -0
- novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +54 -51
- novel_downloader/core/parsers/qidian/__init__.py +2 -2
- novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +290 -346
- novel_downloader/core/parsers/qidian/chapter_normal.py +25 -56
- novel_downloader/core/parsers/qidian/main_parser.py +19 -57
- novel_downloader/core/parsers/qidian/utils/__init__.py +12 -11
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +6 -7
- novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
- novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
- novel_downloader/core/parsers/quanben5.py +103 -0
- novel_downloader/core/parsers/registry.py +57 -0
- novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +46 -48
- novel_downloader/core/parsers/shencou.py +215 -0
- novel_downloader/core/parsers/shuhaige.py +111 -0
- novel_downloader/core/parsers/tongrenquan.py +116 -0
- novel_downloader/core/parsers/ttkan.py +132 -0
- novel_downloader/core/parsers/wanbengo.py +191 -0
- novel_downloader/core/parsers/xiaoshuowu.py +173 -0
- novel_downloader/core/parsers/xiguashuwu.py +435 -0
- novel_downloader/core/parsers/xs63b.py +161 -0
- novel_downloader/core/parsers/xshbook.py +134 -0
- novel_downloader/core/parsers/yamibo.py +155 -0
- novel_downloader/core/parsers/yibige.py +166 -0
- novel_downloader/core/searchers/__init__.py +51 -0
- novel_downloader/core/searchers/aaatxt.py +107 -0
- novel_downloader/core/searchers/b520.py +84 -0
- novel_downloader/core/searchers/base.py +168 -0
- novel_downloader/core/searchers/dxmwx.py +105 -0
- novel_downloader/core/searchers/eightnovel.py +84 -0
- novel_downloader/core/searchers/esjzone.py +102 -0
- novel_downloader/core/searchers/hetushu.py +92 -0
- novel_downloader/core/searchers/i25zw.py +93 -0
- novel_downloader/core/searchers/ixdzs8.py +107 -0
- novel_downloader/core/searchers/jpxs123.py +107 -0
- novel_downloader/core/searchers/piaotia.py +100 -0
- novel_downloader/core/searchers/qbtr.py +106 -0
- novel_downloader/core/searchers/qianbi.py +165 -0
- novel_downloader/core/searchers/quanben5.py +144 -0
- novel_downloader/core/searchers/registry.py +79 -0
- novel_downloader/core/searchers/shuhaige.py +124 -0
- novel_downloader/core/searchers/tongrenquan.py +110 -0
- novel_downloader/core/searchers/ttkan.py +92 -0
- novel_downloader/core/searchers/xiaoshuowu.py +122 -0
- novel_downloader/core/searchers/xiguashuwu.py +95 -0
- novel_downloader/core/searchers/xs63b.py +104 -0
- novel_downloader/locales/en.json +36 -79
- novel_downloader/locales/zh.json +37 -80
- novel_downloader/models/__init__.py +23 -50
- novel_downloader/models/book.py +44 -0
- novel_downloader/models/config.py +16 -43
- novel_downloader/models/login.py +1 -1
- novel_downloader/models/search.py +21 -0
- novel_downloader/resources/config/settings.toml +39 -74
- novel_downloader/resources/css_styles/intro.css +83 -0
- novel_downloader/resources/css_styles/main.css +30 -89
- novel_downloader/resources/json/xiguashuwu.json +718 -0
- novel_downloader/utils/__init__.py +43 -0
- novel_downloader/utils/chapter_storage.py +247 -226
- novel_downloader/utils/constants.py +5 -50
- novel_downloader/utils/cookies.py +6 -18
- novel_downloader/utils/crypto_utils/__init__.py +13 -0
- novel_downloader/utils/crypto_utils/aes_util.py +90 -0
- novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
- novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
- novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
- novel_downloader/utils/epub/__init__.py +34 -0
- novel_downloader/utils/epub/builder.py +377 -0
- novel_downloader/utils/epub/constants.py +118 -0
- novel_downloader/utils/epub/documents.py +297 -0
- novel_downloader/utils/epub/models.py +120 -0
- novel_downloader/utils/epub/utils.py +179 -0
- novel_downloader/utils/file_utils/__init__.py +5 -30
- novel_downloader/utils/file_utils/io.py +9 -150
- novel_downloader/utils/file_utils/normalize.py +2 -2
- novel_downloader/utils/file_utils/sanitize.py +2 -7
- novel_downloader/utils/fontocr.py +207 -0
- novel_downloader/utils/i18n.py +2 -0
- novel_downloader/utils/logger.py +10 -16
- novel_downloader/utils/network.py +111 -252
- novel_downloader/utils/state.py +5 -90
- novel_downloader/utils/text_utils/__init__.py +16 -21
- novel_downloader/utils/text_utils/diff_display.py +6 -9
- novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
- novel_downloader/utils/text_utils/text_cleaner.py +179 -0
- novel_downloader/utils/text_utils/truncate_utils.py +62 -0
- novel_downloader/utils/time_utils/__init__.py +6 -12
- novel_downloader/utils/time_utils/datetime_utils.py +23 -33
- novel_downloader/utils/time_utils/sleep_utils.py +5 -10
- novel_downloader/web/__init__.py +13 -0
- novel_downloader/web/components/__init__.py +11 -0
- novel_downloader/web/components/navigation.py +35 -0
- novel_downloader/web/main.py +66 -0
- novel_downloader/web/pages/__init__.py +17 -0
- novel_downloader/web/pages/download.py +78 -0
- novel_downloader/web/pages/progress.py +147 -0
- novel_downloader/web/pages/search.py +329 -0
- novel_downloader/web/services/__init__.py +17 -0
- novel_downloader/web/services/client_dialog.py +164 -0
- novel_downloader/web/services/cred_broker.py +113 -0
- novel_downloader/web/services/cred_models.py +35 -0
- novel_downloader/web/services/task_manager.py +264 -0
- novel_downloader-2.0.0.dist-info/METADATA +171 -0
- novel_downloader-2.0.0.dist-info/RECORD +210 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
- novel_downloader/config/site_rules.py +0 -94
- novel_downloader/core/downloaders/biquge.py +0 -25
- novel_downloader/core/downloaders/esjzone.py +0 -25
- novel_downloader/core/downloaders/linovelib.py +0 -25
- novel_downloader/core/downloaders/sfacg.py +0 -25
- novel_downloader/core/downloaders/yamibo.py +0 -25
- novel_downloader/core/exporters/biquge.py +0 -25
- novel_downloader/core/exporters/esjzone.py +0 -25
- novel_downloader/core/exporters/qianbi.py +0 -25
- novel_downloader/core/exporters/sfacg.py +0 -25
- novel_downloader/core/exporters/yamibo.py +0 -25
- novel_downloader/core/factory/__init__.py +0 -20
- novel_downloader/core/factory/downloader.py +0 -73
- novel_downloader/core/factory/exporter.py +0 -58
- novel_downloader/core/factory/fetcher.py +0 -96
- novel_downloader/core/factory/parser.py +0 -86
- novel_downloader/core/fetchers/base/__init__.py +0 -14
- novel_downloader/core/fetchers/base/browser.py +0 -403
- novel_downloader/core/fetchers/biquge/__init__.py +0 -14
- novel_downloader/core/fetchers/common/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/browser.py +0 -204
- novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
- novel_downloader/core/fetchers/linovelib/browser.py +0 -193
- novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/browser.py +0 -318
- novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
- novel_downloader/core/fetchers/sfacg/browser.py +0 -189
- novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
- novel_downloader/core/fetchers/yamibo/browser.py +0 -229
- novel_downloader/core/parsers/biquge/__init__.py +0 -10
- novel_downloader/core/parsers/biquge/main_parser.py +0 -134
- novel_downloader/core/parsers/common/__init__.py +0 -13
- novel_downloader/core/parsers/common/helper.py +0 -323
- novel_downloader/core/parsers/common/main_parser.py +0 -106
- novel_downloader/core/parsers/esjzone/__init__.py +0 -10
- novel_downloader/core/parsers/linovelib/__init__.py +0 -10
- novel_downloader/core/parsers/qianbi/__init__.py +0 -10
- novel_downloader/core/parsers/sfacg/__init__.py +0 -10
- novel_downloader/core/parsers/yamibo/__init__.py +0 -10
- novel_downloader/core/parsers/yamibo/main_parser.py +0 -194
- novel_downloader/models/browser.py +0 -21
- novel_downloader/models/chapter.py +0 -25
- novel_downloader/models/site_rules.py +0 -99
- novel_downloader/models/tasks.py +0 -33
- novel_downloader/models/types.py +0 -15
- novel_downloader/resources/css_styles/volume-intro.css +0 -56
- novel_downloader/resources/json/replace_word_map.json +0 -4
- novel_downloader/resources/text/blacklist.txt +0 -22
- novel_downloader/tui/__init__.py +0 -7
- novel_downloader/tui/app.py +0 -32
- novel_downloader/tui/main.py +0 -17
- novel_downloader/tui/screens/__init__.py +0 -14
- novel_downloader/tui/screens/home.py +0 -198
- novel_downloader/tui/screens/login.py +0 -74
- novel_downloader/tui/styles/home_layout.tcss +0 -79
- novel_downloader/tui/widgets/richlog_handler.py +0 -24
- novel_downloader/utils/cache.py +0 -24
- novel_downloader/utils/fontocr/__init__.py +0 -22
- novel_downloader/utils/fontocr/model_loader.py +0 -69
- novel_downloader/utils/fontocr/ocr_v1.py +0 -303
- novel_downloader/utils/fontocr/ocr_v2.py +0 -752
- novel_downloader/utils/hash_store.py +0 -279
- novel_downloader/utils/hash_utils.py +0 -103
- novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
- novel_downloader/utils/text_utils/font_mapping.py +0 -28
- novel_downloader/utils/text_utils/text_cleaning.py +0 -107
- novel_downloader-1.4.5.dist-info/METADATA +0 -196
- novel_downloader-1.4.5.dist-info/RECORD +0 -165
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,144 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.searchers.quanben5
|
4
|
+
----------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
import json
|
9
|
+
import logging
|
10
|
+
import random
|
11
|
+
import time
|
12
|
+
|
13
|
+
from lxml import html
|
14
|
+
|
15
|
+
from novel_downloader.core.searchers.base import BaseSearcher
|
16
|
+
from novel_downloader.core.searchers.registry import register_searcher
|
17
|
+
from novel_downloader.models import SearchResult
|
18
|
+
|
19
|
+
logger = logging.getLogger(__name__)
|
20
|
+
|
21
|
+
|
22
|
+
@register_searcher(
|
23
|
+
site_keys=["quanben5"],
|
24
|
+
)
|
25
|
+
class Quanben5Searcher(BaseSearcher):
|
26
|
+
site_name = "quanben5"
|
27
|
+
priority = 30
|
28
|
+
BASE_URL = "https://quanben5.com"
|
29
|
+
SEARCH_URL = "https://quanben5.com/"
|
30
|
+
|
31
|
+
STATIC_CHARS = "PXhw7UT1B0a9kQDKZsjIASmOezxYG4CHo5Jyfg2b8FLpEvRr3WtVnlqMidu6cN"
|
32
|
+
|
33
|
+
@classmethod
|
34
|
+
async def _fetch_html(cls, keyword: str) -> str:
|
35
|
+
t = str(int(time.time() * 1000))
|
36
|
+
uri_keyword = cls._quote(keyword)
|
37
|
+
b_raw = cls._base64(uri_keyword)
|
38
|
+
b = cls._quote(b_raw)
|
39
|
+
|
40
|
+
params = {
|
41
|
+
"c": "book",
|
42
|
+
"a": "search.json",
|
43
|
+
"callback": "search",
|
44
|
+
"t": t,
|
45
|
+
"keywords": uri_keyword,
|
46
|
+
"b": b,
|
47
|
+
}
|
48
|
+
full_url = cls._build_url(cls.SEARCH_URL, params)
|
49
|
+
|
50
|
+
headers = {
|
51
|
+
"Host": "quanben5.com",
|
52
|
+
"Referer": "https://quanben5.com/search.html",
|
53
|
+
}
|
54
|
+
|
55
|
+
try:
|
56
|
+
async with (await cls._http_get(full_url, headers=headers)) as resp:
|
57
|
+
return await cls._response_to_str(resp)
|
58
|
+
except Exception:
|
59
|
+
logger.error(
|
60
|
+
"Failed to fetch HTML for keyword '%s' from '%s'",
|
61
|
+
keyword,
|
62
|
+
cls.SEARCH_URL,
|
63
|
+
)
|
64
|
+
return ""
|
65
|
+
|
66
|
+
@classmethod
|
67
|
+
def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
|
68
|
+
# Unwrap JSONP: search({...});
|
69
|
+
prefix, suffix = "search(", ");"
|
70
|
+
json_str = (
|
71
|
+
html_str[len(prefix) : -len(suffix)]
|
72
|
+
if html_str.startswith(prefix) and html_str.endswith(suffix)
|
73
|
+
else html_str
|
74
|
+
)
|
75
|
+
|
76
|
+
try:
|
77
|
+
data = json.loads(json_str)
|
78
|
+
except json.JSONDecodeError:
|
79
|
+
return []
|
80
|
+
|
81
|
+
content_html = data.get("content", "")
|
82
|
+
if not content_html:
|
83
|
+
return []
|
84
|
+
|
85
|
+
doc = html.fromstring(content_html)
|
86
|
+
rows = doc.xpath('//div[@class="pic_txt_list"]')
|
87
|
+
results: list[SearchResult] = []
|
88
|
+
|
89
|
+
for idx, row in enumerate(rows):
|
90
|
+
href = cls._first_str(row.xpath(".//h3/a/@href"))
|
91
|
+
if not href:
|
92
|
+
continue
|
93
|
+
|
94
|
+
if limit is not None and idx >= limit:
|
95
|
+
break
|
96
|
+
|
97
|
+
# '/n/douposanqian/' -> "douposanqian"
|
98
|
+
book_id = href.rstrip("/").split("/")[-1]
|
99
|
+
book_url = cls._abs_url(href)
|
100
|
+
|
101
|
+
cover_rel = cls._first_str(row.xpath(".//div[@class='pic']//img/@src"))
|
102
|
+
cover_url = cls._abs_url(cover_rel) if cover_rel else ""
|
103
|
+
|
104
|
+
title = "".join(
|
105
|
+
t.strip()
|
106
|
+
for t in row.xpath(".//h3/a/span[@class='name']//text()")
|
107
|
+
if t and t.strip()
|
108
|
+
)
|
109
|
+
|
110
|
+
author = cls._first_str(
|
111
|
+
row.xpath(".//p[@class='info']//span[contains(@class,'author')]/text()")
|
112
|
+
)
|
113
|
+
|
114
|
+
# Bump priority by result index
|
115
|
+
prio = cls.priority + idx
|
116
|
+
|
117
|
+
results.append(
|
118
|
+
SearchResult(
|
119
|
+
site=cls.site_name,
|
120
|
+
book_id=book_id,
|
121
|
+
book_url=book_url,
|
122
|
+
cover_url=cover_url,
|
123
|
+
title=title,
|
124
|
+
author=author,
|
125
|
+
latest_chapter="-",
|
126
|
+
update_date="-",
|
127
|
+
word_count="-",
|
128
|
+
priority=prio,
|
129
|
+
)
|
130
|
+
)
|
131
|
+
return results
|
132
|
+
|
133
|
+
@classmethod
|
134
|
+
def _base64(cls, s: str) -> str:
|
135
|
+
out = []
|
136
|
+
for ch in s:
|
137
|
+
idx = cls.STATIC_CHARS.find(ch)
|
138
|
+
code = cls.STATIC_CHARS[(idx + 3) % 62] if idx != -1 else ch
|
139
|
+
n1 = int(random.random() * 62)
|
140
|
+
n2 = int(random.random() * 62)
|
141
|
+
out.append(cls.STATIC_CHARS[n1])
|
142
|
+
out.append(code)
|
143
|
+
out.append(cls.STATIC_CHARS[n2])
|
144
|
+
return "".join(out)
|
@@ -0,0 +1,79 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.searchers.registry
|
4
|
+
----------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
__all__ = ["register_searcher", "search"]
|
9
|
+
|
10
|
+
import asyncio
|
11
|
+
from collections.abc import Callable, Sequence
|
12
|
+
from typing import TypeVar
|
13
|
+
|
14
|
+
import aiohttp
|
15
|
+
|
16
|
+
from novel_downloader.core.searchers.base import BaseSearcher
|
17
|
+
from novel_downloader.models import SearchResult
|
18
|
+
|
19
|
+
S = TypeVar("S", bound=BaseSearcher)
|
20
|
+
|
21
|
+
_SEARCHER_REGISTRY: dict[str, type[BaseSearcher]] = {}
|
22
|
+
|
23
|
+
|
24
|
+
def register_searcher(
|
25
|
+
site_keys: Sequence[str],
|
26
|
+
) -> Callable[[type[S]], type[S]]:
|
27
|
+
"""
|
28
|
+
Decorator to register a searcher class under given name.
|
29
|
+
"""
|
30
|
+
|
31
|
+
def decorator(cls: type[S]) -> type[S]:
|
32
|
+
for key in site_keys:
|
33
|
+
_SEARCHER_REGISTRY[key] = cls
|
34
|
+
return cls
|
35
|
+
|
36
|
+
return decorator
|
37
|
+
|
38
|
+
|
39
|
+
async def search(
|
40
|
+
keyword: str,
|
41
|
+
sites: Sequence[str] | None = None,
|
42
|
+
limit: int | None = None,
|
43
|
+
per_site_limit: int = 5,
|
44
|
+
timeout: float = 5.0,
|
45
|
+
) -> list[SearchResult]:
|
46
|
+
"""
|
47
|
+
Perform a search for the given keyword across one or more registered sites,
|
48
|
+
then aggregate and sort the results by their `priority` value.
|
49
|
+
|
50
|
+
:param keyword: The search term or keyword to query.
|
51
|
+
:param sites: An optional sequence of site keys to limit which searchers.
|
52
|
+
:param limit: Maximum total number of results to return; if None, return all.
|
53
|
+
:param per_site_limit: Maximum number of search results per site.
|
54
|
+
:param timeout: Per-request time budget (seconds)
|
55
|
+
:return: A flat list of `SearchResult` objects.
|
56
|
+
"""
|
57
|
+
keys = list(sites or _SEARCHER_REGISTRY.keys())
|
58
|
+
to_call = {_SEARCHER_REGISTRY[key] for key in keys if key in _SEARCHER_REGISTRY}
|
59
|
+
|
60
|
+
site_timeout = aiohttp.ClientTimeout(total=timeout)
|
61
|
+
|
62
|
+
results: list[SearchResult] = []
|
63
|
+
async with aiohttp.ClientSession(timeout=site_timeout) as session:
|
64
|
+
# Give all searchers the same session
|
65
|
+
for cls in to_call:
|
66
|
+
cls.configure(session)
|
67
|
+
|
68
|
+
# Kick off all sites in parallel
|
69
|
+
coros = [cls.search(keyword, limit=per_site_limit) for cls in to_call]
|
70
|
+
site_lists = await asyncio.gather(*coros, return_exceptions=True)
|
71
|
+
|
72
|
+
# Collect successful results; skip failures
|
73
|
+
for item in site_lists:
|
74
|
+
if isinstance(item, Exception | BaseException):
|
75
|
+
continue
|
76
|
+
results.extend(item)
|
77
|
+
|
78
|
+
results.sort(key=lambda res: res["priority"])
|
79
|
+
return results[:limit] if limit is not None else results
|
@@ -0,0 +1,124 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.searchers.shuhaige
|
4
|
+
----------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
import logging
|
9
|
+
import time
|
10
|
+
|
11
|
+
from lxml import html
|
12
|
+
|
13
|
+
from novel_downloader.core.searchers.base import BaseSearcher
|
14
|
+
from novel_downloader.core.searchers.registry import register_searcher
|
15
|
+
from novel_downloader.models import SearchResult
|
16
|
+
|
17
|
+
logger = logging.getLogger(__name__)
|
18
|
+
|
19
|
+
|
20
|
+
@register_searcher(
|
21
|
+
site_keys=["shuhaige"],
|
22
|
+
)
|
23
|
+
class ShuhaigeSearcher(BaseSearcher):
|
24
|
+
site_name = "shuhaige"
|
25
|
+
priority = 30
|
26
|
+
BASE_URL = "https://www.shuhaige.net"
|
27
|
+
SEARCH_URL = "https://www.shuhaige.net/search.html"
|
28
|
+
|
29
|
+
@classmethod
|
30
|
+
async def _fetch_html(cls, keyword: str) -> str:
|
31
|
+
data = {
|
32
|
+
"searchtype": "all",
|
33
|
+
"searchkey": keyword,
|
34
|
+
}
|
35
|
+
ts = int(time.time())
|
36
|
+
# baidu cookie format: f"Hm_lpvt_{site_id}={timestamp}"
|
37
|
+
cookie_str = (
|
38
|
+
f"Hm_lpvt_3094b20ed277f38e8f9ac2b2b29d6263={ts}; "
|
39
|
+
f"Hm_lpvt_c3da01855456ad902664af23cc3254cb={ts}"
|
40
|
+
)
|
41
|
+
headers = {
|
42
|
+
"Origin": "https://www.shuhaige.net",
|
43
|
+
"Referer": "https://www.shuhaige.net/",
|
44
|
+
"Cookie": cookie_str,
|
45
|
+
}
|
46
|
+
try:
|
47
|
+
async with (
|
48
|
+
await cls._http_post(cls.SEARCH_URL, data=data, headers=headers)
|
49
|
+
) as resp:
|
50
|
+
return await cls._response_to_str(resp)
|
51
|
+
except Exception:
|
52
|
+
logger.error(
|
53
|
+
"Failed to fetch HTML for keyword '%s' from '%s'",
|
54
|
+
keyword,
|
55
|
+
cls.SEARCH_URL,
|
56
|
+
)
|
57
|
+
return ""
|
58
|
+
|
59
|
+
@classmethod
|
60
|
+
def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
|
61
|
+
doc = html.fromstring(html_str)
|
62
|
+
rows = doc.xpath('//div[@id="sitembox"]/dl')
|
63
|
+
results: list[SearchResult] = []
|
64
|
+
|
65
|
+
for idx, row in enumerate(rows):
|
66
|
+
href = cls._first_str(row.xpath("./dt/a[1]/@href")) or cls._first_str(
|
67
|
+
row.xpath("./dd/h3/a[1]/@href")
|
68
|
+
)
|
69
|
+
if not href:
|
70
|
+
continue
|
71
|
+
|
72
|
+
if limit is not None and idx >= limit:
|
73
|
+
break
|
74
|
+
|
75
|
+
book_id = href.strip("/").split("/")[0]
|
76
|
+
book_url = cls._abs_url(href)
|
77
|
+
|
78
|
+
title = cls._first_str(row.xpath("./dd/h3/a[1]//text()")) or cls._first_str(
|
79
|
+
row.xpath("./dt/a[1]/img[1]/@alt")
|
80
|
+
)
|
81
|
+
|
82
|
+
cover_rel = cls._first_str(row.xpath("./dt/a[1]/img[1]/@src"))
|
83
|
+
cover_url = cls._abs_url(cover_rel) if cover_rel else ""
|
84
|
+
|
85
|
+
author = (
|
86
|
+
cls._first_str(row.xpath("./dd[@class='book_other'][1]/span[1]/text()"))
|
87
|
+
or "-"
|
88
|
+
)
|
89
|
+
word_count = (
|
90
|
+
cls._first_str(row.xpath("./dd[@class='book_other'][1]/span[4]/text()"))
|
91
|
+
or "-"
|
92
|
+
)
|
93
|
+
|
94
|
+
latest_chapter = (
|
95
|
+
cls._first_str(
|
96
|
+
row.xpath("./dd[@class='book_other'][last()]/a[1]//text()")
|
97
|
+
)
|
98
|
+
or "-"
|
99
|
+
)
|
100
|
+
update_date = (
|
101
|
+
cls._first_str(
|
102
|
+
row.xpath("./dd[@class='book_other'][last()]/span[1]//text()")
|
103
|
+
)
|
104
|
+
or "-"
|
105
|
+
)
|
106
|
+
|
107
|
+
# Compute priority
|
108
|
+
prio = cls.priority + idx
|
109
|
+
|
110
|
+
results.append(
|
111
|
+
SearchResult(
|
112
|
+
site=cls.site_name,
|
113
|
+
book_id=book_id,
|
114
|
+
book_url=book_url,
|
115
|
+
cover_url=cover_url,
|
116
|
+
title=title,
|
117
|
+
author=author,
|
118
|
+
latest_chapter=latest_chapter,
|
119
|
+
update_date=update_date,
|
120
|
+
word_count=word_count,
|
121
|
+
priority=prio,
|
122
|
+
)
|
123
|
+
)
|
124
|
+
return results
|
@@ -0,0 +1,110 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.searchers.tongrenquan
|
4
|
+
-------------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
import logging
|
9
|
+
|
10
|
+
from lxml import html
|
11
|
+
|
12
|
+
from novel_downloader.core.searchers.base import BaseSearcher
|
13
|
+
from novel_downloader.core.searchers.registry import register_searcher
|
14
|
+
from novel_downloader.models import SearchResult
|
15
|
+
|
16
|
+
logger = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
|
19
|
+
@register_searcher(
|
20
|
+
site_keys=["tongrenquan"],
|
21
|
+
)
|
22
|
+
class TongrenquanSearcher(BaseSearcher):
|
23
|
+
site_name = "tongrenquan"
|
24
|
+
priority = 30
|
25
|
+
SEARCH_URL = "https://www.tongrenquan.org/e/search/indexstart.php"
|
26
|
+
BASE_URL = "https://www.tongrenquan.org"
|
27
|
+
|
28
|
+
@classmethod
|
29
|
+
async def _fetch_html(cls, keyword: str) -> str:
|
30
|
+
keyboard = cls._quote(keyword, encoding="gbk", errors="replace")
|
31
|
+
show = "title"
|
32
|
+
classid = "0"
|
33
|
+
body = f"keyboard={keyboard}&show={show}&classid={classid}"
|
34
|
+
headers = {
|
35
|
+
"Origin": "https://www.tongrenquan.cc",
|
36
|
+
"Referer": "https://www.tongrenquan.cc/",
|
37
|
+
"Content-Type": "application/x-www-form-urlencoded",
|
38
|
+
}
|
39
|
+
try:
|
40
|
+
async with (
|
41
|
+
await cls._http_post(cls.SEARCH_URL, data=body, headers=headers)
|
42
|
+
) as resp:
|
43
|
+
return await cls._response_to_str(resp)
|
44
|
+
except Exception:
|
45
|
+
logger.error(
|
46
|
+
"Failed to fetch HTML for keyword '%s' from '%s'",
|
47
|
+
keyword,
|
48
|
+
cls.SEARCH_URL,
|
49
|
+
)
|
50
|
+
return ""
|
51
|
+
|
52
|
+
@classmethod
|
53
|
+
def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
|
54
|
+
doc = html.fromstring(html_str)
|
55
|
+
rows = doc.xpath('//div[@class="books m-cols"]/div[@class="bk"]')
|
56
|
+
results: list[SearchResult] = []
|
57
|
+
|
58
|
+
for idx, row in enumerate(rows):
|
59
|
+
href = cls._first_str(row.xpath(".//h3/a[1]/@href"))
|
60
|
+
if not href:
|
61
|
+
continue
|
62
|
+
|
63
|
+
if limit is not None and idx >= limit:
|
64
|
+
break
|
65
|
+
|
66
|
+
# '/tongren/9302.html' -> "9302"
|
67
|
+
book_id = href.split("/")[-1].split(".")[0]
|
68
|
+
book_url = cls._abs_url(href)
|
69
|
+
|
70
|
+
cover_rel = cls._first_str(
|
71
|
+
row.xpath("./div[@class='pic']/a[1]/img[1]/@src")
|
72
|
+
)
|
73
|
+
cover_url = cls._abs_url(cover_rel) if cover_rel else ""
|
74
|
+
|
75
|
+
title = cls._first_str(
|
76
|
+
row.xpath("./div[@class='bk_right']/h3/a[1]//text()")
|
77
|
+
)
|
78
|
+
|
79
|
+
author = (
|
80
|
+
cls._first_str(
|
81
|
+
row.xpath("./div[@class='bk_right']/div[@class='booknews']/text()"),
|
82
|
+
replaces=[("作者:", "")],
|
83
|
+
)
|
84
|
+
or "-"
|
85
|
+
)
|
86
|
+
|
87
|
+
update_date = cls._first_str(
|
88
|
+
row.xpath(
|
89
|
+
"./div[@class='bk_right']/div[@class='booknews']/label[@class='date']/text()"
|
90
|
+
)
|
91
|
+
)
|
92
|
+
|
93
|
+
# Compute priority
|
94
|
+
prio = cls.priority + idx
|
95
|
+
|
96
|
+
results.append(
|
97
|
+
SearchResult(
|
98
|
+
site=cls.site_name,
|
99
|
+
book_id=book_id,
|
100
|
+
book_url=book_url,
|
101
|
+
cover_url=cover_url,
|
102
|
+
title=title,
|
103
|
+
author=author,
|
104
|
+
latest_chapter="-",
|
105
|
+
update_date=update_date,
|
106
|
+
word_count="-",
|
107
|
+
priority=prio,
|
108
|
+
)
|
109
|
+
)
|
110
|
+
return results
|
@@ -0,0 +1,92 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.searchers.ttkan
|
4
|
+
-------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
import logging
|
9
|
+
|
10
|
+
from lxml import html
|
11
|
+
|
12
|
+
from novel_downloader.core.searchers.base import BaseSearcher
|
13
|
+
from novel_downloader.core.searchers.registry import register_searcher
|
14
|
+
from novel_downloader.models import SearchResult
|
15
|
+
|
16
|
+
logger = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
|
19
|
+
@register_searcher(
|
20
|
+
site_keys=["ttkan"],
|
21
|
+
)
|
22
|
+
class TtkanSearcher(BaseSearcher):
|
23
|
+
site_name = "ttkan"
|
24
|
+
priority = 100
|
25
|
+
BASE_URL = "https://www.ttkan.co"
|
26
|
+
SEARCH_URL = "https://www.ttkan.co/novel/search"
|
27
|
+
|
28
|
+
@classmethod
|
29
|
+
async def _fetch_html(cls, keyword: str) -> str:
|
30
|
+
params = {"q": keyword}
|
31
|
+
try:
|
32
|
+
async with (await cls._http_get(cls.SEARCH_URL, params=params)) as resp:
|
33
|
+
return await cls._response_to_str(resp)
|
34
|
+
except Exception:
|
35
|
+
logger.error(
|
36
|
+
"Failed to fetch HTML for keyword '%s' from '%s'",
|
37
|
+
keyword,
|
38
|
+
cls.SEARCH_URL,
|
39
|
+
)
|
40
|
+
return ""
|
41
|
+
|
42
|
+
@classmethod
|
43
|
+
def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
|
44
|
+
doc = html.fromstring(html_str)
|
45
|
+
items = doc.xpath(
|
46
|
+
'//div[contains(@class,"frame_body")]//div[@class="pure-g"]/div[contains(@class,"novel_cell")]'
|
47
|
+
)
|
48
|
+
if not items:
|
49
|
+
items = doc.xpath('//div[contains(@class,"novel_cell")]')
|
50
|
+
results: list[SearchResult] = []
|
51
|
+
|
52
|
+
for idx, item in enumerate(items):
|
53
|
+
href = cls._first_str(item.xpath(".//a[@href][1]/@href"))
|
54
|
+
if not href:
|
55
|
+
continue
|
56
|
+
|
57
|
+
if limit is not None and len(results) >= limit:
|
58
|
+
break
|
59
|
+
|
60
|
+
# link -> /novel/chapters/<book_id>
|
61
|
+
book_id = href.strip("/").split("/")[-1]
|
62
|
+
book_url = cls._abs_url(href)
|
63
|
+
|
64
|
+
cover_rel = cls._first_str(item.xpath(".//amp-img/@src"))
|
65
|
+
cover_url = cls._abs_url(cover_rel) if cover_rel else ""
|
66
|
+
|
67
|
+
title = cls._first_str(item.xpath(".//h3/text()"))
|
68
|
+
|
69
|
+
author = (
|
70
|
+
cls._first_str(
|
71
|
+
item.xpath(".//li[starts-with(normalize-space(.),'作者')]/text()"),
|
72
|
+
replaces=[("作者:", "")],
|
73
|
+
)
|
74
|
+
or "-"
|
75
|
+
)
|
76
|
+
|
77
|
+
prio = cls.priority + idx
|
78
|
+
results.append(
|
79
|
+
SearchResult(
|
80
|
+
site=cls.site_name,
|
81
|
+
book_id=book_id,
|
82
|
+
book_url=book_url,
|
83
|
+
cover_url=cover_url,
|
84
|
+
title=title,
|
85
|
+
author=author,
|
86
|
+
latest_chapter="-",
|
87
|
+
update_date="-",
|
88
|
+
word_count="-",
|
89
|
+
priority=prio,
|
90
|
+
)
|
91
|
+
)
|
92
|
+
return results
|
@@ -0,0 +1,122 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.searchers.xiaoshuowu
|
4
|
+
------------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
import logging
|
9
|
+
|
10
|
+
from lxml import html
|
11
|
+
|
12
|
+
from novel_downloader.core.searchers.base import BaseSearcher
|
13
|
+
from novel_downloader.core.searchers.registry import register_searcher
|
14
|
+
from novel_downloader.models import SearchResult
|
15
|
+
|
16
|
+
logger = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
|
19
|
+
@register_searcher(
|
20
|
+
site_keys=["xiaoshuowu", "xiaoshuoge"],
|
21
|
+
)
|
22
|
+
class XiaoshuowuSearcher(BaseSearcher):
|
23
|
+
site_name = "xiaoshuowu"
|
24
|
+
priority = 30
|
25
|
+
SEARCH_URL = "http://www.xiaoshuoge.info/modules/article/search.php"
|
26
|
+
|
27
|
+
@classmethod
|
28
|
+
async def _fetch_html(cls, keyword: str) -> str:
|
29
|
+
params = {"q": keyword}
|
30
|
+
try:
|
31
|
+
async with (await cls._http_get(cls.SEARCH_URL, params=params)) as resp:
|
32
|
+
return await cls._response_to_str(resp)
|
33
|
+
except Exception:
|
34
|
+
logger.error(
|
35
|
+
"Failed to fetch HTML for keyword '%s' from '%s'",
|
36
|
+
keyword,
|
37
|
+
cls.SEARCH_URL,
|
38
|
+
)
|
39
|
+
return ""
|
40
|
+
|
41
|
+
@classmethod
|
42
|
+
def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
|
43
|
+
"""
|
44
|
+
Parse raw HTML from Xiaoshuowu search results into list of SearchResult.
|
45
|
+
|
46
|
+
:param html_str: Raw HTML string from Xiaoshuowu search results page.
|
47
|
+
:param limit: Maximum number of results to return, or None for all.
|
48
|
+
:return: List of SearchResult dicts.
|
49
|
+
"""
|
50
|
+
doc = html.fromstring(html_str)
|
51
|
+
rows = doc.xpath('//div[@class="c_row"]')
|
52
|
+
results: list[SearchResult] = []
|
53
|
+
|
54
|
+
for idx, row in enumerate(rows):
|
55
|
+
href = cls._first_str(row.xpath(".//span[@class='c_subject']/a/@href"))
|
56
|
+
if not href:
|
57
|
+
continue
|
58
|
+
|
59
|
+
if limit is not None and idx >= limit:
|
60
|
+
break
|
61
|
+
|
62
|
+
# 'http://www.xiaoshuoge.info/book/374339/' -> "374339"
|
63
|
+
book_id = href.split("book/")[-1].strip("/")
|
64
|
+
book_url = cls._abs_url(href)
|
65
|
+
|
66
|
+
cover_rel = cls._first_str(row.xpath(".//div[@class='fl']//img/@src"))
|
67
|
+
cover_url = cls._abs_url(cover_rel) if cover_rel else ""
|
68
|
+
|
69
|
+
title = cls._first_str(row.xpath(".//span[@class='c_subject']/a/text()"))
|
70
|
+
|
71
|
+
author = (
|
72
|
+
cls._first_str(
|
73
|
+
row.xpath(
|
74
|
+
".//div[@class='c_tag'][1]/span[@class='c_label'][contains(.,'作者')]/following-sibling::span[@class='c_value'][1]/text()"
|
75
|
+
)
|
76
|
+
)
|
77
|
+
or "-"
|
78
|
+
)
|
79
|
+
word_count = (
|
80
|
+
cls._first_str(
|
81
|
+
row.xpath(
|
82
|
+
".//div[@class='c_tag'][1]/span[@class='c_label'][contains(.,'字数')]/following-sibling::span[@class='c_value'][1]/text()"
|
83
|
+
)
|
84
|
+
)
|
85
|
+
or "-"
|
86
|
+
)
|
87
|
+
|
88
|
+
latest_chapter = (
|
89
|
+
cls._first_str(
|
90
|
+
row.xpath(
|
91
|
+
".//div[@class='c_tag'][last()]/span[@class='c_label'][contains(.,'最新')]/following-sibling::span[@class='c_value'][1]//a//text()"
|
92
|
+
)
|
93
|
+
)
|
94
|
+
or "-"
|
95
|
+
)
|
96
|
+
update_date = (
|
97
|
+
cls._first_str(
|
98
|
+
row.xpath(
|
99
|
+
".//div[@class='c_tag'][last()]/span[@class='c_label'][contains(.,'更新')]/following-sibling::span[@class='c_value'][1]/text()"
|
100
|
+
)
|
101
|
+
)
|
102
|
+
or "-"
|
103
|
+
)
|
104
|
+
|
105
|
+
# Priority
|
106
|
+
prio = cls.priority + idx
|
107
|
+
|
108
|
+
results.append(
|
109
|
+
SearchResult(
|
110
|
+
site=cls.site_name,
|
111
|
+
book_id=book_id,
|
112
|
+
book_url=book_url,
|
113
|
+
cover_url=cover_url,
|
114
|
+
title=title,
|
115
|
+
author=author,
|
116
|
+
latest_chapter=latest_chapter,
|
117
|
+
update_date=update_date,
|
118
|
+
word_count=word_count,
|
119
|
+
priority=prio,
|
120
|
+
)
|
121
|
+
)
|
122
|
+
return results
|