novel-downloader 1.5.0__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/__init__.py +1 -3
- novel_downloader/cli/clean.py +21 -88
- novel_downloader/cli/config.py +26 -21
- novel_downloader/cli/download.py +77 -64
- novel_downloader/cli/export.py +16 -20
- novel_downloader/cli/main.py +1 -1
- novel_downloader/cli/search.py +62 -65
- novel_downloader/cli/ui.py +156 -0
- novel_downloader/config/__init__.py +8 -5
- novel_downloader/config/adapter.py +65 -105
- novel_downloader/config/{loader.py → file_io.py} +53 -26
- novel_downloader/core/__init__.py +1 -0
- novel_downloader/core/archived/deqixs/fetcher.py +115 -0
- novel_downloader/core/archived/deqixs/parser.py +132 -0
- novel_downloader/core/archived/deqixs/searcher.py +89 -0
- novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} +12 -20
- novel_downloader/core/archived/wanbengo/searcher.py +98 -0
- novel_downloader/core/archived/xshbook/searcher.py +93 -0
- novel_downloader/core/downloaders/__init__.py +3 -24
- novel_downloader/core/downloaders/base.py +49 -23
- novel_downloader/core/downloaders/common.py +191 -137
- novel_downloader/core/downloaders/qianbi.py +187 -146
- novel_downloader/core/downloaders/qidian.py +187 -141
- novel_downloader/core/downloaders/registry.py +4 -2
- novel_downloader/core/downloaders/signals.py +46 -0
- novel_downloader/core/exporters/__init__.py +3 -20
- novel_downloader/core/exporters/base.py +33 -37
- novel_downloader/core/exporters/common/__init__.py +1 -2
- novel_downloader/core/exporters/common/epub.py +15 -10
- novel_downloader/core/exporters/common/main_exporter.py +19 -12
- novel_downloader/core/exporters/common/txt.py +14 -9
- novel_downloader/core/exporters/epub_util.py +59 -29
- novel_downloader/core/exporters/linovelib/__init__.py +1 -0
- novel_downloader/core/exporters/linovelib/epub.py +23 -25
- novel_downloader/core/exporters/linovelib/main_exporter.py +8 -12
- novel_downloader/core/exporters/linovelib/txt.py +17 -11
- novel_downloader/core/exporters/qidian.py +2 -8
- novel_downloader/core/exporters/registry.py +4 -2
- novel_downloader/core/exporters/txt_util.py +7 -7
- novel_downloader/core/fetchers/__init__.py +54 -48
- novel_downloader/core/fetchers/aaatxt.py +83 -0
- novel_downloader/core/fetchers/{biquge/session.py → b520.py} +6 -11
- novel_downloader/core/fetchers/{base/session.py → base.py} +37 -46
- novel_downloader/core/fetchers/{biquge/browser.py → biquyuedu.py} +12 -17
- novel_downloader/core/fetchers/dxmwx.py +110 -0
- novel_downloader/core/fetchers/eightnovel.py +139 -0
- novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +19 -12
- novel_downloader/core/fetchers/guidaye.py +85 -0
- novel_downloader/core/fetchers/hetushu.py +92 -0
- novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +19 -28
- novel_downloader/core/fetchers/ixdzs8.py +113 -0
- novel_downloader/core/fetchers/jpxs123.py +101 -0
- novel_downloader/core/fetchers/lewenn.py +83 -0
- novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +12 -13
- novel_downloader/core/fetchers/piaotia.py +105 -0
- novel_downloader/core/fetchers/qbtr.py +101 -0
- novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +5 -10
- novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +46 -39
- novel_downloader/core/fetchers/quanben5.py +92 -0
- novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
- novel_downloader/core/fetchers/registry.py +5 -16
- novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +7 -10
- novel_downloader/core/fetchers/shencou.py +106 -0
- novel_downloader/core/fetchers/shuhaige.py +84 -0
- novel_downloader/core/fetchers/tongrenquan.py +84 -0
- novel_downloader/core/fetchers/ttkan.py +95 -0
- novel_downloader/core/fetchers/wanbengo.py +83 -0
- novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
- novel_downloader/core/fetchers/xiguashuwu.py +177 -0
- novel_downloader/core/fetchers/xs63b.py +171 -0
- novel_downloader/core/fetchers/xshbook.py +85 -0
- novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +19 -12
- novel_downloader/core/fetchers/yibige.py +114 -0
- novel_downloader/core/interfaces/__init__.py +1 -9
- novel_downloader/core/interfaces/downloader.py +6 -2
- novel_downloader/core/interfaces/exporter.py +7 -7
- novel_downloader/core/interfaces/fetcher.py +4 -17
- novel_downloader/core/interfaces/parser.py +5 -6
- novel_downloader/core/interfaces/searcher.py +9 -1
- novel_downloader/core/parsers/__init__.py +49 -12
- novel_downloader/core/parsers/aaatxt.py +132 -0
- novel_downloader/core/parsers/b520.py +116 -0
- novel_downloader/core/parsers/base.py +63 -12
- novel_downloader/core/parsers/biquyuedu.py +133 -0
- novel_downloader/core/parsers/dxmwx.py +162 -0
- novel_downloader/core/parsers/eightnovel.py +224 -0
- novel_downloader/core/parsers/esjzone.py +61 -66
- novel_downloader/core/parsers/guidaye.py +128 -0
- novel_downloader/core/parsers/hetushu.py +139 -0
- novel_downloader/core/parsers/i25zw.py +137 -0
- novel_downloader/core/parsers/ixdzs8.py +186 -0
- novel_downloader/core/parsers/jpxs123.py +137 -0
- novel_downloader/core/parsers/lewenn.py +142 -0
- novel_downloader/core/parsers/linovelib.py +48 -64
- novel_downloader/core/parsers/piaotia.py +189 -0
- novel_downloader/core/parsers/qbtr.py +136 -0
- novel_downloader/core/parsers/qianbi.py +48 -50
- novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +272 -330
- novel_downloader/core/parsers/qidian/chapter_normal.py +24 -55
- novel_downloader/core/parsers/qidian/main_parser.py +11 -38
- novel_downloader/core/parsers/qidian/utils/__init__.py +1 -0
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +1 -1
- novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
- novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
- novel_downloader/core/parsers/quanben5.py +103 -0
- novel_downloader/core/parsers/registry.py +5 -16
- novel_downloader/core/parsers/sfacg.py +38 -45
- novel_downloader/core/parsers/shencou.py +215 -0
- novel_downloader/core/parsers/shuhaige.py +111 -0
- novel_downloader/core/parsers/tongrenquan.py +116 -0
- novel_downloader/core/parsers/ttkan.py +132 -0
- novel_downloader/core/parsers/wanbengo.py +191 -0
- novel_downloader/core/parsers/xiaoshuowu.py +173 -0
- novel_downloader/core/parsers/xiguashuwu.py +435 -0
- novel_downloader/core/parsers/xs63b.py +161 -0
- novel_downloader/core/parsers/xshbook.py +134 -0
- novel_downloader/core/parsers/yamibo.py +87 -131
- novel_downloader/core/parsers/yibige.py +166 -0
- novel_downloader/core/searchers/__init__.py +34 -3
- novel_downloader/core/searchers/aaatxt.py +107 -0
- novel_downloader/core/searchers/{biquge.py → b520.py} +29 -28
- novel_downloader/core/searchers/base.py +112 -36
- novel_downloader/core/searchers/dxmwx.py +105 -0
- novel_downloader/core/searchers/eightnovel.py +84 -0
- novel_downloader/core/searchers/esjzone.py +43 -25
- novel_downloader/core/searchers/hetushu.py +92 -0
- novel_downloader/core/searchers/i25zw.py +93 -0
- novel_downloader/core/searchers/ixdzs8.py +107 -0
- novel_downloader/core/searchers/jpxs123.py +107 -0
- novel_downloader/core/searchers/piaotia.py +100 -0
- novel_downloader/core/searchers/qbtr.py +106 -0
- novel_downloader/core/searchers/qianbi.py +74 -40
- novel_downloader/core/searchers/quanben5.py +144 -0
- novel_downloader/core/searchers/registry.py +24 -8
- novel_downloader/core/searchers/shuhaige.py +124 -0
- novel_downloader/core/searchers/tongrenquan.py +110 -0
- novel_downloader/core/searchers/ttkan.py +92 -0
- novel_downloader/core/searchers/xiaoshuowu.py +122 -0
- novel_downloader/core/searchers/xiguashuwu.py +95 -0
- novel_downloader/core/searchers/xs63b.py +104 -0
- novel_downloader/locales/en.json +31 -82
- novel_downloader/locales/zh.json +32 -83
- novel_downloader/models/__init__.py +21 -22
- novel_downloader/models/book.py +44 -0
- novel_downloader/models/config.py +4 -37
- novel_downloader/models/login.py +1 -1
- novel_downloader/models/search.py +5 -0
- novel_downloader/resources/config/settings.toml +8 -70
- novel_downloader/resources/json/xiguashuwu.json +718 -0
- novel_downloader/utils/__init__.py +13 -22
- novel_downloader/utils/chapter_storage.py +3 -2
- novel_downloader/utils/constants.py +4 -29
- novel_downloader/utils/cookies.py +6 -18
- novel_downloader/utils/crypto_utils/__init__.py +13 -0
- novel_downloader/utils/crypto_utils/aes_util.py +90 -0
- novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
- novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
- novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
- novel_downloader/utils/epub/__init__.py +1 -1
- novel_downloader/utils/epub/constants.py +57 -16
- novel_downloader/utils/epub/documents.py +88 -194
- novel_downloader/utils/epub/models.py +0 -14
- novel_downloader/utils/epub/utils.py +63 -96
- novel_downloader/utils/file_utils/__init__.py +2 -23
- novel_downloader/utils/file_utils/io.py +3 -113
- novel_downloader/utils/file_utils/sanitize.py +0 -4
- novel_downloader/utils/fontocr.py +207 -0
- novel_downloader/utils/logger.py +8 -16
- novel_downloader/utils/network.py +2 -2
- novel_downloader/utils/state.py +4 -90
- novel_downloader/utils/text_utils/__init__.py +1 -7
- novel_downloader/utils/text_utils/diff_display.py +5 -7
- novel_downloader/utils/time_utils/__init__.py +5 -11
- novel_downloader/utils/time_utils/datetime_utils.py +20 -29
- novel_downloader/utils/time_utils/sleep_utils.py +4 -8
- novel_downloader/web/__init__.py +13 -0
- novel_downloader/web/components/__init__.py +11 -0
- novel_downloader/web/components/navigation.py +35 -0
- novel_downloader/web/main.py +66 -0
- novel_downloader/web/pages/__init__.py +17 -0
- novel_downloader/web/pages/download.py +78 -0
- novel_downloader/web/pages/progress.py +147 -0
- novel_downloader/web/pages/search.py +329 -0
- novel_downloader/web/services/__init__.py +17 -0
- novel_downloader/web/services/client_dialog.py +164 -0
- novel_downloader/web/services/cred_broker.py +113 -0
- novel_downloader/web/services/cred_models.py +35 -0
- novel_downloader/web/services/task_manager.py +264 -0
- novel_downloader-2.0.0.dist-info/METADATA +171 -0
- novel_downloader-2.0.0.dist-info/RECORD +210 -0
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
- novel_downloader/core/downloaders/biquge.py +0 -29
- novel_downloader/core/downloaders/esjzone.py +0 -29
- novel_downloader/core/downloaders/linovelib.py +0 -29
- novel_downloader/core/downloaders/sfacg.py +0 -29
- novel_downloader/core/downloaders/yamibo.py +0 -29
- novel_downloader/core/exporters/biquge.py +0 -22
- novel_downloader/core/exporters/esjzone.py +0 -22
- novel_downloader/core/exporters/qianbi.py +0 -22
- novel_downloader/core/exporters/sfacg.py +0 -22
- novel_downloader/core/exporters/yamibo.py +0 -22
- novel_downloader/core/fetchers/base/__init__.py +0 -14
- novel_downloader/core/fetchers/base/browser.py +0 -422
- novel_downloader/core/fetchers/biquge/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/browser.py +0 -209
- novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
- novel_downloader/core/fetchers/linovelib/browser.py +0 -198
- novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/browser.py +0 -326
- novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
- novel_downloader/core/fetchers/sfacg/browser.py +0 -194
- novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
- novel_downloader/core/fetchers/yamibo/browser.py +0 -234
- novel_downloader/core/parsers/biquge.py +0 -139
- novel_downloader/models/chapter.py +0 -25
- novel_downloader/models/types.py +0 -13
- novel_downloader/tui/__init__.py +0 -7
- novel_downloader/tui/app.py +0 -32
- novel_downloader/tui/main.py +0 -17
- novel_downloader/tui/screens/__init__.py +0 -14
- novel_downloader/tui/screens/home.py +0 -198
- novel_downloader/tui/screens/login.py +0 -74
- novel_downloader/tui/styles/home_layout.tcss +0 -79
- novel_downloader/tui/widgets/richlog_handler.py +0 -24
- novel_downloader/utils/cache.py +0 -24
- novel_downloader/utils/fontocr/__init__.py +0 -22
- novel_downloader/utils/fontocr/hash_store.py +0 -280
- novel_downloader/utils/fontocr/hash_utils.py +0 -103
- novel_downloader/utils/fontocr/model_loader.py +0 -69
- novel_downloader/utils/fontocr/ocr_v1.py +0 -315
- novel_downloader/utils/fontocr/ocr_v2.py +0 -764
- novel_downloader/utils/fontocr/ocr_v3.py +0 -744
- novel_downloader-1.5.0.dist-info/METADATA +0 -196
- novel_downloader-1.5.0.dist-info/RECORD +0 -164
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -65,17 +65,15 @@ def parse_normal_chapter(
|
|
65
65
|
seq = chapter_info.get("seq", None)
|
66
66
|
volume = chapter_info.get("extra", {}).get("volumeName", "")
|
67
67
|
|
68
|
-
chapter_text =
|
68
|
+
chapter_text = _parse_paragraph(
|
69
|
+
html_str=raw_html,
|
70
|
+
is_vip=vip_status(ssr_data),
|
71
|
+
chapter_id=chapter_id,
|
72
|
+
fkp=fkp,
|
73
|
+
fuid=parser._fuid,
|
74
|
+
)
|
69
75
|
if not chapter_text:
|
70
|
-
|
71
|
-
html_str=raw_html,
|
72
|
-
is_vip=vip_status(ssr_data),
|
73
|
-
chapter_id=chapter_id,
|
74
|
-
fkp=fkp,
|
75
|
-
fuid=parser._fuid,
|
76
|
-
)
|
77
|
-
if not chapter_text:
|
78
|
-
return None
|
76
|
+
return None
|
79
77
|
|
80
78
|
if parser._use_truncation and duplicated:
|
81
79
|
chapter_text = truncate_half_lines(chapter_text)
|
@@ -103,55 +101,26 @@ def parse_normal_chapter(
|
|
103
101
|
return None
|
104
102
|
|
105
103
|
|
106
|
-
def
|
107
|
-
try:
|
108
|
-
tree = html.fromstring(html_str)
|
109
|
-
main = tree.xpath('//div[@id="app"]//div[@id="reader-content"]//main')
|
110
|
-
if not main:
|
111
|
-
return ""
|
112
|
-
main = main[0]
|
113
|
-
|
114
|
-
content_spans = main.xpath('.//span[contains(@class, "content-text")]')
|
115
|
-
|
116
|
-
paragraph_texts = [
|
117
|
-
span.text_content().strip()
|
118
|
-
for span in content_spans
|
119
|
-
if span.text_content().strip()
|
120
|
-
]
|
121
|
-
|
122
|
-
chapter_text = "\n\n".join(paragraph_texts)
|
123
|
-
return chapter_text
|
124
|
-
|
125
|
-
except Exception as e:
|
126
|
-
logger.error("[Parser] _parse_paragraph failed: %s", e)
|
127
|
-
return ""
|
128
|
-
|
129
|
-
|
130
|
-
def _parse_session_paragraph(
|
104
|
+
def _parse_paragraph(
|
131
105
|
html_str: str,
|
132
106
|
is_vip: bool,
|
133
107
|
chapter_id: str,
|
134
108
|
fkp: str,
|
135
109
|
fuid: str,
|
136
110
|
) -> str:
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
return ""
|
147
|
-
|
148
|
-
tree = html.fromstring(raw_html)
|
149
|
-
paras = tree.xpath(".//p")
|
150
|
-
paragraph_texts = [
|
151
|
-
p.text_content().strip() for p in paras if p.text_content().strip()
|
152
|
-
]
|
153
|
-
return "\n\n".join(paragraph_texts)
|
111
|
+
raw_html = html_str
|
112
|
+
|
113
|
+
if is_vip:
|
114
|
+
try:
|
115
|
+
decryptor = get_decryptor()
|
116
|
+
raw_html = decryptor.decrypt(raw_html, chapter_id, fkp, fuid)
|
117
|
+
except Exception as e:
|
118
|
+
logger.error("[Parser] decryption failed for '%s': %s", chapter_id, e)
|
119
|
+
return ""
|
154
120
|
|
155
|
-
|
156
|
-
|
157
|
-
|
121
|
+
tree = html.fromstring(raw_html)
|
122
|
+
paras = tree.xpath(".//p")
|
123
|
+
paragraph_texts = [
|
124
|
+
p.text_content().strip() for p in paras if p.text_content().strip()
|
125
|
+
]
|
126
|
+
return "\n".join(paragraph_texts)
|
@@ -10,13 +10,17 @@ from __future__ import annotations
|
|
10
10
|
|
11
11
|
import logging
|
12
12
|
from pathlib import Path
|
13
|
-
from typing import
|
13
|
+
from typing import Any
|
14
14
|
|
15
15
|
from novel_downloader.core.parsers.base import BaseParser
|
16
16
|
from novel_downloader.core.parsers.registry import register_parser
|
17
|
-
from novel_downloader.models import
|
18
|
-
|
17
|
+
from novel_downloader.models import (
|
18
|
+
BookInfoDict,
|
19
|
+
ChapterDict,
|
20
|
+
ParserConfig,
|
21
|
+
)
|
19
22
|
from novel_downloader.utils.constants import DATA_DIR
|
23
|
+
from novel_downloader.utils.cookies import get_cookie_value
|
20
24
|
|
21
25
|
from .book_info_parser import parse_book_info
|
22
26
|
from .chapter_router import parse_chapter
|
@@ -24,17 +28,13 @@ from .utils import is_encrypted
|
|
24
28
|
|
25
29
|
logger = logging.getLogger(__name__)
|
26
30
|
|
27
|
-
if TYPE_CHECKING:
|
28
|
-
from novel_downloader.utils.fontocr import FontOCR
|
29
|
-
|
30
31
|
|
31
32
|
@register_parser(
|
32
33
|
site_keys=["qidian", "qd"],
|
33
|
-
backends=["session", "browser"],
|
34
34
|
)
|
35
35
|
class QidianParser(BaseParser):
|
36
36
|
"""
|
37
|
-
Parser for
|
37
|
+
Parser for 起点中文网 site.
|
38
38
|
"""
|
39
39
|
|
40
40
|
def __init__(
|
@@ -49,47 +49,20 @@ class QidianParser(BaseParser):
|
|
49
49
|
"""
|
50
50
|
super().__init__(config)
|
51
51
|
|
52
|
-
# Extract and store parser flags from config
|
53
|
-
self._use_truncation = config.use_truncation
|
54
|
-
self._decode_font: bool = config.decode_font
|
55
|
-
|
56
52
|
self._fixed_font_dir: Path = self._base_cache_dir / "fixed_fonts"
|
57
53
|
self._fixed_font_dir.mkdir(parents=True, exist_ok=True)
|
58
54
|
self._debug_dir: Path = Path.cwd() / "debug"
|
59
55
|
|
60
56
|
state_files = [
|
61
|
-
DATA_DIR / "qidian" / "browser_state.cookies",
|
62
57
|
DATA_DIR / "qidian" / "session_state.cookies",
|
63
58
|
]
|
64
|
-
self._fuid: str = fuid or
|
65
|
-
|
66
|
-
self._font_ocr: FontOCR | None = None
|
67
|
-
if self._decode_font:
|
68
|
-
try:
|
69
|
-
from novel_downloader.utils.fontocr import FontOCR
|
70
|
-
except ImportError:
|
71
|
-
logger.warning(
|
72
|
-
"[QidianParser] FontOCR not available, font decoding will skip"
|
73
|
-
)
|
74
|
-
else:
|
75
|
-
self._font_ocr = FontOCR(
|
76
|
-
cache_dir=self._base_cache_dir,
|
77
|
-
use_freq=config.use_freq,
|
78
|
-
use_ocr=config.use_ocr,
|
79
|
-
use_vec=config.use_vec,
|
80
|
-
batch_size=config.batch_size,
|
81
|
-
gpu_mem=config.gpu_mem,
|
82
|
-
gpu_id=config.gpu_id,
|
83
|
-
ocr_weight=config.ocr_weight,
|
84
|
-
vec_weight=config.vec_weight,
|
85
|
-
font_debug=config.save_font_debug,
|
86
|
-
)
|
59
|
+
self._fuid: str = fuid or get_cookie_value(state_files, "ywguid")
|
87
60
|
|
88
61
|
def parse_book_info(
|
89
62
|
self,
|
90
63
|
html_list: list[str],
|
91
64
|
**kwargs: Any,
|
92
|
-
) ->
|
65
|
+
) -> BookInfoDict | None:
|
93
66
|
"""
|
94
67
|
Parse a book info page and extract metadata and chapter structure.
|
95
68
|
|
@@ -97,7 +70,7 @@ class QidianParser(BaseParser):
|
|
97
70
|
:return: Parsed metadata and chapter structure as a dictionary.
|
98
71
|
"""
|
99
72
|
if not html_list:
|
100
|
-
return
|
73
|
+
return None
|
101
74
|
return parse_book_info(html_list[0])
|
102
75
|
|
103
76
|
def parse_chapter(
|
@@ -25,7 +25,7 @@ import requests
|
|
25
25
|
from novel_downloader.utils.constants import JS_SCRIPT_DIR
|
26
26
|
|
27
27
|
DEST_ROOT: Final[Path] = JS_SCRIPT_DIR
|
28
|
-
GITHUB_OWNER: Final = "
|
28
|
+
GITHUB_OWNER: Final = "saudadez21"
|
29
29
|
GITHUB_REPO: Final = "qidian-decryptor"
|
30
30
|
RELEASE_VERSION: Final = "v1.0.1"
|
31
31
|
BASE_URL: Final = f"https://github.com/{GITHUB_OWNER}/{GITHUB_REPO}/releases/download/{RELEASE_VERSION}"
|
@@ -0,0 +1,143 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.parsers.qidian.utils.fontmap_recover
|
4
|
+
----------------------------------------------------------
|
5
|
+
|
6
|
+
Tools for generating and applying font character mappings
|
7
|
+
to recover obfuscated Qidian text.
|
8
|
+
"""
|
9
|
+
|
10
|
+
__all__ = [
|
11
|
+
"generate_font_map",
|
12
|
+
"apply_font_mapping",
|
13
|
+
]
|
14
|
+
|
15
|
+
import json
|
16
|
+
import logging
|
17
|
+
from pathlib import Path
|
18
|
+
|
19
|
+
import numpy as np
|
20
|
+
from fontTools.ttLib import TTFont
|
21
|
+
from PIL import ImageFont
|
22
|
+
|
23
|
+
logger = logging.getLogger(__name__)
|
24
|
+
CHAR_FONT_SIZE = 52
|
25
|
+
|
26
|
+
|
27
|
+
def generate_font_map(
|
28
|
+
fixed_font_path: Path,
|
29
|
+
random_font_path: Path,
|
30
|
+
char_set: set[str],
|
31
|
+
refl_set: set[str],
|
32
|
+
cache_dir: Path,
|
33
|
+
batch_size: int = 32,
|
34
|
+
) -> dict[str, str]:
|
35
|
+
"""
|
36
|
+
Build a mapping from scrambled font chars to real chars.
|
37
|
+
|
38
|
+
Uses OCR to compare rendered glyphs from a known (fixed) font and an
|
39
|
+
obfuscated (random) font. Results are cached in JSON so repeated runs
|
40
|
+
are faster.
|
41
|
+
|
42
|
+
:param fixed_font_path: fixed font file.
|
43
|
+
:param random_font_path: random font file.
|
44
|
+
:param char_set: Characters to match directly.
|
45
|
+
:param refl_set: Characters to match in flipped form.
|
46
|
+
:param cache_dir: Directory to save/load cached results.
|
47
|
+
:param batch_size: How many chars to OCR per batch.
|
48
|
+
|
49
|
+
:return: { obf_char: real_char, ... }
|
50
|
+
"""
|
51
|
+
try:
|
52
|
+
from novel_downloader.utils.fontocr import get_font_ocr
|
53
|
+
|
54
|
+
font_ocr = get_font_ocr(batch_size=batch_size)
|
55
|
+
except ImportError:
|
56
|
+
logger.warning("[QidianParser] FontOCR not available, font decoding will skip")
|
57
|
+
return {}
|
58
|
+
|
59
|
+
mapping_result: dict[str, str] = {}
|
60
|
+
fixed_map_file = cache_dir / "fixed_font_map" / f"{Path(fixed_font_path).stem}.json"
|
61
|
+
fixed_map_file.parent.mkdir(parents=True, exist_ok=True)
|
62
|
+
|
63
|
+
# load existing cache
|
64
|
+
try:
|
65
|
+
with open(fixed_map_file, encoding="utf-8") as f:
|
66
|
+
fixed_map = json.load(f)
|
67
|
+
cached_chars = set(fixed_map.keys())
|
68
|
+
mapping_result.update({ch: fixed_map[ch] for ch in char_set if ch in fixed_map})
|
69
|
+
mapping_result.update({ch: fixed_map[ch] for ch in refl_set if ch in fixed_map})
|
70
|
+
char_set = set(char_set) - cached_chars
|
71
|
+
refl_set = set(refl_set) - cached_chars
|
72
|
+
except Exception:
|
73
|
+
fixed_map = {}
|
74
|
+
cached_chars = set()
|
75
|
+
|
76
|
+
# prepare font renderers and cmap sets
|
77
|
+
try:
|
78
|
+
fixed_ttf = TTFont(fixed_font_path)
|
79
|
+
fixed_chars = {chr(c) for c in fixed_ttf.getBestCmap()}
|
80
|
+
fixed_font = ImageFont.truetype(str(fixed_font_path), CHAR_FONT_SIZE)
|
81
|
+
|
82
|
+
random_ttf = TTFont(random_font_path)
|
83
|
+
random_chars = {chr(c) for c in random_ttf.getBestCmap()}
|
84
|
+
random_font = ImageFont.truetype(str(random_font_path), CHAR_FONT_SIZE)
|
85
|
+
except Exception as e:
|
86
|
+
logger.error("[FontOCR] Failed to load TTF fonts: %s", e)
|
87
|
+
return mapping_result
|
88
|
+
|
89
|
+
def _render_batch(chars: list[tuple[str, bool]]) -> list[tuple[str, np.ndarray]]:
|
90
|
+
out = []
|
91
|
+
for ch, reflect in chars:
|
92
|
+
if ch in fixed_chars:
|
93
|
+
font = fixed_font
|
94
|
+
elif ch in random_chars:
|
95
|
+
font = random_font
|
96
|
+
else:
|
97
|
+
continue
|
98
|
+
img = font_ocr.render_char_image_array(ch, font, reflect)
|
99
|
+
if img is not None:
|
100
|
+
out.append((ch, img))
|
101
|
+
return out
|
102
|
+
|
103
|
+
# process normal and reflected sets together
|
104
|
+
for chars, reflect in [(list(char_set), False), (list(refl_set), True)]:
|
105
|
+
for batch_chars in font_ocr._chunked(chars, font_ocr._batch_size):
|
106
|
+
# render all images in this batch
|
107
|
+
to_render = [(ch, reflect) for ch in batch_chars]
|
108
|
+
rendered = _render_batch(to_render)
|
109
|
+
if not rendered:
|
110
|
+
continue
|
111
|
+
|
112
|
+
# query OCR+vec simultaneously
|
113
|
+
imgs_to_query = [img for (ch, img) in rendered]
|
114
|
+
fused = font_ocr.predict(imgs_to_query, top_k=1)
|
115
|
+
|
116
|
+
# pick best per char, apply threshold + cache
|
117
|
+
for (ch, _), preds in zip(rendered, fused, strict=False):
|
118
|
+
if not preds:
|
119
|
+
continue
|
120
|
+
real_char, _ = preds[0]
|
121
|
+
mapping_result[ch] = real_char
|
122
|
+
fixed_map[ch] = real_char
|
123
|
+
|
124
|
+
# persist updated fixed_map
|
125
|
+
try:
|
126
|
+
with open(fixed_map_file, "w", encoding="utf-8") as f:
|
127
|
+
json.dump(fixed_map, f, ensure_ascii=False, indent=2)
|
128
|
+
except Exception as e:
|
129
|
+
logger.error("[FontOCR] Failed to save fixed map: %s", e)
|
130
|
+
|
131
|
+
return mapping_result
|
132
|
+
|
133
|
+
|
134
|
+
def apply_font_mapping(text: str, font_map: dict[str, str]) -> str:
|
135
|
+
"""
|
136
|
+
Replace each character in `text` using `font_map`,
|
137
|
+
leaving unmapped characters unchanged.
|
138
|
+
|
139
|
+
:param text: The input string, possibly containing obfuscated font chars.
|
140
|
+
:param font_map: A dict mapping obfuscated chars to real chars.
|
141
|
+
:return: The de-obfuscated text.
|
142
|
+
"""
|
143
|
+
return "".join(font_map.get(ch, ch) for ch in text)
|
@@ -4,10 +4,6 @@ novel_downloader.core.parsers.qidian.utils.helpers
|
|
4
4
|
--------------------------------------------------
|
5
5
|
|
6
6
|
Shared utility functions for parsing Qidian pages.
|
7
|
-
|
8
|
-
This module provides reusable helpers to:
|
9
|
-
- Extract SSR-rendered JSON page context and structured chapter metadata.
|
10
|
-
- Identify VIP chapters, encrypted content, and viewability conditions.
|
11
7
|
"""
|
12
8
|
|
13
9
|
import json
|
@@ -0,0 +1,103 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.parsers.quanben5
|
4
|
+
--------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
from datetime import datetime
|
9
|
+
from typing import Any
|
10
|
+
|
11
|
+
from lxml import html
|
12
|
+
|
13
|
+
from novel_downloader.core.parsers.base import BaseParser
|
14
|
+
from novel_downloader.core.parsers.registry import register_parser
|
15
|
+
from novel_downloader.models import (
|
16
|
+
BookInfoDict,
|
17
|
+
ChapterDict,
|
18
|
+
ChapterInfoDict,
|
19
|
+
VolumeInfoDict,
|
20
|
+
)
|
21
|
+
|
22
|
+
|
23
|
+
@register_parser(
|
24
|
+
site_keys=["quanben5"],
|
25
|
+
)
|
26
|
+
class Quanben5Parser(BaseParser):
|
27
|
+
"""
|
28
|
+
Parser for 全本小说网 book pages.
|
29
|
+
"""
|
30
|
+
|
31
|
+
def parse_book_info(
|
32
|
+
self,
|
33
|
+
html_list: list[str],
|
34
|
+
**kwargs: Any,
|
35
|
+
) -> BookInfoDict | None:
|
36
|
+
if not html_list:
|
37
|
+
return None
|
38
|
+
|
39
|
+
tree = html.fromstring(html_list[0])
|
40
|
+
book_name = self._first_str(tree.xpath("//h3/span/text()"))
|
41
|
+
author = self._first_str(
|
42
|
+
tree.xpath(
|
43
|
+
'//p[@class="info"][contains(., "作者")]/span[@class="author"]/text()'
|
44
|
+
)
|
45
|
+
)
|
46
|
+
cover_url = self._first_str(tree.xpath('//div[@class="pic"]/img/@src'))
|
47
|
+
category = self._first_str(
|
48
|
+
tree.xpath('//p[@class="info"][contains(., "类别")]/span/text()')
|
49
|
+
)
|
50
|
+
tags = [category] if category else []
|
51
|
+
update_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
52
|
+
summary = self._first_str(tree.xpath('//p[@class="description"]/text()'))
|
53
|
+
|
54
|
+
chapters: list[ChapterInfoDict] = []
|
55
|
+
for li in tree.xpath('//ul[@class="list"]/li'):
|
56
|
+
link = li.xpath(".//a")[0]
|
57
|
+
href = link.get("href", "").strip()
|
58
|
+
title = self._first_str(link.xpath(".//span/text()"))
|
59
|
+
# '/n/toutian/83840.html' -> '83840'
|
60
|
+
chapter_id = href.rstrip(".html").split("/")[-1]
|
61
|
+
chapters.append({"title": title, "url": href, "chapterId": chapter_id})
|
62
|
+
|
63
|
+
volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
|
64
|
+
|
65
|
+
return {
|
66
|
+
"book_name": book_name,
|
67
|
+
"author": author,
|
68
|
+
"cover_url": cover_url,
|
69
|
+
"update_time": update_time,
|
70
|
+
"tags": tags,
|
71
|
+
"summary": summary,
|
72
|
+
"volumes": volumes,
|
73
|
+
"extra": {},
|
74
|
+
}
|
75
|
+
|
76
|
+
def parse_chapter(
|
77
|
+
self,
|
78
|
+
html_list: list[str],
|
79
|
+
chapter_id: str,
|
80
|
+
**kwargs: Any,
|
81
|
+
) -> ChapterDict | None:
|
82
|
+
if not html_list:
|
83
|
+
return None
|
84
|
+
|
85
|
+
tree = html.fromstring(html_list[0])
|
86
|
+
|
87
|
+
# Extract the chapter title
|
88
|
+
title = self._first_str(tree.xpath('//h1[@class="title1"]/text()'))
|
89
|
+
|
90
|
+
# Extract all <p> text within the content container
|
91
|
+
paragraphs = tree.xpath('//div[@id="content"]/p/text()')
|
92
|
+
# Clean whitespace and join with double newlines
|
93
|
+
content = "\n".join(p.strip() for p in paragraphs if p.strip())
|
94
|
+
|
95
|
+
if not content:
|
96
|
+
return None
|
97
|
+
|
98
|
+
return {
|
99
|
+
"id": chapter_id,
|
100
|
+
"title": title,
|
101
|
+
"content": content,
|
102
|
+
"extra": {"site": "quanben5"},
|
103
|
+
}
|
@@ -3,6 +3,7 @@
|
|
3
3
|
novel_downloader.core.parsers.registry
|
4
4
|
--------------------------------------
|
5
5
|
|
6
|
+
Registry and factory helpers for creating site-specific parsers.
|
6
7
|
"""
|
7
8
|
|
8
9
|
__all__ = ["register_parser", "get_parser"]
|
@@ -16,27 +17,24 @@ from novel_downloader.models import ParserConfig
|
|
16
17
|
ParserBuilder = Callable[[ParserConfig], ParserProtocol]
|
17
18
|
|
18
19
|
P = TypeVar("P", bound=ParserProtocol)
|
19
|
-
_PARSER_MAP: dict[str,
|
20
|
+
_PARSER_MAP: dict[str, ParserBuilder] = {}
|
20
21
|
|
21
22
|
|
22
23
|
def register_parser(
|
23
24
|
site_keys: Sequence[str],
|
24
|
-
backends: Sequence[str],
|
25
25
|
) -> Callable[[type[P]], type[P]]:
|
26
26
|
"""
|
27
27
|
Decorator to register a parser class under given keys.
|
28
28
|
|
29
29
|
:param site_keys: Sequence of site identifiers
|
30
|
-
:param backends:
|
30
|
+
:param backends: Sequence of backend types
|
31
31
|
:return: A class decorator that populates _PARSER_MAP.
|
32
32
|
"""
|
33
33
|
|
34
34
|
def decorator(cls: type[P]) -> type[P]:
|
35
35
|
for site in site_keys:
|
36
36
|
site_lower = site.lower()
|
37
|
-
|
38
|
-
for backend in backends:
|
39
|
-
bucket[backend] = cls
|
37
|
+
_PARSER_MAP[site_lower] = cls
|
40
38
|
return cls
|
41
39
|
|
42
40
|
return decorator
|
@@ -52,17 +50,8 @@ def get_parser(site: str, config: ParserConfig) -> ParserProtocol:
|
|
52
50
|
"""
|
53
51
|
site_key = site.lower()
|
54
52
|
try:
|
55
|
-
|
53
|
+
parser_cls = _PARSER_MAP[site_key]
|
56
54
|
except KeyError as err:
|
57
55
|
raise ValueError(f"Unsupported site: {site!r}") from err
|
58
56
|
|
59
|
-
mode = config.mode
|
60
|
-
try:
|
61
|
-
parser_cls = backend_map[mode]
|
62
|
-
except KeyError as err:
|
63
|
-
raise ValueError(
|
64
|
-
f"Unsupported parser mode {mode!r} for site {site!r}. "
|
65
|
-
f"Available modes: {list(backend_map)}"
|
66
|
-
) from err
|
67
|
-
|
68
57
|
return parser_cls(config)
|
@@ -11,22 +11,32 @@ from lxml import html
|
|
11
11
|
|
12
12
|
from novel_downloader.core.parsers.base import BaseParser
|
13
13
|
from novel_downloader.core.parsers.registry import register_parser
|
14
|
-
from novel_downloader.models import
|
14
|
+
from novel_downloader.models import (
|
15
|
+
BookInfoDict,
|
16
|
+
ChapterDict,
|
17
|
+
ChapterInfoDict,
|
18
|
+
VolumeInfoDict,
|
19
|
+
)
|
15
20
|
|
16
21
|
|
17
22
|
@register_parser(
|
18
23
|
site_keys=["sfacg"],
|
19
|
-
backends=["session", "browser"],
|
20
24
|
)
|
21
25
|
class SfacgParser(BaseParser):
|
22
|
-
"""
|
26
|
+
"""
|
27
|
+
Parser for sfacg book pages.
|
28
|
+
"""
|
23
29
|
|
24
30
|
# Book info XPaths
|
25
31
|
_BOOK_NAME_XPATH = '//ul[@class="book_info"]//span[@class="book_newtitle"]/text()'
|
26
32
|
_AUTHOR_INFO_XPATH = '//ul[@class="book_info"]//span[@class="book_info3"]/text()'
|
27
33
|
_UPDATE_TIME_XPATH = '//ul[@class="book_info"]//span[@class="book_info3"]/br/following-sibling::text()' # noqa: E501
|
28
34
|
_COVER_URL_XPATH = '//ul[@class="book_info"]//li/img/@src'
|
29
|
-
_STATUS_XPATH = '//ul[@class="book_info"]//div[@class="book_info2"]/span/text()'
|
35
|
+
# _STATUS_XPATH = '//ul[@class="book_info"]//div[@class="book_info2"]/span/text()'
|
36
|
+
_STATUS_XPATH = (
|
37
|
+
'//ul[@class="book_info"]//div[@class="book_info2"]/span/text()'
|
38
|
+
' and (contains(., "完结") or contains(., "连载"))]/text()'
|
39
|
+
)
|
30
40
|
_SUMMARY_XPATH = '//ul[@class="book_profile"]/li[@class="book_bk_qs1"]/text()'
|
31
41
|
|
32
42
|
# Catalog XPaths
|
@@ -47,54 +57,35 @@ class SfacgParser(BaseParser):
|
|
47
57
|
self,
|
48
58
|
html_list: list[str],
|
49
59
|
**kwargs: Any,
|
50
|
-
) ->
|
51
|
-
"""
|
52
|
-
Parse a book info page and extract metadata and chapter structure.
|
53
|
-
|
54
|
-
:param html_list: Raw HTML of the book info page.
|
55
|
-
:return: Parsed metadata and chapter structure as a dictionary.
|
56
|
-
"""
|
60
|
+
) -> BookInfoDict | None:
|
57
61
|
if len(html_list) < 2:
|
58
|
-
return
|
62
|
+
return None
|
59
63
|
|
60
64
|
info_tree = html.fromstring(html_list[0])
|
61
65
|
catalog_tree = html.fromstring(html_list[1])
|
62
66
|
|
63
|
-
result: dict[str, Any] = {}
|
64
|
-
|
65
67
|
# Book metadata
|
66
|
-
book_name = info_tree.xpath(self._BOOK_NAME_XPATH)
|
67
|
-
result["book_name"] = book_name[0].strip() if book_name else ""
|
68
|
+
book_name = self._first_str(info_tree.xpath(self._BOOK_NAME_XPATH))
|
68
69
|
|
69
|
-
|
70
|
-
|
71
|
-
result["word_count"] = (
|
72
|
-
book_info3[0].split("/")[1].strip()
|
73
|
-
if book_info3 and len(book_info3[0].split("/")) > 1
|
74
|
-
else ""
|
75
|
-
)
|
70
|
+
book_info3_str = self._first_str(info_tree.xpath(self._AUTHOR_INFO_XPATH))
|
71
|
+
author, _, word_count = (p.strip() for p in book_info3_str.partition("/"))
|
76
72
|
|
77
|
-
|
78
|
-
result["update_time"] = book_info3_br[0].strip() if book_info3_br else ""
|
73
|
+
update_time = self._first_str(info_tree.xpath(self._UPDATE_TIME_XPATH))
|
79
74
|
|
80
|
-
cover_url = info_tree.xpath(self._COVER_URL_XPATH)
|
81
|
-
result["cover_url"] = "https:" + cover_url[0] if cover_url else ""
|
75
|
+
cover_url = "https:" + self._first_str(info_tree.xpath(self._COVER_URL_XPATH))
|
82
76
|
|
83
|
-
serial_status = info_tree.xpath(self._STATUS_XPATH)
|
84
|
-
result["serial_status"] = next(
|
85
|
-
(s for s in serial_status if "完结" in s or "连载" in s), ""
|
86
|
-
)
|
77
|
+
serial_status = self._first_str(info_tree.xpath(self._STATUS_XPATH))
|
87
78
|
|
88
|
-
|
89
|
-
|
79
|
+
summary_elem = info_tree.xpath(self._SUMMARY_XPATH)
|
80
|
+
summary = "".join(summary_elem).strip()
|
90
81
|
|
91
82
|
# Chapter structure
|
92
83
|
volume_titles = catalog_tree.xpath(self._VOLUME_TITLE_XPATH)
|
93
84
|
volume_blocks = catalog_tree.xpath(self._VOLUME_CONTENT_XPATH)
|
94
85
|
|
95
|
-
volumes = []
|
86
|
+
volumes: list[VolumeInfoDict] = []
|
96
87
|
for vol_title, vol_block in zip(volume_titles, volume_blocks, strict=False):
|
97
|
-
chapters = []
|
88
|
+
chapters: list[ChapterInfoDict] = []
|
98
89
|
for a in vol_block.xpath(self._CHAPTER_LIST_XPATH):
|
99
90
|
href = a.xpath("./@href")[0] if a.xpath("./@href") else ""
|
100
91
|
title = "".join(a.xpath(".//li//text()")).strip()
|
@@ -112,9 +103,18 @@ class SfacgParser(BaseParser):
|
|
112
103
|
"chapters": chapters,
|
113
104
|
}
|
114
105
|
)
|
115
|
-
result["volumes"] = volumes
|
116
106
|
|
117
|
-
return
|
107
|
+
return {
|
108
|
+
"book_name": book_name,
|
109
|
+
"author": author,
|
110
|
+
"cover_url": cover_url,
|
111
|
+
"update_time": update_time,
|
112
|
+
"word_count": word_count,
|
113
|
+
"serial_status": serial_status,
|
114
|
+
"summary": summary,
|
115
|
+
"volumes": volumes,
|
116
|
+
"extra": {},
|
117
|
+
}
|
118
118
|
|
119
119
|
def parse_chapter(
|
120
120
|
self,
|
@@ -122,13 +122,6 @@ class SfacgParser(BaseParser):
|
|
122
122
|
chapter_id: str,
|
123
123
|
**kwargs: Any,
|
124
124
|
) -> ChapterDict | None:
|
125
|
-
"""
|
126
|
-
Parse a single chapter page and extract clean text or simplified HTML.
|
127
|
-
|
128
|
-
:param html_list: Raw HTML of the chapter page.
|
129
|
-
:param chapter_id: Identifier of the chapter being parsed.
|
130
|
-
:return: Cleaned chapter content as plain text or minimal HTML.
|
131
|
-
"""
|
132
125
|
if not html_list:
|
133
126
|
return None
|
134
127
|
keywords = [
|
@@ -156,7 +149,7 @@ class SfacgParser(BaseParser):
|
|
156
149
|
raw_text_parts = tree.xpath(self._CHAPTER_TEXT_XPATH)
|
157
150
|
content_lines = [txt.strip() for txt in raw_text_parts if txt.strip()]
|
158
151
|
|
159
|
-
content = "\n
|
152
|
+
content = "\n".join(content_lines).strip()
|
160
153
|
if not content:
|
161
154
|
return None
|
162
155
|
|