novel-downloader 2.0.0__py3-none-any.whl → 2.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/download.py +14 -11
- novel_downloader/cli/export.py +19 -19
- novel_downloader/cli/ui.py +35 -8
- novel_downloader/config/adapter.py +216 -153
- novel_downloader/core/__init__.py +5 -6
- novel_downloader/core/archived/deqixs/fetcher.py +1 -28
- novel_downloader/core/downloaders/__init__.py +2 -0
- novel_downloader/core/downloaders/base.py +34 -85
- novel_downloader/core/downloaders/common.py +147 -171
- novel_downloader/core/downloaders/qianbi.py +30 -64
- novel_downloader/core/downloaders/qidian.py +157 -184
- novel_downloader/core/downloaders/qqbook.py +292 -0
- novel_downloader/core/downloaders/registry.py +2 -2
- novel_downloader/core/exporters/__init__.py +2 -0
- novel_downloader/core/exporters/base.py +37 -59
- novel_downloader/core/exporters/common.py +620 -0
- novel_downloader/core/exporters/linovelib.py +47 -0
- novel_downloader/core/exporters/qidian.py +41 -12
- novel_downloader/core/exporters/qqbook.py +28 -0
- novel_downloader/core/exporters/registry.py +2 -2
- novel_downloader/core/fetchers/__init__.py +4 -2
- novel_downloader/core/fetchers/aaatxt.py +2 -22
- novel_downloader/core/fetchers/b520.py +3 -23
- novel_downloader/core/fetchers/base.py +80 -105
- novel_downloader/core/fetchers/biquyuedu.py +2 -22
- novel_downloader/core/fetchers/dxmwx.py +10 -22
- novel_downloader/core/fetchers/esjzone.py +6 -29
- novel_downloader/core/fetchers/guidaye.py +2 -22
- novel_downloader/core/fetchers/hetushu.py +9 -29
- novel_downloader/core/fetchers/i25zw.py +2 -16
- novel_downloader/core/fetchers/ixdzs8.py +2 -16
- novel_downloader/core/fetchers/jpxs123.py +2 -16
- novel_downloader/core/fetchers/lewenn.py +2 -22
- novel_downloader/core/fetchers/linovelib.py +4 -20
- novel_downloader/core/fetchers/{eightnovel.py → n8novel.py} +12 -40
- novel_downloader/core/fetchers/piaotia.py +2 -16
- novel_downloader/core/fetchers/qbtr.py +2 -16
- novel_downloader/core/fetchers/qianbi.py +1 -20
- novel_downloader/core/fetchers/qidian.py +27 -68
- novel_downloader/core/fetchers/qqbook.py +177 -0
- novel_downloader/core/fetchers/quanben5.py +9 -29
- novel_downloader/core/fetchers/rate_limiter.py +22 -53
- novel_downloader/core/fetchers/sfacg.py +3 -16
- novel_downloader/core/fetchers/shencou.py +2 -16
- novel_downloader/core/fetchers/shuhaige.py +2 -22
- novel_downloader/core/fetchers/tongrenquan.py +2 -22
- novel_downloader/core/fetchers/ttkan.py +3 -14
- novel_downloader/core/fetchers/wanbengo.py +2 -22
- novel_downloader/core/fetchers/xiaoshuowu.py +2 -16
- novel_downloader/core/fetchers/xiguashuwu.py +4 -20
- novel_downloader/core/fetchers/xs63b.py +3 -15
- novel_downloader/core/fetchers/xshbook.py +2 -22
- novel_downloader/core/fetchers/yamibo.py +4 -28
- novel_downloader/core/fetchers/yibige.py +13 -26
- novel_downloader/core/interfaces/exporter.py +19 -7
- novel_downloader/core/interfaces/fetcher.py +23 -49
- novel_downloader/core/interfaces/parser.py +2 -2
- novel_downloader/core/parsers/__init__.py +4 -2
- novel_downloader/core/parsers/b520.py +2 -2
- novel_downloader/core/parsers/base.py +5 -39
- novel_downloader/core/parsers/esjzone.py +3 -3
- novel_downloader/core/parsers/{eightnovel.py → n8novel.py} +7 -7
- novel_downloader/core/parsers/qidian.py +717 -0
- novel_downloader/core/parsers/qqbook.py +709 -0
- novel_downloader/core/parsers/xiguashuwu.py +8 -15
- novel_downloader/core/searchers/__init__.py +2 -2
- novel_downloader/core/searchers/b520.py +1 -1
- novel_downloader/core/searchers/base.py +2 -2
- novel_downloader/core/searchers/{eightnovel.py → n8novel.py} +5 -5
- novel_downloader/locales/en.json +3 -3
- novel_downloader/locales/zh.json +3 -3
- novel_downloader/models/__init__.py +2 -0
- novel_downloader/models/book.py +1 -0
- novel_downloader/models/config.py +12 -0
- novel_downloader/resources/config/settings.toml +23 -5
- novel_downloader/resources/js_scripts/expr_to_json.js +14 -0
- novel_downloader/resources/js_scripts/qidian_decrypt_node.js +21 -16
- novel_downloader/resources/js_scripts/qq_decrypt_node.js +92 -0
- novel_downloader/utils/__init__.py +0 -2
- novel_downloader/utils/chapter_storage.py +2 -3
- novel_downloader/utils/constants.py +7 -3
- novel_downloader/utils/cookies.py +32 -17
- novel_downloader/utils/crypto_utils/__init__.py +0 -6
- novel_downloader/utils/crypto_utils/aes_util.py +1 -1
- novel_downloader/utils/crypto_utils/rc4.py +40 -50
- novel_downloader/utils/epub/__init__.py +2 -3
- novel_downloader/utils/epub/builder.py +6 -6
- novel_downloader/utils/epub/constants.py +1 -6
- novel_downloader/utils/epub/documents.py +7 -7
- novel_downloader/utils/epub/models.py +8 -8
- novel_downloader/utils/epub/utils.py +10 -10
- novel_downloader/utils/file_utils/io.py +48 -73
- novel_downloader/utils/file_utils/normalize.py +1 -7
- novel_downloader/utils/file_utils/sanitize.py +4 -11
- novel_downloader/utils/fontocr/__init__.py +13 -0
- novel_downloader/utils/{fontocr.py → fontocr/core.py} +72 -61
- novel_downloader/utils/fontocr/loader.py +52 -0
- novel_downloader/utils/logger.py +80 -56
- novel_downloader/utils/network.py +16 -40
- novel_downloader/utils/node_decryptor/__init__.py +13 -0
- novel_downloader/utils/node_decryptor/decryptor.py +342 -0
- novel_downloader/{core/parsers/qidian/utils → utils/node_decryptor}/decryptor_fetcher.py +5 -6
- novel_downloader/utils/text_utils/text_cleaner.py +39 -30
- novel_downloader/utils/text_utils/truncate_utils.py +3 -14
- novel_downloader/utils/time_utils/sleep_utils.py +53 -43
- novel_downloader/web/main.py +1 -1
- novel_downloader/web/pages/download.py +1 -1
- novel_downloader/web/pages/search.py +4 -4
- novel_downloader/web/services/task_manager.py +2 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/METADATA +5 -1
- novel_downloader-2.0.2.dist-info/RECORD +203 -0
- novel_downloader/core/exporters/common/__init__.py +0 -11
- novel_downloader/core/exporters/common/epub.py +0 -198
- novel_downloader/core/exporters/common/main_exporter.py +0 -64
- novel_downloader/core/exporters/common/txt.py +0 -146
- novel_downloader/core/exporters/epub_util.py +0 -215
- novel_downloader/core/exporters/linovelib/__init__.py +0 -11
- novel_downloader/core/exporters/linovelib/epub.py +0 -349
- novel_downloader/core/exporters/linovelib/main_exporter.py +0 -66
- novel_downloader/core/exporters/linovelib/txt.py +0 -139
- novel_downloader/core/exporters/txt_util.py +0 -67
- novel_downloader/core/parsers/qidian/__init__.py +0 -10
- novel_downloader/core/parsers/qidian/book_info_parser.py +0 -89
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -470
- novel_downloader/core/parsers/qidian/chapter_normal.py +0 -126
- novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
- novel_downloader/core/parsers/qidian/main_parser.py +0 -101
- novel_downloader/core/parsers/qidian/utils/__init__.py +0 -30
- novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +0 -143
- novel_downloader/core/parsers/qidian/utils/helpers.py +0 -110
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +0 -175
- novel_downloader-2.0.0.dist-info/RECORD +0 -210
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/WHEEL +0 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/entry_points.txt +0 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/top_level.txt +0 -0
@@ -1,68 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.core.parsers.qidian.chapter_router
|
4
|
-
---------------------------------------------------
|
5
|
-
|
6
|
-
Routing logic for selecting the correct chapter parser for Qidian pages.
|
7
|
-
"""
|
8
|
-
|
9
|
-
from __future__ import annotations
|
10
|
-
|
11
|
-
import logging
|
12
|
-
from typing import TYPE_CHECKING
|
13
|
-
|
14
|
-
from novel_downloader.models import ChapterDict
|
15
|
-
|
16
|
-
from .chapter_normal import parse_normal_chapter
|
17
|
-
from .utils import (
|
18
|
-
can_view_chapter,
|
19
|
-
find_ssr_page_context,
|
20
|
-
is_encrypted,
|
21
|
-
)
|
22
|
-
|
23
|
-
if TYPE_CHECKING:
|
24
|
-
from .main_parser import QidianParser
|
25
|
-
|
26
|
-
logger = logging.getLogger(__name__)
|
27
|
-
|
28
|
-
|
29
|
-
def parse_chapter(
|
30
|
-
parser: QidianParser,
|
31
|
-
html_str: str,
|
32
|
-
chapter_id: str,
|
33
|
-
) -> ChapterDict | None:
|
34
|
-
"""
|
35
|
-
Extract and return the formatted textual content of chapter.
|
36
|
-
|
37
|
-
:param parser: Instance of QidianParser.
|
38
|
-
:param html_str: Raw HTML content of the chapter page.
|
39
|
-
:param chapter_id: Identifier of the chapter being parsed.
|
40
|
-
:return: Formatted chapter text or empty string if not parsable.
|
41
|
-
"""
|
42
|
-
try:
|
43
|
-
ssr_data = find_ssr_page_context(html_str)
|
44
|
-
|
45
|
-
if not can_view_chapter(ssr_data):
|
46
|
-
logger.warning(
|
47
|
-
"[Parser] Chapter '%s' is not purchased or inaccessible.", chapter_id
|
48
|
-
)
|
49
|
-
return None
|
50
|
-
|
51
|
-
if is_encrypted(ssr_data):
|
52
|
-
if not parser._decode_font:
|
53
|
-
return None
|
54
|
-
try:
|
55
|
-
from .chapter_encrypted import parse_encrypted_chapter
|
56
|
-
|
57
|
-
return parse_encrypted_chapter(parser, html_str, chapter_id)
|
58
|
-
except ImportError:
|
59
|
-
logger.warning(
|
60
|
-
"[Parser] Encrypted chapter '%s' requires extra dependencies.",
|
61
|
-
chapter_id,
|
62
|
-
)
|
63
|
-
return None
|
64
|
-
|
65
|
-
return parse_normal_chapter(parser, html_str, chapter_id)
|
66
|
-
except Exception as e:
|
67
|
-
logger.warning("[Parser] parse error for chapter '%s': %s", chapter_id, e)
|
68
|
-
return None
|
@@ -1,101 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.core.parsers.qidian.main_parser
|
4
|
-
------------------------------------------------
|
5
|
-
|
6
|
-
Main parser class for handling Qidian HTML
|
7
|
-
"""
|
8
|
-
|
9
|
-
from __future__ import annotations
|
10
|
-
|
11
|
-
import logging
|
12
|
-
from pathlib import Path
|
13
|
-
from typing import Any
|
14
|
-
|
15
|
-
from novel_downloader.core.parsers.base import BaseParser
|
16
|
-
from novel_downloader.core.parsers.registry import register_parser
|
17
|
-
from novel_downloader.models import (
|
18
|
-
BookInfoDict,
|
19
|
-
ChapterDict,
|
20
|
-
ParserConfig,
|
21
|
-
)
|
22
|
-
from novel_downloader.utils.constants import DATA_DIR
|
23
|
-
from novel_downloader.utils.cookies import get_cookie_value
|
24
|
-
|
25
|
-
from .book_info_parser import parse_book_info
|
26
|
-
from .chapter_router import parse_chapter
|
27
|
-
from .utils import is_encrypted
|
28
|
-
|
29
|
-
logger = logging.getLogger(__name__)
|
30
|
-
|
31
|
-
|
32
|
-
@register_parser(
|
33
|
-
site_keys=["qidian", "qd"],
|
34
|
-
)
|
35
|
-
class QidianParser(BaseParser):
|
36
|
-
"""
|
37
|
-
Parser for 起点中文网 site.
|
38
|
-
"""
|
39
|
-
|
40
|
-
def __init__(
|
41
|
-
self,
|
42
|
-
config: ParserConfig,
|
43
|
-
fuid: str = "",
|
44
|
-
):
|
45
|
-
"""
|
46
|
-
Initialize the QidianParser with the given configuration.
|
47
|
-
|
48
|
-
:param config: ParserConfig object controlling:
|
49
|
-
"""
|
50
|
-
super().__init__(config)
|
51
|
-
|
52
|
-
self._fixed_font_dir: Path = self._base_cache_dir / "fixed_fonts"
|
53
|
-
self._fixed_font_dir.mkdir(parents=True, exist_ok=True)
|
54
|
-
self._debug_dir: Path = Path.cwd() / "debug"
|
55
|
-
|
56
|
-
state_files = [
|
57
|
-
DATA_DIR / "qidian" / "session_state.cookies",
|
58
|
-
]
|
59
|
-
self._fuid: str = fuid or get_cookie_value(state_files, "ywguid")
|
60
|
-
|
61
|
-
def parse_book_info(
|
62
|
-
self,
|
63
|
-
html_list: list[str],
|
64
|
-
**kwargs: Any,
|
65
|
-
) -> BookInfoDict | None:
|
66
|
-
"""
|
67
|
-
Parse a book info page and extract metadata and chapter structure.
|
68
|
-
|
69
|
-
:param html_list: Raw HTML of the book info page.
|
70
|
-
:return: Parsed metadata and chapter structure as a dictionary.
|
71
|
-
"""
|
72
|
-
if not html_list:
|
73
|
-
return None
|
74
|
-
return parse_book_info(html_list[0])
|
75
|
-
|
76
|
-
def parse_chapter(
|
77
|
-
self,
|
78
|
-
html_list: list[str],
|
79
|
-
chapter_id: str,
|
80
|
-
**kwargs: Any,
|
81
|
-
) -> ChapterDict | None:
|
82
|
-
"""
|
83
|
-
:param html_list: Raw HTML of the chapter page.
|
84
|
-
:param chapter_id: Identifier of the chapter being parsed.
|
85
|
-
:return: Cleaned chapter content as plain text.
|
86
|
-
"""
|
87
|
-
if not html_list:
|
88
|
-
return None
|
89
|
-
return parse_chapter(self, html_list[0], chapter_id)
|
90
|
-
|
91
|
-
def is_encrypted(self, html_str: str) -> bool:
|
92
|
-
"""
|
93
|
-
Return True if content is encrypted.
|
94
|
-
|
95
|
-
:param html: Raw HTML of the chapter page.
|
96
|
-
"""
|
97
|
-
return is_encrypted(html_str)
|
98
|
-
|
99
|
-
@property
|
100
|
-
def save_font_debug(self) -> bool:
|
101
|
-
return self._config.save_font_debug
|
@@ -1,30 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.core.parsers.qidian.utils
|
4
|
-
------------------------------------------
|
5
|
-
|
6
|
-
Utility functions and helpers for parsing and decrypting Qidian novel pages
|
7
|
-
"""
|
8
|
-
|
9
|
-
__all__ = [
|
10
|
-
"find_ssr_page_context",
|
11
|
-
"extract_chapter_info",
|
12
|
-
"is_restricted_page",
|
13
|
-
"vip_status",
|
14
|
-
"can_view_chapter",
|
15
|
-
"is_encrypted",
|
16
|
-
"is_duplicated",
|
17
|
-
"QidianNodeDecryptor",
|
18
|
-
"get_decryptor",
|
19
|
-
]
|
20
|
-
|
21
|
-
from .helpers import (
|
22
|
-
can_view_chapter,
|
23
|
-
extract_chapter_info,
|
24
|
-
find_ssr_page_context,
|
25
|
-
is_duplicated,
|
26
|
-
is_encrypted,
|
27
|
-
is_restricted_page,
|
28
|
-
vip_status,
|
29
|
-
)
|
30
|
-
from .node_decryptor import QidianNodeDecryptor, get_decryptor
|
@@ -1,143 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.core.parsers.qidian.utils.fontmap_recover
|
4
|
-
----------------------------------------------------------
|
5
|
-
|
6
|
-
Tools for generating and applying font character mappings
|
7
|
-
to recover obfuscated Qidian text.
|
8
|
-
"""
|
9
|
-
|
10
|
-
__all__ = [
|
11
|
-
"generate_font_map",
|
12
|
-
"apply_font_mapping",
|
13
|
-
]
|
14
|
-
|
15
|
-
import json
|
16
|
-
import logging
|
17
|
-
from pathlib import Path
|
18
|
-
|
19
|
-
import numpy as np
|
20
|
-
from fontTools.ttLib import TTFont
|
21
|
-
from PIL import ImageFont
|
22
|
-
|
23
|
-
logger = logging.getLogger(__name__)
|
24
|
-
CHAR_FONT_SIZE = 52
|
25
|
-
|
26
|
-
|
27
|
-
def generate_font_map(
|
28
|
-
fixed_font_path: Path,
|
29
|
-
random_font_path: Path,
|
30
|
-
char_set: set[str],
|
31
|
-
refl_set: set[str],
|
32
|
-
cache_dir: Path,
|
33
|
-
batch_size: int = 32,
|
34
|
-
) -> dict[str, str]:
|
35
|
-
"""
|
36
|
-
Build a mapping from scrambled font chars to real chars.
|
37
|
-
|
38
|
-
Uses OCR to compare rendered glyphs from a known (fixed) font and an
|
39
|
-
obfuscated (random) font. Results are cached in JSON so repeated runs
|
40
|
-
are faster.
|
41
|
-
|
42
|
-
:param fixed_font_path: fixed font file.
|
43
|
-
:param random_font_path: random font file.
|
44
|
-
:param char_set: Characters to match directly.
|
45
|
-
:param refl_set: Characters to match in flipped form.
|
46
|
-
:param cache_dir: Directory to save/load cached results.
|
47
|
-
:param batch_size: How many chars to OCR per batch.
|
48
|
-
|
49
|
-
:return: { obf_char: real_char, ... }
|
50
|
-
"""
|
51
|
-
try:
|
52
|
-
from novel_downloader.utils.fontocr import get_font_ocr
|
53
|
-
|
54
|
-
font_ocr = get_font_ocr(batch_size=batch_size)
|
55
|
-
except ImportError:
|
56
|
-
logger.warning("[QidianParser] FontOCR not available, font decoding will skip")
|
57
|
-
return {}
|
58
|
-
|
59
|
-
mapping_result: dict[str, str] = {}
|
60
|
-
fixed_map_file = cache_dir / "fixed_font_map" / f"{Path(fixed_font_path).stem}.json"
|
61
|
-
fixed_map_file.parent.mkdir(parents=True, exist_ok=True)
|
62
|
-
|
63
|
-
# load existing cache
|
64
|
-
try:
|
65
|
-
with open(fixed_map_file, encoding="utf-8") as f:
|
66
|
-
fixed_map = json.load(f)
|
67
|
-
cached_chars = set(fixed_map.keys())
|
68
|
-
mapping_result.update({ch: fixed_map[ch] for ch in char_set if ch in fixed_map})
|
69
|
-
mapping_result.update({ch: fixed_map[ch] for ch in refl_set if ch in fixed_map})
|
70
|
-
char_set = set(char_set) - cached_chars
|
71
|
-
refl_set = set(refl_set) - cached_chars
|
72
|
-
except Exception:
|
73
|
-
fixed_map = {}
|
74
|
-
cached_chars = set()
|
75
|
-
|
76
|
-
# prepare font renderers and cmap sets
|
77
|
-
try:
|
78
|
-
fixed_ttf = TTFont(fixed_font_path)
|
79
|
-
fixed_chars = {chr(c) for c in fixed_ttf.getBestCmap()}
|
80
|
-
fixed_font = ImageFont.truetype(str(fixed_font_path), CHAR_FONT_SIZE)
|
81
|
-
|
82
|
-
random_ttf = TTFont(random_font_path)
|
83
|
-
random_chars = {chr(c) for c in random_ttf.getBestCmap()}
|
84
|
-
random_font = ImageFont.truetype(str(random_font_path), CHAR_FONT_SIZE)
|
85
|
-
except Exception as e:
|
86
|
-
logger.error("[FontOCR] Failed to load TTF fonts: %s", e)
|
87
|
-
return mapping_result
|
88
|
-
|
89
|
-
def _render_batch(chars: list[tuple[str, bool]]) -> list[tuple[str, np.ndarray]]:
|
90
|
-
out = []
|
91
|
-
for ch, reflect in chars:
|
92
|
-
if ch in fixed_chars:
|
93
|
-
font = fixed_font
|
94
|
-
elif ch in random_chars:
|
95
|
-
font = random_font
|
96
|
-
else:
|
97
|
-
continue
|
98
|
-
img = font_ocr.render_char_image_array(ch, font, reflect)
|
99
|
-
if img is not None:
|
100
|
-
out.append((ch, img))
|
101
|
-
return out
|
102
|
-
|
103
|
-
# process normal and reflected sets together
|
104
|
-
for chars, reflect in [(list(char_set), False), (list(refl_set), True)]:
|
105
|
-
for batch_chars in font_ocr._chunked(chars, font_ocr._batch_size):
|
106
|
-
# render all images in this batch
|
107
|
-
to_render = [(ch, reflect) for ch in batch_chars]
|
108
|
-
rendered = _render_batch(to_render)
|
109
|
-
if not rendered:
|
110
|
-
continue
|
111
|
-
|
112
|
-
# query OCR+vec simultaneously
|
113
|
-
imgs_to_query = [img for (ch, img) in rendered]
|
114
|
-
fused = font_ocr.predict(imgs_to_query, top_k=1)
|
115
|
-
|
116
|
-
# pick best per char, apply threshold + cache
|
117
|
-
for (ch, _), preds in zip(rendered, fused, strict=False):
|
118
|
-
if not preds:
|
119
|
-
continue
|
120
|
-
real_char, _ = preds[0]
|
121
|
-
mapping_result[ch] = real_char
|
122
|
-
fixed_map[ch] = real_char
|
123
|
-
|
124
|
-
# persist updated fixed_map
|
125
|
-
try:
|
126
|
-
with open(fixed_map_file, "w", encoding="utf-8") as f:
|
127
|
-
json.dump(fixed_map, f, ensure_ascii=False, indent=2)
|
128
|
-
except Exception as e:
|
129
|
-
logger.error("[FontOCR] Failed to save fixed map: %s", e)
|
130
|
-
|
131
|
-
return mapping_result
|
132
|
-
|
133
|
-
|
134
|
-
def apply_font_mapping(text: str, font_map: dict[str, str]) -> str:
|
135
|
-
"""
|
136
|
-
Replace each character in `text` using `font_map`,
|
137
|
-
leaving unmapped characters unchanged.
|
138
|
-
|
139
|
-
:param text: The input string, possibly containing obfuscated font chars.
|
140
|
-
:param font_map: A dict mapping obfuscated chars to real chars.
|
141
|
-
:return: The de-obfuscated text.
|
142
|
-
"""
|
143
|
-
return "".join(font_map.get(ch, ch) for ch in text)
|
@@ -1,110 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.core.parsers.qidian.utils.helpers
|
4
|
-
--------------------------------------------------
|
5
|
-
|
6
|
-
Shared utility functions for parsing Qidian pages.
|
7
|
-
"""
|
8
|
-
|
9
|
-
import json
|
10
|
-
import logging
|
11
|
-
from typing import Any
|
12
|
-
|
13
|
-
from lxml import html
|
14
|
-
|
15
|
-
logger = logging.getLogger(__name__)
|
16
|
-
|
17
|
-
|
18
|
-
def find_ssr_page_context(html_str: str) -> dict[str, Any]:
|
19
|
-
"""
|
20
|
-
Extract SSR JSON from <script id="vite-plugin-ssr_pageContext">.
|
21
|
-
"""
|
22
|
-
try:
|
23
|
-
tree = html.fromstring(html_str)
|
24
|
-
script = tree.xpath('//script[@id="vite-plugin-ssr_pageContext"]/text()')
|
25
|
-
if script:
|
26
|
-
data: dict[str, Any] = json.loads(script[0].strip())
|
27
|
-
return data
|
28
|
-
except Exception as e:
|
29
|
-
logger.warning("[Parser] SSR JSON parse error: %s", e)
|
30
|
-
return {}
|
31
|
-
|
32
|
-
|
33
|
-
def extract_chapter_info(ssr_data: dict[str, Any]) -> dict[str, Any]:
|
34
|
-
"""
|
35
|
-
Extract the 'chapterInfo' dictionary from the SSR page context.
|
36
|
-
|
37
|
-
This handles nested key access and returns an empty dict if missing.
|
38
|
-
|
39
|
-
:param ssr_data: The full SSR data object from _find_ssr_page_context().
|
40
|
-
:return: A dict with chapter metadata such as chapterName, authorSay, etc.
|
41
|
-
"""
|
42
|
-
try:
|
43
|
-
page_context = ssr_data.get("pageContext", {})
|
44
|
-
page_props = page_context.get("pageProps", {})
|
45
|
-
page_data = page_props.get("pageData", {})
|
46
|
-
chapter_info = page_data.get("chapterInfo", {})
|
47
|
-
|
48
|
-
assert isinstance(chapter_info, dict)
|
49
|
-
return chapter_info
|
50
|
-
except Exception:
|
51
|
-
return {}
|
52
|
-
|
53
|
-
|
54
|
-
def is_restricted_page(html_str: str) -> bool:
|
55
|
-
"""
|
56
|
-
Return True if page content indicates access restriction
|
57
|
-
(e.g. not subscribed/purchased).
|
58
|
-
|
59
|
-
:param html_str: Raw HTML string.
|
60
|
-
"""
|
61
|
-
markers = ["这是VIP章节", "需要订阅", "订阅后才能阅读"]
|
62
|
-
return any(m in html_str for m in markers)
|
63
|
-
|
64
|
-
|
65
|
-
def vip_status(ssr_data: dict[str, Any]) -> bool:
|
66
|
-
"""
|
67
|
-
:return: True if VIP, False otherwise.
|
68
|
-
"""
|
69
|
-
chapter_info = extract_chapter_info(ssr_data)
|
70
|
-
vip_flag = chapter_info.get("vipStatus", 0)
|
71
|
-
fens_flag = chapter_info.get("fEnS", 0)
|
72
|
-
return bool(vip_flag == 1 and fens_flag != 0)
|
73
|
-
|
74
|
-
|
75
|
-
def can_view_chapter(ssr_data: dict[str, Any]) -> bool:
|
76
|
-
"""
|
77
|
-
A chapter is not viewable if it is marked as VIP
|
78
|
-
and has not been purchased.
|
79
|
-
|
80
|
-
:return: True if viewable, False otherwise.
|
81
|
-
"""
|
82
|
-
chapter_info = extract_chapter_info(ssr_data)
|
83
|
-
is_buy = chapter_info.get("isBuy", 0)
|
84
|
-
vip_status = chapter_info.get("vipStatus", 0)
|
85
|
-
return not (vip_status == 1 and is_buy == 0)
|
86
|
-
|
87
|
-
|
88
|
-
def is_duplicated(ssr_data: dict[str, Any]) -> bool:
|
89
|
-
"""
|
90
|
-
Check if chapter is marked as duplicated (eFW = 1).
|
91
|
-
"""
|
92
|
-
chapter_info = extract_chapter_info(ssr_data)
|
93
|
-
efw_flag = chapter_info.get("eFW", 0)
|
94
|
-
return bool(efw_flag == 1)
|
95
|
-
|
96
|
-
|
97
|
-
def is_encrypted(content: str | dict[str, Any]) -> bool:
|
98
|
-
"""
|
99
|
-
Return True if content is encrypted.
|
100
|
-
|
101
|
-
Chapter Encryption Status (cES):
|
102
|
-
- 0: 内容是'明文'
|
103
|
-
- 2: 字体加密
|
104
|
-
|
105
|
-
:param content: HTML content, either as a raw string or a BeautifulSoup object.
|
106
|
-
:return: True if encrypted marker is found, else False.
|
107
|
-
"""
|
108
|
-
ssr_data = find_ssr_page_context(content) if isinstance(content, str) else content
|
109
|
-
chapter_info = extract_chapter_info(ssr_data)
|
110
|
-
return int(chapter_info.get("cES", 0)) == 2
|
@@ -1,175 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.core.parsers.qidian.utils.node_decryptor
|
4
|
-
---------------------------------------------------------
|
5
|
-
|
6
|
-
Provides QidianNodeDecryptor, which ensures a Node.js environment,
|
7
|
-
downloads or installs the required JS modules (Fock + decrypt script),
|
8
|
-
and invokes a Node.js subprocess to decrypt Qidian chapter content.
|
9
|
-
"""
|
10
|
-
|
11
|
-
import json
|
12
|
-
import logging
|
13
|
-
import shutil
|
14
|
-
import subprocess
|
15
|
-
import uuid
|
16
|
-
from pathlib import Path
|
17
|
-
|
18
|
-
from novel_downloader.utils.constants import (
|
19
|
-
JS_SCRIPT_DIR,
|
20
|
-
QD_DECRYPT_SCRIPT_PATH,
|
21
|
-
)
|
22
|
-
|
23
|
-
from .decryptor_fetcher import ensure_decryptor
|
24
|
-
|
25
|
-
logger = logging.getLogger(__name__)
|
26
|
-
|
27
|
-
|
28
|
-
class QidianNodeDecryptor:
|
29
|
-
"""
|
30
|
-
A decryptor that uses Node.js plus Qidian's Fock JavaScript module
|
31
|
-
to decrypt encrypted chapter payloads.
|
32
|
-
|
33
|
-
On initialization, this class will:
|
34
|
-
1. Verify that `node` is on PATH.
|
35
|
-
2. Copy our bundled `qidian_decrypt_node.js` into `JS_SCRIPT_DIR`.
|
36
|
-
3. Download the remote Fock module JS if not already present.
|
37
|
-
|
38
|
-
Calling `decrypt()` will:
|
39
|
-
- Write a temp JSON input file with [ciphertext, chapter_id, fkp, fuid].
|
40
|
-
- Spawn `node qidian_decrypt_node.js <in> <out>`.
|
41
|
-
- Read and return the decrypted text.
|
42
|
-
- Clean up the temp files.
|
43
|
-
"""
|
44
|
-
|
45
|
-
QIDIAN_FOCK_JS_URL: str = (
|
46
|
-
"https://cococdn.qidian.com/coco/s12062024/4819793b.qeooxh.js"
|
47
|
-
)
|
48
|
-
QIDIAN_FOCK_JS_PATH: Path = JS_SCRIPT_DIR / "4819793b.qeooxh.js"
|
49
|
-
QIDIAN_DECRYPT_SCRIPT_FILE: str = "qidian_decrypt_node.js"
|
50
|
-
QIDIAN_DECRYPT_SCRIPT_PATH: Path = JS_SCRIPT_DIR / QIDIAN_DECRYPT_SCRIPT_FILE
|
51
|
-
|
52
|
-
def __init__(self) -> None:
|
53
|
-
"""
|
54
|
-
Initialise the decryptor environment and decide which executable will be
|
55
|
-
used (`node` script or the pre-built binary).
|
56
|
-
"""
|
57
|
-
self.script_dir: Path = JS_SCRIPT_DIR
|
58
|
-
self.script_dir.mkdir(parents=True, exist_ok=True)
|
59
|
-
|
60
|
-
self._script_cmd: list[str] | None = None
|
61
|
-
self._check_environment()
|
62
|
-
|
63
|
-
def _check_environment(self) -> None:
|
64
|
-
"""
|
65
|
-
Decide which decryptor backend to use and make sure it is ready.
|
66
|
-
"""
|
67
|
-
try:
|
68
|
-
# 1) Check Node.js
|
69
|
-
if not shutil.which("node"):
|
70
|
-
raise OSError("Node.js is not installed or not in PATH.")
|
71
|
-
|
72
|
-
# 2) Copy bundled decrypt script into place if missing
|
73
|
-
if not self.QIDIAN_DECRYPT_SCRIPT_PATH.exists():
|
74
|
-
try:
|
75
|
-
resource = QD_DECRYPT_SCRIPT_PATH
|
76
|
-
shutil.copyfile(str(resource), str(self.QIDIAN_DECRYPT_SCRIPT_PATH))
|
77
|
-
except Exception as e:
|
78
|
-
logger.error("[decryptor] Failed to copy decrypt script: %s", e)
|
79
|
-
raise
|
80
|
-
|
81
|
-
# 3) Download the Fock JS module from Qidian CDN if missing
|
82
|
-
if not self.QIDIAN_FOCK_JS_PATH.exists():
|
83
|
-
from novel_downloader.utils.network import download
|
84
|
-
|
85
|
-
try:
|
86
|
-
download(
|
87
|
-
self.QIDIAN_FOCK_JS_URL,
|
88
|
-
self.script_dir,
|
89
|
-
on_exist="overwrite",
|
90
|
-
)
|
91
|
-
except Exception as e:
|
92
|
-
logger.error("[decryptor] Failed to download Fock JS module: %s", e)
|
93
|
-
raise
|
94
|
-
self._script_cmd = ["node", str(self.QIDIAN_DECRYPT_SCRIPT_PATH)]
|
95
|
-
return
|
96
|
-
except Exception:
|
97
|
-
try:
|
98
|
-
self._script_cmd = [str(ensure_decryptor(self.script_dir))]
|
99
|
-
except Exception as exc:
|
100
|
-
raise OSError(
|
101
|
-
"Neither Node.js nor fallback binary is available."
|
102
|
-
) from exc
|
103
|
-
|
104
|
-
def decrypt(
|
105
|
-
self,
|
106
|
-
ciphertext: str | bytes,
|
107
|
-
chapter_id: str,
|
108
|
-
fkp: str,
|
109
|
-
fuid: str,
|
110
|
-
) -> str:
|
111
|
-
"""
|
112
|
-
Decrypt a chapter payload via our Node.js script.
|
113
|
-
|
114
|
-
:param ciphertext: Base64-encoded encrypted content (str or bytes).
|
115
|
-
:param chapter_id: The chapter's numeric ID.
|
116
|
-
:param fkp: Base64-encoded Fock key param from the page.
|
117
|
-
:param fuid: Fock user ID param from the page.
|
118
|
-
:return: The decrypted plain-text content.
|
119
|
-
:raises RuntimeError: if the Node.js subprocess exits with a non-zero code.
|
120
|
-
"""
|
121
|
-
if not self._script_cmd:
|
122
|
-
return ""
|
123
|
-
if not (ciphertext and chapter_id and fkp and fuid):
|
124
|
-
return ""
|
125
|
-
# Normalize inputs
|
126
|
-
cipher_str = (
|
127
|
-
ciphertext.decode("utf-8")
|
128
|
-
if isinstance(ciphertext, (bytes | bytearray))
|
129
|
-
else str(ciphertext)
|
130
|
-
)
|
131
|
-
chapter_str = str(chapter_id)
|
132
|
-
|
133
|
-
# Create unique temp file names
|
134
|
-
task_id = uuid.uuid4().hex
|
135
|
-
input_path = self.script_dir / f"input_{task_id}.json"
|
136
|
-
output_path = self.script_dir / f"output_{task_id}.txt"
|
137
|
-
|
138
|
-
try:
|
139
|
-
# Write arguments as JSON array
|
140
|
-
input_path.write_text(
|
141
|
-
json.dumps([cipher_str, chapter_str, fkp, fuid]),
|
142
|
-
encoding="utf-8",
|
143
|
-
)
|
144
|
-
|
145
|
-
cmd = self._script_cmd + [input_path.name, output_path.name]
|
146
|
-
proc = subprocess.run(
|
147
|
-
cmd,
|
148
|
-
capture_output=True,
|
149
|
-
text=True,
|
150
|
-
cwd=str(self.script_dir),
|
151
|
-
)
|
152
|
-
|
153
|
-
if proc.returncode != 0:
|
154
|
-
raise RuntimeError(f"Node error: {proc.stderr.strip()}")
|
155
|
-
|
156
|
-
# Return decrypted content
|
157
|
-
return output_path.read_text(encoding="utf-8").strip()
|
158
|
-
|
159
|
-
finally:
|
160
|
-
# Clean up temp files
|
161
|
-
input_path.unlink(missing_ok=True)
|
162
|
-
output_path.unlink(missing_ok=True)
|
163
|
-
|
164
|
-
|
165
|
-
_decryptor: QidianNodeDecryptor | None = None
|
166
|
-
|
167
|
-
|
168
|
-
def get_decryptor() -> QidianNodeDecryptor:
|
169
|
-
"""
|
170
|
-
Return the singleton QidianNodeDecryptor, initializing it on first use.
|
171
|
-
"""
|
172
|
-
global _decryptor
|
173
|
-
if _decryptor is None:
|
174
|
-
_decryptor = QidianNodeDecryptor()
|
175
|
-
return _decryptor
|