novel-downloader 2.0.0__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/download.py +3 -3
  3. novel_downloader/cli/export.py +1 -1
  4. novel_downloader/cli/ui.py +7 -7
  5. novel_downloader/config/adapter.py +191 -154
  6. novel_downloader/core/__init__.py +5 -6
  7. novel_downloader/core/exporters/common/txt.py +9 -9
  8. novel_downloader/core/exporters/linovelib/txt.py +9 -9
  9. novel_downloader/core/fetchers/qidian.py +20 -35
  10. novel_downloader/core/interfaces/fetcher.py +2 -2
  11. novel_downloader/core/interfaces/parser.py +2 -2
  12. novel_downloader/core/parsers/base.py +1 -0
  13. novel_downloader/core/parsers/eightnovel.py +2 -2
  14. novel_downloader/core/parsers/esjzone.py +3 -3
  15. novel_downloader/core/parsers/qidian/main_parser.py +747 -12
  16. novel_downloader/core/parsers/qidian/utils/__init__.py +2 -21
  17. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +4 -4
  18. novel_downloader/core/parsers/xiguashuwu.py +6 -12
  19. novel_downloader/locales/en.json +3 -3
  20. novel_downloader/locales/zh.json +3 -3
  21. novel_downloader/utils/__init__.py +0 -2
  22. novel_downloader/utils/chapter_storage.py +2 -3
  23. novel_downloader/utils/constants.py +1 -3
  24. novel_downloader/utils/cookies.py +32 -17
  25. novel_downloader/utils/crypto_utils/__init__.py +0 -6
  26. novel_downloader/utils/crypto_utils/rc4.py +40 -50
  27. novel_downloader/utils/epub/__init__.py +2 -3
  28. novel_downloader/utils/epub/builder.py +6 -6
  29. novel_downloader/utils/epub/constants.py +5 -5
  30. novel_downloader/utils/epub/documents.py +7 -7
  31. novel_downloader/utils/epub/models.py +8 -8
  32. novel_downloader/utils/epub/utils.py +10 -10
  33. novel_downloader/utils/file_utils/io.py +48 -73
  34. novel_downloader/utils/file_utils/normalize.py +1 -7
  35. novel_downloader/utils/file_utils/sanitize.py +4 -11
  36. novel_downloader/utils/fontocr/__init__.py +13 -0
  37. novel_downloader/utils/{fontocr.py → fontocr/core.py} +70 -61
  38. novel_downloader/utils/fontocr/loader.py +50 -0
  39. novel_downloader/utils/logger.py +80 -56
  40. novel_downloader/utils/network.py +16 -40
  41. novel_downloader/utils/text_utils/text_cleaner.py +39 -30
  42. novel_downloader/utils/text_utils/truncate_utils.py +3 -14
  43. novel_downloader/utils/time_utils/sleep_utils.py +53 -43
  44. novel_downloader/web/main.py +1 -1
  45. novel_downloader/web/pages/search.py +3 -3
  46. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/METADATA +2 -1
  47. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/RECORD +51 -55
  48. novel_downloader/core/parsers/qidian/book_info_parser.py +0 -89
  49. novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -470
  50. novel_downloader/core/parsers/qidian/chapter_normal.py +0 -126
  51. novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
  52. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +0 -143
  53. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -110
  54. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/WHEEL +0 -0
  55. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/entry_points.txt +0 -0
  56. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/licenses/LICENSE +0 -0
  57. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/top_level.txt +0 -0
@@ -1,143 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.core.parsers.qidian.utils.fontmap_recover
4
- ----------------------------------------------------------
5
-
6
- Tools for generating and applying font character mappings
7
- to recover obfuscated Qidian text.
8
- """
9
-
10
- __all__ = [
11
- "generate_font_map",
12
- "apply_font_mapping",
13
- ]
14
-
15
- import json
16
- import logging
17
- from pathlib import Path
18
-
19
- import numpy as np
20
- from fontTools.ttLib import TTFont
21
- from PIL import ImageFont
22
-
23
- logger = logging.getLogger(__name__)
24
- CHAR_FONT_SIZE = 52
25
-
26
-
27
- def generate_font_map(
28
- fixed_font_path: Path,
29
- random_font_path: Path,
30
- char_set: set[str],
31
- refl_set: set[str],
32
- cache_dir: Path,
33
- batch_size: int = 32,
34
- ) -> dict[str, str]:
35
- """
36
- Build a mapping from scrambled font chars to real chars.
37
-
38
- Uses OCR to compare rendered glyphs from a known (fixed) font and an
39
- obfuscated (random) font. Results are cached in JSON so repeated runs
40
- are faster.
41
-
42
- :param fixed_font_path: fixed font file.
43
- :param random_font_path: random font file.
44
- :param char_set: Characters to match directly.
45
- :param refl_set: Characters to match in flipped form.
46
- :param cache_dir: Directory to save/load cached results.
47
- :param batch_size: How many chars to OCR per batch.
48
-
49
- :return: { obf_char: real_char, ... }
50
- """
51
- try:
52
- from novel_downloader.utils.fontocr import get_font_ocr
53
-
54
- font_ocr = get_font_ocr(batch_size=batch_size)
55
- except ImportError:
56
- logger.warning("[QidianParser] FontOCR not available, font decoding will skip")
57
- return {}
58
-
59
- mapping_result: dict[str, str] = {}
60
- fixed_map_file = cache_dir / "fixed_font_map" / f"{Path(fixed_font_path).stem}.json"
61
- fixed_map_file.parent.mkdir(parents=True, exist_ok=True)
62
-
63
- # load existing cache
64
- try:
65
- with open(fixed_map_file, encoding="utf-8") as f:
66
- fixed_map = json.load(f)
67
- cached_chars = set(fixed_map.keys())
68
- mapping_result.update({ch: fixed_map[ch] for ch in char_set if ch in fixed_map})
69
- mapping_result.update({ch: fixed_map[ch] for ch in refl_set if ch in fixed_map})
70
- char_set = set(char_set) - cached_chars
71
- refl_set = set(refl_set) - cached_chars
72
- except Exception:
73
- fixed_map = {}
74
- cached_chars = set()
75
-
76
- # prepare font renderers and cmap sets
77
- try:
78
- fixed_ttf = TTFont(fixed_font_path)
79
- fixed_chars = {chr(c) for c in fixed_ttf.getBestCmap()}
80
- fixed_font = ImageFont.truetype(str(fixed_font_path), CHAR_FONT_SIZE)
81
-
82
- random_ttf = TTFont(random_font_path)
83
- random_chars = {chr(c) for c in random_ttf.getBestCmap()}
84
- random_font = ImageFont.truetype(str(random_font_path), CHAR_FONT_SIZE)
85
- except Exception as e:
86
- logger.error("[FontOCR] Failed to load TTF fonts: %s", e)
87
- return mapping_result
88
-
89
- def _render_batch(chars: list[tuple[str, bool]]) -> list[tuple[str, np.ndarray]]:
90
- out = []
91
- for ch, reflect in chars:
92
- if ch in fixed_chars:
93
- font = fixed_font
94
- elif ch in random_chars:
95
- font = random_font
96
- else:
97
- continue
98
- img = font_ocr.render_char_image_array(ch, font, reflect)
99
- if img is not None:
100
- out.append((ch, img))
101
- return out
102
-
103
- # process normal and reflected sets together
104
- for chars, reflect in [(list(char_set), False), (list(refl_set), True)]:
105
- for batch_chars in font_ocr._chunked(chars, font_ocr._batch_size):
106
- # render all images in this batch
107
- to_render = [(ch, reflect) for ch in batch_chars]
108
- rendered = _render_batch(to_render)
109
- if not rendered:
110
- continue
111
-
112
- # query OCR+vec simultaneously
113
- imgs_to_query = [img for (ch, img) in rendered]
114
- fused = font_ocr.predict(imgs_to_query, top_k=1)
115
-
116
- # pick best per char, apply threshold + cache
117
- for (ch, _), preds in zip(rendered, fused, strict=False):
118
- if not preds:
119
- continue
120
- real_char, _ = preds[0]
121
- mapping_result[ch] = real_char
122
- fixed_map[ch] = real_char
123
-
124
- # persist updated fixed_map
125
- try:
126
- with open(fixed_map_file, "w", encoding="utf-8") as f:
127
- json.dump(fixed_map, f, ensure_ascii=False, indent=2)
128
- except Exception as e:
129
- logger.error("[FontOCR] Failed to save fixed map: %s", e)
130
-
131
- return mapping_result
132
-
133
-
134
- def apply_font_mapping(text: str, font_map: dict[str, str]) -> str:
135
- """
136
- Replace each character in `text` using `font_map`,
137
- leaving unmapped characters unchanged.
138
-
139
- :param text: The input string, possibly containing obfuscated font chars.
140
- :param font_map: A dict mapping obfuscated chars to real chars.
141
- :return: The de-obfuscated text.
142
- """
143
- return "".join(font_map.get(ch, ch) for ch in text)
@@ -1,110 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.core.parsers.qidian.utils.helpers
4
- --------------------------------------------------
5
-
6
- Shared utility functions for parsing Qidian pages.
7
- """
8
-
9
- import json
10
- import logging
11
- from typing import Any
12
-
13
- from lxml import html
14
-
15
- logger = logging.getLogger(__name__)
16
-
17
-
18
- def find_ssr_page_context(html_str: str) -> dict[str, Any]:
19
- """
20
- Extract SSR JSON from <script id="vite-plugin-ssr_pageContext">.
21
- """
22
- try:
23
- tree = html.fromstring(html_str)
24
- script = tree.xpath('//script[@id="vite-plugin-ssr_pageContext"]/text()')
25
- if script:
26
- data: dict[str, Any] = json.loads(script[0].strip())
27
- return data
28
- except Exception as e:
29
- logger.warning("[Parser] SSR JSON parse error: %s", e)
30
- return {}
31
-
32
-
33
- def extract_chapter_info(ssr_data: dict[str, Any]) -> dict[str, Any]:
34
- """
35
- Extract the 'chapterInfo' dictionary from the SSR page context.
36
-
37
- This handles nested key access and returns an empty dict if missing.
38
-
39
- :param ssr_data: The full SSR data object from _find_ssr_page_context().
40
- :return: A dict with chapter metadata such as chapterName, authorSay, etc.
41
- """
42
- try:
43
- page_context = ssr_data.get("pageContext", {})
44
- page_props = page_context.get("pageProps", {})
45
- page_data = page_props.get("pageData", {})
46
- chapter_info = page_data.get("chapterInfo", {})
47
-
48
- assert isinstance(chapter_info, dict)
49
- return chapter_info
50
- except Exception:
51
- return {}
52
-
53
-
54
- def is_restricted_page(html_str: str) -> bool:
55
- """
56
- Return True if page content indicates access restriction
57
- (e.g. not subscribed/purchased).
58
-
59
- :param html_str: Raw HTML string.
60
- """
61
- markers = ["这是VIP章节", "需要订阅", "订阅后才能阅读"]
62
- return any(m in html_str for m in markers)
63
-
64
-
65
- def vip_status(ssr_data: dict[str, Any]) -> bool:
66
- """
67
- :return: True if VIP, False otherwise.
68
- """
69
- chapter_info = extract_chapter_info(ssr_data)
70
- vip_flag = chapter_info.get("vipStatus", 0)
71
- fens_flag = chapter_info.get("fEnS", 0)
72
- return bool(vip_flag == 1 and fens_flag != 0)
73
-
74
-
75
- def can_view_chapter(ssr_data: dict[str, Any]) -> bool:
76
- """
77
- A chapter is not viewable if it is marked as VIP
78
- and has not been purchased.
79
-
80
- :return: True if viewable, False otherwise.
81
- """
82
- chapter_info = extract_chapter_info(ssr_data)
83
- is_buy = chapter_info.get("isBuy", 0)
84
- vip_status = chapter_info.get("vipStatus", 0)
85
- return not (vip_status == 1 and is_buy == 0)
86
-
87
-
88
- def is_duplicated(ssr_data: dict[str, Any]) -> bool:
89
- """
90
- Check if chapter is marked as duplicated (eFW = 1).
91
- """
92
- chapter_info = extract_chapter_info(ssr_data)
93
- efw_flag = chapter_info.get("eFW", 0)
94
- return bool(efw_flag == 1)
95
-
96
-
97
- def is_encrypted(content: str | dict[str, Any]) -> bool:
98
- """
99
- Return True if content is encrypted.
100
-
101
- Chapter Encryption Status (cES):
102
- - 0: 内容是'明文'
103
- - 2: 字体加密
104
-
105
- :param content: HTML content, either as a raw string or a BeautifulSoup object.
106
- :return: True if encrypted marker is found, else False.
107
- """
108
- ssr_data = find_ssr_page_context(content) if isinstance(content, str) else content
109
- chapter_info = extract_chapter_info(ssr_data)
110
- return int(chapter_info.get("cES", 0)) == 2