novel-downloader 1.5.0__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (248) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +1 -3
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +26 -21
  5. novel_downloader/cli/download.py +79 -66
  6. novel_downloader/cli/export.py +17 -21
  7. novel_downloader/cli/main.py +1 -1
  8. novel_downloader/cli/search.py +62 -65
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +8 -5
  11. novel_downloader/config/adapter.py +206 -209
  12. novel_downloader/config/{loader.py → file_io.py} +53 -26
  13. novel_downloader/core/__init__.py +5 -5
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} +12 -20
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +3 -24
  21. novel_downloader/core/downloaders/base.py +49 -23
  22. novel_downloader/core/downloaders/common.py +191 -137
  23. novel_downloader/core/downloaders/qianbi.py +187 -146
  24. novel_downloader/core/downloaders/qidian.py +187 -141
  25. novel_downloader/core/downloaders/registry.py +4 -2
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +3 -20
  28. novel_downloader/core/exporters/base.py +33 -37
  29. novel_downloader/core/exporters/common/__init__.py +1 -2
  30. novel_downloader/core/exporters/common/epub.py +15 -10
  31. novel_downloader/core/exporters/common/main_exporter.py +19 -12
  32. novel_downloader/core/exporters/common/txt.py +17 -12
  33. novel_downloader/core/exporters/epub_util.py +59 -29
  34. novel_downloader/core/exporters/linovelib/__init__.py +1 -0
  35. novel_downloader/core/exporters/linovelib/epub.py +23 -25
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +8 -12
  37. novel_downloader/core/exporters/linovelib/txt.py +20 -14
  38. novel_downloader/core/exporters/qidian.py +2 -8
  39. novel_downloader/core/exporters/registry.py +4 -2
  40. novel_downloader/core/exporters/txt_util.py +7 -7
  41. novel_downloader/core/fetchers/__init__.py +54 -48
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +6 -11
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +37 -46
  45. novel_downloader/core/fetchers/{biquge/browser.py → biquyuedu.py} +12 -17
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +19 -12
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +19 -28
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/lewenn.py +83 -0
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +12 -13
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +5 -10
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +56 -64
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +5 -16
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +7 -10
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/shuhaige.py +84 -0
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/wanbengo.py +83 -0
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +19 -12
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +1 -9
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +6 -19
  79. novel_downloader/core/interfaces/parser.py +7 -8
  80. novel_downloader/core/interfaces/searcher.py +9 -1
  81. novel_downloader/core/parsers/__init__.py +49 -12
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +64 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/esjzone.py +64 -69
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/linovelib.py +48 -64
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/qianbi.py +48 -50
  99. novel_downloader/core/parsers/qidian/main_parser.py +756 -48
  100. novel_downloader/core/parsers/qidian/utils/__init__.py +3 -21
  101. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +1 -1
  102. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +4 -4
  103. novel_downloader/core/parsers/quanben5.py +103 -0
  104. novel_downloader/core/parsers/registry.py +5 -16
  105. novel_downloader/core/parsers/sfacg.py +38 -45
  106. novel_downloader/core/parsers/shencou.py +215 -0
  107. novel_downloader/core/parsers/shuhaige.py +111 -0
  108. novel_downloader/core/parsers/tongrenquan.py +116 -0
  109. novel_downloader/core/parsers/ttkan.py +132 -0
  110. novel_downloader/core/parsers/wanbengo.py +191 -0
  111. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  112. novel_downloader/core/parsers/xiguashuwu.py +429 -0
  113. novel_downloader/core/parsers/xs63b.py +161 -0
  114. novel_downloader/core/parsers/xshbook.py +134 -0
  115. novel_downloader/core/parsers/yamibo.py +87 -131
  116. novel_downloader/core/parsers/yibige.py +166 -0
  117. novel_downloader/core/searchers/__init__.py +34 -3
  118. novel_downloader/core/searchers/aaatxt.py +107 -0
  119. novel_downloader/core/searchers/{biquge.py → b520.py} +29 -28
  120. novel_downloader/core/searchers/base.py +112 -36
  121. novel_downloader/core/searchers/dxmwx.py +105 -0
  122. novel_downloader/core/searchers/eightnovel.py +84 -0
  123. novel_downloader/core/searchers/esjzone.py +43 -25
  124. novel_downloader/core/searchers/hetushu.py +92 -0
  125. novel_downloader/core/searchers/i25zw.py +93 -0
  126. novel_downloader/core/searchers/ixdzs8.py +107 -0
  127. novel_downloader/core/searchers/jpxs123.py +107 -0
  128. novel_downloader/core/searchers/piaotia.py +100 -0
  129. novel_downloader/core/searchers/qbtr.py +106 -0
  130. novel_downloader/core/searchers/qianbi.py +74 -40
  131. novel_downloader/core/searchers/quanben5.py +144 -0
  132. novel_downloader/core/searchers/registry.py +24 -8
  133. novel_downloader/core/searchers/shuhaige.py +124 -0
  134. novel_downloader/core/searchers/tongrenquan.py +110 -0
  135. novel_downloader/core/searchers/ttkan.py +92 -0
  136. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  137. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  138. novel_downloader/core/searchers/xs63b.py +104 -0
  139. novel_downloader/locales/en.json +34 -85
  140. novel_downloader/locales/zh.json +35 -86
  141. novel_downloader/models/__init__.py +21 -22
  142. novel_downloader/models/book.py +44 -0
  143. novel_downloader/models/config.py +4 -37
  144. novel_downloader/models/login.py +1 -1
  145. novel_downloader/models/search.py +5 -0
  146. novel_downloader/resources/config/settings.toml +8 -70
  147. novel_downloader/resources/json/xiguashuwu.json +718 -0
  148. novel_downloader/utils/__init__.py +13 -24
  149. novel_downloader/utils/chapter_storage.py +5 -5
  150. novel_downloader/utils/constants.py +4 -31
  151. novel_downloader/utils/cookies.py +38 -35
  152. novel_downloader/utils/crypto_utils/__init__.py +7 -0
  153. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  154. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  155. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  156. novel_downloader/utils/crypto_utils/rc4.py +54 -0
  157. novel_downloader/utils/epub/__init__.py +3 -4
  158. novel_downloader/utils/epub/builder.py +6 -6
  159. novel_downloader/utils/epub/constants.py +62 -21
  160. novel_downloader/utils/epub/documents.py +95 -201
  161. novel_downloader/utils/epub/models.py +8 -22
  162. novel_downloader/utils/epub/utils.py +73 -106
  163. novel_downloader/utils/file_utils/__init__.py +2 -23
  164. novel_downloader/utils/file_utils/io.py +53 -188
  165. novel_downloader/utils/file_utils/normalize.py +1 -7
  166. novel_downloader/utils/file_utils/sanitize.py +4 -15
  167. novel_downloader/utils/fontocr/__init__.py +5 -14
  168. novel_downloader/utils/fontocr/core.py +216 -0
  169. novel_downloader/utils/fontocr/loader.py +50 -0
  170. novel_downloader/utils/logger.py +81 -65
  171. novel_downloader/utils/network.py +17 -41
  172. novel_downloader/utils/state.py +4 -90
  173. novel_downloader/utils/text_utils/__init__.py +1 -7
  174. novel_downloader/utils/text_utils/diff_display.py +5 -7
  175. novel_downloader/utils/text_utils/text_cleaner.py +39 -30
  176. novel_downloader/utils/text_utils/truncate_utils.py +3 -14
  177. novel_downloader/utils/time_utils/__init__.py +5 -11
  178. novel_downloader/utils/time_utils/datetime_utils.py +20 -29
  179. novel_downloader/utils/time_utils/sleep_utils.py +55 -49
  180. novel_downloader/web/__init__.py +13 -0
  181. novel_downloader/web/components/__init__.py +11 -0
  182. novel_downloader/web/components/navigation.py +35 -0
  183. novel_downloader/web/main.py +66 -0
  184. novel_downloader/web/pages/__init__.py +17 -0
  185. novel_downloader/web/pages/download.py +78 -0
  186. novel_downloader/web/pages/progress.py +147 -0
  187. novel_downloader/web/pages/search.py +329 -0
  188. novel_downloader/web/services/__init__.py +17 -0
  189. novel_downloader/web/services/client_dialog.py +164 -0
  190. novel_downloader/web/services/cred_broker.py +113 -0
  191. novel_downloader/web/services/cred_models.py +35 -0
  192. novel_downloader/web/services/task_manager.py +264 -0
  193. novel_downloader-2.0.1.dist-info/METADATA +172 -0
  194. novel_downloader-2.0.1.dist-info/RECORD +206 -0
  195. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/entry_points.txt +1 -1
  196. novel_downloader/core/downloaders/biquge.py +0 -29
  197. novel_downloader/core/downloaders/esjzone.py +0 -29
  198. novel_downloader/core/downloaders/linovelib.py +0 -29
  199. novel_downloader/core/downloaders/sfacg.py +0 -29
  200. novel_downloader/core/downloaders/yamibo.py +0 -29
  201. novel_downloader/core/exporters/biquge.py +0 -22
  202. novel_downloader/core/exporters/esjzone.py +0 -22
  203. novel_downloader/core/exporters/qianbi.py +0 -22
  204. novel_downloader/core/exporters/sfacg.py +0 -22
  205. novel_downloader/core/exporters/yamibo.py +0 -22
  206. novel_downloader/core/fetchers/base/__init__.py +0 -14
  207. novel_downloader/core/fetchers/base/browser.py +0 -422
  208. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  209. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  210. novel_downloader/core/fetchers/esjzone/browser.py +0 -209
  211. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  212. novel_downloader/core/fetchers/linovelib/browser.py +0 -198
  213. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  214. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  215. novel_downloader/core/fetchers/qidian/browser.py +0 -326
  216. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  217. novel_downloader/core/fetchers/sfacg/browser.py +0 -194
  218. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  219. novel_downloader/core/fetchers/yamibo/browser.py +0 -234
  220. novel_downloader/core/parsers/biquge.py +0 -139
  221. novel_downloader/core/parsers/qidian/book_info_parser.py +0 -90
  222. novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -528
  223. novel_downloader/core/parsers/qidian/chapter_normal.py +0 -157
  224. novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
  225. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -114
  226. novel_downloader/models/chapter.py +0 -25
  227. novel_downloader/models/types.py +0 -13
  228. novel_downloader/tui/__init__.py +0 -7
  229. novel_downloader/tui/app.py +0 -32
  230. novel_downloader/tui/main.py +0 -17
  231. novel_downloader/tui/screens/__init__.py +0 -14
  232. novel_downloader/tui/screens/home.py +0 -198
  233. novel_downloader/tui/screens/login.py +0 -74
  234. novel_downloader/tui/styles/home_layout.tcss +0 -79
  235. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  236. novel_downloader/utils/cache.py +0 -24
  237. novel_downloader/utils/crypto_utils.py +0 -71
  238. novel_downloader/utils/fontocr/hash_store.py +0 -280
  239. novel_downloader/utils/fontocr/hash_utils.py +0 -103
  240. novel_downloader/utils/fontocr/model_loader.py +0 -69
  241. novel_downloader/utils/fontocr/ocr_v1.py +0 -315
  242. novel_downloader/utils/fontocr/ocr_v2.py +0 -764
  243. novel_downloader/utils/fontocr/ocr_v3.py +0 -744
  244. novel_downloader-1.5.0.dist-info/METADATA +0 -196
  245. novel_downloader-1.5.0.dist-info/RECORD +0 -164
  246. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/WHEEL +0 -0
  247. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/licenses/LICENSE +0 -0
  248. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/top_level.txt +0 -0
@@ -1,315 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.utils.fontocr.ocr_v1
4
- -------------------------------------
5
-
6
- This class provides utility methods for optical character recognition (OCR)
7
- and font mapping, primarily used for decrypting custom font encryption
8
- on web pages (e.g., the Qidian website).
9
- """
10
-
11
- import json
12
- import logging
13
- from pathlib import Path
14
- from typing import Any
15
-
16
- import numpy as np
17
- import paddle
18
- from fontTools.ttLib import TTFont
19
- from paddleocr import PaddleOCR
20
- from PIL import Image, ImageDraw, ImageFont
21
- from PIL.Image import Transpose
22
-
23
- from novel_downloader.utils.constants import (
24
- REC_CHAR_MODEL_FILES,
25
- REC_IMAGE_SHAPE_MAP,
26
- )
27
-
28
- from .hash_store import img_hash_store
29
- from .model_loader import get_rec_chinese_char_model_dir
30
-
31
- logger = logging.getLogger(__name__)
32
-
33
-
34
- class FontOCRV1:
35
- """
36
- Version 1 of the FontOCR utility.
37
-
38
- :param use_freq: if True, weight OCR scores by character frequency
39
- :param cache_dir: base path to store font-map JSON data
40
- :param threshold: minimum confidence threshold [0.0-1.0]
41
- :param font_debug: if True, dump per-char debug images under cache_dir
42
- """
43
-
44
- # Default constants
45
- CHAR_IMAGE_SIZE = 64
46
- CHAR_FONT_SIZE = 52
47
- _freq_weight = 0.05
48
-
49
- # shared resources
50
- _global_char_freq_db: dict[str, int] = {}
51
- _global_ocr: PaddleOCR | None = None
52
-
53
- def __init__(
54
- self,
55
- cache_dir: str | Path,
56
- use_freq: bool = False,
57
- ocr_version: str = "v1.0",
58
- threshold: float = 0.0,
59
- font_debug: bool = False,
60
- **kwargs: Any,
61
- ) -> None:
62
- self.use_freq = use_freq
63
- self.ocr_version = ocr_version
64
- self.threshold = threshold
65
- self.font_debug = font_debug
66
- self._max_freq = 5
67
-
68
- self._cache_dir = Path(cache_dir)
69
- self._cache_dir.mkdir(parents=True, exist_ok=True)
70
- self._fixed_map_dir = self._cache_dir / "fixed_font_map"
71
- self._fixed_map_dir.mkdir(exist_ok=True)
72
-
73
- if font_debug:
74
- self._debug_dir = self._cache_dir / "font_debug" / "badcase"
75
- self._debug_dir.mkdir(parents=True, exist_ok=True)
76
-
77
- # load shared NLP/OCR + frequency DB once
78
- self._load_ocr_model()
79
- if self.use_freq and not FontOCRV1._global_char_freq_db:
80
- self._load_char_freq_db()
81
-
82
- def _load_ocr_model(self) -> None:
83
- """
84
- Initialize the shared PaddleOCR model if not already loaded.
85
- """
86
- if FontOCRV1._global_ocr is not None:
87
- return
88
-
89
- gpu_available = paddle.device.is_compiled_with_cuda()
90
- self._char_model_dir = get_rec_chinese_char_model_dir(self.ocr_version)
91
-
92
- for fname in REC_CHAR_MODEL_FILES:
93
- full_path = self._char_model_dir / fname
94
- if not full_path.exists():
95
- raise FileNotFoundError(f"[FontOCR] Required file missing: {full_path}")
96
-
97
- char_dict_file = self._char_model_dir / "rec_custom_keys.txt"
98
- FontOCRV1._global_ocr = PaddleOCR(
99
- use_angle_cls=False,
100
- lang="ch",
101
- det=False,
102
- use_gpu=gpu_available,
103
- show_log=self.font_debug,
104
- rec_model_dir=str(self._char_model_dir),
105
- rec_char_dict_path=str(char_dict_file),
106
- rec_image_shape=REC_IMAGE_SHAPE_MAP[self.ocr_version],
107
- max_text_length=1,
108
- use_space_char=False,
109
- )
110
-
111
- def _load_char_freq_db(self) -> bool:
112
- """
113
- Loads character frequency data from a JSON file and
114
- assigns it to the instance variable.
115
-
116
- :return: True if successfully loaded, False otherwise.
117
- """
118
- try:
119
- char_freq_map_file = self._char_model_dir / "char_freq.json"
120
- with char_freq_map_file.open("r", encoding="utf-8") as f:
121
- FontOCRV1._global_char_freq_db = json.load(f)
122
- self._max_freq = max(FontOCRV1._global_char_freq_db.values())
123
- return True
124
- except Exception as e:
125
- logger.warning("[FontOCR] Failed to load char freq DB: %s", e)
126
- return False
127
-
128
- @staticmethod
129
- def _generate_char_image(
130
- char: str,
131
- render_font: ImageFont.FreeTypeFont,
132
- is_reflect: bool = False,
133
- ) -> Image.Image | None:
134
- """
135
- Render a single character into a square image.
136
- If is_reflect is True, flip horizontally.
137
- """
138
- size = FontOCRV1.CHAR_IMAGE_SIZE
139
- img = Image.new("L", (size, size), color=255)
140
- draw = ImageDraw.Draw(img)
141
- bbox = draw.textbbox((0, 0), char, font=render_font)
142
- w, h = bbox[2] - bbox[0], bbox[3] - bbox[1]
143
- x = (size - w) // 2 - bbox[0]
144
- y = (size - h) // 2 - bbox[1]
145
- draw.text((x, y), char, fill=0, font=render_font)
146
- if is_reflect:
147
- img = img.transpose(Transpose.FLIP_LEFT_RIGHT)
148
-
149
- img_np = np.array(img)
150
- if np.unique(img_np).size == 1:
151
- return None
152
-
153
- return img
154
-
155
- def ocr_text(
156
- self, img: Image.Image, top_k: int = 1
157
- ) -> str | list[tuple[str, float]]:
158
- """
159
- Run PaddleOCR on a single-image, return best match(es).
160
- If use_freq, adjust score by frequency bonus.
161
- """
162
- if not FontOCRV1._global_ocr:
163
- self._load_ocr_model()
164
- try:
165
- img_np = np.asarray(img)
166
- assert FontOCRV1._global_ocr is not None
167
- result = FontOCRV1._global_ocr.ocr(
168
- img_np, cls=False, det=False
169
- ) # returns List[List[ (text, score) ]]
170
- candidates = result[0] if result else []
171
- # attach frequency weight if enabled
172
- if self.use_freq and FontOCRV1._global_char_freq_db:
173
- adjusted = []
174
- for ch, score in candidates:
175
- freq = FontOCRV1._global_char_freq_db.get(ch, self._max_freq)
176
- bonus = (
177
- FontOCRV1._freq_weight
178
- * (self._max_freq - freq)
179
- / self._max_freq
180
- )
181
- adjusted.append((ch, score + bonus))
182
- candidates = adjusted
183
- # filter by threshold
184
- filtered = [c for c in candidates if c[1] >= self.threshold]
185
- return filtered[0][0] if top_k == 1 and filtered else filtered[:top_k]
186
- except Exception as e:
187
- logger.error("[FontOCR] OCR failure: %s", e)
188
- return "" if top_k == 1 else []
189
-
190
- def query(self, img: Image.Image, top_k: int = 1) -> str | list[tuple[str, float]]:
191
- """
192
- First try hash-based lookup via img_hash_store;
193
- if no hit, fall back to ocr_text().
194
- """
195
- # quick hash lookup
196
- matches = img_hash_store.query(img, k=top_k)
197
- if matches:
198
- # matches is List[(label, dist)]
199
- return matches[0][0] if top_k == 1 else matches
200
-
201
- # fallback to OCR
202
- return self.ocr_text(img, top_k=top_k)
203
-
204
- def generate_font_map(
205
- self,
206
- fixed_font_path: str | Path,
207
- random_font_path: str | Path,
208
- char_set: set[str],
209
- refl_set: set[str],
210
- chapter_id: str | None = None,
211
- ) -> dict[str, str]:
212
- """
213
- Generates a mapping from encrypted (randomized) font characters to
214
- their real recognized characters by rendering and OCR-based matching.
215
-
216
- :param fixed_font_path: Path to the reference (fixed) font.
217
- :param random_font_path: Path to the obfuscated (random) font.
218
- :param char_set: Characters to process normally.
219
- :param refl_set: Characters to process as horizontally flipped.
220
- :param chapter_id: Chapter ID
221
-
222
- :returns mapping_result: { obf_char: real_char, ... }
223
- """
224
- mapping_result: dict[str, str] = {}
225
- fixed_map_file = self._fixed_map_dir / f"{Path(fixed_font_path).stem}.json"
226
-
227
- # 1) load or init fixed_font_map
228
- if fixed_map_file.exists():
229
- try:
230
- with open(fixed_map_file, encoding="utf-8") as f:
231
- fixed_map = json.load(f)
232
- except Exception as e:
233
- logger.debug("[FontOCR] Failed to load fixed map file: %s", e)
234
- fixed_map = {}
235
- else:
236
- fixed_map = {}
237
-
238
- # prepare font renderers and cmap sets
239
- try:
240
- fixed_ttf = TTFont(fixed_font_path)
241
- fixed_chars = {chr(c) for c in fixed_ttf.getBestCmap()}
242
- fixed_font = ImageFont.truetype(str(fixed_font_path), self.CHAR_FONT_SIZE)
243
-
244
- random_ttf = TTFont(random_font_path)
245
- random_chars = {chr(c) for c in random_ttf.getBestCmap()}
246
- random_font = ImageFont.truetype(str(random_font_path), self.CHAR_FONT_SIZE)
247
- except Exception as e:
248
- logger.error("[FontOCR] Failed to load TTF fonts: %s", e)
249
- return mapping_result
250
-
251
- def _process(chars: set[str], reflect: bool = False) -> None:
252
- for ch in chars:
253
- try:
254
- if ch in fixed_map:
255
- mapping_result[ch] = fixed_map[ch]
256
- logger.debug(
257
- "[FontOCR] Using cached mapping: '%s' -> '%s'",
258
- ch,
259
- fixed_map[ch],
260
- )
261
- continue
262
-
263
- if ch in fixed_chars:
264
- font_to_use = fixed_font
265
- elif ch in random_chars:
266
- font_to_use = random_font
267
- else:
268
- logger.debug("[FontOCR] Skipping unknown char: '%s'", ch)
269
- continue
270
-
271
- img = self._generate_char_image(ch, font_to_use, is_reflect=reflect)
272
- if img is None:
273
- logger.debug("[FontOCR] Skipping unknown char: '%s'", ch)
274
- continue
275
-
276
- real = self.query(img, top_k=1)
277
- if real:
278
- real_char = (
279
- str(real[0]) if isinstance(real, (list | tuple)) else real
280
- )
281
- mapping_result[ch] = real_char
282
- if ch in fixed_chars:
283
- fixed_map[ch] = real_char
284
- logger.debug("[FontOCR] Mapped '%s' -> '%s'", ch, real_char)
285
- elif self.font_debug and chapter_id:
286
- dbg_path = self._debug_dir / f"{ord(ch):05X}_{chapter_id}.png"
287
- img.save(dbg_path)
288
- logger.debug("[FontOCR] Saved debug image: %s", dbg_path)
289
- except Exception as e:
290
- logger.warning("[FontOCR] Failed to process char '%s': %s", ch, e)
291
-
292
- # process normal + reflected chars
293
- _process(char_set, reflect=False)
294
- _process(refl_set, reflect=True)
295
-
296
- # persist updated fixed_map
297
- try:
298
- with open(fixed_map_file, "w", encoding="utf-8") as f:
299
- json.dump(fixed_map, f, ensure_ascii=False, indent=2)
300
- except Exception as e:
301
- logger.error("[FontOCR] Failed to save fixed map: %s", e)
302
-
303
- return mapping_result
304
-
305
- @staticmethod
306
- def apply_font_mapping(text: str, font_map: dict[str, str]) -> str:
307
- """
308
- Replace each character in `text` using `font_map`,
309
- leaving unmapped characters unchanged.
310
-
311
- :param text: The input string, possibly containing obfuscated font chars.
312
- :param font_map: A dict mapping obfuscated chars to real chars.
313
- :return: The de-obfuscated text.
314
- """
315
- return "".join(font_map.get(ch, ch) for ch in text)