novel-downloader 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -2
- novel_downloader/cli/__init__.py +0 -1
- novel_downloader/cli/clean.py +2 -10
- novel_downloader/cli/download.py +18 -22
- novel_downloader/cli/interactive.py +0 -1
- novel_downloader/cli/main.py +1 -3
- novel_downloader/cli/settings.py +8 -8
- novel_downloader/config/__init__.py +0 -1
- novel_downloader/config/adapter.py +48 -18
- novel_downloader/config/loader.py +116 -108
- novel_downloader/config/models.py +41 -32
- novel_downloader/config/site_rules.py +2 -4
- novel_downloader/core/__init__.py +0 -1
- novel_downloader/core/downloaders/__init__.py +4 -4
- novel_downloader/core/downloaders/base/__init__.py +14 -0
- novel_downloader/core/downloaders/{base_async_downloader.py → base/base_async.py} +49 -53
- novel_downloader/core/downloaders/{base_downloader.py → base/base_sync.py} +64 -43
- novel_downloader/core/downloaders/biquge/__init__.py +12 -0
- novel_downloader/core/downloaders/biquge/biquge_sync.py +25 -0
- novel_downloader/core/downloaders/common/__init__.py +14 -0
- novel_downloader/core/downloaders/{common_asynb_downloader.py → common/common_async.py} +42 -33
- novel_downloader/core/downloaders/{common_downloader.py → common/common_sync.py} +34 -23
- novel_downloader/core/downloaders/qidian/__init__.py +10 -0
- novel_downloader/core/downloaders/{qidian_downloader.py → qidian/qidian_sync.py} +80 -64
- novel_downloader/core/factory/__init__.py +4 -5
- novel_downloader/core/factory/{downloader_factory.py → downloader.py} +36 -35
- novel_downloader/core/factory/{parser_factory.py → parser.py} +12 -14
- novel_downloader/core/factory/{requester_factory.py → requester.py} +29 -16
- novel_downloader/core/factory/{saver_factory.py → saver.py} +4 -9
- novel_downloader/core/interfaces/__init__.py +8 -9
- novel_downloader/core/interfaces/{async_downloader_protocol.py → async_downloader.py} +4 -5
- novel_downloader/core/interfaces/{async_requester_protocol.py → async_requester.py} +26 -12
- novel_downloader/core/interfaces/{parser_protocol.py → parser.py} +11 -6
- novel_downloader/core/interfaces/{saver_protocol.py → saver.py} +2 -3
- novel_downloader/core/interfaces/{downloader_protocol.py → sync_downloader.py} +6 -7
- novel_downloader/core/interfaces/{requester_protocol.py → sync_requester.py} +34 -17
- novel_downloader/core/parsers/__init__.py +5 -4
- novel_downloader/core/parsers/{base_parser.py → base.py} +20 -11
- novel_downloader/core/parsers/biquge/__init__.py +10 -0
- novel_downloader/core/parsers/biquge/main_parser.py +126 -0
- novel_downloader/core/parsers/{common_parser → common}/__init__.py +2 -3
- novel_downloader/core/parsers/{common_parser → common}/helper.py +20 -18
- novel_downloader/core/parsers/{common_parser → common}/main_parser.py +15 -9
- novel_downloader/core/parsers/{qidian_parser → qidian}/__init__.py +2 -3
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/__init__.py +2 -3
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_encrypted.py +41 -49
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_normal.py +17 -21
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_router.py +10 -9
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/main_parser.py +16 -12
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/__init__.py +2 -3
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_encrypted.py +37 -45
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_normal.py +19 -23
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_router.py +10 -9
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/main_parser.py +16 -12
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/node_decryptor.py +7 -10
- novel_downloader/core/parsers/{qidian_parser → qidian}/shared/__init__.py +2 -3
- novel_downloader/core/parsers/qidian/shared/book_info_parser.py +150 -0
- novel_downloader/core/parsers/{qidian_parser → qidian}/shared/helpers.py +9 -10
- novel_downloader/core/requesters/__init__.py +9 -5
- novel_downloader/core/requesters/base/__init__.py +16 -0
- novel_downloader/core/requesters/{base_async_session.py → base/async_session.py} +180 -73
- novel_downloader/core/requesters/base/browser.py +340 -0
- novel_downloader/core/requesters/base/session.py +364 -0
- novel_downloader/core/requesters/biquge/__init__.py +12 -0
- novel_downloader/core/requesters/biquge/session.py +90 -0
- novel_downloader/core/requesters/{common_requester → common}/__init__.py +4 -5
- novel_downloader/core/requesters/common/async_session.py +96 -0
- novel_downloader/core/requesters/common/session.py +113 -0
- novel_downloader/core/requesters/qidian/__init__.py +21 -0
- novel_downloader/core/requesters/qidian/broswer.py +306 -0
- novel_downloader/core/requesters/qidian/session.py +287 -0
- novel_downloader/core/savers/__init__.py +5 -3
- novel_downloader/core/savers/{base_saver.py → base.py} +12 -13
- novel_downloader/core/savers/biquge.py +25 -0
- novel_downloader/core/savers/{common_saver → common}/__init__.py +2 -3
- novel_downloader/core/savers/{common_saver/common_epub.py → common/epub.py} +24 -52
- novel_downloader/core/savers/{common_saver → common}/main_saver.py +43 -9
- novel_downloader/core/savers/{common_saver/common_txt.py → common/txt.py} +16 -46
- novel_downloader/core/savers/epub_utils/__init__.py +0 -1
- novel_downloader/core/savers/epub_utils/css_builder.py +13 -7
- novel_downloader/core/savers/epub_utils/initializer.py +4 -5
- novel_downloader/core/savers/epub_utils/text_to_html.py +2 -3
- novel_downloader/core/savers/epub_utils/volume_intro.py +1 -3
- novel_downloader/core/savers/{qidian_saver.py → qidian.py} +12 -6
- novel_downloader/locales/en.json +12 -4
- novel_downloader/locales/zh.json +9 -1
- novel_downloader/resources/config/settings.toml +88 -0
- novel_downloader/utils/cache.py +2 -2
- novel_downloader/utils/chapter_storage.py +340 -0
- novel_downloader/utils/constants.py +8 -5
- novel_downloader/utils/crypto_utils.py +3 -3
- novel_downloader/utils/file_utils/__init__.py +0 -1
- novel_downloader/utils/file_utils/io.py +12 -17
- novel_downloader/utils/file_utils/normalize.py +1 -3
- novel_downloader/utils/file_utils/sanitize.py +2 -9
- novel_downloader/utils/fontocr/__init__.py +0 -1
- novel_downloader/utils/fontocr/ocr_v1.py +19 -22
- novel_downloader/utils/fontocr/ocr_v2.py +147 -60
- novel_downloader/utils/hash_store.py +19 -20
- novel_downloader/utils/hash_utils.py +0 -1
- novel_downloader/utils/i18n.py +3 -4
- novel_downloader/utils/logger.py +5 -6
- novel_downloader/utils/model_loader.py +5 -8
- novel_downloader/utils/network.py +9 -10
- novel_downloader/utils/state.py +6 -7
- novel_downloader/utils/text_utils/__init__.py +0 -1
- novel_downloader/utils/text_utils/chapter_formatting.py +2 -7
- novel_downloader/utils/text_utils/diff_display.py +0 -1
- novel_downloader/utils/text_utils/font_mapping.py +1 -4
- novel_downloader/utils/text_utils/text_cleaning.py +0 -1
- novel_downloader/utils/time_utils/__init__.py +0 -1
- novel_downloader/utils/time_utils/datetime_utils.py +9 -11
- novel_downloader/utils/time_utils/sleep_utils.py +27 -13
- {novel_downloader-1.2.1.dist-info → novel_downloader-1.3.0.dist-info}/METADATA +14 -17
- novel_downloader-1.3.0.dist-info/RECORD +127 -0
- {novel_downloader-1.2.1.dist-info → novel_downloader-1.3.0.dist-info}/WHEEL +1 -1
- novel_downloader/core/parsers/qidian_parser/shared/book_info_parser.py +0 -95
- novel_downloader/core/requesters/base_browser.py +0 -210
- novel_downloader/core/requesters/base_session.py +0 -243
- novel_downloader/core/requesters/common_requester/common_async_session.py +0 -98
- novel_downloader/core/requesters/common_requester/common_session.py +0 -126
- novel_downloader/core/requesters/qidian_requester/__init__.py +0 -22
- novel_downloader/core/requesters/qidian_requester/qidian_broswer.py +0 -377
- novel_downloader/core/requesters/qidian_requester/qidian_session.py +0 -202
- novel_downloader/resources/config/settings.yaml +0 -76
- novel_downloader-1.2.1.dist-info/RECORD +0 -115
- {novel_downloader-1.2.1.dist-info → novel_downloader-1.3.0.dist-info}/entry_points.txt +0 -0
- {novel_downloader-1.2.1.dist-info → novel_downloader-1.3.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.2.1.dist-info → novel_downloader-1.3.0.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,4 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
2
|
"""
|
4
3
|
novel_downloader.utils.fontocr.ocr_v1
|
5
4
|
-------------------------------------
|
@@ -12,7 +11,7 @@ on web pages (e.g., the Qidian website).
|
|
12
11
|
import json
|
13
12
|
import logging
|
14
13
|
from pathlib import Path
|
15
|
-
from typing import Any
|
14
|
+
from typing import Any
|
16
15
|
|
17
16
|
import numpy as np
|
18
17
|
import paddle
|
@@ -46,12 +45,12 @@ class FontOCRV1:
|
|
46
45
|
_freq_weight = 0.05
|
47
46
|
|
48
47
|
# shared resources
|
49
|
-
_global_char_freq_db:
|
50
|
-
_global_ocr:
|
48
|
+
_global_char_freq_db: dict[str, int] = {}
|
49
|
+
_global_ocr: PaddleOCR | None = None
|
51
50
|
|
52
51
|
def __init__(
|
53
52
|
self,
|
54
|
-
cache_dir:
|
53
|
+
cache_dir: str | Path,
|
55
54
|
use_freq: bool = False,
|
56
55
|
ocr_version: str = "v1.0",
|
57
56
|
threshold: float = 0.0,
|
@@ -129,7 +128,7 @@ class FontOCRV1:
|
|
129
128
|
char: str,
|
130
129
|
render_font: ImageFont.FreeTypeFont,
|
131
130
|
is_reflect: bool = False,
|
132
|
-
) ->
|
131
|
+
) -> Image.Image | None:
|
133
132
|
"""
|
134
133
|
Render a single character into a square image.
|
135
134
|
If is_reflect is True, flip horizontally.
|
@@ -153,7 +152,7 @@ class FontOCRV1:
|
|
153
152
|
|
154
153
|
def ocr_text(
|
155
154
|
self, img: Image.Image, top_k: int = 1
|
156
|
-
) ->
|
155
|
+
) -> str | list[tuple[str, float]]:
|
157
156
|
"""
|
158
157
|
Run PaddleOCR on a single-image, return best match(es).
|
159
158
|
If use_freq, adjust score by frequency bonus.
|
@@ -186,9 +185,7 @@ class FontOCRV1:
|
|
186
185
|
logger.error("[FontOCR] OCR failure: %s", e)
|
187
186
|
return "" if top_k == 1 else []
|
188
187
|
|
189
|
-
def query(
|
190
|
-
self, img: Image.Image, top_k: int = 1
|
191
|
-
) -> Union[str, List[Tuple[str, float]]]:
|
188
|
+
def query(self, img: Image.Image, top_k: int = 1) -> str | list[tuple[str, float]]:
|
192
189
|
"""
|
193
190
|
First try hash-based lookup via img_hash_store;
|
194
191
|
if no hit, fall back to ocr_text().
|
@@ -204,12 +201,12 @@ class FontOCRV1:
|
|
204
201
|
|
205
202
|
def generate_font_map(
|
206
203
|
self,
|
207
|
-
fixed_font_path:
|
208
|
-
random_font_path:
|
209
|
-
char_set:
|
210
|
-
refl_set:
|
211
|
-
chapter_id:
|
212
|
-
) ->
|
204
|
+
fixed_font_path: str | Path,
|
205
|
+
random_font_path: str | Path,
|
206
|
+
char_set: set[str],
|
207
|
+
refl_set: set[str],
|
208
|
+
chapter_id: str | None = None,
|
209
|
+
) -> dict[str, str]:
|
213
210
|
"""
|
214
211
|
Generates a mapping from encrypted (randomized) font characters to
|
215
212
|
their real recognized characters by rendering and OCR-based matching.
|
@@ -222,13 +219,13 @@ class FontOCRV1:
|
|
222
219
|
|
223
220
|
:returns mapping_result: { obf_char: real_char, ... }
|
224
221
|
"""
|
225
|
-
mapping_result:
|
222
|
+
mapping_result: dict[str, str] = {}
|
226
223
|
fixed_map_file = self._fixed_map_dir / f"{Path(fixed_font_path).stem}.json"
|
227
224
|
|
228
225
|
# 1) load or init fixed_font_map
|
229
226
|
if fixed_map_file.exists():
|
230
227
|
try:
|
231
|
-
with open(fixed_map_file,
|
228
|
+
with open(fixed_map_file, encoding="utf-8") as f:
|
232
229
|
fixed_map = json.load(f)
|
233
230
|
except Exception as e:
|
234
231
|
logger.debug("[FontOCR] Failed to load fixed map file: %s", e)
|
@@ -239,17 +236,17 @@ class FontOCRV1:
|
|
239
236
|
# prepare font renderers and cmap sets
|
240
237
|
try:
|
241
238
|
fixed_ttf = TTFont(fixed_font_path)
|
242
|
-
fixed_chars =
|
239
|
+
fixed_chars = {chr(c) for c in fixed_ttf.getBestCmap()}
|
243
240
|
fixed_font = ImageFont.truetype(str(fixed_font_path), self.CHAR_FONT_SIZE)
|
244
241
|
|
245
242
|
random_ttf = TTFont(random_font_path)
|
246
|
-
random_chars =
|
243
|
+
random_chars = {chr(c) for c in random_ttf.getBestCmap()}
|
247
244
|
random_font = ImageFont.truetype(str(random_font_path), self.CHAR_FONT_SIZE)
|
248
245
|
except Exception as e:
|
249
246
|
logger.error("[FontOCR] Failed to load TTF fonts: %s", e)
|
250
247
|
return mapping_result
|
251
248
|
|
252
|
-
def _process(chars:
|
249
|
+
def _process(chars: set[str], reflect: bool = False) -> None:
|
253
250
|
for ch in chars:
|
254
251
|
try:
|
255
252
|
if ch in fixed_map:
|
@@ -277,7 +274,7 @@ class FontOCRV1:
|
|
277
274
|
real = self.query(img, top_k=1)
|
278
275
|
if real:
|
279
276
|
real_char = (
|
280
|
-
str(real[0]) if isinstance(real, (list
|
277
|
+
str(real[0]) if isinstance(real, (list | tuple)) else real
|
281
278
|
)
|
282
279
|
mapping_result[ch] = real_char
|
283
280
|
if ch in fixed_chars:
|
@@ -1,5 +1,4 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
2
|
"""
|
4
3
|
novel_downloader.utils.fontocr.ocr_v2
|
5
4
|
-------------------------------------
|
@@ -13,8 +12,9 @@ import json
|
|
13
12
|
import logging
|
14
13
|
import math
|
15
14
|
import os
|
15
|
+
from collections.abc import Generator
|
16
16
|
from pathlib import Path
|
17
|
-
from typing import Any,
|
17
|
+
from typing import Any, TypeVar
|
18
18
|
|
19
19
|
import cv2
|
20
20
|
import numpy as np
|
@@ -22,8 +22,8 @@ import paddle
|
|
22
22
|
from fontTools.ttLib import TTFont
|
23
23
|
from paddle.inference import Config
|
24
24
|
from paddle.inference import create_predictor as _create_predictor
|
25
|
-
from paddleocr.ppocr.postprocess.rec_postprocess import CTCLabelDecode
|
26
25
|
from PIL import Image, ImageDraw, ImageFont
|
26
|
+
from PIL.Image import Transpose
|
27
27
|
|
28
28
|
try:
|
29
29
|
# pip install cupy-cuda11x
|
@@ -45,23 +45,116 @@ T = TypeVar("T")
|
|
45
45
|
logger = logging.getLogger(__name__)
|
46
46
|
|
47
47
|
|
48
|
-
class
|
48
|
+
class CTCLabelDecode:
|
49
|
+
"""
|
50
|
+
Convert between text-index and text-label for CTC-based models.
|
51
|
+
|
52
|
+
:param character_dict_path: Path to the file containing characters, one per line.
|
53
|
+
:param beg_str: Token representing the start of sequence.
|
54
|
+
:param end_str: Token representing the end of sequence.
|
55
|
+
"""
|
56
|
+
|
57
|
+
__slots__ = ("idx_to_char", "char_to_idx", "blank_id", "beg_str", "end_str")
|
58
|
+
|
59
|
+
def __init__(
|
60
|
+
self,
|
61
|
+
character_dict_path: str | Path,
|
62
|
+
beg_str: str = "sos",
|
63
|
+
end_str: str = "eos",
|
64
|
+
):
|
65
|
+
# Store special tokens
|
66
|
+
self.beg_str = beg_str
|
67
|
+
self.end_str = end_str
|
68
|
+
|
69
|
+
# Read and clean character list (skip empty lines)
|
70
|
+
path = Path(character_dict_path)
|
71
|
+
chars = [
|
72
|
+
line.strip()
|
73
|
+
for line in path.read_text(encoding="utf-8").splitlines()
|
74
|
+
if line.strip()
|
75
|
+
]
|
76
|
+
|
77
|
+
# Reserve index 0 for the CTC blank token, then actual characters
|
78
|
+
self.idx_to_char: list[str] = ["blank"] + chars
|
79
|
+
self.blank_id: int = 0
|
80
|
+
|
81
|
+
# Build reverse mapping from character to index
|
82
|
+
self.char_to_idx = {ch: i for i, ch in enumerate(self.idx_to_char)}
|
83
|
+
|
84
|
+
def decode(
|
85
|
+
self,
|
86
|
+
text_indices: np.ndarray,
|
87
|
+
text_probs: np.ndarray | None = None,
|
88
|
+
) -> list[tuple[str, float]]:
|
89
|
+
"""
|
90
|
+
Decode index sequences to strings and compute average confidence.
|
91
|
+
|
92
|
+
:param text_indices: (batch_size, seq_len) class indices.
|
93
|
+
:param text_probs: Optional per-step probabilities, same shape.
|
94
|
+
:return: List of (string, avg_confidence) per sample.
|
95
|
+
"""
|
96
|
+
results: list[tuple[str, float]] = []
|
97
|
+
batch_size = text_indices.shape[0]
|
98
|
+
|
99
|
+
for i in range(batch_size):
|
100
|
+
seq = text_indices[i]
|
101
|
+
# Collapse repeated tokens: keep first of any run
|
102
|
+
mask = np.concatenate(([True], seq[1:] != seq[:-1]))
|
103
|
+
# Remove blanks
|
104
|
+
mask &= seq != self.blank_id
|
105
|
+
|
106
|
+
# Map indices to characters
|
107
|
+
chars = [self.idx_to_char[idx] for idx in seq[mask]]
|
108
|
+
|
109
|
+
# Compute average confidence, or default to 1.0 if no probs provided
|
110
|
+
if text_probs is not None:
|
111
|
+
probs = text_probs[i][mask]
|
112
|
+
avg_conf = float(probs.mean()) if probs.size else 0.0
|
113
|
+
else:
|
114
|
+
avg_conf = 1.0
|
115
|
+
|
116
|
+
results.append(("".join(chars), avg_conf))
|
117
|
+
|
118
|
+
return results
|
119
|
+
|
120
|
+
def __call__(self, preds: Any) -> list[tuple[str, float]]:
|
121
|
+
"""
|
122
|
+
Decode raw model outputs to final text labels and confidences.
|
123
|
+
|
124
|
+
:param preds: Model output array/tensor of shape (batch, seq_len, num_classes),
|
125
|
+
or a tuple/list whose last element is that array.
|
126
|
+
:returns: A list of (decoded_string, average_confidence).
|
127
|
+
"""
|
128
|
+
# If passed as (logits, ...), take the last element
|
129
|
+
if isinstance(preds, (tuple | list)):
|
130
|
+
preds = preds[-1]
|
131
|
+
|
132
|
+
# Convert framework tensor to numpy if needed
|
133
|
+
if hasattr(preds, "numpy"):
|
134
|
+
preds = preds.numpy()
|
135
|
+
|
136
|
+
# Get the most likely class index and its probability
|
137
|
+
text_idx = preds.argmax(axis=2)
|
138
|
+
text_prob = preds.max(axis=2)
|
139
|
+
|
140
|
+
return self.decode(text_idx, text_prob)
|
141
|
+
|
142
|
+
|
143
|
+
class TextRecognizer:
|
49
144
|
def __init__(
|
50
145
|
self,
|
51
146
|
rec_model_dir: str,
|
52
147
|
rec_image_shape: str,
|
53
148
|
rec_batch_num: int,
|
54
149
|
rec_char_dict_path: str,
|
55
|
-
use_space_char: bool = False,
|
56
150
|
use_gpu: bool = False,
|
57
151
|
gpu_mem: int = 500,
|
58
|
-
gpu_id:
|
152
|
+
gpu_id: int | None = None,
|
59
153
|
):
|
60
154
|
self.rec_batch_num = int(rec_batch_num)
|
61
155
|
self.rec_image_shape = tuple(map(int, rec_image_shape.split(","))) # (C, H, W)
|
62
156
|
self.postprocess_op = CTCLabelDecode(
|
63
157
|
character_dict_path=rec_char_dict_path,
|
64
|
-
use_space_char=use_space_char,
|
65
158
|
)
|
66
159
|
|
67
160
|
self._create_predictor(
|
@@ -92,7 +185,7 @@ class TextRecognizer(object):
|
|
92
185
|
model_dir: str,
|
93
186
|
use_gpu: bool,
|
94
187
|
gpu_mem: int,
|
95
|
-
gpu_id:
|
188
|
+
gpu_id: int | None = None,
|
96
189
|
) -> None:
|
97
190
|
"""
|
98
191
|
Internal helper to build the Paddle predictor + I/O handles
|
@@ -126,12 +219,12 @@ class TextRecognizer(object):
|
|
126
219
|
selected = [preferred] if preferred in out_names else out_names
|
127
220
|
self.output_tensors = [self.predictor.get_output_handle(n) for n in selected]
|
128
221
|
|
129
|
-
def __call__(self, img_list:
|
222
|
+
def __call__(self, img_list: list[np.ndarray]) -> list[tuple[str, float]]:
|
130
223
|
"""
|
131
224
|
Perform batch OCR on a list of images and return (text, confidence) tuples.
|
132
225
|
"""
|
133
226
|
img_num = len(img_list)
|
134
|
-
results:
|
227
|
+
results: list[tuple[str, float]] = []
|
135
228
|
|
136
229
|
C, H, W0 = self.rec_image_shape
|
137
230
|
|
@@ -161,12 +254,7 @@ class TextRecognizer(object):
|
|
161
254
|
outputs = [t.copy_to_cpu() for t in self.output_tensors]
|
162
255
|
preds = outputs[0] if len(outputs) == 1 else outputs
|
163
256
|
|
164
|
-
rec_batch = self.postprocess_op(
|
165
|
-
preds,
|
166
|
-
return_word_box=False,
|
167
|
-
wh_ratio_list=wh_ratios,
|
168
|
-
max_wh_ratio=max_wh,
|
169
|
-
)
|
257
|
+
rec_batch = self.postprocess_op(preds)
|
170
258
|
results.extend(rec_batch)
|
171
259
|
|
172
260
|
return results
|
@@ -208,21 +296,21 @@ class FontOCRV2:
|
|
208
296
|
_freq_weight = 0.05
|
209
297
|
|
210
298
|
# shared resources
|
211
|
-
_global_char_freq_db:
|
212
|
-
_global_ocr:
|
213
|
-
_global_vec_db:
|
214
|
-
_global_vec_label:
|
215
|
-
_global_vec_shape:
|
299
|
+
_global_char_freq_db: dict[str, int] = {}
|
300
|
+
_global_ocr: TextRecognizer | None = None
|
301
|
+
_global_vec_db: np.ndarray | None = None
|
302
|
+
_global_vec_label: tuple[str, ...] = ()
|
303
|
+
_global_vec_shape: tuple[int, int] = (32, 32)
|
216
304
|
|
217
305
|
def __init__(
|
218
306
|
self,
|
219
|
-
cache_dir:
|
307
|
+
cache_dir: str | Path,
|
220
308
|
use_freq: bool = False,
|
221
309
|
use_ocr: bool = True,
|
222
310
|
use_vec: bool = False,
|
223
311
|
batch_size: int = 32,
|
224
312
|
gpu_mem: int = 500,
|
225
|
-
gpu_id:
|
313
|
+
gpu_id: int | None = None,
|
226
314
|
ocr_weight: float = 0.6,
|
227
315
|
vec_weight: float = 0.4,
|
228
316
|
ocr_version: str = "v1.0",
|
@@ -281,7 +369,6 @@ class FontOCRV2:
|
|
281
369
|
rec_char_dict_path=str(char_dict_file),
|
282
370
|
rec_image_shape=REC_IMAGE_SHAPE_MAP[self.ocr_version],
|
283
371
|
rec_batch_num=self.batch_size,
|
284
|
-
use_space_char=False,
|
285
372
|
use_gpu=gpu_available,
|
286
373
|
gpu_mem=self.gpu_mem,
|
287
374
|
gpu_id=self.gpu_id,
|
@@ -328,7 +415,7 @@ class FontOCRV2:
|
|
328
415
|
FontOCRV2._global_vec_db = vec_db / norm
|
329
416
|
|
330
417
|
# Load corresponding labels
|
331
|
-
with open(char_vec_label_file,
|
418
|
+
with open(char_vec_label_file, encoding="utf-8") as f:
|
332
419
|
FontOCRV2._global_vec_label = tuple(line.strip() for line in f)
|
333
420
|
|
334
421
|
@staticmethod
|
@@ -336,7 +423,7 @@ class FontOCRV2:
|
|
336
423
|
char: str,
|
337
424
|
render_font: ImageFont.FreeTypeFont,
|
338
425
|
is_reflect: bool = False,
|
339
|
-
) ->
|
426
|
+
) -> Image.Image | None:
|
340
427
|
"""
|
341
428
|
Render a single character into a square image.
|
342
429
|
If is_reflect is True, flip horizontally.
|
@@ -350,7 +437,7 @@ class FontOCRV2:
|
|
350
437
|
y = (size - h) // 2 - bbox[1]
|
351
438
|
draw.text((x, y), char, fill=0, font=render_font)
|
352
439
|
if is_reflect:
|
353
|
-
img = img.transpose(
|
440
|
+
img = img.transpose(Transpose.FLIP_LEFT_RIGHT)
|
354
441
|
|
355
442
|
img_np = np.array(img)
|
356
443
|
if np.unique(img_np).size == 1:
|
@@ -360,9 +447,9 @@ class FontOCRV2:
|
|
360
447
|
|
361
448
|
def match_text_by_embedding(
|
362
449
|
self,
|
363
|
-
images:
|
450
|
+
images: Image.Image | list[Image.Image],
|
364
451
|
top_k: int = 1,
|
365
|
-
) ->
|
452
|
+
) -> list[tuple[str, float]] | list[list[tuple[str, float]]]:
|
366
453
|
"""
|
367
454
|
Match input image to precomputed character embeddings using cosine similarity.
|
368
455
|
|
@@ -378,7 +465,7 @@ class FontOCRV2:
|
|
378
465
|
if self._global_vec_db is None:
|
379
466
|
return []
|
380
467
|
try:
|
381
|
-
imgs:
|
468
|
+
imgs: list[Image.Image] = (
|
382
469
|
[images] if isinstance(images, Image.Image) else images
|
383
470
|
)
|
384
471
|
|
@@ -395,7 +482,7 @@ class FontOCRV2:
|
|
395
482
|
# Compute all cosine similarities in one batch:
|
396
483
|
sims_batch = batch.dot(self._global_vec_db.T) # (N, num_chars)
|
397
484
|
|
398
|
-
all_results:
|
485
|
+
all_results: list[list[tuple[str, float]]] = []
|
399
486
|
for sims in sims_batch:
|
400
487
|
k = min(top_k, sims.shape[0])
|
401
488
|
top_unsorted = array_backend.argpartition(-sims, k - 1)[:k]
|
@@ -418,8 +505,8 @@ class FontOCRV2:
|
|
418
505
|
|
419
506
|
def run_ocr_on_images(
|
420
507
|
self,
|
421
|
-
images:
|
422
|
-
) ->
|
508
|
+
images: Image.Image | list[Image.Image],
|
509
|
+
) -> tuple[str, float] | list[tuple[str, float]]:
|
423
510
|
"""
|
424
511
|
Run OCR on one or more PIL.Image(s) and return recognized text with confidence
|
425
512
|
|
@@ -434,7 +521,7 @@ class FontOCRV2:
|
|
434
521
|
try:
|
435
522
|
# Normalize input to a list of numpy arrays (RGB)
|
436
523
|
img_list = [images] if isinstance(images, Image.Image) else images
|
437
|
-
np_imgs:
|
524
|
+
np_imgs: list[np.ndarray] = [
|
438
525
|
np.array(img.convert("RGB")) for img in img_list
|
439
526
|
]
|
440
527
|
|
@@ -455,16 +542,16 @@ class FontOCRV2:
|
|
455
542
|
|
456
543
|
def query(
|
457
544
|
self,
|
458
|
-
images:
|
545
|
+
images: Image.Image | list[Image.Image],
|
459
546
|
top_k: int = 3,
|
460
|
-
) ->
|
547
|
+
) -> list[tuple[str, float]] | list[list[tuple[str, float]]]:
|
461
548
|
"""
|
462
549
|
For each input image, run OCR + embedding match, fuse scores,
|
463
550
|
and return a sorted list of (char, score) above self.threshold.
|
464
551
|
"""
|
465
552
|
# normalize to list
|
466
553
|
single = isinstance(images, Image.Image)
|
467
|
-
imgs:
|
554
|
+
imgs: list[Image.Image] = [images] if single else images
|
468
555
|
|
469
556
|
# try the hash store
|
470
557
|
hash_batch = [img_hash_store.query(img, k=top_k) or [] for img in imgs]
|
@@ -473,31 +560,31 @@ class FontOCRV2:
|
|
473
560
|
fallback_imgs = [imgs[i] for i in fallback_indices]
|
474
561
|
|
475
562
|
# OCR scores
|
476
|
-
raw_ocr:
|
563
|
+
raw_ocr: tuple[str, float] | list[tuple[str, float]] = (
|
477
564
|
self.run_ocr_on_images(fallback_imgs)
|
478
565
|
if (self.use_ocr and fallback_imgs)
|
479
566
|
else []
|
480
567
|
)
|
481
568
|
if isinstance(raw_ocr, tuple):
|
482
|
-
ocr_fallback:
|
569
|
+
ocr_fallback: list[tuple[str, float]] = [raw_ocr]
|
483
570
|
else:
|
484
571
|
ocr_fallback = raw_ocr
|
485
572
|
|
486
573
|
# Vec‐embedding scores
|
487
|
-
raw_vec:
|
574
|
+
raw_vec: list[tuple[str, float]] | list[list[tuple[str, float]]] = (
|
488
575
|
self.match_text_by_embedding(fallback_imgs, top_k=top_k)
|
489
576
|
if (self.use_vec and fallback_imgs)
|
490
577
|
else []
|
491
578
|
)
|
492
579
|
if raw_vec and isinstance(raw_vec[0], tuple):
|
493
|
-
vec_fallback:
|
580
|
+
vec_fallback: list[list[tuple[str, float]]] = [raw_vec] # type: ignore
|
494
581
|
else:
|
495
582
|
vec_fallback = raw_vec # type: ignore
|
496
583
|
|
497
584
|
# Fuse OCR+vector for the fallback set
|
498
|
-
fused_fallback:
|
499
|
-
for ocr_preds, vec_preds in zip(ocr_fallback, vec_fallback):
|
500
|
-
scores:
|
585
|
+
fused_fallback: list[list[tuple[str, float]]] = []
|
586
|
+
for ocr_preds, vec_preds in zip(ocr_fallback, vec_fallback, strict=False):
|
587
|
+
scores: dict[str, float] = {}
|
501
588
|
|
502
589
|
# OCR weight
|
503
590
|
if ocr_preds:
|
@@ -529,7 +616,7 @@ class FontOCRV2:
|
|
529
616
|
fused_fallback.append(filtered[:top_k])
|
530
617
|
|
531
618
|
# Recombine hash hits + fallback in original order
|
532
|
-
fused_batch:
|
619
|
+
fused_batch: list[list[tuple[str, float]]] = []
|
533
620
|
fallback_iter = iter(fused_fallback)
|
534
621
|
for h_preds in hash_batch:
|
535
622
|
if h_preds:
|
@@ -540,19 +627,19 @@ class FontOCRV2:
|
|
540
627
|
# Unwrap single‐image case
|
541
628
|
return fused_batch[0] if single else fused_batch
|
542
629
|
|
543
|
-
def _chunked(self, seq:
|
630
|
+
def _chunked(self, seq: list[T], size: int) -> Generator[list[T], None, None]:
|
544
631
|
"""Yield successive chunks of `seq` of length `size`."""
|
545
632
|
for i in range(0, len(seq), size):
|
546
633
|
yield seq[i : i + size]
|
547
634
|
|
548
635
|
def generate_font_map(
|
549
636
|
self,
|
550
|
-
fixed_font_path:
|
551
|
-
random_font_path:
|
552
|
-
char_set:
|
553
|
-
refl_set:
|
554
|
-
chapter_id:
|
555
|
-
) ->
|
637
|
+
fixed_font_path: str | Path,
|
638
|
+
random_font_path: str | Path,
|
639
|
+
char_set: set[str],
|
640
|
+
refl_set: set[str],
|
641
|
+
chapter_id: str | None = None,
|
642
|
+
) -> dict[str, str]:
|
556
643
|
"""
|
557
644
|
Generates a mapping from encrypted (randomized) font characters to
|
558
645
|
their real recognized characters by rendering and OCR-based matching.
|
@@ -565,12 +652,12 @@ class FontOCRV2:
|
|
565
652
|
|
566
653
|
:returns mapping_result: { obf_char: real_char, ... }
|
567
654
|
"""
|
568
|
-
mapping_result:
|
655
|
+
mapping_result: dict[str, str] = {}
|
569
656
|
fixed_map_file = self._fixed_map_dir / f"{Path(fixed_font_path).stem}.json"
|
570
657
|
|
571
658
|
# load existing cache
|
572
659
|
try:
|
573
|
-
with open(fixed_map_file,
|
660
|
+
with open(fixed_map_file, encoding="utf-8") as f:
|
574
661
|
fixed_map = json.load(f)
|
575
662
|
except Exception:
|
576
663
|
fixed_map = {}
|
@@ -578,19 +665,19 @@ class FontOCRV2:
|
|
578
665
|
# prepare font renderers and cmap sets
|
579
666
|
try:
|
580
667
|
fixed_ttf = TTFont(fixed_font_path)
|
581
|
-
fixed_chars =
|
668
|
+
fixed_chars = {chr(c) for c in fixed_ttf.getBestCmap()}
|
582
669
|
fixed_font = ImageFont.truetype(str(fixed_font_path), self.CHAR_FONT_SIZE)
|
583
670
|
|
584
671
|
random_ttf = TTFont(random_font_path)
|
585
|
-
random_chars =
|
672
|
+
random_chars = {chr(c) for c in random_ttf.getBestCmap()}
|
586
673
|
random_font = ImageFont.truetype(str(random_font_path), self.CHAR_FONT_SIZE)
|
587
674
|
except Exception as e:
|
588
675
|
logger.error("[FontOCR] Failed to load TTF fonts: %s", e)
|
589
676
|
return mapping_result
|
590
677
|
|
591
678
|
def _render_batch(
|
592
|
-
chars:
|
593
|
-
) ->
|
679
|
+
chars: list[tuple[str, bool]]
|
680
|
+
) -> list[tuple[str, Image.Image]]:
|
594
681
|
out = []
|
595
682
|
for ch, reflect in chars:
|
596
683
|
if ch in fixed_chars:
|
@@ -618,12 +705,12 @@ class FontOCRV2:
|
|
618
705
|
imgs_to_query = [img for (ch, img) in rendered]
|
619
706
|
fused_raw = self.query(imgs_to_query, top_k=3)
|
620
707
|
if isinstance(fused_raw[0], tuple):
|
621
|
-
fused:
|
708
|
+
fused: list[list[tuple[str, float]]] = [fused_raw] # type: ignore
|
622
709
|
else:
|
623
710
|
fused = fused_raw # type: ignore
|
624
711
|
|
625
712
|
# pick best per char, apply threshold + cache
|
626
|
-
for (ch, img), preds in zip(rendered, fused):
|
713
|
+
for (ch, img), preds in zip(rendered, fused, strict=False):
|
627
714
|
if ch in fixed_map:
|
628
715
|
mapping_result[ch] = fixed_map[ch]
|
629
716
|
logger.debug(
|