novel-downloader 1.5.0__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/__init__.py +1 -3
- novel_downloader/cli/clean.py +21 -88
- novel_downloader/cli/config.py +26 -21
- novel_downloader/cli/download.py +79 -66
- novel_downloader/cli/export.py +17 -21
- novel_downloader/cli/main.py +1 -1
- novel_downloader/cli/search.py +62 -65
- novel_downloader/cli/ui.py +156 -0
- novel_downloader/config/__init__.py +8 -5
- novel_downloader/config/adapter.py +206 -209
- novel_downloader/config/{loader.py → file_io.py} +53 -26
- novel_downloader/core/__init__.py +5 -5
- novel_downloader/core/archived/deqixs/fetcher.py +115 -0
- novel_downloader/core/archived/deqixs/parser.py +132 -0
- novel_downloader/core/archived/deqixs/searcher.py +89 -0
- novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} +12 -20
- novel_downloader/core/archived/wanbengo/searcher.py +98 -0
- novel_downloader/core/archived/xshbook/searcher.py +93 -0
- novel_downloader/core/downloaders/__init__.py +3 -24
- novel_downloader/core/downloaders/base.py +49 -23
- novel_downloader/core/downloaders/common.py +191 -137
- novel_downloader/core/downloaders/qianbi.py +187 -146
- novel_downloader/core/downloaders/qidian.py +187 -141
- novel_downloader/core/downloaders/registry.py +4 -2
- novel_downloader/core/downloaders/signals.py +46 -0
- novel_downloader/core/exporters/__init__.py +3 -20
- novel_downloader/core/exporters/base.py +33 -37
- novel_downloader/core/exporters/common/__init__.py +1 -2
- novel_downloader/core/exporters/common/epub.py +15 -10
- novel_downloader/core/exporters/common/main_exporter.py +19 -12
- novel_downloader/core/exporters/common/txt.py +17 -12
- novel_downloader/core/exporters/epub_util.py +59 -29
- novel_downloader/core/exporters/linovelib/__init__.py +1 -0
- novel_downloader/core/exporters/linovelib/epub.py +23 -25
- novel_downloader/core/exporters/linovelib/main_exporter.py +8 -12
- novel_downloader/core/exporters/linovelib/txt.py +20 -14
- novel_downloader/core/exporters/qidian.py +2 -8
- novel_downloader/core/exporters/registry.py +4 -2
- novel_downloader/core/exporters/txt_util.py +7 -7
- novel_downloader/core/fetchers/__init__.py +54 -48
- novel_downloader/core/fetchers/aaatxt.py +83 -0
- novel_downloader/core/fetchers/{biquge/session.py → b520.py} +6 -11
- novel_downloader/core/fetchers/{base/session.py → base.py} +37 -46
- novel_downloader/core/fetchers/{biquge/browser.py → biquyuedu.py} +12 -17
- novel_downloader/core/fetchers/dxmwx.py +110 -0
- novel_downloader/core/fetchers/eightnovel.py +139 -0
- novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +19 -12
- novel_downloader/core/fetchers/guidaye.py +85 -0
- novel_downloader/core/fetchers/hetushu.py +92 -0
- novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +19 -28
- novel_downloader/core/fetchers/ixdzs8.py +113 -0
- novel_downloader/core/fetchers/jpxs123.py +101 -0
- novel_downloader/core/fetchers/lewenn.py +83 -0
- novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +12 -13
- novel_downloader/core/fetchers/piaotia.py +105 -0
- novel_downloader/core/fetchers/qbtr.py +101 -0
- novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +5 -10
- novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +56 -64
- novel_downloader/core/fetchers/quanben5.py +92 -0
- novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
- novel_downloader/core/fetchers/registry.py +5 -16
- novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +7 -10
- novel_downloader/core/fetchers/shencou.py +106 -0
- novel_downloader/core/fetchers/shuhaige.py +84 -0
- novel_downloader/core/fetchers/tongrenquan.py +84 -0
- novel_downloader/core/fetchers/ttkan.py +95 -0
- novel_downloader/core/fetchers/wanbengo.py +83 -0
- novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
- novel_downloader/core/fetchers/xiguashuwu.py +177 -0
- novel_downloader/core/fetchers/xs63b.py +171 -0
- novel_downloader/core/fetchers/xshbook.py +85 -0
- novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +19 -12
- novel_downloader/core/fetchers/yibige.py +114 -0
- novel_downloader/core/interfaces/__init__.py +1 -9
- novel_downloader/core/interfaces/downloader.py +6 -2
- novel_downloader/core/interfaces/exporter.py +7 -7
- novel_downloader/core/interfaces/fetcher.py +6 -19
- novel_downloader/core/interfaces/parser.py +7 -8
- novel_downloader/core/interfaces/searcher.py +9 -1
- novel_downloader/core/parsers/__init__.py +49 -12
- novel_downloader/core/parsers/aaatxt.py +132 -0
- novel_downloader/core/parsers/b520.py +116 -0
- novel_downloader/core/parsers/base.py +64 -12
- novel_downloader/core/parsers/biquyuedu.py +133 -0
- novel_downloader/core/parsers/dxmwx.py +162 -0
- novel_downloader/core/parsers/eightnovel.py +224 -0
- novel_downloader/core/parsers/esjzone.py +64 -69
- novel_downloader/core/parsers/guidaye.py +128 -0
- novel_downloader/core/parsers/hetushu.py +139 -0
- novel_downloader/core/parsers/i25zw.py +137 -0
- novel_downloader/core/parsers/ixdzs8.py +186 -0
- novel_downloader/core/parsers/jpxs123.py +137 -0
- novel_downloader/core/parsers/lewenn.py +142 -0
- novel_downloader/core/parsers/linovelib.py +48 -64
- novel_downloader/core/parsers/piaotia.py +189 -0
- novel_downloader/core/parsers/qbtr.py +136 -0
- novel_downloader/core/parsers/qianbi.py +48 -50
- novel_downloader/core/parsers/qidian/main_parser.py +756 -48
- novel_downloader/core/parsers/qidian/utils/__init__.py +3 -21
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +1 -1
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +4 -4
- novel_downloader/core/parsers/quanben5.py +103 -0
- novel_downloader/core/parsers/registry.py +5 -16
- novel_downloader/core/parsers/sfacg.py +38 -45
- novel_downloader/core/parsers/shencou.py +215 -0
- novel_downloader/core/parsers/shuhaige.py +111 -0
- novel_downloader/core/parsers/tongrenquan.py +116 -0
- novel_downloader/core/parsers/ttkan.py +132 -0
- novel_downloader/core/parsers/wanbengo.py +191 -0
- novel_downloader/core/parsers/xiaoshuowu.py +173 -0
- novel_downloader/core/parsers/xiguashuwu.py +429 -0
- novel_downloader/core/parsers/xs63b.py +161 -0
- novel_downloader/core/parsers/xshbook.py +134 -0
- novel_downloader/core/parsers/yamibo.py +87 -131
- novel_downloader/core/parsers/yibige.py +166 -0
- novel_downloader/core/searchers/__init__.py +34 -3
- novel_downloader/core/searchers/aaatxt.py +107 -0
- novel_downloader/core/searchers/{biquge.py → b520.py} +29 -28
- novel_downloader/core/searchers/base.py +112 -36
- novel_downloader/core/searchers/dxmwx.py +105 -0
- novel_downloader/core/searchers/eightnovel.py +84 -0
- novel_downloader/core/searchers/esjzone.py +43 -25
- novel_downloader/core/searchers/hetushu.py +92 -0
- novel_downloader/core/searchers/i25zw.py +93 -0
- novel_downloader/core/searchers/ixdzs8.py +107 -0
- novel_downloader/core/searchers/jpxs123.py +107 -0
- novel_downloader/core/searchers/piaotia.py +100 -0
- novel_downloader/core/searchers/qbtr.py +106 -0
- novel_downloader/core/searchers/qianbi.py +74 -40
- novel_downloader/core/searchers/quanben5.py +144 -0
- novel_downloader/core/searchers/registry.py +24 -8
- novel_downloader/core/searchers/shuhaige.py +124 -0
- novel_downloader/core/searchers/tongrenquan.py +110 -0
- novel_downloader/core/searchers/ttkan.py +92 -0
- novel_downloader/core/searchers/xiaoshuowu.py +122 -0
- novel_downloader/core/searchers/xiguashuwu.py +95 -0
- novel_downloader/core/searchers/xs63b.py +104 -0
- novel_downloader/locales/en.json +34 -85
- novel_downloader/locales/zh.json +35 -86
- novel_downloader/models/__init__.py +21 -22
- novel_downloader/models/book.py +44 -0
- novel_downloader/models/config.py +4 -37
- novel_downloader/models/login.py +1 -1
- novel_downloader/models/search.py +5 -0
- novel_downloader/resources/config/settings.toml +8 -70
- novel_downloader/resources/json/xiguashuwu.json +718 -0
- novel_downloader/utils/__init__.py +13 -24
- novel_downloader/utils/chapter_storage.py +5 -5
- novel_downloader/utils/constants.py +4 -31
- novel_downloader/utils/cookies.py +38 -35
- novel_downloader/utils/crypto_utils/__init__.py +7 -0
- novel_downloader/utils/crypto_utils/aes_util.py +90 -0
- novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
- novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
- novel_downloader/utils/crypto_utils/rc4.py +54 -0
- novel_downloader/utils/epub/__init__.py +3 -4
- novel_downloader/utils/epub/builder.py +6 -6
- novel_downloader/utils/epub/constants.py +62 -21
- novel_downloader/utils/epub/documents.py +95 -201
- novel_downloader/utils/epub/models.py +8 -22
- novel_downloader/utils/epub/utils.py +73 -106
- novel_downloader/utils/file_utils/__init__.py +2 -23
- novel_downloader/utils/file_utils/io.py +53 -188
- novel_downloader/utils/file_utils/normalize.py +1 -7
- novel_downloader/utils/file_utils/sanitize.py +4 -15
- novel_downloader/utils/fontocr/__init__.py +5 -14
- novel_downloader/utils/fontocr/core.py +216 -0
- novel_downloader/utils/fontocr/loader.py +50 -0
- novel_downloader/utils/logger.py +81 -65
- novel_downloader/utils/network.py +17 -41
- novel_downloader/utils/state.py +4 -90
- novel_downloader/utils/text_utils/__init__.py +1 -7
- novel_downloader/utils/text_utils/diff_display.py +5 -7
- novel_downloader/utils/text_utils/text_cleaner.py +39 -30
- novel_downloader/utils/text_utils/truncate_utils.py +3 -14
- novel_downloader/utils/time_utils/__init__.py +5 -11
- novel_downloader/utils/time_utils/datetime_utils.py +20 -29
- novel_downloader/utils/time_utils/sleep_utils.py +55 -49
- novel_downloader/web/__init__.py +13 -0
- novel_downloader/web/components/__init__.py +11 -0
- novel_downloader/web/components/navigation.py +35 -0
- novel_downloader/web/main.py +66 -0
- novel_downloader/web/pages/__init__.py +17 -0
- novel_downloader/web/pages/download.py +78 -0
- novel_downloader/web/pages/progress.py +147 -0
- novel_downloader/web/pages/search.py +329 -0
- novel_downloader/web/services/__init__.py +17 -0
- novel_downloader/web/services/client_dialog.py +164 -0
- novel_downloader/web/services/cred_broker.py +113 -0
- novel_downloader/web/services/cred_models.py +35 -0
- novel_downloader/web/services/task_manager.py +264 -0
- novel_downloader-2.0.1.dist-info/METADATA +172 -0
- novel_downloader-2.0.1.dist-info/RECORD +206 -0
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/entry_points.txt +1 -1
- novel_downloader/core/downloaders/biquge.py +0 -29
- novel_downloader/core/downloaders/esjzone.py +0 -29
- novel_downloader/core/downloaders/linovelib.py +0 -29
- novel_downloader/core/downloaders/sfacg.py +0 -29
- novel_downloader/core/downloaders/yamibo.py +0 -29
- novel_downloader/core/exporters/biquge.py +0 -22
- novel_downloader/core/exporters/esjzone.py +0 -22
- novel_downloader/core/exporters/qianbi.py +0 -22
- novel_downloader/core/exporters/sfacg.py +0 -22
- novel_downloader/core/exporters/yamibo.py +0 -22
- novel_downloader/core/fetchers/base/__init__.py +0 -14
- novel_downloader/core/fetchers/base/browser.py +0 -422
- novel_downloader/core/fetchers/biquge/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/browser.py +0 -209
- novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
- novel_downloader/core/fetchers/linovelib/browser.py +0 -198
- novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/browser.py +0 -326
- novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
- novel_downloader/core/fetchers/sfacg/browser.py +0 -194
- novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
- novel_downloader/core/fetchers/yamibo/browser.py +0 -234
- novel_downloader/core/parsers/biquge.py +0 -139
- novel_downloader/core/parsers/qidian/book_info_parser.py +0 -90
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -528
- novel_downloader/core/parsers/qidian/chapter_normal.py +0 -157
- novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
- novel_downloader/core/parsers/qidian/utils/helpers.py +0 -114
- novel_downloader/models/chapter.py +0 -25
- novel_downloader/models/types.py +0 -13
- novel_downloader/tui/__init__.py +0 -7
- novel_downloader/tui/app.py +0 -32
- novel_downloader/tui/main.py +0 -17
- novel_downloader/tui/screens/__init__.py +0 -14
- novel_downloader/tui/screens/home.py +0 -198
- novel_downloader/tui/screens/login.py +0 -74
- novel_downloader/tui/styles/home_layout.tcss +0 -79
- novel_downloader/tui/widgets/richlog_handler.py +0 -24
- novel_downloader/utils/cache.py +0 -24
- novel_downloader/utils/crypto_utils.py +0 -71
- novel_downloader/utils/fontocr/hash_store.py +0 -280
- novel_downloader/utils/fontocr/hash_utils.py +0 -103
- novel_downloader/utils/fontocr/model_loader.py +0 -69
- novel_downloader/utils/fontocr/ocr_v1.py +0 -315
- novel_downloader/utils/fontocr/ocr_v2.py +0 -764
- novel_downloader/utils/fontocr/ocr_v3.py +0 -744
- novel_downloader-1.5.0.dist-info/METADATA +0 -196
- novel_downloader-1.5.0.dist-info/RECORD +0 -164
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/WHEEL +0 -0
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/top_level.txt +0 -0
@@ -1,744 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.utils.fontocr.ocr_v3
|
4
|
-
-------------------------------------
|
5
|
-
|
6
|
-
This class provides utility methods for optical character recognition (OCR)
|
7
|
-
and font mapping, primarily used for decrypting custom font encryption
|
8
|
-
on web pages (e.g., the Qidian website).
|
9
|
-
"""
|
10
|
-
|
11
|
-
import json
|
12
|
-
import logging
|
13
|
-
import math
|
14
|
-
import os
|
15
|
-
from collections.abc import Generator
|
16
|
-
from pathlib import Path
|
17
|
-
from typing import Any, TypeVar
|
18
|
-
|
19
|
-
import cv2
|
20
|
-
import numpy as np
|
21
|
-
import paddle
|
22
|
-
from fontTools.ttLib import TTFont
|
23
|
-
from paddle.inference import Config
|
24
|
-
from paddle.inference import create_predictor as _create_predictor
|
25
|
-
from PIL import Image, ImageDraw, ImageFont
|
26
|
-
from PIL.Image import Transpose
|
27
|
-
|
28
|
-
from novel_downloader.utils.constants import (
|
29
|
-
REC_CHAR_MODEL_FILES,
|
30
|
-
REC_IMAGE_SHAPE_MAP,
|
31
|
-
)
|
32
|
-
|
33
|
-
from .model_loader import (
|
34
|
-
get_rec_char_vector_dir,
|
35
|
-
get_rec_chinese_char_model_dir,
|
36
|
-
)
|
37
|
-
|
38
|
-
T = TypeVar("T")
|
39
|
-
logger = logging.getLogger(__name__)
|
40
|
-
|
41
|
-
|
42
|
-
class CTCLabelDecode:
|
43
|
-
"""
|
44
|
-
Convert between text-index and text-label for CTC-based models.
|
45
|
-
|
46
|
-
:param character_dict_path: Path to the file containing characters, one per line.
|
47
|
-
:param beg_str: Token representing the start of sequence.
|
48
|
-
:param end_str: Token representing the end of sequence.
|
49
|
-
"""
|
50
|
-
|
51
|
-
__slots__ = ("idx_to_char", "char_to_idx", "blank_id", "beg_str", "end_str")
|
52
|
-
|
53
|
-
def __init__(
|
54
|
-
self,
|
55
|
-
character_dict_path: str | Path,
|
56
|
-
beg_str: str = "sos",
|
57
|
-
end_str: str = "eos",
|
58
|
-
):
|
59
|
-
# Store special tokens
|
60
|
-
self.beg_str = beg_str
|
61
|
-
self.end_str = end_str
|
62
|
-
|
63
|
-
# Read and clean character list (skip empty lines)
|
64
|
-
path = Path(character_dict_path)
|
65
|
-
chars = [
|
66
|
-
line.strip()
|
67
|
-
for line in path.read_text(encoding="utf-8").splitlines()
|
68
|
-
if line.strip()
|
69
|
-
]
|
70
|
-
|
71
|
-
# Reserve index 0 for the CTC blank token, then actual characters
|
72
|
-
self.idx_to_char: list[str] = ["blank"] + chars
|
73
|
-
self.blank_id: int = 0
|
74
|
-
|
75
|
-
# Build reverse mapping from character to index
|
76
|
-
self.char_to_idx = {ch: i for i, ch in enumerate(self.idx_to_char)}
|
77
|
-
|
78
|
-
def decode(
|
79
|
-
self,
|
80
|
-
text_indices: np.ndarray,
|
81
|
-
text_probs: np.ndarray | None = None,
|
82
|
-
) -> list[tuple[str, float]]:
|
83
|
-
"""
|
84
|
-
Decode index sequences to strings and compute average confidence.
|
85
|
-
|
86
|
-
:param text_indices: (batch_size, seq_len) class indices.
|
87
|
-
:param text_probs: Optional per-step probabilities, same shape.
|
88
|
-
:return: List of (string, avg_confidence) per sample.
|
89
|
-
"""
|
90
|
-
results: list[tuple[str, float]] = []
|
91
|
-
batch_size = text_indices.shape[0]
|
92
|
-
|
93
|
-
for i in range(batch_size):
|
94
|
-
seq = text_indices[i]
|
95
|
-
# Collapse repeated tokens: keep first of any run
|
96
|
-
mask = np.concatenate(([True], seq[1:] != seq[:-1]))
|
97
|
-
# Remove blanks
|
98
|
-
mask &= seq != self.blank_id
|
99
|
-
|
100
|
-
# Map indices to characters
|
101
|
-
chars = [self.idx_to_char[idx] for idx in seq[mask]]
|
102
|
-
|
103
|
-
# Compute average confidence, or default to 1.0 if no probs provided
|
104
|
-
if text_probs is not None:
|
105
|
-
probs = text_probs[i][mask]
|
106
|
-
avg_conf = float(probs.mean()) if probs.size else 0.0
|
107
|
-
else:
|
108
|
-
avg_conf = 1.0
|
109
|
-
|
110
|
-
results.append(("".join(chars), avg_conf))
|
111
|
-
|
112
|
-
return results
|
113
|
-
|
114
|
-
def __call__(self, preds: Any) -> list[tuple[str, float]]:
|
115
|
-
"""
|
116
|
-
Decode raw model outputs to final text labels and confidences.
|
117
|
-
|
118
|
-
:param preds: Model output array/tensor of shape (batch, seq_len, num_classes),
|
119
|
-
or a tuple/list whose last element is that array.
|
120
|
-
:returns: A list of (decoded_string, average_confidence).
|
121
|
-
"""
|
122
|
-
# If passed as (logits, ...), take the last element
|
123
|
-
if isinstance(preds, (tuple | list)):
|
124
|
-
preds = preds[-1]
|
125
|
-
|
126
|
-
# Convert framework tensor to numpy if needed
|
127
|
-
if hasattr(preds, "numpy"):
|
128
|
-
preds = preds.numpy()
|
129
|
-
|
130
|
-
# Get the most likely class index and its probability
|
131
|
-
text_idx = preds.argmax(axis=2)
|
132
|
-
text_prob = preds.max(axis=2)
|
133
|
-
|
134
|
-
return self.decode(text_idx, text_prob)
|
135
|
-
|
136
|
-
|
137
|
-
class TextRecognizer:
|
138
|
-
def __init__(
|
139
|
-
self,
|
140
|
-
rec_model_dir: str,
|
141
|
-
rec_image_shape: str,
|
142
|
-
rec_batch_num: int,
|
143
|
-
rec_char_dict_path: str,
|
144
|
-
use_gpu: bool = False,
|
145
|
-
gpu_mem: int = 500,
|
146
|
-
gpu_id: int | None = None,
|
147
|
-
):
|
148
|
-
self.rec_batch_num = int(rec_batch_num)
|
149
|
-
self.rec_image_shape = tuple(map(int, rec_image_shape.split(","))) # (C, H, W)
|
150
|
-
self.postprocess_op = CTCLabelDecode(
|
151
|
-
character_dict_path=rec_char_dict_path,
|
152
|
-
)
|
153
|
-
|
154
|
-
self._create_predictor(
|
155
|
-
model_dir=rec_model_dir,
|
156
|
-
use_gpu=use_gpu,
|
157
|
-
gpu_mem=gpu_mem,
|
158
|
-
gpu_id=gpu_id,
|
159
|
-
)
|
160
|
-
|
161
|
-
def _get_infer_gpu_id(self) -> int:
|
162
|
-
"""
|
163
|
-
Look at CUDA_VISIBLE_DEVICES or HIP_VISIBLE_DEVICES,
|
164
|
-
pick the first entry and return as integer. Fallback to 0.
|
165
|
-
"""
|
166
|
-
if not paddle.device.is_compiled_with_rocm:
|
167
|
-
gpu_env = os.environ.get("CUDA_VISIBLE_DEVICES", "0")
|
168
|
-
else:
|
169
|
-
gpu_env = os.environ.get("HIP_VISIBLE_DEVICES", "0")
|
170
|
-
|
171
|
-
first = gpu_env.split(",")[0]
|
172
|
-
try:
|
173
|
-
return int(first)
|
174
|
-
except ValueError:
|
175
|
-
return 0
|
176
|
-
|
177
|
-
def _create_predictor(
|
178
|
-
self,
|
179
|
-
model_dir: str,
|
180
|
-
use_gpu: bool,
|
181
|
-
gpu_mem: int,
|
182
|
-
gpu_id: int | None = None,
|
183
|
-
) -> None:
|
184
|
-
"""
|
185
|
-
Internal helper to build the Paddle predictor + I/O handles
|
186
|
-
"""
|
187
|
-
model_file = f"{model_dir}/inference.pdmodel"
|
188
|
-
params_file = f"{model_dir}/inference.pdiparams"
|
189
|
-
|
190
|
-
cfg = Config(model_file, params_file)
|
191
|
-
if use_gpu:
|
192
|
-
chosen = gpu_id if gpu_id is not None else self._get_infer_gpu_id()
|
193
|
-
cfg.enable_use_gpu(gpu_mem, chosen)
|
194
|
-
else:
|
195
|
-
cfg.disable_gpu()
|
196
|
-
|
197
|
-
# enable memory optim
|
198
|
-
cfg.enable_memory_optim()
|
199
|
-
cfg.disable_glog_info()
|
200
|
-
# Use zero-copy feed/fetch for speed
|
201
|
-
cfg.switch_use_feed_fetch_ops(False)
|
202
|
-
# Enable IR optimizations
|
203
|
-
cfg.switch_ir_optim(True)
|
204
|
-
|
205
|
-
self.config = cfg
|
206
|
-
self.predictor = _create_predictor(cfg)
|
207
|
-
|
208
|
-
in_name = self.predictor.get_input_names()[0]
|
209
|
-
self.input_tensor = self.predictor.get_input_handle(in_name)
|
210
|
-
|
211
|
-
out_names = self.predictor.get_output_names()
|
212
|
-
preferred = "softmax_0.tmp_0"
|
213
|
-
selected = [preferred] if preferred in out_names else out_names
|
214
|
-
self.output_tensors = [self.predictor.get_output_handle(n) for n in selected]
|
215
|
-
|
216
|
-
def __call__(self, img_list: list[np.ndarray]) -> list[tuple[str, float]]:
|
217
|
-
"""
|
218
|
-
Perform batch OCR on a list of images and return (text, confidence) tuples.
|
219
|
-
"""
|
220
|
-
img_num = len(img_list)
|
221
|
-
results: list[tuple[str, float]] = []
|
222
|
-
|
223
|
-
C, H, W0 = self.rec_image_shape
|
224
|
-
|
225
|
-
# Process images in batches
|
226
|
-
for start in range(0, img_num, self.rec_batch_num):
|
227
|
-
batch = img_list[start : start + self.rec_batch_num]
|
228
|
-
# Compute width-to-height ratios for all images in the batch
|
229
|
-
wh_ratios = [img.shape[1] / float(img.shape[0]) for img in batch]
|
230
|
-
max_wh = max(W0 / H, *wh_ratios)
|
231
|
-
|
232
|
-
B = len(batch)
|
233
|
-
# Pre-allocate a numpy array for the batch
|
234
|
-
batch_tensor = np.zeros(
|
235
|
-
(B, C, H, int(math.ceil(H * max_wh))), dtype=np.float32
|
236
|
-
)
|
237
|
-
|
238
|
-
# Normalize and pad each image into the batch tensor
|
239
|
-
for i, img in enumerate(batch):
|
240
|
-
norm = self.resize_norm_img(img, max_wh)
|
241
|
-
batch_tensor[i, :, :, : norm.shape[2]] = norm
|
242
|
-
|
243
|
-
# Run inference
|
244
|
-
self.input_tensor.copy_from_cpu(batch_tensor)
|
245
|
-
self.predictor.run()
|
246
|
-
|
247
|
-
# Retrieve and post-process outputs
|
248
|
-
outputs = [t.copy_to_cpu() for t in self.output_tensors]
|
249
|
-
preds = outputs[0] if len(outputs) == 1 else outputs
|
250
|
-
|
251
|
-
rec_batch = self.postprocess_op(preds)
|
252
|
-
results.extend(rec_batch)
|
253
|
-
|
254
|
-
return results
|
255
|
-
|
256
|
-
def resize_norm_img(self, img: np.ndarray, max_wh_ratio: float) -> np.ndarray:
|
257
|
-
C, H, W0 = self.rec_image_shape
|
258
|
-
if img.ndim == 2:
|
259
|
-
# Convert grayscale images to RGB
|
260
|
-
img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
|
261
|
-
assert (
|
262
|
-
img.ndim == 3 and img.shape[2] == C
|
263
|
-
), f"Expect {C}-channel image, got {img.shape}"
|
264
|
-
|
265
|
-
h, w = img.shape[:2]
|
266
|
-
# Determine new width based on the height and max width-height ratio
|
267
|
-
new_w = min(int(math.ceil(H * (w / h))), int(H * max_wh_ratio))
|
268
|
-
resized = cv2.resize(img, (new_w, H)).astype("float32")
|
269
|
-
# Change to CHW format and scale to [0,1]
|
270
|
-
resized = resized.transpose(2, 0, 1) / 255.0
|
271
|
-
# Normalize to [-1, 1]
|
272
|
-
resized = (resized - 0.5) / 0.5
|
273
|
-
|
274
|
-
return resized
|
275
|
-
|
276
|
-
|
277
|
-
class FontOCRV3:
|
278
|
-
"""
|
279
|
-
Version 3 of the FontOCR utility.
|
280
|
-
|
281
|
-
This class provides character recognition using a hybrid approach combining:
|
282
|
-
- OCR model inference
|
283
|
-
- Feature vector similarity matching
|
284
|
-
- Optional frequency-based scoring adjustments
|
285
|
-
"""
|
286
|
-
|
287
|
-
# Default constants
|
288
|
-
CHAR_IMAGE_SIZE = 64
|
289
|
-
CHAR_FONT_SIZE = 52
|
290
|
-
_freq_weight = 0.05
|
291
|
-
|
292
|
-
# shared resources
|
293
|
-
_global_char_freq_db: dict[str, int] = {}
|
294
|
-
_global_ocr: TextRecognizer | None = None
|
295
|
-
_global_vec_db: np.ndarray | None = None
|
296
|
-
_global_vec_label: tuple[str, ...] = ()
|
297
|
-
_global_vec_shape: tuple[int, int] = (32, 32)
|
298
|
-
|
299
|
-
def __init__(
|
300
|
-
self,
|
301
|
-
cache_dir: str | Path,
|
302
|
-
use_freq: bool = False,
|
303
|
-
use_ocr: bool = True,
|
304
|
-
use_vec: bool = False,
|
305
|
-
batch_size: int = 32,
|
306
|
-
gpu_mem: int = 500,
|
307
|
-
gpu_id: int | None = None,
|
308
|
-
ocr_weight: float = 0.6,
|
309
|
-
vec_weight: float = 0.4,
|
310
|
-
ocr_version: str = "v1.0",
|
311
|
-
threshold: float = 0.0,
|
312
|
-
font_debug: bool = False,
|
313
|
-
**kwargs: Any,
|
314
|
-
) -> None:
|
315
|
-
"""
|
316
|
-
Initialize a FontOCRV3 instance.
|
317
|
-
|
318
|
-
:param cache_dir: base path to store font-map JSON data
|
319
|
-
:param use_freq: if True, weight scores using character frequency database
|
320
|
-
:param use_ocr: if True, use OCR model for character prediction
|
321
|
-
:param use_vec: if True, use feature vector matching for prediction
|
322
|
-
:param batch_size: batch size for OCR inference (minimum 1)
|
323
|
-
:param gpu_mem: GPU memory allocation in MB for OCR model
|
324
|
-
:param gpu_id: target GPU ID for running the OCR model (optional)
|
325
|
-
:param ocr_weight: weight factor for OCR-based prediction scores
|
326
|
-
:param vec_weight: weight factor for vector-based similarity scores
|
327
|
-
:param ocr_version: OCR model version identifier
|
328
|
-
:param threshold: minimum confidence threshold for predictions [0.0-1.0]
|
329
|
-
:param font_debug: if True, dump per-character debug images under debug_dir
|
330
|
-
:param kwargs: reserved for future extensions
|
331
|
-
"""
|
332
|
-
self.use_freq = use_freq
|
333
|
-
self.use_ocr = use_ocr
|
334
|
-
self.use_vec = use_vec
|
335
|
-
self.batch_size = max(batch_size, 1)
|
336
|
-
self.gpu_mem = gpu_mem
|
337
|
-
self.gpu_id = gpu_id
|
338
|
-
self.ocr_weight = ocr_weight
|
339
|
-
self.vec_weight = vec_weight
|
340
|
-
self.ocr_version = ocr_version
|
341
|
-
self.threshold = min(threshold, 1.0)
|
342
|
-
self.font_debug = font_debug
|
343
|
-
self._max_freq = 5
|
344
|
-
|
345
|
-
self._cache_dir = Path(cache_dir)
|
346
|
-
self._cache_dir.mkdir(parents=True, exist_ok=True)
|
347
|
-
self._fixed_map_dir = self._cache_dir / "fixed_font_map"
|
348
|
-
self._fixed_map_dir.mkdir(parents=True, exist_ok=True)
|
349
|
-
|
350
|
-
if font_debug:
|
351
|
-
self._debug_dir = Path.cwd() / "debug" / "font_debug" / "badcase"
|
352
|
-
self._debug_dir.mkdir(parents=True, exist_ok=True)
|
353
|
-
|
354
|
-
# load shared OCR + frequency DB
|
355
|
-
if self.use_ocr:
|
356
|
-
self._load_ocr_model()
|
357
|
-
if self.use_freq:
|
358
|
-
self._load_char_freq_db()
|
359
|
-
if self.use_vec:
|
360
|
-
self._load_char_vec_db()
|
361
|
-
|
362
|
-
def generate_font_map(
|
363
|
-
self,
|
364
|
-
fixed_font_path: str | Path,
|
365
|
-
random_font_path: str | Path,
|
366
|
-
char_set: set[str],
|
367
|
-
refl_set: set[str],
|
368
|
-
chapter_id: str | None = None,
|
369
|
-
) -> dict[str, str]:
|
370
|
-
"""
|
371
|
-
Generates a mapping from encrypted (randomized) font characters to
|
372
|
-
their real recognized characters by rendering and OCR-based matching.
|
373
|
-
|
374
|
-
:param fixed_font_path: Path to the reference (fixed) font.
|
375
|
-
:param random_font_path: Path to the obfuscated (random) font.
|
376
|
-
:param char_set: Characters to process normally.
|
377
|
-
:param refl_set: Characters to process as horizontally flipped.
|
378
|
-
:param chapter_id: Chapter ID
|
379
|
-
|
380
|
-
:returns mapping_result: { obf_char: real_char, ... }
|
381
|
-
"""
|
382
|
-
mapping_result: dict[str, str] = {}
|
383
|
-
fixed_map_file = self._fixed_map_dir / f"{Path(fixed_font_path).stem}.json"
|
384
|
-
|
385
|
-
# load existing cache
|
386
|
-
try:
|
387
|
-
with open(fixed_map_file, encoding="utf-8") as f:
|
388
|
-
fixed_map = json.load(f)
|
389
|
-
cached_chars = set(fixed_map.keys())
|
390
|
-
mapping_result.update(
|
391
|
-
{ch: fixed_map[ch] for ch in char_set if ch in fixed_map}
|
392
|
-
)
|
393
|
-
mapping_result.update(
|
394
|
-
{ch: fixed_map[ch] for ch in refl_set if ch in fixed_map}
|
395
|
-
)
|
396
|
-
char_set = set(char_set) - cached_chars
|
397
|
-
refl_set = set(refl_set) - cached_chars
|
398
|
-
except Exception:
|
399
|
-
fixed_map = {}
|
400
|
-
cached_chars = set()
|
401
|
-
|
402
|
-
# prepare font renderers and cmap sets
|
403
|
-
try:
|
404
|
-
fixed_ttf = TTFont(fixed_font_path)
|
405
|
-
fixed_chars = {chr(c) for c in fixed_ttf.getBestCmap()}
|
406
|
-
fixed_font = ImageFont.truetype(str(fixed_font_path), self.CHAR_FONT_SIZE)
|
407
|
-
|
408
|
-
random_ttf = TTFont(random_font_path)
|
409
|
-
random_chars = {chr(c) for c in random_ttf.getBestCmap()}
|
410
|
-
random_font = ImageFont.truetype(str(random_font_path), self.CHAR_FONT_SIZE)
|
411
|
-
except Exception as e:
|
412
|
-
logger.error("[FontOCR] Failed to load TTF fonts: %s", e)
|
413
|
-
return mapping_result
|
414
|
-
|
415
|
-
def _render_batch(
|
416
|
-
chars: list[tuple[str, bool]]
|
417
|
-
) -> list[tuple[str, Image.Image]]:
|
418
|
-
out = []
|
419
|
-
for ch, reflect in chars:
|
420
|
-
if ch in fixed_chars:
|
421
|
-
font = fixed_font
|
422
|
-
elif ch in random_chars:
|
423
|
-
font = random_font
|
424
|
-
else:
|
425
|
-
continue
|
426
|
-
img = self._generate_char_image(ch, font, reflect)
|
427
|
-
if img is not None:
|
428
|
-
out.append((ch, img))
|
429
|
-
return out
|
430
|
-
|
431
|
-
# process normal and reflected sets together
|
432
|
-
debug_idx = 1
|
433
|
-
for chars, reflect in [(list(char_set), False), (list(refl_set), True)]:
|
434
|
-
for batch_chars in self._chunked(chars, self.batch_size):
|
435
|
-
# render all images in this batch
|
436
|
-
to_render = [(ch, reflect) for ch in batch_chars]
|
437
|
-
rendered = _render_batch(to_render)
|
438
|
-
if not rendered:
|
439
|
-
continue
|
440
|
-
|
441
|
-
# query OCR+vec simultaneously
|
442
|
-
imgs_to_query = [img for (ch, img) in rendered]
|
443
|
-
fused_raw = self.query(imgs_to_query, top_k=3)
|
444
|
-
if isinstance(fused_raw[0], tuple):
|
445
|
-
fused: list[list[tuple[str, float]]] = [fused_raw] # type: ignore
|
446
|
-
else:
|
447
|
-
fused = fused_raw # type: ignore
|
448
|
-
|
449
|
-
# pick best per char, apply threshold + cache
|
450
|
-
for (ch, img), preds in zip(rendered, fused, strict=False):
|
451
|
-
if not preds:
|
452
|
-
if self.font_debug and chapter_id:
|
453
|
-
dbg_path = (
|
454
|
-
self._debug_dir / f"{chapter_id}_{debug_idx:04d}.png"
|
455
|
-
)
|
456
|
-
img.save(dbg_path)
|
457
|
-
debug_idx += 1
|
458
|
-
continue
|
459
|
-
real_char, _ = preds[0]
|
460
|
-
mapping_result[ch] = real_char
|
461
|
-
fixed_map[ch] = real_char
|
462
|
-
|
463
|
-
# persist updated fixed_map
|
464
|
-
try:
|
465
|
-
with open(fixed_map_file, "w", encoding="utf-8") as f:
|
466
|
-
json.dump(fixed_map, f, ensure_ascii=False, indent=2)
|
467
|
-
except Exception as e:
|
468
|
-
logger.error("[FontOCR] Failed to save fixed map: %s", e)
|
469
|
-
|
470
|
-
return mapping_result
|
471
|
-
|
472
|
-
@staticmethod
|
473
|
-
def apply_font_mapping(text: str, font_map: dict[str, str]) -> str:
|
474
|
-
"""
|
475
|
-
Replace each character in `text` using `font_map`,
|
476
|
-
leaving unmapped characters unchanged.
|
477
|
-
|
478
|
-
:param text: The input string, possibly containing obfuscated font chars.
|
479
|
-
:param font_map: A dict mapping obfuscated chars to real chars.
|
480
|
-
:return: The de-obfuscated text.
|
481
|
-
"""
|
482
|
-
return "".join(font_map.get(ch, ch) for ch in text)
|
483
|
-
|
484
|
-
def query(
|
485
|
-
self,
|
486
|
-
images: Image.Image | list[Image.Image],
|
487
|
-
top_k: int = 3,
|
488
|
-
) -> list[tuple[str, float]] | list[list[tuple[str, float]]]:
|
489
|
-
"""
|
490
|
-
For each input image, run OCR + embedding match, fuse scores,
|
491
|
-
and return a sorted list of (char, score) above self.threshold.
|
492
|
-
"""
|
493
|
-
# normalize to list
|
494
|
-
single = isinstance(images, Image.Image)
|
495
|
-
imgs: list[Image.Image] = [images] if single else images
|
496
|
-
|
497
|
-
# OCR scores
|
498
|
-
if self.use_ocr and imgs:
|
499
|
-
raw_ocr = self.run_ocr_on_images(imgs)
|
500
|
-
ocr_results = [raw_ocr] if isinstance(raw_ocr, tuple) else raw_ocr
|
501
|
-
else:
|
502
|
-
ocr_results = [("", 0.0) for _ in imgs]
|
503
|
-
|
504
|
-
# Vec-embedding scores
|
505
|
-
if self.use_vec and imgs:
|
506
|
-
raw_vec = self.match_text_by_embedding(imgs, top_k=top_k)
|
507
|
-
if raw_vec and isinstance(raw_vec[0], tuple):
|
508
|
-
vec_results: list[list[tuple[str, float]]] = [raw_vec] # type: ignore
|
509
|
-
else:
|
510
|
-
vec_results = raw_vec # type: ignore
|
511
|
-
else:
|
512
|
-
vec_results = [[] for _ in imgs]
|
513
|
-
|
514
|
-
total_results: list[list[tuple[str, float]]] = []
|
515
|
-
for ocr_preds, vec_preds in zip(ocr_results, vec_results, strict=False):
|
516
|
-
scores: dict[str, float] = {}
|
517
|
-
|
518
|
-
if ocr_preds and ocr_preds[0]:
|
519
|
-
ch, s = ocr_preds
|
520
|
-
scores[ch] = scores.get(ch, 0.0) + self.ocr_weight * s
|
521
|
-
for ch, s in vec_preds:
|
522
|
-
scores[ch] = scores.get(ch, 0.0) + self.vec_weight * s
|
523
|
-
if self.use_freq:
|
524
|
-
for ch in list(scores):
|
525
|
-
level = self._global_char_freq_db.get(ch, self._max_freq)
|
526
|
-
freq_score = (self._max_freq - level) / max(1, self._max_freq)
|
527
|
-
scores[ch] += self._freq_weight * freq_score
|
528
|
-
|
529
|
-
# Threshold + sort + top_k
|
530
|
-
filtered = [(ch, sc) for ch, sc in scores.items() if sc >= self.threshold]
|
531
|
-
filtered.sort(key=lambda x: -x[1])
|
532
|
-
|
533
|
-
total_results.append(filtered[:top_k])
|
534
|
-
|
535
|
-
return total_results[0] if single else total_results
|
536
|
-
|
537
|
-
def match_text_by_embedding(
|
538
|
-
self,
|
539
|
-
images: Image.Image | list[Image.Image],
|
540
|
-
top_k: int = 1,
|
541
|
-
) -> list[tuple[str, float]] | list[list[tuple[str, float]]]:
|
542
|
-
"""
|
543
|
-
Match input image to precomputed character embeddings using cosine similarity.
|
544
|
-
|
545
|
-
:param images: a PIL.Image or a list of PIL.Image to match
|
546
|
-
:param top_k: int, how many top matches to return
|
547
|
-
|
548
|
-
:return:
|
549
|
-
- If a single Image was passed in,
|
550
|
-
returns a list of (label, score) tuples sorted descending.
|
551
|
-
|
552
|
-
- If a list of Images was passed in, returns a list of such lists.
|
553
|
-
"""
|
554
|
-
if self._global_vec_db is None:
|
555
|
-
default = [("", 0.0)]
|
556
|
-
if isinstance(images, Image.Image):
|
557
|
-
return default
|
558
|
-
else:
|
559
|
-
return [default for _ in range(len(images))]
|
560
|
-
try:
|
561
|
-
imgs: list[Image.Image] = (
|
562
|
-
[images] if isinstance(images, Image.Image) else images
|
563
|
-
)
|
564
|
-
|
565
|
-
# Convert images to normalized 1D vectors
|
566
|
-
vecs = []
|
567
|
-
for img in imgs:
|
568
|
-
pil_gray = img.convert("L").resize(self._global_vec_shape)
|
569
|
-
arr = np.asarray(pil_gray, dtype=np.float32) / 255.0
|
570
|
-
v = np.asarray(arr).ravel()
|
571
|
-
v /= np.linalg.norm(v) + 1e-6
|
572
|
-
vecs.append(v)
|
573
|
-
|
574
|
-
batch = np.stack(vecs, axis=0) # (N, D)
|
575
|
-
# Compute all cosine similarities in one batch:
|
576
|
-
sims_batch = batch.dot(self._global_vec_db.T) # (N, num_chars)
|
577
|
-
|
578
|
-
all_results: list[list[tuple[str, float]]] = []
|
579
|
-
for sims in sims_batch:
|
580
|
-
k = min(top_k, sims.shape[0])
|
581
|
-
top_unsorted = np.argpartition(-sims, k - 1)[:k]
|
582
|
-
top_idx = top_unsorted[np.argsort(-sims[top_unsorted])]
|
583
|
-
results = [
|
584
|
-
(self._global_vec_label[int(i)], float(sims[int(i)]))
|
585
|
-
for i in top_idx
|
586
|
-
]
|
587
|
-
all_results.append(results)
|
588
|
-
|
589
|
-
# Unwrap single-image case
|
590
|
-
return all_results[0] if isinstance(images, Image.Image) else all_results
|
591
|
-
except Exception as e:
|
592
|
-
logger.warning("[FontOCR] Error: %s", e)
|
593
|
-
default = [("", 0.0)]
|
594
|
-
if isinstance(images, Image.Image):
|
595
|
-
return default
|
596
|
-
else:
|
597
|
-
return [default for _ in range(len(images))]
|
598
|
-
|
599
|
-
def run_ocr_on_images(
|
600
|
-
self,
|
601
|
-
images: Image.Image | list[Image.Image],
|
602
|
-
) -> tuple[str, float] | list[tuple[str, float]]:
|
603
|
-
"""
|
604
|
-
Run OCR on one or more PIL.Image(s) and return recognized text with confidence
|
605
|
-
|
606
|
-
:param images: A single PIL.Image or list of PIL.Images to recognize.
|
607
|
-
:return:
|
608
|
-
- If a single image is passed, returns Tuple[str, float].
|
609
|
-
|
610
|
-
- If a list is passed, returns List[Tuple[str, float]].
|
611
|
-
"""
|
612
|
-
if self._global_ocr is None:
|
613
|
-
fallback = ("", 0.0)
|
614
|
-
return (
|
615
|
-
fallback
|
616
|
-
if isinstance(images, Image.Image)
|
617
|
-
else [fallback for _ in images]
|
618
|
-
)
|
619
|
-
try:
|
620
|
-
# Normalize input to a list of numpy arrays (RGB)
|
621
|
-
img_list = [images] if isinstance(images, Image.Image) else images
|
622
|
-
np_imgs: list[np.ndarray] = [
|
623
|
-
np.array(img.convert("RGB")) for img in img_list
|
624
|
-
]
|
625
|
-
|
626
|
-
# Run OCR
|
627
|
-
ocr_results = self._global_ocr(np_imgs)
|
628
|
-
|
629
|
-
# Return result depending on input type
|
630
|
-
return ocr_results if isinstance(images, list) else ocr_results[0]
|
631
|
-
|
632
|
-
except Exception as e:
|
633
|
-
logger.warning("[FontOCR] OCR failed: %s", e)
|
634
|
-
fallback = ("", 0.0)
|
635
|
-
return (
|
636
|
-
fallback
|
637
|
-
if isinstance(images, Image.Image)
|
638
|
-
else [fallback for _ in images]
|
639
|
-
)
|
640
|
-
|
641
|
-
def _load_ocr_model(self) -> None:
|
642
|
-
"""
|
643
|
-
Initialize the shared PaddleOCR model if not already loaded.
|
644
|
-
"""
|
645
|
-
if FontOCRV3._global_ocr is not None:
|
646
|
-
return
|
647
|
-
|
648
|
-
gpu_available = paddle.device.is_compiled_with_cuda()
|
649
|
-
self._char_model_dir = get_rec_chinese_char_model_dir(self.ocr_version)
|
650
|
-
|
651
|
-
for fname in REC_CHAR_MODEL_FILES:
|
652
|
-
full_path = self._char_model_dir / fname
|
653
|
-
if not full_path.exists():
|
654
|
-
raise FileNotFoundError(f"[FontOCR] Required file missing: {full_path}")
|
655
|
-
|
656
|
-
char_dict_file = self._char_model_dir / "rec_custom_keys.txt"
|
657
|
-
FontOCRV3._global_ocr = TextRecognizer(
|
658
|
-
rec_model_dir=str(self._char_model_dir),
|
659
|
-
rec_char_dict_path=str(char_dict_file),
|
660
|
-
rec_image_shape=REC_IMAGE_SHAPE_MAP[self.ocr_version],
|
661
|
-
rec_batch_num=self.batch_size,
|
662
|
-
use_gpu=gpu_available,
|
663
|
-
gpu_mem=self.gpu_mem,
|
664
|
-
gpu_id=self.gpu_id,
|
665
|
-
)
|
666
|
-
|
667
|
-
def _load_char_freq_db(self) -> bool:
|
668
|
-
"""
|
669
|
-
Loads character frequency data from a JSON file and
|
670
|
-
assigns it to the instance variable.
|
671
|
-
|
672
|
-
:return: True if successfully loaded, False otherwise.
|
673
|
-
"""
|
674
|
-
if FontOCRV3._global_char_freq_db is not None:
|
675
|
-
return True
|
676
|
-
|
677
|
-
try:
|
678
|
-
char_freq_map_file = self._char_model_dir / "char_freq.json"
|
679
|
-
with char_freq_map_file.open("r", encoding="utf-8") as f:
|
680
|
-
FontOCRV3._global_char_freq_db = json.load(f)
|
681
|
-
self._max_freq = max(FontOCRV3._global_char_freq_db.values())
|
682
|
-
return True
|
683
|
-
except Exception as e:
|
684
|
-
logger.warning("[FontOCR] Failed to load char freq DB: %s", e)
|
685
|
-
return False
|
686
|
-
|
687
|
-
def _load_char_vec_db(self) -> None:
|
688
|
-
"""
|
689
|
-
Initialize the shared Char Vector if not already loaded.
|
690
|
-
"""
|
691
|
-
if FontOCRV3._global_vec_db is not None:
|
692
|
-
return
|
693
|
-
|
694
|
-
char_vec_dir = get_rec_char_vector_dir(self.ocr_version)
|
695
|
-
char_vec_npy_file = char_vec_dir / "char_vectors.npy"
|
696
|
-
char_vec_label_file = char_vec_dir / "char_vectors.txt"
|
697
|
-
|
698
|
-
# Load and normalize vector database
|
699
|
-
vec_db = np.load(char_vec_npy_file)
|
700
|
-
_, dim = vec_db.shape
|
701
|
-
side = int(np.sqrt(dim))
|
702
|
-
FontOCRV3._global_vec_shape = (side, side)
|
703
|
-
|
704
|
-
norm = np.linalg.norm(vec_db, axis=1, keepdims=True) + 1e-6
|
705
|
-
FontOCRV3._global_vec_db = vec_db / norm
|
706
|
-
|
707
|
-
# Load corresponding labels
|
708
|
-
with open(char_vec_label_file, encoding="utf-8") as f:
|
709
|
-
FontOCRV3._global_vec_label = tuple(line.strip() for line in f)
|
710
|
-
|
711
|
-
@staticmethod
|
712
|
-
def _generate_char_image(
|
713
|
-
char: str,
|
714
|
-
render_font: ImageFont.FreeTypeFont,
|
715
|
-
is_reflect: bool = False,
|
716
|
-
) -> Image.Image | None:
|
717
|
-
"""
|
718
|
-
Render a single character into a square image.
|
719
|
-
If is_reflect is True, flip horizontally.
|
720
|
-
"""
|
721
|
-
size = FontOCRV3.CHAR_IMAGE_SIZE
|
722
|
-
img = Image.new("L", (size, size), color=255)
|
723
|
-
draw = ImageDraw.Draw(img)
|
724
|
-
bbox = draw.textbbox((0, 0), char, font=render_font)
|
725
|
-
w, h = bbox[2] - bbox[0], bbox[3] - bbox[1]
|
726
|
-
x = (size - w) // 2 - bbox[0]
|
727
|
-
y = (size - h) // 2 - bbox[1]
|
728
|
-
draw.text((x, y), char, fill=0, font=render_font)
|
729
|
-
if is_reflect:
|
730
|
-
img = img.transpose(Transpose.FLIP_LEFT_RIGHT)
|
731
|
-
|
732
|
-
img_np = np.array(img)
|
733
|
-
if np.unique(img_np).size == 1:
|
734
|
-
return None
|
735
|
-
|
736
|
-
return img
|
737
|
-
|
738
|
-
@staticmethod
|
739
|
-
def _chunked(seq: list[T], size: int) -> Generator[list[T], None, None]:
|
740
|
-
"""
|
741
|
-
Yield successive chunks of `seq` of length `size`.
|
742
|
-
"""
|
743
|
-
for i in range(0, len(seq), size):
|
744
|
-
yield seq[i : i + size]
|