py2ls 0.1.10.0__py3-none-any.whl → 0.1.10.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- py2ls/.git/COMMIT_EDITMSG +1 -1
- py2ls/.git/FETCH_HEAD +1 -1
- py2ls/.git/index +0 -0
- py2ls/.git/logs/HEAD +1 -0
- py2ls/.git/logs/refs/heads/main +1 -0
- py2ls/.git/logs/refs/remotes/origin/HEAD +1 -0
- py2ls/.git/logs/refs/remotes/origin/main +1 -0
- py2ls/.git/objects/27/aa6074f652bc6f7078f8647489d9ee8e24f0e2 +0 -0
- py2ls/.git/objects/28/c2969d785c1b892c2a96b3f00eba63a59811b3 +0 -0
- py2ls/.git/objects/2a/fdf45791a26d42ccead35ace76a8f0b2a56561 +0 -0
- py2ls/.git/objects/34/b6f3a2ee84f39bed4eee57f2c0e0afb994feb1 +0 -0
- py2ls/.git/objects/35/1a5f491ab97eee9d1ee699478d75a8bb5d3dc2 +0 -0
- py2ls/.git/objects/39/b13be65125556784e44c7a1d9821703c7ab67e +0 -0
- py2ls/.git/objects/3b/507acc7f23391644cc0b824b1e79fd2677a362 +0 -0
- py2ls/.git/objects/3d/9d10d27724657a436c65a6254bfd213d4b3562 +0 -0
- py2ls/.git/objects/47/6cbd5a7c5e35cddef2f8a38bdc4896d403b095 +0 -0
- py2ls/.git/objects/78/063f4c863fc371ec0313303c0a81283b35d9b6 +0 -0
- py2ls/.git/objects/82/70b319ce4046854fbe7dc41054b6c2d112dab2 +0 -0
- py2ls/.git/objects/85/aee46f478e9afdb84d50a05242c53b04ed2e21 +0 -0
- py2ls/.git/objects/86/e288b46f8fe179907e4413f665aeb5053fddb1 +0 -0
- py2ls/.git/objects/94/f7dbe88e80c4205a901b71eb8f181974376bba +0 -0
- py2ls/.git/objects/9b/ec5ee2236ee2d5532c36bfd132e23c58fdb69c +0 -0
- py2ls/.git/objects/b3/4f7f271c6d6105e35a6556ffda71d03afe8c96 +0 -0
- py2ls/.git/objects/b3/69579064bde9de9a19d114fc33e4e48cc8c0e4 +0 -0
- py2ls/.git/objects/bf/b54d65922ce1dfda1aaa014913a54e7172d0bc +0 -0
- py2ls/.git/objects/c1/397c6ed72c4e20ef6b9ab83163e9a6baba5b45 +0 -0
- py2ls/.git/objects/cc/45df1d317a2eb63ff1ff3a5f3b4a9f98fd92b5 +0 -0
- py2ls/.git/objects/d6/39e8af592cd75a318d8affddd1bcc70c2095f2 +0 -0
- py2ls/.git/objects/db/3f2cd643292057936230b95cf7ec3046affe11 +0 -0
- py2ls/.git/objects/de/214c626ac2dd2685bfaa0bc0fc20f528d014d7 +0 -0
- py2ls/.git/objects/e4/6c715352db9fe3c887a635f1916df4ca1f4ff9 +0 -0
- py2ls/.git/objects/e5/0580a0bd1e1b3d29f834382b80fceb61d5cf0c +0 -0
- py2ls/.git/objects/ec/d980279432b13f0374b90ca439a6329cdece0f +0 -0
- py2ls/.git/objects/ee/cee64eacaff022dcdc509c0c2b1da492f21060 +0 -0
- py2ls/.git/objects/f5/61c3c1bf1c9ea9c9d1f556a7be2869f71f3bdf +0 -0
- py2ls/.git/refs/heads/main +1 -1
- py2ls/.git/refs/remotes/origin/main +1 -1
- py2ls/batman.py +62 -47
- py2ls/ips.py +771 -3
- py2ls/netfinder.py +125 -1
- py2ls/ocr.py +721 -0
- py2ls/plot.py +24 -0
- py2ls/translator.py +470 -119
- {py2ls-0.1.10.0.dist-info → py2ls-0.1.10.2.dist-info}/METADATA +1 -1
- {py2ls-0.1.10.0.dist-info → py2ls-0.1.10.2.dist-info}/RECORD +46 -17
- {py2ls-0.1.10.0.dist-info → py2ls-0.1.10.2.dist-info}/WHEEL +1 -1
py2ls/ocr.py
ADDED
@@ -0,0 +1,721 @@
|
|
1
|
+
import easyocr
|
2
|
+
import cv2
|
3
|
+
import numpy as np
|
4
|
+
import matplotlib.pyplot as plt
|
5
|
+
from py2ls.ips import (
|
6
|
+
strcmp,
|
7
|
+
detect_angle,
|
8
|
+
) # Ensure this function is defined in your 'ips' module
|
9
|
+
from spellchecker import SpellChecker
|
10
|
+
import re
|
11
|
+
|
12
|
+
from PIL import Image, ImageDraw, ImageFont
|
13
|
+
import PIL.PngImagePlugin
|
14
|
+
import pytesseract
|
15
|
+
|
16
|
+
"""
|
17
|
+
Optical Character Recognition (OCR)
|
18
|
+
"""
|
19
|
+
|
20
|
+
# Valid language codes
|
21
|
+
lang_valid = {
|
22
|
+
"easyocr": {
|
23
|
+
"english": "en",
|
24
|
+
"thai": "th",
|
25
|
+
"chinese_traditional": "ch_tra",
|
26
|
+
"chinese": "ch_sim",
|
27
|
+
"japanese": "ja",
|
28
|
+
"korean": "ko",
|
29
|
+
"tamil": "ta",
|
30
|
+
"telugu": "te",
|
31
|
+
"kannada": "kn",
|
32
|
+
"german": "de",
|
33
|
+
},
|
34
|
+
"pytesseract": {
|
35
|
+
"afrikaans": "afr",
|
36
|
+
"amharic": "amh",
|
37
|
+
"arabic": "ara",
|
38
|
+
"assamese": "asm",
|
39
|
+
"azerbaijani": "aze",
|
40
|
+
"azerbaijani_cyrillic": "aze_cyrl",
|
41
|
+
"belarusian": "bel",
|
42
|
+
"bengali": "ben",
|
43
|
+
"tibetan": "bod",
|
44
|
+
"bosnian": "bos",
|
45
|
+
"breton": "bre",
|
46
|
+
"bulgarian": "bul",
|
47
|
+
"catalan": "cat",
|
48
|
+
"cebuano": "ceb",
|
49
|
+
"czech": "ces",
|
50
|
+
"chinese": "chi_sim",
|
51
|
+
"chinese_vertical": "chi_sim_vert",
|
52
|
+
"chinese_traditional": "chi_tra",
|
53
|
+
"chinese_traditional_vertical": "chi_tra_vert",
|
54
|
+
"cherokee": "chr",
|
55
|
+
"corsican": "cos",
|
56
|
+
"welsh": "cym",
|
57
|
+
"danish": "dan",
|
58
|
+
"danish_fraktur": "dan_frak",
|
59
|
+
"german": "deu",
|
60
|
+
"german_fraktur": "deu_frak",
|
61
|
+
"german_latf": "deu_latf",
|
62
|
+
"dhivehi": "div",
|
63
|
+
"dzongkha": "dzo",
|
64
|
+
"greek": "ell",
|
65
|
+
"english": "eng",
|
66
|
+
"middle_english": "enm",
|
67
|
+
"esperanto": "epo",
|
68
|
+
"math_equations": "equ",
|
69
|
+
"estonian": "est",
|
70
|
+
"basque": "eus",
|
71
|
+
"faroese": "fao",
|
72
|
+
"persian": "fas",
|
73
|
+
"filipino": "fil",
|
74
|
+
"finnish": "fin",
|
75
|
+
"french": "fra",
|
76
|
+
"middle_french": "frm",
|
77
|
+
"frisian": "fry",
|
78
|
+
"scottish_gaelic": "gla",
|
79
|
+
"irish": "gle",
|
80
|
+
"galician": "glg",
|
81
|
+
"ancient_greek": "grc",
|
82
|
+
"gujarati": "guj",
|
83
|
+
"haitian_creole": "hat",
|
84
|
+
"hebrew": "heb",
|
85
|
+
"hindi": "hin",
|
86
|
+
"croatian": "hrv",
|
87
|
+
"hungarian": "hun",
|
88
|
+
"armenian": "hye",
|
89
|
+
"inuktitut": "iku",
|
90
|
+
"indonesian": "ind",
|
91
|
+
"icelandic": "isl",
|
92
|
+
"italian": "ita",
|
93
|
+
"old_italian": "ita_old",
|
94
|
+
"javanese": "jav",
|
95
|
+
"japanese": "jpn",
|
96
|
+
"japanese_vertical": "jpn_vert",
|
97
|
+
"kannada": "kan",
|
98
|
+
"georgian": "kat",
|
99
|
+
"old_georgian": "kat_old",
|
100
|
+
"kazakh": "kaz",
|
101
|
+
"khmer": "khm",
|
102
|
+
"kyrgyz": "kir",
|
103
|
+
"kurdish_kurmanji": "kmr",
|
104
|
+
"korean": "kor",
|
105
|
+
"korean_vertical": "kor_vert",
|
106
|
+
"lao": "lao",
|
107
|
+
"latin": "lat",
|
108
|
+
"latvian": "lav",
|
109
|
+
"lithuanian": "lit",
|
110
|
+
"luxembourgish": "ltz",
|
111
|
+
"malayalam": "mal",
|
112
|
+
"marathi": "mar",
|
113
|
+
"macedonian": "mkd",
|
114
|
+
"maltese": "mlt",
|
115
|
+
"mongolian": "mon",
|
116
|
+
"maori": "mri",
|
117
|
+
"malay": "msa",
|
118
|
+
"burmese": "mya",
|
119
|
+
"nepali": "nep",
|
120
|
+
"dutch": "nld",
|
121
|
+
"norwegian": "nor",
|
122
|
+
"occitan": "oci",
|
123
|
+
"oriya": "ori",
|
124
|
+
"script_detection": "osd",
|
125
|
+
"punjabi": "pan",
|
126
|
+
"polish": "pol",
|
127
|
+
"portuguese": "por",
|
128
|
+
},
|
129
|
+
}
|
130
|
+
|
131
|
+
|
132
|
+
def lang_auto_detect(
|
133
|
+
lang,
|
134
|
+
model="easyocr", # "easyocr" or "pytesseract"
|
135
|
+
):
|
136
|
+
res_lang = []
|
137
|
+
if isinstance(lang, str):
|
138
|
+
lang = [lang]
|
139
|
+
for i in lang:
|
140
|
+
res_lang.append(lang_valid[model][strcmp(i, list(lang_valid[model].keys()))[0]])
|
141
|
+
return res_lang
|
142
|
+
|
143
|
+
|
144
|
+
def determine_src_points(image):
|
145
|
+
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
146
|
+
_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
|
147
|
+
contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
148
|
+
|
149
|
+
# Sort contours by area and pick the largest one
|
150
|
+
contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5]
|
151
|
+
src_points = None
|
152
|
+
|
153
|
+
for contour in contours:
|
154
|
+
epsilon = 0.02 * cv2.arcLength(contour, True)
|
155
|
+
approx = cv2.approxPolyDP(contour, epsilon, True)
|
156
|
+
if len(approx) == 4: # We need a quadrilateral
|
157
|
+
src_points = np.array(approx, dtype="float32")
|
158
|
+
break
|
159
|
+
|
160
|
+
if src_points is not None:
|
161
|
+
# Order points in a specific order (top-left, top-right, bottom-right, bottom-left)
|
162
|
+
src_points = src_points.reshape(4, 2)
|
163
|
+
rect = np.zeros((4, 2), dtype="float32")
|
164
|
+
s = src_points.sum(axis=1)
|
165
|
+
diff = np.diff(src_points, axis=1)
|
166
|
+
rect[0] = src_points[np.argmin(s)]
|
167
|
+
rect[2] = src_points[np.argmax(s)]
|
168
|
+
rect[1] = src_points[np.argmin(diff)]
|
169
|
+
rect[3] = src_points[np.argmax(diff)]
|
170
|
+
src_points = rect
|
171
|
+
else:
|
172
|
+
# If no rectangle is detected, fallback to a default or user-defined points
|
173
|
+
height, width = image.shape[:2]
|
174
|
+
src_points = np.array(
|
175
|
+
[[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]],
|
176
|
+
dtype="float32",
|
177
|
+
)
|
178
|
+
return src_points
|
179
|
+
|
180
|
+
|
181
|
+
def get_default_camera_matrix(image_shape):
|
182
|
+
height, width = image_shape[:2]
|
183
|
+
focal_length = width
|
184
|
+
center = (width / 2, height / 2)
|
185
|
+
camera_matrix = np.array(
|
186
|
+
[[focal_length, 0, center[0]], [0, focal_length, center[1]], [0, 0, 1]],
|
187
|
+
dtype="float32",
|
188
|
+
)
|
189
|
+
dist_coeffs = np.zeros((4, 1)) # Assuming no distortion
|
190
|
+
return camera_matrix, dist_coeffs
|
191
|
+
|
192
|
+
|
193
|
+
def correct_perspective(image, src_points):
|
194
|
+
# Define the destination points for the perspective transform
|
195
|
+
width, height = 1000, 1000 # Adjust size as needed
|
196
|
+
dst_points = np.array(
|
197
|
+
[[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]],
|
198
|
+
dtype="float32",
|
199
|
+
)
|
200
|
+
|
201
|
+
# Calculate the perspective transform matrix
|
202
|
+
M = cv2.getPerspectiveTransform(src_points, dst_points)
|
203
|
+
# Apply the perspective transform
|
204
|
+
corrected_image = cv2.warpPerspective(image, M, (width, height))
|
205
|
+
return corrected_image
|
206
|
+
|
207
|
+
|
208
|
+
def detect_text_orientation(image):
|
209
|
+
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
210
|
+
edges = cv2.Canny(gray, 50, 150, apertureSize=3)
|
211
|
+
lines = cv2.HoughLines(edges, 1, np.pi / 180, 200)
|
212
|
+
|
213
|
+
if lines is None:
|
214
|
+
return 0
|
215
|
+
|
216
|
+
angles = []
|
217
|
+
for rho, theta in lines[:, 0]:
|
218
|
+
angle = theta * 180 / np.pi
|
219
|
+
if angle > 90:
|
220
|
+
angle -= 180
|
221
|
+
angles.append(angle)
|
222
|
+
|
223
|
+
median_angle = np.median(angles)
|
224
|
+
return median_angle
|
225
|
+
|
226
|
+
|
227
|
+
def rotate_image(image, angle):
|
228
|
+
center = (image.shape[1] // 2, image.shape[0] // 2)
|
229
|
+
rot_mat = cv2.getRotationMatrix2D(center, angle, 1.0)
|
230
|
+
rotated_image = cv2.warpAffine(
|
231
|
+
image, rot_mat, (image.shape[1], image.shape[0]), flags=cv2.INTER_LINEAR
|
232
|
+
)
|
233
|
+
return rotated_image
|
234
|
+
|
235
|
+
|
236
|
+
def correct_skew(image):
|
237
|
+
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
238
|
+
coords = np.column_stack(np.where(gray > 0))
|
239
|
+
angle = cv2.minAreaRect(coords)[-1]
|
240
|
+
if angle < -45:
|
241
|
+
angle = -(90 + angle)
|
242
|
+
else:
|
243
|
+
angle = -angle
|
244
|
+
(h, w) = image.shape[:2]
|
245
|
+
center = (w // 2, h // 2)
|
246
|
+
M = cv2.getRotationMatrix2D(center, angle, 1.0)
|
247
|
+
rotated = cv2.warpAffine(
|
248
|
+
image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE
|
249
|
+
)
|
250
|
+
return rotated
|
251
|
+
|
252
|
+
|
253
|
+
def undistort_image(image, camera_matrix, dist_coeffs):
|
254
|
+
return cv2.undistort(image, camera_matrix, dist_coeffs)
|
255
|
+
|
256
|
+
|
257
|
+
def add_text_pil(image, text, position, font_size=10, color=(255, 0, 0)):
|
258
|
+
# Convert the image to PIL format
|
259
|
+
pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
260
|
+
# Create a drawing context
|
261
|
+
draw = ImageDraw.Draw(pil_image)
|
262
|
+
# Define the font (make sure to use a font that supports Chinese characters)
|
263
|
+
try:
|
264
|
+
font = ImageFont.truetype(
|
265
|
+
"/System/Library/Fonts/Supplemental/Songti.ttc", font_size
|
266
|
+
)
|
267
|
+
except IOError:
|
268
|
+
font = ImageFont.load_default()
|
269
|
+
|
270
|
+
# cal top_left position
|
271
|
+
# Measure text size using textbbox
|
272
|
+
text_bbox = draw.textbbox((0, 0), text, font=font)
|
273
|
+
text_width = text_bbox[2] - text_bbox[0]
|
274
|
+
text_height = text_bbox[3] - text_bbox[1]
|
275
|
+
# Calculate 5% of the text height for upward adjustment
|
276
|
+
offset = int(0.5 * text_height) # 上移动 50%
|
277
|
+
# Adjust position to match OpenCV's bottom-left alignment
|
278
|
+
adjusted_position = (position[0], position[1] - text_height - offset)
|
279
|
+
|
280
|
+
# Add text to the image
|
281
|
+
draw.text(adjusted_position, text, font=font, fill=color)
|
282
|
+
# Convert the image back to OpenCV format
|
283
|
+
image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
|
284
|
+
return image
|
285
|
+
|
286
|
+
|
287
|
+
def preprocess_img(
|
288
|
+
image,
|
289
|
+
grayscale=True,
|
290
|
+
threshold=True,
|
291
|
+
threshold_method="adaptive",
|
292
|
+
rotate="auto",
|
293
|
+
skew=False,
|
294
|
+
blur=True,
|
295
|
+
blur_ksize=(5, 5),
|
296
|
+
morph=True,
|
297
|
+
morph_op="open",
|
298
|
+
morph_kernel_size=(3, 3),
|
299
|
+
enhance_contrast=True,
|
300
|
+
clahe_clip=2.0,
|
301
|
+
clahe_grid_size=(8, 8),
|
302
|
+
edge_detection=False,
|
303
|
+
):
|
304
|
+
"""
|
305
|
+
预处理步骤:
|
306
|
+
|
307
|
+
转换为灰度图像: 如果 grayscale 为 True,将图像转换为灰度图像。
|
308
|
+
二值化处理: 根据 threshold 和 threshold_method 参数,对图像进行二值化处理。
|
309
|
+
降噪处理: 使用高斯模糊对图像进行降噪。
|
310
|
+
形态学处理: 根据 morph_op 参数选择不同的形态学操作(开运算、闭运算、膨胀、腐蚀),用于去除噪声或填补孔洞。
|
311
|
+
对比度增强: 使用 CLAHE 技术增强图像对比度。
|
312
|
+
边缘检测: 如果 edge_detection 为 True,使用 Canny 边缘检测算法。
|
313
|
+
|
314
|
+
预处理图像以提高 OCR 识别准确性。
|
315
|
+
参数:
|
316
|
+
image: 输入的图像路径或图像数据。
|
317
|
+
grayscale: 是否将图像转换为灰度图像。
|
318
|
+
threshold: 是否对图像进行二值化处理。
|
319
|
+
threshold_method: 二值化方法,可以是 'global' 或 'adaptive'。
|
320
|
+
denoise: 是否对图像进行降噪处理。
|
321
|
+
blur_ksize: 高斯模糊的核大小。
|
322
|
+
morph: 是否进行形态学处理。
|
323
|
+
morph_op: 形态学操作的类型,包括 'open'(开运算)、'close'(闭运算)、'dilate'(膨胀)、'erode'(腐蚀)。
|
324
|
+
morph_kernel_size: 形态学操作的内核大小。
|
325
|
+
enhance_contrast: 是否增强图像对比度。
|
326
|
+
clahe_clip: CLAHE(对比度受限的自适应直方图均衡)的剪裁限制。
|
327
|
+
clahe_grid_size: CLAHE 的网格大小。
|
328
|
+
edge_detection: 是否进行边缘检测。
|
329
|
+
"""
|
330
|
+
if isinstance(image, PIL.PngImagePlugin.PngImageFile):
|
331
|
+
image = np.array(image)
|
332
|
+
if isinstance(image, str):
|
333
|
+
image = cv2.imread(image)
|
334
|
+
if not isinstance(image, np.ndarray):
|
335
|
+
image = np.array(image)
|
336
|
+
if image.shape[1] == 4: # Check if it has an alpha channel
|
337
|
+
# Drop the alpha channel (if needed), or handle it as required
|
338
|
+
image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
|
339
|
+
else:
|
340
|
+
# Convert RGB to BGR for OpenCV compatibility
|
341
|
+
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
342
|
+
|
343
|
+
# Rotate image
|
344
|
+
if rotate == "auto":
|
345
|
+
angle = detect_angle(image, by="fft")
|
346
|
+
img_preprocessed = rotate_image(image, angle)
|
347
|
+
else:
|
348
|
+
img_preprocessed = image
|
349
|
+
|
350
|
+
# Correct skew
|
351
|
+
if skew:
|
352
|
+
img_preprocessed = correct_skew(image)
|
353
|
+
|
354
|
+
# Convert to grayscale
|
355
|
+
if grayscale:
|
356
|
+
img_preprocessed = cv2.cvtColor(img_preprocessed, cv2.COLOR_BGR2GRAY)
|
357
|
+
|
358
|
+
# Thresholding
|
359
|
+
if threshold:
|
360
|
+
if threshold_method == "adaptive":
|
361
|
+
image = cv2.adaptiveThreshold(
|
362
|
+
img_preprocessed,
|
363
|
+
255,
|
364
|
+
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
365
|
+
cv2.THRESH_BINARY,
|
366
|
+
11,
|
367
|
+
2,
|
368
|
+
)
|
369
|
+
elif threshold_method == "global":
|
370
|
+
_, img_preprocessed = cv2.threshold(
|
371
|
+
img_preprocessed, 127, 255, cv2.THRESH_BINARY
|
372
|
+
)
|
373
|
+
|
374
|
+
# Denoise by Gaussian Blur
|
375
|
+
if blur:
|
376
|
+
img_preprocessed = cv2.GaussianBlur(img_preprocessed, blur_ksize, 0)
|
377
|
+
|
378
|
+
# 形态学处理
|
379
|
+
if morph:
|
380
|
+
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, morph_kernel_size)
|
381
|
+
if morph_op == "close": # 闭运算
|
382
|
+
# 目的: 闭运算用于填补前景物体中的小孔或间隙,同时保留其形状和大小。
|
383
|
+
# 工作原理: 闭运算先进行膨胀,然后进行腐蚀。膨胀步骤填补小孔或间隙,腐蚀步骤恢复较大物体的形状。
|
384
|
+
# 效果:
|
385
|
+
# 填补前景物体中的小孔和间隙。
|
386
|
+
# 平滑较大物体的边缘。
|
387
|
+
# 示例用途: 填补物体中的小孔或间隙。
|
388
|
+
img_preprocessed = cv2.morphologyEx(
|
389
|
+
img_preprocessed, cv2.MORPH_CLOSE, kernel
|
390
|
+
)
|
391
|
+
elif morph_op == "open": # 开运算
|
392
|
+
# 目的: 开运算用于去除背景中的小物体或噪声,同时保留较大物体的形状和大小。
|
393
|
+
# 工作原理: 开运算先进行腐蚀,然后进行膨胀。腐蚀步骤去除小规模的噪声,膨胀步骤恢复剩余物体的大小。
|
394
|
+
# 效果:
|
395
|
+
# 去除前景中的小物体。
|
396
|
+
# 平滑较大物体的轮廓。
|
397
|
+
# 示例用途: 去除小噪声或伪影,同时保持较大物体完整。
|
398
|
+
img_preprocessed = cv2.morphologyEx(
|
399
|
+
img_preprocessed, cv2.MORPH_OPEN, kernel
|
400
|
+
)
|
401
|
+
elif morph_op == "dilate": # 膨胀
|
402
|
+
# 目的: 膨胀操作在物体边界上添加像素。它可以用来填补物体中的小孔或连接相邻的物体。
|
403
|
+
# 工作原理: 内核在图像上移动,每个位置上的像素值被设置为内核覆盖区域中的最大值。
|
404
|
+
# 效果:
|
405
|
+
# 物体变大。
|
406
|
+
# 填补物体中的小孔或间隙。
|
407
|
+
# 示例用途: 填补物体中的小孔或连接断裂的物体部分。
|
408
|
+
img_preprocessed = cv2.dilate(img_preprocessed, kernel)
|
409
|
+
elif morph_op == "erode": # 腐蚀
|
410
|
+
# 目的: 腐蚀操作用于去除物体边界上的像素。它可以用来去除小规模的噪声,并将靠近的物体分开。
|
411
|
+
# 工作原理: 内核(结构元素)在图像上移动,每个位置上的像素值被设置为内核覆盖区域中的最小值。
|
412
|
+
# 效果:
|
413
|
+
# 物体变小。
|
414
|
+
# 去除图像中的小白点(在白色前景/黑色背景的图像中)。
|
415
|
+
# 示例用途: 去除二值图像中的小噪声或分离相互接触的物体
|
416
|
+
img_preprocessed = cv2.erode(img_preprocessed, kernel)
|
417
|
+
|
418
|
+
# 对比度增强
|
419
|
+
if enhance_contrast:
|
420
|
+
clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=clahe_grid_size)
|
421
|
+
img_preprocessed = clahe.apply(img_preprocessed)
|
422
|
+
|
423
|
+
# 边缘检测
|
424
|
+
if edge_detection:
|
425
|
+
img_preprocessed = cv2.Canny(img_preprocessed, 100, 200)
|
426
|
+
|
427
|
+
return img_preprocessed
|
428
|
+
|
429
|
+
|
430
|
+
def text_postprocess(
|
431
|
+
text,
|
432
|
+
spell_check=True,
|
433
|
+
clean=True,
|
434
|
+
filter=dict(min_length=2),
|
435
|
+
pattern=None,
|
436
|
+
merge=True,
|
437
|
+
):
|
438
|
+
|
439
|
+
def correct_spelling(text_list):
|
440
|
+
spell = SpellChecker()
|
441
|
+
corrected_text = [spell.candidates(word) for word in text_list]
|
442
|
+
return corrected_text
|
443
|
+
|
444
|
+
def clean_text(text_list):
|
445
|
+
cleaned_text = [re.sub(r"[^\w\s]", "", text) for text in text_list]
|
446
|
+
return cleaned_text
|
447
|
+
|
448
|
+
def filter_text(text_list, min_length=2):
|
449
|
+
filtered_text = [text for text in text_list if len(text) >= min_length]
|
450
|
+
return filtered_text
|
451
|
+
|
452
|
+
def extract_patterns(text_list, pattern):
|
453
|
+
pattern = re.compile(pattern)
|
454
|
+
matched_text = [text for text in text_list if pattern.search(text)]
|
455
|
+
return matched_text
|
456
|
+
|
457
|
+
def merge_fragments(text_list):
|
458
|
+
merged_text = " ".join(text_list)
|
459
|
+
return merged_text
|
460
|
+
|
461
|
+
results = text
|
462
|
+
print(results)
|
463
|
+
if spell_check:
|
464
|
+
results = correct_spelling(results)
|
465
|
+
if clean:
|
466
|
+
results = clean_text(results)
|
467
|
+
if filter:
|
468
|
+
results = filter_text(
|
469
|
+
results, min_length=postprocess["filter"].get("min_length", 2)
|
470
|
+
)
|
471
|
+
if pattern:
|
472
|
+
results = extract_patterns(results, postprocess["pattern"])
|
473
|
+
if merge:
|
474
|
+
results = merge_fragments(results)
|
475
|
+
|
476
|
+
|
477
|
+
# https://www.jaided.ai/easyocr/documentation/
|
478
|
+
# extract text from an image with EasyOCR
|
479
|
+
def get_text(
|
480
|
+
image,
|
481
|
+
lang=["ch_sim", "en"],
|
482
|
+
model="easyocr", # "pytesseract"
|
483
|
+
thr=0.25,
|
484
|
+
gpu=True,
|
485
|
+
decoder="wordbeamsearch", #'greedy', 'beamsearch' and 'wordbeamsearch'(hightly accurate)
|
486
|
+
output="all",
|
487
|
+
preprocess=None,
|
488
|
+
postprocess="not ready",
|
489
|
+
show=True,
|
490
|
+
ax=None,
|
491
|
+
cmap=cv2.COLOR_BGR2RGB, # draw_box
|
492
|
+
font=cv2.FONT_HERSHEY_SIMPLEX,
|
493
|
+
font_scale=0.8,
|
494
|
+
thickness_text=2, # Line thickness of 2 px
|
495
|
+
color_box=(0, 255, 0), # draw_box
|
496
|
+
color_text=(0, 0, 255), # draw_box
|
497
|
+
**kwargs,
|
498
|
+
):
|
499
|
+
"""
|
500
|
+
功能: 该函数使用 EasyOCR 进行文本识别,并允许自定义图像预处理步骤和结果展示。
|
501
|
+
参数:
|
502
|
+
image: 输入的图像路径或图像数据。
|
503
|
+
lang: OCR 语言列表。
|
504
|
+
thr: 置信度阈值,低于此阈值的检测结果将被过滤。
|
505
|
+
gpu: 是否使用 GPU。
|
506
|
+
output: 输出类型,可以是 'all'(返回所有检测结果)、'text'(返回文本)、'score'(返回置信度分数)、'box'(返回边界框)。
|
507
|
+
preprocess: 预处理参数字典,传递给 preprocess_img 函数。
|
508
|
+
show: 是否显示结果图像。
|
509
|
+
ax: 用于显示图像的 Matplotlib 子图。
|
510
|
+
cmap: 用于显示图像的颜色映射。
|
511
|
+
color_box: 边界框的颜色。
|
512
|
+
color_text: 文本的颜色。
|
513
|
+
kwargs: 传递给 EasyOCR readtext 函数的其他参数。
|
514
|
+
|
515
|
+
# Uage
|
516
|
+
image_path = 'car_plate.jpg' # 替换为你的图像路径
|
517
|
+
results = get_text(
|
518
|
+
image_path,
|
519
|
+
lang=["en"],
|
520
|
+
gpu=False,
|
521
|
+
output="text",
|
522
|
+
preprocess={
|
523
|
+
"grayscale": True,
|
524
|
+
"threshold": True,
|
525
|
+
"threshold_method": 'adaptive',
|
526
|
+
"denoise": True,
|
527
|
+
"blur_ksize": (5, 5),
|
528
|
+
"morph": True,
|
529
|
+
"morph_op": 'close',
|
530
|
+
"morph_kernel_size": (3, 3),
|
531
|
+
"enhance_contrast": True,
|
532
|
+
"clahe_clip": 2.0,
|
533
|
+
"clahe_grid_size": (8, 8),
|
534
|
+
"edge_detection": False
|
535
|
+
},
|
536
|
+
adjust_contrast=0.7
|
537
|
+
)
|
538
|
+
"""
|
539
|
+
|
540
|
+
if ax is None:
|
541
|
+
ax = plt.gca()
|
542
|
+
lang = lang_auto_detect(lang, model)
|
543
|
+
print(f"detecting language(s):{lang}")
|
544
|
+
if isinstance(image, str):
|
545
|
+
image = cv2.imread(image)
|
546
|
+
|
547
|
+
# Ensure lang is always a list
|
548
|
+
if isinstance(lang, str):
|
549
|
+
lang = [lang]
|
550
|
+
|
551
|
+
# ! preprocessing img
|
552
|
+
if preprocess is None:
|
553
|
+
preprocess = {}
|
554
|
+
image_process = preprocess_img(image, **preprocess)
|
555
|
+
if "easy" in model.lower():
|
556
|
+
# Perform OCR on the image
|
557
|
+
reader = easyocr.Reader(lang, gpu=gpu)
|
558
|
+
detections = reader.readtext(image_process, decoder=decoder, **kwargs)
|
559
|
+
if postprocess is None:
|
560
|
+
postprocess = dict(
|
561
|
+
spell_check=True,
|
562
|
+
clean=True,
|
563
|
+
filter=dict(min_length=2),
|
564
|
+
pattern=None,
|
565
|
+
merge=True,
|
566
|
+
)
|
567
|
+
text_corr = []
|
568
|
+
for _, text, _ in detections:
|
569
|
+
text_corr.extend(text_postprocess(text, **postprocess))
|
570
|
+
if show:
|
571
|
+
for bbox, text, score in detections:
|
572
|
+
if score > thr:
|
573
|
+
top_left = tuple(map(int, bbox[0]))
|
574
|
+
bottom_right = tuple(map(int, bbox[2]))
|
575
|
+
image = cv2.rectangle(image, top_left, bottom_right, color_box, 2)
|
576
|
+
# image = cv2.putText(
|
577
|
+
# image, text, top_left, font, font_scale, color_text, thickness_text
|
578
|
+
# )
|
579
|
+
image = add_text_pil(
|
580
|
+
image,
|
581
|
+
text,
|
582
|
+
top_left,
|
583
|
+
font_size=font_scale * 32,
|
584
|
+
color=color_text,
|
585
|
+
)
|
586
|
+
img_cmp = cv2.cvtColor(image, cmap)
|
587
|
+
ax.imshow(img_cmp)
|
588
|
+
ax.axis("off")
|
589
|
+
# plt.show()
|
590
|
+
# 根据输出类型返回相应的结果
|
591
|
+
if output == "all":
|
592
|
+
return ax, detections
|
593
|
+
elif "t" in output.lower() and "x" in output.lower():
|
594
|
+
# 提取文本,过滤低置信度的结果
|
595
|
+
text = [text_ for _, text_, score_ in detections if score_ >= thr]
|
596
|
+
if postprocess:
|
597
|
+
return ax, text
|
598
|
+
else:
|
599
|
+
return text_corr
|
600
|
+
elif "score" in output.lower() or "prob" in output.lower():
|
601
|
+
# 提取分数
|
602
|
+
scores = [score_ for _, _, score_ in detections]
|
603
|
+
return ax, scores
|
604
|
+
elif "box" in output.lower():
|
605
|
+
# 提取边界框,过滤低置信度的结果
|
606
|
+
bboxes = [bbox_ for bbox_, _, score_ in detections if score_ >= thr]
|
607
|
+
return ax, bboxes
|
608
|
+
else:
|
609
|
+
# 默认返回所有检测信息
|
610
|
+
return ax, detections
|
611
|
+
else:
|
612
|
+
# 根据输出类型返回相应的结果
|
613
|
+
if output == "all":
|
614
|
+
return detections
|
615
|
+
elif "t" in output.lower() and "x" in output.lower():
|
616
|
+
# 提取文本,过滤低置信度的结果
|
617
|
+
text = [text_ for _, text_, score_ in detections if score_ >= thr]
|
618
|
+
return text
|
619
|
+
elif "score" in output.lower() or "prob" in output.lower():
|
620
|
+
# 提取分数
|
621
|
+
scores = [score_ for _, _, score_ in detections]
|
622
|
+
return scores
|
623
|
+
elif "box" in output.lower():
|
624
|
+
# 提取边界框,过滤低置信度的结果
|
625
|
+
bboxes = [bbox_ for bbox_, _, score_ in detections if score_ >= thr]
|
626
|
+
return bboxes
|
627
|
+
else:
|
628
|
+
# 默认返回所有检测信息
|
629
|
+
return detections
|
630
|
+
else: # "pytesseract"
|
631
|
+
text = pytesseract.image_to_string(image_process, lang="+".join(lang), **kwargs)
|
632
|
+
bboxes = pytesseract.image_to_boxes(image_process, **kwargs)
|
633
|
+
if show:
|
634
|
+
# Image dimensions
|
635
|
+
h, w, _ = image.shape
|
636
|
+
|
637
|
+
for line in bboxes.splitlines():
|
638
|
+
parts = line.split()
|
639
|
+
if len(parts) == 6:
|
640
|
+
char, left, bottom, right, top, _ = parts
|
641
|
+
left, bottom, right, top = map(int, [left, bottom, right, top])
|
642
|
+
|
643
|
+
# Convert Tesseract coordinates (bottom-left and top-right) to (top-left and bottom-right)
|
644
|
+
top_left = (left, h - top)
|
645
|
+
bottom_right = (right, h - bottom)
|
646
|
+
|
647
|
+
# Draw the bounding box
|
648
|
+
image = cv2.rectangle(image, top_left, bottom_right, color_box, 2)
|
649
|
+
image = add_text_pil(
|
650
|
+
image,
|
651
|
+
char,
|
652
|
+
top_left,
|
653
|
+
font_size=font_scale * 32,
|
654
|
+
color=color_text,
|
655
|
+
)
|
656
|
+
img_cmp = cv2.cvtColor(image, cmap)
|
657
|
+
ax.imshow(img_cmp)
|
658
|
+
ax.axis("off")
|
659
|
+
if output == "all":
|
660
|
+
# Get verbose data including boxes, confidences, line and page numbers
|
661
|
+
detections = pytesseract.image_to_data(image_process)
|
662
|
+
return ax, detections
|
663
|
+
elif "t" in output.lower() and "x" in output.lower():
|
664
|
+
return ax, text
|
665
|
+
elif "box" in output.lower():
|
666
|
+
return ax, bboxes
|
667
|
+
else:
|
668
|
+
# Get information about orientation and script detection
|
669
|
+
return pytesseract.image_to_osd(image_process, **kwargs)
|
670
|
+
else:
|
671
|
+
if output == "all":
|
672
|
+
# Get verbose data including boxes, confidences, line and page numbers
|
673
|
+
detections = pytesseract.image_to_data(image_process, **kwargs)
|
674
|
+
return detections
|
675
|
+
elif "t" in output.lower() and "x" in output.lower():
|
676
|
+
return text
|
677
|
+
elif "box" in output.lower():
|
678
|
+
return bboxes
|
679
|
+
else:
|
680
|
+
# Get information about orientation and script detection
|
681
|
+
return pytesseract.image_to_osd(image_process, **kwargs)
|
682
|
+
|
683
|
+
|
684
|
+
def draw_box(
|
685
|
+
image,
|
686
|
+
detections=None,
|
687
|
+
thr=0.25,
|
688
|
+
cmap=cv2.COLOR_BGR2RGB,
|
689
|
+
color_box=(0, 255, 0), # draw_box
|
690
|
+
color_text=(0, 0, 255), # draw_box
|
691
|
+
font_scale=0.8,
|
692
|
+
show=True,
|
693
|
+
ax=None,
|
694
|
+
**kwargs,
|
695
|
+
):
|
696
|
+
|
697
|
+
if ax is None:
|
698
|
+
ax = plt.gca()
|
699
|
+
if isinstance(image, str):
|
700
|
+
image = cv2.imread(image)
|
701
|
+
if detections is None:
|
702
|
+
detections = get_text(image=image, show=0, output="all", **kwargs)
|
703
|
+
|
704
|
+
for bbox, text, score in detections:
|
705
|
+
if score > thr:
|
706
|
+
top_left = tuple(map(int, bbox[0]))
|
707
|
+
bottom_right = tuple(map(int, bbox[2]))
|
708
|
+
image = cv2.rectangle(image, top_left, bottom_right, color_box, 2)
|
709
|
+
# image = cv2.putText(
|
710
|
+
# image, text, top_left, font, font_scale, color_text, thickness_text
|
711
|
+
# )
|
712
|
+
image = add_text_pil(
|
713
|
+
image, text, top_left, font_size=font_scale * 32, color=color_text
|
714
|
+
)
|
715
|
+
|
716
|
+
img_cmp = cv2.cvtColor(image, cmap)
|
717
|
+
if show:
|
718
|
+
ax.imshow(img_cmp)
|
719
|
+
ax.axis("off")
|
720
|
+
# plt.show()
|
721
|
+
return img_cmp
|