py2ls 0.1.10.0__py3-none-any.whl → 0.1.10.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. py2ls/.git/COMMIT_EDITMSG +1 -1
  2. py2ls/.git/FETCH_HEAD +1 -1
  3. py2ls/.git/index +0 -0
  4. py2ls/.git/logs/HEAD +1 -0
  5. py2ls/.git/logs/refs/heads/main +1 -0
  6. py2ls/.git/logs/refs/remotes/origin/HEAD +1 -0
  7. py2ls/.git/logs/refs/remotes/origin/main +1 -0
  8. py2ls/.git/objects/27/aa6074f652bc6f7078f8647489d9ee8e24f0e2 +0 -0
  9. py2ls/.git/objects/28/c2969d785c1b892c2a96b3f00eba63a59811b3 +0 -0
  10. py2ls/.git/objects/2a/fdf45791a26d42ccead35ace76a8f0b2a56561 +0 -0
  11. py2ls/.git/objects/34/b6f3a2ee84f39bed4eee57f2c0e0afb994feb1 +0 -0
  12. py2ls/.git/objects/35/1a5f491ab97eee9d1ee699478d75a8bb5d3dc2 +0 -0
  13. py2ls/.git/objects/39/b13be65125556784e44c7a1d9821703c7ab67e +0 -0
  14. py2ls/.git/objects/3b/507acc7f23391644cc0b824b1e79fd2677a362 +0 -0
  15. py2ls/.git/objects/3d/9d10d27724657a436c65a6254bfd213d4b3562 +0 -0
  16. py2ls/.git/objects/47/6cbd5a7c5e35cddef2f8a38bdc4896d403b095 +0 -0
  17. py2ls/.git/objects/78/063f4c863fc371ec0313303c0a81283b35d9b6 +0 -0
  18. py2ls/.git/objects/82/70b319ce4046854fbe7dc41054b6c2d112dab2 +0 -0
  19. py2ls/.git/objects/85/aee46f478e9afdb84d50a05242c53b04ed2e21 +0 -0
  20. py2ls/.git/objects/86/e288b46f8fe179907e4413f665aeb5053fddb1 +0 -0
  21. py2ls/.git/objects/94/f7dbe88e80c4205a901b71eb8f181974376bba +0 -0
  22. py2ls/.git/objects/9b/ec5ee2236ee2d5532c36bfd132e23c58fdb69c +0 -0
  23. py2ls/.git/objects/b3/4f7f271c6d6105e35a6556ffda71d03afe8c96 +0 -0
  24. py2ls/.git/objects/b3/69579064bde9de9a19d114fc33e4e48cc8c0e4 +0 -0
  25. py2ls/.git/objects/bf/b54d65922ce1dfda1aaa014913a54e7172d0bc +0 -0
  26. py2ls/.git/objects/c1/397c6ed72c4e20ef6b9ab83163e9a6baba5b45 +0 -0
  27. py2ls/.git/objects/cc/45df1d317a2eb63ff1ff3a5f3b4a9f98fd92b5 +0 -0
  28. py2ls/.git/objects/d6/39e8af592cd75a318d8affddd1bcc70c2095f2 +0 -0
  29. py2ls/.git/objects/db/3f2cd643292057936230b95cf7ec3046affe11 +0 -0
  30. py2ls/.git/objects/de/214c626ac2dd2685bfaa0bc0fc20f528d014d7 +0 -0
  31. py2ls/.git/objects/e4/6c715352db9fe3c887a635f1916df4ca1f4ff9 +0 -0
  32. py2ls/.git/objects/e5/0580a0bd1e1b3d29f834382b80fceb61d5cf0c +0 -0
  33. py2ls/.git/objects/ec/d980279432b13f0374b90ca439a6329cdece0f +0 -0
  34. py2ls/.git/objects/ee/cee64eacaff022dcdc509c0c2b1da492f21060 +0 -0
  35. py2ls/.git/objects/f5/61c3c1bf1c9ea9c9d1f556a7be2869f71f3bdf +0 -0
  36. py2ls/.git/refs/heads/main +1 -1
  37. py2ls/.git/refs/remotes/origin/main +1 -1
  38. py2ls/batman.py +62 -47
  39. py2ls/ips.py +771 -3
  40. py2ls/netfinder.py +125 -1
  41. py2ls/ocr.py +721 -0
  42. py2ls/plot.py +24 -0
  43. py2ls/translator.py +470 -119
  44. {py2ls-0.1.10.0.dist-info → py2ls-0.1.10.2.dist-info}/METADATA +1 -1
  45. {py2ls-0.1.10.0.dist-info → py2ls-0.1.10.2.dist-info}/RECORD +46 -17
  46. {py2ls-0.1.10.0.dist-info → py2ls-0.1.10.2.dist-info}/WHEEL +1 -1
py2ls/ocr.py ADDED
@@ -0,0 +1,721 @@
1
+ import easyocr
2
+ import cv2
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ from py2ls.ips import (
6
+ strcmp,
7
+ detect_angle,
8
+ ) # Ensure this function is defined in your 'ips' module
9
+ from spellchecker import SpellChecker
10
+ import re
11
+
12
+ from PIL import Image, ImageDraw, ImageFont
13
+ import PIL.PngImagePlugin
14
+ import pytesseract
15
+
16
+ """
17
+ Optical Character Recognition (OCR)
18
+ """
19
+
20
+ # Valid language codes
21
+ lang_valid = {
22
+ "easyocr": {
23
+ "english": "en",
24
+ "thai": "th",
25
+ "chinese_traditional": "ch_tra",
26
+ "chinese": "ch_sim",
27
+ "japanese": "ja",
28
+ "korean": "ko",
29
+ "tamil": "ta",
30
+ "telugu": "te",
31
+ "kannada": "kn",
32
+ "german": "de",
33
+ },
34
+ "pytesseract": {
35
+ "afrikaans": "afr",
36
+ "amharic": "amh",
37
+ "arabic": "ara",
38
+ "assamese": "asm",
39
+ "azerbaijani": "aze",
40
+ "azerbaijani_cyrillic": "aze_cyrl",
41
+ "belarusian": "bel",
42
+ "bengali": "ben",
43
+ "tibetan": "bod",
44
+ "bosnian": "bos",
45
+ "breton": "bre",
46
+ "bulgarian": "bul",
47
+ "catalan": "cat",
48
+ "cebuano": "ceb",
49
+ "czech": "ces",
50
+ "chinese": "chi_sim",
51
+ "chinese_vertical": "chi_sim_vert",
52
+ "chinese_traditional": "chi_tra",
53
+ "chinese_traditional_vertical": "chi_tra_vert",
54
+ "cherokee": "chr",
55
+ "corsican": "cos",
56
+ "welsh": "cym",
57
+ "danish": "dan",
58
+ "danish_fraktur": "dan_frak",
59
+ "german": "deu",
60
+ "german_fraktur": "deu_frak",
61
+ "german_latf": "deu_latf",
62
+ "dhivehi": "div",
63
+ "dzongkha": "dzo",
64
+ "greek": "ell",
65
+ "english": "eng",
66
+ "middle_english": "enm",
67
+ "esperanto": "epo",
68
+ "math_equations": "equ",
69
+ "estonian": "est",
70
+ "basque": "eus",
71
+ "faroese": "fao",
72
+ "persian": "fas",
73
+ "filipino": "fil",
74
+ "finnish": "fin",
75
+ "french": "fra",
76
+ "middle_french": "frm",
77
+ "frisian": "fry",
78
+ "scottish_gaelic": "gla",
79
+ "irish": "gle",
80
+ "galician": "glg",
81
+ "ancient_greek": "grc",
82
+ "gujarati": "guj",
83
+ "haitian_creole": "hat",
84
+ "hebrew": "heb",
85
+ "hindi": "hin",
86
+ "croatian": "hrv",
87
+ "hungarian": "hun",
88
+ "armenian": "hye",
89
+ "inuktitut": "iku",
90
+ "indonesian": "ind",
91
+ "icelandic": "isl",
92
+ "italian": "ita",
93
+ "old_italian": "ita_old",
94
+ "javanese": "jav",
95
+ "japanese": "jpn",
96
+ "japanese_vertical": "jpn_vert",
97
+ "kannada": "kan",
98
+ "georgian": "kat",
99
+ "old_georgian": "kat_old",
100
+ "kazakh": "kaz",
101
+ "khmer": "khm",
102
+ "kyrgyz": "kir",
103
+ "kurdish_kurmanji": "kmr",
104
+ "korean": "kor",
105
+ "korean_vertical": "kor_vert",
106
+ "lao": "lao",
107
+ "latin": "lat",
108
+ "latvian": "lav",
109
+ "lithuanian": "lit",
110
+ "luxembourgish": "ltz",
111
+ "malayalam": "mal",
112
+ "marathi": "mar",
113
+ "macedonian": "mkd",
114
+ "maltese": "mlt",
115
+ "mongolian": "mon",
116
+ "maori": "mri",
117
+ "malay": "msa",
118
+ "burmese": "mya",
119
+ "nepali": "nep",
120
+ "dutch": "nld",
121
+ "norwegian": "nor",
122
+ "occitan": "oci",
123
+ "oriya": "ori",
124
+ "script_detection": "osd",
125
+ "punjabi": "pan",
126
+ "polish": "pol",
127
+ "portuguese": "por",
128
+ },
129
+ }
130
+
131
+
132
+ def lang_auto_detect(
133
+ lang,
134
+ model="easyocr", # "easyocr" or "pytesseract"
135
+ ):
136
+ res_lang = []
137
+ if isinstance(lang, str):
138
+ lang = [lang]
139
+ for i in lang:
140
+ res_lang.append(lang_valid[model][strcmp(i, list(lang_valid[model].keys()))[0]])
141
+ return res_lang
142
+
143
+
144
+ def determine_src_points(image):
145
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
146
+ _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
147
+ contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
148
+
149
+ # Sort contours by area and pick the largest one
150
+ contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5]
151
+ src_points = None
152
+
153
+ for contour in contours:
154
+ epsilon = 0.02 * cv2.arcLength(contour, True)
155
+ approx = cv2.approxPolyDP(contour, epsilon, True)
156
+ if len(approx) == 4: # We need a quadrilateral
157
+ src_points = np.array(approx, dtype="float32")
158
+ break
159
+
160
+ if src_points is not None:
161
+ # Order points in a specific order (top-left, top-right, bottom-right, bottom-left)
162
+ src_points = src_points.reshape(4, 2)
163
+ rect = np.zeros((4, 2), dtype="float32")
164
+ s = src_points.sum(axis=1)
165
+ diff = np.diff(src_points, axis=1)
166
+ rect[0] = src_points[np.argmin(s)]
167
+ rect[2] = src_points[np.argmax(s)]
168
+ rect[1] = src_points[np.argmin(diff)]
169
+ rect[3] = src_points[np.argmax(diff)]
170
+ src_points = rect
171
+ else:
172
+ # If no rectangle is detected, fallback to a default or user-defined points
173
+ height, width = image.shape[:2]
174
+ src_points = np.array(
175
+ [[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]],
176
+ dtype="float32",
177
+ )
178
+ return src_points
179
+
180
+
181
+ def get_default_camera_matrix(image_shape):
182
+ height, width = image_shape[:2]
183
+ focal_length = width
184
+ center = (width / 2, height / 2)
185
+ camera_matrix = np.array(
186
+ [[focal_length, 0, center[0]], [0, focal_length, center[1]], [0, 0, 1]],
187
+ dtype="float32",
188
+ )
189
+ dist_coeffs = np.zeros((4, 1)) # Assuming no distortion
190
+ return camera_matrix, dist_coeffs
191
+
192
+
193
+ def correct_perspective(image, src_points):
194
+ # Define the destination points for the perspective transform
195
+ width, height = 1000, 1000 # Adjust size as needed
196
+ dst_points = np.array(
197
+ [[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]],
198
+ dtype="float32",
199
+ )
200
+
201
+ # Calculate the perspective transform matrix
202
+ M = cv2.getPerspectiveTransform(src_points, dst_points)
203
+ # Apply the perspective transform
204
+ corrected_image = cv2.warpPerspective(image, M, (width, height))
205
+ return corrected_image
206
+
207
+
208
+ def detect_text_orientation(image):
209
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
210
+ edges = cv2.Canny(gray, 50, 150, apertureSize=3)
211
+ lines = cv2.HoughLines(edges, 1, np.pi / 180, 200)
212
+
213
+ if lines is None:
214
+ return 0
215
+
216
+ angles = []
217
+ for rho, theta in lines[:, 0]:
218
+ angle = theta * 180 / np.pi
219
+ if angle > 90:
220
+ angle -= 180
221
+ angles.append(angle)
222
+
223
+ median_angle = np.median(angles)
224
+ return median_angle
225
+
226
+
227
+ def rotate_image(image, angle):
228
+ center = (image.shape[1] // 2, image.shape[0] // 2)
229
+ rot_mat = cv2.getRotationMatrix2D(center, angle, 1.0)
230
+ rotated_image = cv2.warpAffine(
231
+ image, rot_mat, (image.shape[1], image.shape[0]), flags=cv2.INTER_LINEAR
232
+ )
233
+ return rotated_image
234
+
235
+
236
+ def correct_skew(image):
237
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
238
+ coords = np.column_stack(np.where(gray > 0))
239
+ angle = cv2.minAreaRect(coords)[-1]
240
+ if angle < -45:
241
+ angle = -(90 + angle)
242
+ else:
243
+ angle = -angle
244
+ (h, w) = image.shape[:2]
245
+ center = (w // 2, h // 2)
246
+ M = cv2.getRotationMatrix2D(center, angle, 1.0)
247
+ rotated = cv2.warpAffine(
248
+ image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE
249
+ )
250
+ return rotated
251
+
252
+
253
+ def undistort_image(image, camera_matrix, dist_coeffs):
254
+ return cv2.undistort(image, camera_matrix, dist_coeffs)
255
+
256
+
257
+ def add_text_pil(image, text, position, font_size=10, color=(255, 0, 0)):
258
+ # Convert the image to PIL format
259
+ pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
260
+ # Create a drawing context
261
+ draw = ImageDraw.Draw(pil_image)
262
+ # Define the font (make sure to use a font that supports Chinese characters)
263
+ try:
264
+ font = ImageFont.truetype(
265
+ "/System/Library/Fonts/Supplemental/Songti.ttc", font_size
266
+ )
267
+ except IOError:
268
+ font = ImageFont.load_default()
269
+
270
+ # cal top_left position
271
+ # Measure text size using textbbox
272
+ text_bbox = draw.textbbox((0, 0), text, font=font)
273
+ text_width = text_bbox[2] - text_bbox[0]
274
+ text_height = text_bbox[3] - text_bbox[1]
275
+ # Calculate 5% of the text height for upward adjustment
276
+ offset = int(0.5 * text_height) # 上移动 50%
277
+ # Adjust position to match OpenCV's bottom-left alignment
278
+ adjusted_position = (position[0], position[1] - text_height - offset)
279
+
280
+ # Add text to the image
281
+ draw.text(adjusted_position, text, font=font, fill=color)
282
+ # Convert the image back to OpenCV format
283
+ image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
284
+ return image
285
+
286
+
287
+ def preprocess_img(
288
+ image,
289
+ grayscale=True,
290
+ threshold=True,
291
+ threshold_method="adaptive",
292
+ rotate="auto",
293
+ skew=False,
294
+ blur=True,
295
+ blur_ksize=(5, 5),
296
+ morph=True,
297
+ morph_op="open",
298
+ morph_kernel_size=(3, 3),
299
+ enhance_contrast=True,
300
+ clahe_clip=2.0,
301
+ clahe_grid_size=(8, 8),
302
+ edge_detection=False,
303
+ ):
304
+ """
305
+ 预处理步骤:
306
+
307
+ 转换为灰度图像: 如果 grayscale 为 True,将图像转换为灰度图像。
308
+ 二值化处理: 根据 threshold 和 threshold_method 参数,对图像进行二值化处理。
309
+ 降噪处理: 使用高斯模糊对图像进行降噪。
310
+ 形态学处理: 根据 morph_op 参数选择不同的形态学操作(开运算、闭运算、膨胀、腐蚀),用于去除噪声或填补孔洞。
311
+ 对比度增强: 使用 CLAHE 技术增强图像对比度。
312
+ 边缘检测: 如果 edge_detection 为 True,使用 Canny 边缘检测算法。
313
+
314
+ 预处理图像以提高 OCR 识别准确性。
315
+ 参数:
316
+ image: 输入的图像路径或图像数据。
317
+ grayscale: 是否将图像转换为灰度图像。
318
+ threshold: 是否对图像进行二值化处理。
319
+ threshold_method: 二值化方法,可以是 'global' 或 'adaptive'。
320
+ denoise: 是否对图像进行降噪处理。
321
+ blur_ksize: 高斯模糊的核大小。
322
+ morph: 是否进行形态学处理。
323
+ morph_op: 形态学操作的类型,包括 'open'(开运算)、'close'(闭运算)、'dilate'(膨胀)、'erode'(腐蚀)。
324
+ morph_kernel_size: 形态学操作的内核大小。
325
+ enhance_contrast: 是否增强图像对比度。
326
+ clahe_clip: CLAHE(对比度受限的自适应直方图均衡)的剪裁限制。
327
+ clahe_grid_size: CLAHE 的网格大小。
328
+ edge_detection: 是否进行边缘检测。
329
+ """
330
+ if isinstance(image, PIL.PngImagePlugin.PngImageFile):
331
+ image = np.array(image)
332
+ if isinstance(image, str):
333
+ image = cv2.imread(image)
334
+ if not isinstance(image, np.ndarray):
335
+ image = np.array(image)
336
+ if image.shape[1] == 4: # Check if it has an alpha channel
337
+ # Drop the alpha channel (if needed), or handle it as required
338
+ image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
339
+ else:
340
+ # Convert RGB to BGR for OpenCV compatibility
341
+ image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
342
+
343
+ # Rotate image
344
+ if rotate == "auto":
345
+ angle = detect_angle(image, by="fft")
346
+ img_preprocessed = rotate_image(image, angle)
347
+ else:
348
+ img_preprocessed = image
349
+
350
+ # Correct skew
351
+ if skew:
352
+ img_preprocessed = correct_skew(image)
353
+
354
+ # Convert to grayscale
355
+ if grayscale:
356
+ img_preprocessed = cv2.cvtColor(img_preprocessed, cv2.COLOR_BGR2GRAY)
357
+
358
+ # Thresholding
359
+ if threshold:
360
+ if threshold_method == "adaptive":
361
+ image = cv2.adaptiveThreshold(
362
+ img_preprocessed,
363
+ 255,
364
+ cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
365
+ cv2.THRESH_BINARY,
366
+ 11,
367
+ 2,
368
+ )
369
+ elif threshold_method == "global":
370
+ _, img_preprocessed = cv2.threshold(
371
+ img_preprocessed, 127, 255, cv2.THRESH_BINARY
372
+ )
373
+
374
+ # Denoise by Gaussian Blur
375
+ if blur:
376
+ img_preprocessed = cv2.GaussianBlur(img_preprocessed, blur_ksize, 0)
377
+
378
+ # 形态学处理
379
+ if morph:
380
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, morph_kernel_size)
381
+ if morph_op == "close": # 闭运算
382
+ # 目的: 闭运算用于填补前景物体中的小孔或间隙,同时保留其形状和大小。
383
+ # 工作原理: 闭运算先进行膨胀,然后进行腐蚀。膨胀步骤填补小孔或间隙,腐蚀步骤恢复较大物体的形状。
384
+ # 效果:
385
+ # 填补前景物体中的小孔和间隙。
386
+ # 平滑较大物体的边缘。
387
+ # 示例用途: 填补物体中的小孔或间隙。
388
+ img_preprocessed = cv2.morphologyEx(
389
+ img_preprocessed, cv2.MORPH_CLOSE, kernel
390
+ )
391
+ elif morph_op == "open": # 开运算
392
+ # 目的: 开运算用于去除背景中的小物体或噪声,同时保留较大物体的形状和大小。
393
+ # 工作原理: 开运算先进行腐蚀,然后进行膨胀。腐蚀步骤去除小规模的噪声,膨胀步骤恢复剩余物体的大小。
394
+ # 效果:
395
+ # 去除前景中的小物体。
396
+ # 平滑较大物体的轮廓。
397
+ # 示例用途: 去除小噪声或伪影,同时保持较大物体完整。
398
+ img_preprocessed = cv2.morphologyEx(
399
+ img_preprocessed, cv2.MORPH_OPEN, kernel
400
+ )
401
+ elif morph_op == "dilate": # 膨胀
402
+ # 目的: 膨胀操作在物体边界上添加像素。它可以用来填补物体中的小孔或连接相邻的物体。
403
+ # 工作原理: 内核在图像上移动,每个位置上的像素值被设置为内核覆盖区域中的最大值。
404
+ # 效果:
405
+ # 物体变大。
406
+ # 填补物体中的小孔或间隙。
407
+ # 示例用途: 填补物体中的小孔或连接断裂的物体部分。
408
+ img_preprocessed = cv2.dilate(img_preprocessed, kernel)
409
+ elif morph_op == "erode": # 腐蚀
410
+ # 目的: 腐蚀操作用于去除物体边界上的像素。它可以用来去除小规模的噪声,并将靠近的物体分开。
411
+ # 工作原理: 内核(结构元素)在图像上移动,每个位置上的像素值被设置为内核覆盖区域中的最小值。
412
+ # 效果:
413
+ # 物体变小。
414
+ # 去除图像中的小白点(在白色前景/黑色背景的图像中)。
415
+ # 示例用途: 去除二值图像中的小噪声或分离相互接触的物体
416
+ img_preprocessed = cv2.erode(img_preprocessed, kernel)
417
+
418
+ # 对比度增强
419
+ if enhance_contrast:
420
+ clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=clahe_grid_size)
421
+ img_preprocessed = clahe.apply(img_preprocessed)
422
+
423
+ # 边缘检测
424
+ if edge_detection:
425
+ img_preprocessed = cv2.Canny(img_preprocessed, 100, 200)
426
+
427
+ return img_preprocessed
428
+
429
+
430
+ def text_postprocess(
431
+ text,
432
+ spell_check=True,
433
+ clean=True,
434
+ filter=dict(min_length=2),
435
+ pattern=None,
436
+ merge=True,
437
+ ):
438
+
439
+ def correct_spelling(text_list):
440
+ spell = SpellChecker()
441
+ corrected_text = [spell.candidates(word) for word in text_list]
442
+ return corrected_text
443
+
444
+ def clean_text(text_list):
445
+ cleaned_text = [re.sub(r"[^\w\s]", "", text) for text in text_list]
446
+ return cleaned_text
447
+
448
+ def filter_text(text_list, min_length=2):
449
+ filtered_text = [text for text in text_list if len(text) >= min_length]
450
+ return filtered_text
451
+
452
+ def extract_patterns(text_list, pattern):
453
+ pattern = re.compile(pattern)
454
+ matched_text = [text for text in text_list if pattern.search(text)]
455
+ return matched_text
456
+
457
+ def merge_fragments(text_list):
458
+ merged_text = " ".join(text_list)
459
+ return merged_text
460
+
461
+ results = text
462
+ print(results)
463
+ if spell_check:
464
+ results = correct_spelling(results)
465
+ if clean:
466
+ results = clean_text(results)
467
+ if filter:
468
+ results = filter_text(
469
+ results, min_length=postprocess["filter"].get("min_length", 2)
470
+ )
471
+ if pattern:
472
+ results = extract_patterns(results, postprocess["pattern"])
473
+ if merge:
474
+ results = merge_fragments(results)
475
+
476
+
477
+ # https://www.jaided.ai/easyocr/documentation/
478
+ # extract text from an image with EasyOCR
479
+ def get_text(
480
+ image,
481
+ lang=["ch_sim", "en"],
482
+ model="easyocr", # "pytesseract"
483
+ thr=0.25,
484
+ gpu=True,
485
+ decoder="wordbeamsearch", #'greedy', 'beamsearch' and 'wordbeamsearch'(hightly accurate)
486
+ output="all",
487
+ preprocess=None,
488
+ postprocess="not ready",
489
+ show=True,
490
+ ax=None,
491
+ cmap=cv2.COLOR_BGR2RGB, # draw_box
492
+ font=cv2.FONT_HERSHEY_SIMPLEX,
493
+ font_scale=0.8,
494
+ thickness_text=2, # Line thickness of 2 px
495
+ color_box=(0, 255, 0), # draw_box
496
+ color_text=(0, 0, 255), # draw_box
497
+ **kwargs,
498
+ ):
499
+ """
500
+ 功能: 该函数使用 EasyOCR 进行文本识别,并允许自定义图像预处理步骤和结果展示。
501
+ 参数:
502
+ image: 输入的图像路径或图像数据。
503
+ lang: OCR 语言列表。
504
+ thr: 置信度阈值,低于此阈值的检测结果将被过滤。
505
+ gpu: 是否使用 GPU。
506
+ output: 输出类型,可以是 'all'(返回所有检测结果)、'text'(返回文本)、'score'(返回置信度分数)、'box'(返回边界框)。
507
+ preprocess: 预处理参数字典,传递给 preprocess_img 函数。
508
+ show: 是否显示结果图像。
509
+ ax: 用于显示图像的 Matplotlib 子图。
510
+ cmap: 用于显示图像的颜色映射。
511
+ color_box: 边界框的颜色。
512
+ color_text: 文本的颜色。
513
+ kwargs: 传递给 EasyOCR readtext 函数的其他参数。
514
+
515
+ # Uage
516
+ image_path = 'car_plate.jpg' # 替换为你的图像路径
517
+ results = get_text(
518
+ image_path,
519
+ lang=["en"],
520
+ gpu=False,
521
+ output="text",
522
+ preprocess={
523
+ "grayscale": True,
524
+ "threshold": True,
525
+ "threshold_method": 'adaptive',
526
+ "denoise": True,
527
+ "blur_ksize": (5, 5),
528
+ "morph": True,
529
+ "morph_op": 'close',
530
+ "morph_kernel_size": (3, 3),
531
+ "enhance_contrast": True,
532
+ "clahe_clip": 2.0,
533
+ "clahe_grid_size": (8, 8),
534
+ "edge_detection": False
535
+ },
536
+ adjust_contrast=0.7
537
+ )
538
+ """
539
+
540
+ if ax is None:
541
+ ax = plt.gca()
542
+ lang = lang_auto_detect(lang, model)
543
+ print(f"detecting language(s):{lang}")
544
+ if isinstance(image, str):
545
+ image = cv2.imread(image)
546
+
547
+ # Ensure lang is always a list
548
+ if isinstance(lang, str):
549
+ lang = [lang]
550
+
551
+ # ! preprocessing img
552
+ if preprocess is None:
553
+ preprocess = {}
554
+ image_process = preprocess_img(image, **preprocess)
555
+ if "easy" in model.lower():
556
+ # Perform OCR on the image
557
+ reader = easyocr.Reader(lang, gpu=gpu)
558
+ detections = reader.readtext(image_process, decoder=decoder, **kwargs)
559
+ if postprocess is None:
560
+ postprocess = dict(
561
+ spell_check=True,
562
+ clean=True,
563
+ filter=dict(min_length=2),
564
+ pattern=None,
565
+ merge=True,
566
+ )
567
+ text_corr = []
568
+ for _, text, _ in detections:
569
+ text_corr.extend(text_postprocess(text, **postprocess))
570
+ if show:
571
+ for bbox, text, score in detections:
572
+ if score > thr:
573
+ top_left = tuple(map(int, bbox[0]))
574
+ bottom_right = tuple(map(int, bbox[2]))
575
+ image = cv2.rectangle(image, top_left, bottom_right, color_box, 2)
576
+ # image = cv2.putText(
577
+ # image, text, top_left, font, font_scale, color_text, thickness_text
578
+ # )
579
+ image = add_text_pil(
580
+ image,
581
+ text,
582
+ top_left,
583
+ font_size=font_scale * 32,
584
+ color=color_text,
585
+ )
586
+ img_cmp = cv2.cvtColor(image, cmap)
587
+ ax.imshow(img_cmp)
588
+ ax.axis("off")
589
+ # plt.show()
590
+ # 根据输出类型返回相应的结果
591
+ if output == "all":
592
+ return ax, detections
593
+ elif "t" in output.lower() and "x" in output.lower():
594
+ # 提取文本,过滤低置信度的结果
595
+ text = [text_ for _, text_, score_ in detections if score_ >= thr]
596
+ if postprocess:
597
+ return ax, text
598
+ else:
599
+ return text_corr
600
+ elif "score" in output.lower() or "prob" in output.lower():
601
+ # 提取分数
602
+ scores = [score_ for _, _, score_ in detections]
603
+ return ax, scores
604
+ elif "box" in output.lower():
605
+ # 提取边界框,过滤低置信度的结果
606
+ bboxes = [bbox_ for bbox_, _, score_ in detections if score_ >= thr]
607
+ return ax, bboxes
608
+ else:
609
+ # 默认返回所有检测信息
610
+ return ax, detections
611
+ else:
612
+ # 根据输出类型返回相应的结果
613
+ if output == "all":
614
+ return detections
615
+ elif "t" in output.lower() and "x" in output.lower():
616
+ # 提取文本,过滤低置信度的结果
617
+ text = [text_ for _, text_, score_ in detections if score_ >= thr]
618
+ return text
619
+ elif "score" in output.lower() or "prob" in output.lower():
620
+ # 提取分数
621
+ scores = [score_ for _, _, score_ in detections]
622
+ return scores
623
+ elif "box" in output.lower():
624
+ # 提取边界框,过滤低置信度的结果
625
+ bboxes = [bbox_ for bbox_, _, score_ in detections if score_ >= thr]
626
+ return bboxes
627
+ else:
628
+ # 默认返回所有检测信息
629
+ return detections
630
+ else: # "pytesseract"
631
+ text = pytesseract.image_to_string(image_process, lang="+".join(lang), **kwargs)
632
+ bboxes = pytesseract.image_to_boxes(image_process, **kwargs)
633
+ if show:
634
+ # Image dimensions
635
+ h, w, _ = image.shape
636
+
637
+ for line in bboxes.splitlines():
638
+ parts = line.split()
639
+ if len(parts) == 6:
640
+ char, left, bottom, right, top, _ = parts
641
+ left, bottom, right, top = map(int, [left, bottom, right, top])
642
+
643
+ # Convert Tesseract coordinates (bottom-left and top-right) to (top-left and bottom-right)
644
+ top_left = (left, h - top)
645
+ bottom_right = (right, h - bottom)
646
+
647
+ # Draw the bounding box
648
+ image = cv2.rectangle(image, top_left, bottom_right, color_box, 2)
649
+ image = add_text_pil(
650
+ image,
651
+ char,
652
+ top_left,
653
+ font_size=font_scale * 32,
654
+ color=color_text,
655
+ )
656
+ img_cmp = cv2.cvtColor(image, cmap)
657
+ ax.imshow(img_cmp)
658
+ ax.axis("off")
659
+ if output == "all":
660
+ # Get verbose data including boxes, confidences, line and page numbers
661
+ detections = pytesseract.image_to_data(image_process)
662
+ return ax, detections
663
+ elif "t" in output.lower() and "x" in output.lower():
664
+ return ax, text
665
+ elif "box" in output.lower():
666
+ return ax, bboxes
667
+ else:
668
+ # Get information about orientation and script detection
669
+ return pytesseract.image_to_osd(image_process, **kwargs)
670
+ else:
671
+ if output == "all":
672
+ # Get verbose data including boxes, confidences, line and page numbers
673
+ detections = pytesseract.image_to_data(image_process, **kwargs)
674
+ return detections
675
+ elif "t" in output.lower() and "x" in output.lower():
676
+ return text
677
+ elif "box" in output.lower():
678
+ return bboxes
679
+ else:
680
+ # Get information about orientation and script detection
681
+ return pytesseract.image_to_osd(image_process, **kwargs)
682
+
683
+
684
+ def draw_box(
685
+ image,
686
+ detections=None,
687
+ thr=0.25,
688
+ cmap=cv2.COLOR_BGR2RGB,
689
+ color_box=(0, 255, 0), # draw_box
690
+ color_text=(0, 0, 255), # draw_box
691
+ font_scale=0.8,
692
+ show=True,
693
+ ax=None,
694
+ **kwargs,
695
+ ):
696
+
697
+ if ax is None:
698
+ ax = plt.gca()
699
+ if isinstance(image, str):
700
+ image = cv2.imread(image)
701
+ if detections is None:
702
+ detections = get_text(image=image, show=0, output="all", **kwargs)
703
+
704
+ for bbox, text, score in detections:
705
+ if score > thr:
706
+ top_left = tuple(map(int, bbox[0]))
707
+ bottom_right = tuple(map(int, bbox[2]))
708
+ image = cv2.rectangle(image, top_left, bottom_right, color_box, 2)
709
+ # image = cv2.putText(
710
+ # image, text, top_left, font, font_scale, color_text, thickness_text
711
+ # )
712
+ image = add_text_pil(
713
+ image, text, top_left, font_size=font_scale * 32, color=color_text
714
+ )
715
+
716
+ img_cmp = cv2.cvtColor(image, cmap)
717
+ if show:
718
+ ax.imshow(img_cmp)
719
+ ax.axis("off")
720
+ # plt.show()
721
+ return img_cmp