py2ls 0.1.10.12__py3-none-any.whl → 0.2.7.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of py2ls might be problematic. Click here for more details.
- py2ls/.DS_Store +0 -0
- py2ls/.git/.DS_Store +0 -0
- py2ls/.git/index +0 -0
- py2ls/.git/logs/refs/remotes/origin/HEAD +1 -0
- py2ls/.git/objects/.DS_Store +0 -0
- py2ls/.git/refs/.DS_Store +0 -0
- py2ls/ImageLoader.py +621 -0
- py2ls/__init__.py +7 -5
- py2ls/apptainer2ls.py +3940 -0
- py2ls/batman.py +164 -42
- py2ls/bio.py +2595 -0
- py2ls/cell_image_clf.py +1632 -0
- py2ls/container2ls.py +4635 -0
- py2ls/corr.py +475 -0
- py2ls/data/.DS_Store +0 -0
- py2ls/data/email/email_html_template.html +88 -0
- py2ls/data/hyper_param_autogluon_zeroshot2024.json +2383 -0
- py2ls/data/hyper_param_tabrepo_2024.py +1753 -0
- py2ls/data/mygenes_fields_241022.txt +355 -0
- py2ls/data/re_common_pattern.json +173 -0
- py2ls/data/sns_info.json +74 -0
- py2ls/data/styles/.DS_Store +0 -0
- py2ls/data/styles/example/.DS_Store +0 -0
- py2ls/data/styles/stylelib/.DS_Store +0 -0
- py2ls/data/styles/stylelib/grid.mplstyle +15 -0
- py2ls/data/styles/stylelib/high-contrast.mplstyle +6 -0
- py2ls/data/styles/stylelib/high-vis.mplstyle +4 -0
- py2ls/data/styles/stylelib/ieee.mplstyle +15 -0
- py2ls/data/styles/stylelib/light.mplstyl +6 -0
- py2ls/data/styles/stylelib/muted.mplstyle +6 -0
- py2ls/data/styles/stylelib/nature-reviews-latex.mplstyle +616 -0
- py2ls/data/styles/stylelib/nature-reviews.mplstyle +616 -0
- py2ls/data/styles/stylelib/nature.mplstyle +31 -0
- py2ls/data/styles/stylelib/no-latex.mplstyle +10 -0
- py2ls/data/styles/stylelib/notebook.mplstyle +36 -0
- py2ls/data/styles/stylelib/paper.mplstyle +290 -0
- py2ls/data/styles/stylelib/paper2.mplstyle +305 -0
- py2ls/data/styles/stylelib/retro.mplstyle +4 -0
- py2ls/data/styles/stylelib/sans.mplstyle +10 -0
- py2ls/data/styles/stylelib/scatter.mplstyle +7 -0
- py2ls/data/styles/stylelib/science.mplstyle +48 -0
- py2ls/data/styles/stylelib/std-colors.mplstyle +4 -0
- py2ls/data/styles/stylelib/vibrant.mplstyle +6 -0
- py2ls/data/tiles.csv +146 -0
- py2ls/data/usages_pd.json +1417 -0
- py2ls/data/usages_sns.json +31 -0
- py2ls/docker2ls.py +5446 -0
- py2ls/ec2ls.py +61 -0
- py2ls/fetch_update.py +145 -0
- py2ls/ich2ls.py +1955 -296
- py2ls/im2.py +8242 -0
- py2ls/image_ml2ls.py +2100 -0
- py2ls/ips.py +33909 -3418
- py2ls/ml2ls.py +7700 -0
- py2ls/mol.py +289 -0
- py2ls/mount2ls.py +1307 -0
- py2ls/netfinder.py +873 -351
- py2ls/nl2ls.py +283 -0
- py2ls/ocr.py +1581 -458
- py2ls/plot.py +10394 -314
- py2ls/rna2ls.py +311 -0
- py2ls/ssh2ls.md +456 -0
- py2ls/ssh2ls.py +5933 -0
- py2ls/ssh2ls_v01.py +2204 -0
- py2ls/stats.py +66 -172
- py2ls/temp20251124.py +509 -0
- py2ls/translator.py +2 -0
- py2ls/utils/decorators.py +3564 -0
- py2ls/utils_bio.py +3453 -0
- {py2ls-0.1.10.12.dist-info → py2ls-0.2.7.10.dist-info}/METADATA +113 -224
- {py2ls-0.1.10.12.dist-info → py2ls-0.2.7.10.dist-info}/RECORD +72 -16
- {py2ls-0.1.10.12.dist-info → py2ls-0.2.7.10.dist-info}/WHEEL +0 -0
py2ls/ocr.py
CHANGED
|
@@ -1,28 +1,76 @@
|
|
|
1
|
-
import easyocr
|
|
2
1
|
import cv2
|
|
2
|
+
import os
|
|
3
3
|
import numpy as np
|
|
4
4
|
import matplotlib.pyplot as plt
|
|
5
5
|
from py2ls.ips import (
|
|
6
6
|
strcmp,
|
|
7
7
|
detect_angle,
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
from PIL import Image, ImageDraw, ImageFont
|
|
13
|
-
import PIL.PngImagePlugin
|
|
14
|
-
import pytesseract
|
|
15
|
-
from paddleocr import PaddleOCR
|
|
8
|
+
str2words,
|
|
9
|
+
isa
|
|
10
|
+
)
|
|
11
|
+
from PIL import Image
|
|
16
12
|
import logging
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
13
|
+
from typing import Union, List, Dict, Tuple, Optional
|
|
14
|
+
from dataclasses import dataclass
|
|
15
|
+
from enum import Enum, auto
|
|
16
|
+
import warnings
|
|
17
|
+
# Suppress unnecessary warnings
|
|
18
|
+
warnings.filterwarnings('ignore')
|
|
21
19
|
|
|
22
20
|
"""
|
|
23
|
-
Optical Character Recognition (OCR)
|
|
21
|
+
Enhanced Optical Character Recognition (OCR) Package
|
|
24
22
|
"""
|
|
25
23
|
|
|
24
|
+
class OCREngine(Enum):
|
|
25
|
+
EASYOCR = auto()
|
|
26
|
+
PADDLEOCR = auto()
|
|
27
|
+
PYTHON_TESSERACT = auto()
|
|
28
|
+
DDDDOCR = auto()
|
|
29
|
+
ZEROX = auto()
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class OCRResult:
|
|
33
|
+
text: str
|
|
34
|
+
confidence: float
|
|
35
|
+
bbox: Optional[List[Tuple[int, int]]] = None
|
|
36
|
+
language: Optional[str] = None
|
|
37
|
+
engine: Optional[str] = None
|
|
38
|
+
|
|
39
|
+
def __str__(self):
|
|
40
|
+
return f"Text: {self.text} (Confidence: {self.confidence:.2f})"
|
|
41
|
+
|
|
42
|
+
@dataclass
|
|
43
|
+
class OCRConfig:
|
|
44
|
+
languages: List[str] = None
|
|
45
|
+
engine: OCREngine = OCREngine.PADDLEOCR
|
|
46
|
+
threshold: float = 0.1
|
|
47
|
+
decoder: str = "wordbeamsearch"
|
|
48
|
+
preprocess: Dict = None
|
|
49
|
+
postprocess: Dict = None
|
|
50
|
+
visualization: Dict = None
|
|
51
|
+
|
|
52
|
+
def __post_init__(self):
|
|
53
|
+
if self.languages is None:
|
|
54
|
+
self.languages = ["en"]
|
|
55
|
+
if self.preprocess is None:
|
|
56
|
+
self.preprocess = {
|
|
57
|
+
"grayscale": True,
|
|
58
|
+
"threshold": True,
|
|
59
|
+
"rotate": "auto"
|
|
60
|
+
}
|
|
61
|
+
if self.postprocess is None:
|
|
62
|
+
self.postprocess = {
|
|
63
|
+
"spell_check": True,
|
|
64
|
+
"clean": True
|
|
65
|
+
}
|
|
66
|
+
if self.visualization is None:
|
|
67
|
+
self.visualization = {
|
|
68
|
+
"show": True,
|
|
69
|
+
"box_color": (0, 255, 0),
|
|
70
|
+
"text_color": (116, 173, 233),
|
|
71
|
+
"font_size": 8
|
|
72
|
+
}
|
|
73
|
+
|
|
26
74
|
# Valid language codes
|
|
27
75
|
lang_valid = {
|
|
28
76
|
"easyocr": {
|
|
@@ -153,22 +201,257 @@ lang_valid = {
|
|
|
153
201
|
},
|
|
154
202
|
}
|
|
155
203
|
|
|
204
|
+
class OCRProcessor:
|
|
205
|
+
def __init__(self, config: OCRConfig = None):
|
|
206
|
+
self.config = config if config else OCRConfig()
|
|
207
|
+
self._initialize_engine()
|
|
208
|
+
|
|
209
|
+
def _initialize_engine(self):
|
|
210
|
+
"""Initialize the selected OCR engine"""
|
|
211
|
+
engine_map = {
|
|
212
|
+
OCREngine.EASYOCR: "easyocr",
|
|
213
|
+
OCREngine.PADDLEOCR: "paddleocr",
|
|
214
|
+
OCREngine.PYTHON_TESSERACT: "pytesseract",
|
|
215
|
+
OCREngine.DDDDOCR: "ddddocr",
|
|
216
|
+
OCREngine.ZEROX: "zerox"
|
|
217
|
+
}
|
|
218
|
+
self.engine_name = engine_map.get(self.config.engine, "paddleocr")
|
|
219
|
+
|
|
220
|
+
def process_image(self, image_path: Union[str, np.ndarray]) -> List[OCRResult]:
|
|
221
|
+
"""Main method to process an image and return OCR results"""
|
|
222
|
+
try:
|
|
223
|
+
# Load and preprocess image
|
|
224
|
+
image = self._load_image(image_path)
|
|
225
|
+
processed_image = self._preprocess_image(image)
|
|
226
|
+
|
|
227
|
+
# Perform OCR
|
|
228
|
+
results = self._perform_ocr(processed_image)
|
|
229
|
+
|
|
230
|
+
# Post-process results
|
|
231
|
+
final_results = self._postprocess_results(results)
|
|
232
|
+
|
|
233
|
+
# Visualize if needed
|
|
234
|
+
if self.config.visualization.get('show', True):
|
|
235
|
+
self._visualize_results(image, final_results)
|
|
236
|
+
|
|
237
|
+
return final_results
|
|
238
|
+
|
|
239
|
+
except Exception as e:
|
|
240
|
+
logging.error(f"Error processing image: {str(e)}")
|
|
241
|
+
raise
|
|
242
|
+
|
|
243
|
+
def _load_image(self, image_path: Union[str, np.ndarray]) -> np.ndarray:
|
|
244
|
+
"""Load image from path or numpy array"""
|
|
245
|
+
if isinstance(image_path, str):
|
|
246
|
+
image = cv2.imread(image_path)
|
|
247
|
+
if image is None:
|
|
248
|
+
raise ValueError(f"Could not load image from path: {image_path}")
|
|
249
|
+
elif isinstance(image_path, np.ndarray):
|
|
250
|
+
image = image_path
|
|
251
|
+
else:
|
|
252
|
+
raise ValueError("Input must be either image path or numpy array")
|
|
253
|
+
|
|
254
|
+
return image
|
|
255
|
+
|
|
256
|
+
def _preprocess_image(self, image: np.ndarray) -> np.ndarray:
|
|
257
|
+
"""Apply preprocessing steps to the image"""
|
|
258
|
+
return preprocess_img(image, **self.config.preprocess)
|
|
259
|
+
|
|
260
|
+
def _perform_ocr(self, image: np.ndarray) -> List[OCRResult]:
|
|
261
|
+
"""Perform OCR using the selected engine"""
|
|
262
|
+
engine_methods = {
|
|
263
|
+
OCREngine.EASYOCR: self._easyocr_recognize,
|
|
264
|
+
OCREngine.PADDLEOCR: self._paddleocr_recognize,
|
|
265
|
+
OCREngine.PYTHON_TESSERACT: self._pytesseract_recognize,
|
|
266
|
+
OCREngine.DDDDOCR: self._ddddocr_recognize,
|
|
267
|
+
OCREngine.ZEROX: self._zerox_recognize
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
method = engine_methods.get(self.config.engine)
|
|
271
|
+
if not method:
|
|
272
|
+
raise ValueError(f"Unsupported OCR engine: {self.config.engine}")
|
|
273
|
+
|
|
274
|
+
return method(image)
|
|
275
|
+
|
|
276
|
+
def _postprocess_results(self, results: List[OCRResult]) -> List[OCRResult]:
|
|
277
|
+
"""Apply post-processing to OCR results"""
|
|
278
|
+
if not self.config.postprocess:
|
|
279
|
+
return results
|
|
280
|
+
|
|
281
|
+
for result in results:
|
|
282
|
+
if self.config.postprocess.get('spell_check', False):
|
|
283
|
+
result.text = str2words(result.text)
|
|
284
|
+
if self.config.postprocess.get('clean', False):
|
|
285
|
+
result.text = self._clean_text(result.text)
|
|
286
|
+
|
|
287
|
+
return results
|
|
288
|
+
|
|
289
|
+
def _visualize_results(self, image: np.ndarray, results: List[OCRResult]):
|
|
290
|
+
"""Visualize OCR results on the original image"""
|
|
291
|
+
vis_config = self.config.visualization
|
|
292
|
+
fig, ax = plt.subplots(figsize=(10, 10))
|
|
293
|
+
|
|
294
|
+
for result in results:
|
|
295
|
+
if result.confidence >= self.config.threshold and result.bbox:
|
|
296
|
+
top_left = tuple(map(int, result.bbox[0]))
|
|
297
|
+
bottom_right = tuple(map(int, result.bbox[2]))
|
|
298
|
+
|
|
299
|
+
# Draw bounding box
|
|
300
|
+
image = cv2.rectangle(
|
|
301
|
+
image,
|
|
302
|
+
top_left,
|
|
303
|
+
bottom_right,
|
|
304
|
+
vis_config['box_color'],
|
|
305
|
+
2
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
# Add text
|
|
309
|
+
image = add_text_pil(
|
|
310
|
+
image,
|
|
311
|
+
result.text,
|
|
312
|
+
top_left,
|
|
313
|
+
font_size=vis_config['font_size'] * 6,
|
|
314
|
+
color=vis_config['text_color'],
|
|
315
|
+
bg_color=(133, 203, 245, 100)
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
# Display the image
|
|
319
|
+
ax.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
|
320
|
+
ax.axis("off")
|
|
321
|
+
plt.show()
|
|
322
|
+
|
|
323
|
+
# Engine-specific recognition methods
|
|
324
|
+
def _easyocr_recognize(self, image: np.ndarray) -> List[OCRResult]:
|
|
325
|
+
"""Recognize text using EasyOCR"""
|
|
326
|
+
import easyocr
|
|
327
|
+
|
|
328
|
+
lang = lang_auto_detect(self.config.languages, "easyocr")
|
|
329
|
+
reader = easyocr.Reader(lang, gpu=self.config.use_gpu)
|
|
330
|
+
detections = reader.readtext(image, decoder=self.config.decoder)
|
|
331
|
+
|
|
332
|
+
return [
|
|
333
|
+
OCRResult(
|
|
334
|
+
text=text,
|
|
335
|
+
confidence=score,
|
|
336
|
+
bbox=bbox,
|
|
337
|
+
engine="easyocr"
|
|
338
|
+
) for bbox, text, score in detections
|
|
339
|
+
]
|
|
340
|
+
|
|
341
|
+
def _paddleocr_recognize(self, image: np.ndarray) -> List[OCRResult]:
|
|
342
|
+
"""Recognize text using PaddleOCR"""
|
|
343
|
+
from paddleocr import PaddleOCR
|
|
344
|
+
|
|
345
|
+
lang = lang_auto_detect(self.config.languages, "paddleocr")
|
|
346
|
+
ocr = PaddleOCR(
|
|
347
|
+
use_angle_cls=True,
|
|
348
|
+
lang=lang[0], # PaddleOCR supports one language at a time
|
|
349
|
+
)
|
|
350
|
+
result = ocr.ocr(image, cls=True)
|
|
351
|
+
|
|
352
|
+
ocr_results = []
|
|
353
|
+
if result and result[0]:
|
|
354
|
+
for line in result[0]:
|
|
355
|
+
if line:
|
|
356
|
+
bbox, (text, score) = line
|
|
357
|
+
ocr_results.append(
|
|
358
|
+
OCRResult(
|
|
359
|
+
text=text,
|
|
360
|
+
confidence=score,
|
|
361
|
+
bbox=bbox,
|
|
362
|
+
engine="paddleocr"
|
|
363
|
+
)
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
return ocr_results
|
|
367
|
+
|
|
368
|
+
def _pytesseract_recognize(self, image: np.ndarray) -> List[OCRResult]:
|
|
369
|
+
"""Recognize text using pytesseract"""
|
|
370
|
+
import pytesseract
|
|
371
|
+
|
|
372
|
+
lang = lang_auto_detect(self.config.languages, "pytesseract")
|
|
373
|
+
data = pytesseract.image_to_data(
|
|
374
|
+
image,
|
|
375
|
+
lang="+".join(lang),
|
|
376
|
+
output_type=pytesseract.Output.DICT
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
ocr_results = []
|
|
380
|
+
for i in range(len(data['text'])):
|
|
381
|
+
if int(data['conf'][i]) > 0: # Filter out empty results
|
|
382
|
+
ocr_results.append(
|
|
383
|
+
OCRResult(
|
|
384
|
+
text=data['text'][i],
|
|
385
|
+
confidence=float(data['conf'][i])/100,
|
|
386
|
+
bbox=(
|
|
387
|
+
(data['left'][i], data['top'][i]),
|
|
388
|
+
(data['left'][i] + data['width'][i], data['top'][i]),
|
|
389
|
+
(data['left'][i] + data['width'][i], data['top'][i] + data['height'][i]),
|
|
390
|
+
(data['left'][i], data['top'][i] + data['height'][i])
|
|
391
|
+
),
|
|
392
|
+
engine="pytesseract"
|
|
393
|
+
)
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
return ocr_results
|
|
397
|
+
|
|
398
|
+
def _ddddocr_recognize(self, image: np.ndarray) -> List[OCRResult]:
|
|
399
|
+
"""Recognize text using ddddocr"""
|
|
400
|
+
import ddddocr
|
|
401
|
+
|
|
402
|
+
ocr = ddddocr.DdddOcr(det=False, ocr=True)
|
|
403
|
+
image_bytes = convert_image_to_bytes(image)
|
|
404
|
+
text = ocr.classification(image_bytes)
|
|
405
|
+
|
|
406
|
+
return [
|
|
407
|
+
OCRResult(
|
|
408
|
+
text=text,
|
|
409
|
+
confidence=1.0, # ddddocr doesn't provide confidence scores
|
|
410
|
+
engine="ddddocr"
|
|
411
|
+
)
|
|
412
|
+
]
|
|
413
|
+
|
|
414
|
+
def _zerox_recognize(self, image: np.ndarray) -> List[OCRResult]:
|
|
415
|
+
"""Recognize text using pyzerox"""
|
|
416
|
+
from pyzerox import zerox
|
|
417
|
+
|
|
418
|
+
results = zerox(image)
|
|
419
|
+
return [
|
|
420
|
+
OCRResult(
|
|
421
|
+
text=text,
|
|
422
|
+
confidence=score,
|
|
423
|
+
bbox=bbox,
|
|
424
|
+
engine="zerox"
|
|
425
|
+
) for bbox, text, score in results
|
|
426
|
+
]
|
|
427
|
+
|
|
428
|
+
@staticmethod
|
|
429
|
+
def _clean_text(text: str) -> str:
|
|
430
|
+
"""Clean text by removing special characters and extra spaces"""
|
|
431
|
+
import re
|
|
432
|
+
text = re.sub(r'[^\w\s]', '', text)
|
|
433
|
+
text = ' '.join(text.split())
|
|
434
|
+
return text
|
|
156
435
|
|
|
157
436
|
def lang_auto_detect(
|
|
158
|
-
lang,
|
|
159
|
-
model="easyocr", # "easyocr" or "pytesseract"
|
|
160
|
-
):
|
|
437
|
+
lang: Union[str, List[str]],
|
|
438
|
+
model: str = "easyocr", # "easyocr" or "pytesseract"
|
|
439
|
+
) -> List[str]:
|
|
440
|
+
"""Automatically detect and validate language codes for the specified OCR model."""
|
|
161
441
|
models = ["easyocr", "paddleocr", "pytesseract"]
|
|
162
442
|
model = strcmp(model, models)[0]
|
|
163
443
|
res_lang = []
|
|
444
|
+
|
|
164
445
|
if isinstance(lang, str):
|
|
165
446
|
lang = [lang]
|
|
447
|
+
|
|
166
448
|
for i in lang:
|
|
167
449
|
res_lang.append(lang_valid[model][strcmp(i, list(lang_valid[model].keys()))[0]])
|
|
450
|
+
|
|
168
451
|
return res_lang
|
|
169
452
|
|
|
170
|
-
|
|
171
|
-
|
|
453
|
+
def determine_src_points(image: np.ndarray) -> np.ndarray:
|
|
454
|
+
"""Determine source points for perspective correction."""
|
|
172
455
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
173
456
|
_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
|
|
174
457
|
contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
@@ -204,8 +487,8 @@ def determine_src_points(image):
|
|
|
204
487
|
)
|
|
205
488
|
return src_points
|
|
206
489
|
|
|
207
|
-
|
|
208
|
-
|
|
490
|
+
def get_default_camera_matrix(image_shape: Tuple[int, int]) -> Tuple[np.ndarray, np.ndarray]:
|
|
491
|
+
"""Generate a default camera matrix for undistortion."""
|
|
209
492
|
height, width = image_shape[:2]
|
|
210
493
|
focal_length = width
|
|
211
494
|
center = (width / 2, height / 2)
|
|
@@ -216,8 +499,8 @@ def get_default_camera_matrix(image_shape):
|
|
|
216
499
|
dist_coeffs = np.zeros((4, 1)) # Assuming no distortion
|
|
217
500
|
return camera_matrix, dist_coeffs
|
|
218
501
|
|
|
219
|
-
|
|
220
|
-
|
|
502
|
+
def correct_perspective(image: np.ndarray, src_points: np.ndarray) -> np.ndarray:
|
|
503
|
+
"""Correct perspective distortion in an image."""
|
|
221
504
|
# Define the destination points for the perspective transform
|
|
222
505
|
width, height = 1000, 1000 # Adjust size as needed
|
|
223
506
|
dst_points = np.array(
|
|
@@ -231,8 +514,8 @@ def correct_perspective(image, src_points):
|
|
|
231
514
|
corrected_image = cv2.warpPerspective(image, M, (width, height))
|
|
232
515
|
return corrected_image
|
|
233
516
|
|
|
234
|
-
|
|
235
|
-
|
|
517
|
+
def detect_text_orientation(image: np.ndarray) -> float:
|
|
518
|
+
"""Detect the orientation angle of text in an image."""
|
|
236
519
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
237
520
|
edges = cv2.Canny(gray, 50, 150, apertureSize=3)
|
|
238
521
|
lines = cv2.HoughLines(edges, 1, np.pi / 180, 200)
|
|
@@ -250,8 +533,8 @@ def detect_text_orientation(image):
|
|
|
250
533
|
median_angle = np.median(angles)
|
|
251
534
|
return median_angle
|
|
252
535
|
|
|
253
|
-
|
|
254
|
-
|
|
536
|
+
def rotate_image(image: np.ndarray, angle: float) -> np.ndarray:
|
|
537
|
+
"""Rotate an image by a given angle."""
|
|
255
538
|
center = (image.shape[1] // 2, image.shape[0] // 2)
|
|
256
539
|
rot_mat = cv2.getRotationMatrix2D(center, angle, 1.0)
|
|
257
540
|
rotated_image = cv2.warpAffine(
|
|
@@ -259,8 +542,8 @@ def rotate_image(image, angle):
|
|
|
259
542
|
)
|
|
260
543
|
return rotated_image
|
|
261
544
|
|
|
262
|
-
|
|
263
|
-
|
|
545
|
+
def correct_skew(image: np.ndarray) -> np.ndarray:
|
|
546
|
+
"""Correct skew in an image using contour detection."""
|
|
264
547
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
265
548
|
coords = np.column_stack(np.where(gray > 0))
|
|
266
549
|
angle = cv2.minAreaRect(coords)[-1]
|
|
@@ -276,24 +559,27 @@ def correct_skew(image):
|
|
|
276
559
|
)
|
|
277
560
|
return rotated
|
|
278
561
|
|
|
279
|
-
|
|
280
|
-
|
|
562
|
+
def undistort_image(image: np.ndarray, camera_matrix: np.ndarray, dist_coeffs: np.ndarray) -> np.ndarray:
|
|
563
|
+
"""Undistort an image using camera calibration parameters."""
|
|
281
564
|
return cv2.undistort(image, camera_matrix, dist_coeffs)
|
|
282
565
|
|
|
283
|
-
|
|
284
566
|
def add_text_pil(
|
|
285
|
-
image,
|
|
286
|
-
text,
|
|
287
|
-
position,
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
)
|
|
567
|
+
image: np.ndarray,
|
|
568
|
+
text: str,
|
|
569
|
+
position: Tuple[int, int],
|
|
570
|
+
cvt_cmp: bool = True,
|
|
571
|
+
font_size: int = 12,
|
|
572
|
+
color: Tuple[int, int, int] = (0, 0, 0),
|
|
573
|
+
bg_color: Tuple[int, int, int, int] = (133, 203, 245, 100),
|
|
574
|
+
) -> np.ndarray:
|
|
575
|
+
"""Add text to an image using PIL for better Unicode support."""
|
|
576
|
+
from PIL import Image, ImageDraw, ImageFont
|
|
577
|
+
|
|
292
578
|
# Convert the image to PIL format
|
|
293
|
-
pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
|
294
|
-
|
|
295
|
-
draw = ImageDraw.Draw(
|
|
296
|
-
|
|
579
|
+
pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)).convert("RGBA")
|
|
580
|
+
overlay = Image.new("RGBA", pil_image.size, (255, 255, 255, 0))
|
|
581
|
+
draw = ImageDraw.Draw(overlay)
|
|
582
|
+
|
|
297
583
|
try:
|
|
298
584
|
font = ImageFont.truetype(
|
|
299
585
|
"/System/Library/Fonts/Supplemental/Songti.ttc", font_size
|
|
@@ -301,22 +587,14 @@ def add_text_pil(
|
|
|
301
587
|
except IOError:
|
|
302
588
|
font = ImageFont.load_default()
|
|
303
589
|
|
|
304
|
-
#
|
|
305
|
-
# Measure text size using textbbox
|
|
590
|
+
# Calculate text size using textbbox
|
|
306
591
|
text_bbox = draw.textbbox((0, 0), text, font=font)
|
|
307
|
-
# # 或者只画 text, # Calculate text size
|
|
308
|
-
# text_width, text_height = draw.textsize(text, font=font)
|
|
309
592
|
text_width = text_bbox[2] - text_bbox[0]
|
|
310
593
|
text_height = text_bbox[3] - text_bbox[1]
|
|
311
594
|
|
|
312
595
|
# Draw background rectangle
|
|
313
596
|
x, y = position
|
|
314
|
-
|
|
315
|
-
offset = int(
|
|
316
|
-
0.1 * text_height
|
|
317
|
-
) # 这就不再上移动了; # int(0.5 * text_height) # 上移动 50%
|
|
318
|
-
|
|
319
|
-
# Adjust position to match OpenCV's bottom-left alignment
|
|
597
|
+
offset = int(0.1 * text_height)
|
|
320
598
|
adjusted_position = (position[0], position[1] - text_height - offset)
|
|
321
599
|
|
|
322
600
|
background_rect = [
|
|
@@ -326,79 +604,90 @@ def add_text_pil(
|
|
|
326
604
|
y + text_height,
|
|
327
605
|
]
|
|
328
606
|
draw.rectangle(background_rect, fill=bg_color)
|
|
607
|
+
|
|
329
608
|
# Add text to the image
|
|
330
609
|
draw.text(adjusted_position, text, font=font, fill=color)
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
610
|
+
|
|
611
|
+
# Combine images
|
|
612
|
+
if pil_image.mode != "RGBA":
|
|
613
|
+
pil_image = pil_image.convert("RGBA")
|
|
614
|
+
if overlay.mode != "RGBA":
|
|
615
|
+
overlay = overlay.convert("RGBA")
|
|
616
|
+
combined = Image.alpha_composite(pil_image, overlay)
|
|
617
|
+
|
|
618
|
+
# Convert back to OpenCV format
|
|
619
|
+
return cv2.cvtColor(np.array(combined), cv2.COLOR_RGBA2BGR)
|
|
335
620
|
|
|
336
621
|
def preprocess_img(
|
|
337
|
-
image,
|
|
338
|
-
grayscale=True,
|
|
339
|
-
threshold=True,
|
|
340
|
-
threshold_method="adaptive",
|
|
341
|
-
rotate="auto",
|
|
342
|
-
skew=False,
|
|
343
|
-
blur=
|
|
344
|
-
blur_ksize=(5, 5),
|
|
345
|
-
morph=True,
|
|
346
|
-
morph_op="open",
|
|
347
|
-
morph_kernel_size=(3, 3),
|
|
348
|
-
enhance_contrast=True,
|
|
349
|
-
clahe_clip=2.0,
|
|
350
|
-
clahe_grid_size=(8, 8),
|
|
351
|
-
edge_detection=False,
|
|
352
|
-
):
|
|
622
|
+
image: Union[str, np.ndarray],
|
|
623
|
+
grayscale: bool = True,
|
|
624
|
+
threshold: bool = True,
|
|
625
|
+
threshold_method: str = "adaptive",
|
|
626
|
+
rotate: Union[str, float] = "auto",
|
|
627
|
+
skew: bool = False,
|
|
628
|
+
blur: bool = False,
|
|
629
|
+
blur_ksize: Tuple[int, int] = (5, 5),
|
|
630
|
+
morph: bool = True,
|
|
631
|
+
morph_op: str = "open",
|
|
632
|
+
morph_kernel_size: Tuple[int, int] = (3, 3),
|
|
633
|
+
enhance_contrast: bool = True,
|
|
634
|
+
clahe_clip: float = 2.0,
|
|
635
|
+
clahe_grid_size: Tuple[int, int] = (8, 8),
|
|
636
|
+
edge_detection: bool = False,
|
|
637
|
+
) -> np.ndarray:
|
|
353
638
|
"""
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
clahe_clip: CLAHE(对比度受限的自适应直方图均衡)的剪裁限制。
|
|
376
|
-
clahe_grid_size: CLAHE 的网格大小。
|
|
377
|
-
edge_detection: 是否进行边缘检测。
|
|
639
|
+
Preprocess an image for OCR to improve recognition accuracy.
|
|
640
|
+
|
|
641
|
+
Parameters:
|
|
642
|
+
image: Input image (path, numpy array, or PIL image)
|
|
643
|
+
grayscale: Convert to grayscale
|
|
644
|
+
threshold: Apply thresholding
|
|
645
|
+
threshold_method: 'global' or 'adaptive' thresholding
|
|
646
|
+
rotate: 'auto' to auto-detect angle, or float for manual rotation
|
|
647
|
+
skew: Correct skew
|
|
648
|
+
blur: Apply Gaussian blur
|
|
649
|
+
blur_ksize: Kernel size for blur
|
|
650
|
+
morph: Apply morphological operations
|
|
651
|
+
morph_op: Type of operation ('open', 'close', 'dilate', 'erode')
|
|
652
|
+
morph_kernel_size: Kernel size for morphological operations
|
|
653
|
+
enhance_contrast: Apply CLAHE contrast enhancement
|
|
654
|
+
clahe_clip: Clip limit for CLAHE
|
|
655
|
+
clahe_grid_size: Grid size for CLAHE
|
|
656
|
+
edge_detection: Apply Canny edge detection
|
|
657
|
+
|
|
658
|
+
Returns:
|
|
659
|
+
Preprocessed image as numpy array
|
|
378
660
|
"""
|
|
379
|
-
|
|
661
|
+
import PIL.PngImagePlugin
|
|
662
|
+
|
|
663
|
+
# Convert different input types to numpy array
|
|
664
|
+
if isinstance(image, (PIL.PngImagePlugin.PngImageFile, Image.Image)):
|
|
380
665
|
image = np.array(image)
|
|
381
666
|
if isinstance(image, str):
|
|
382
667
|
image = cv2.imread(image)
|
|
383
668
|
if not isinstance(image, np.ndarray):
|
|
384
669
|
image = np.array(image)
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
image
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
670
|
+
|
|
671
|
+
try:
|
|
672
|
+
if image.shape[1] == 4: # Check if it has an alpha channel
|
|
673
|
+
image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
|
|
674
|
+
else:
|
|
675
|
+
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
|
676
|
+
except:
|
|
677
|
+
pass
|
|
391
678
|
|
|
392
679
|
# Rotate image
|
|
393
680
|
if rotate == "auto":
|
|
394
681
|
angle = detect_angle(image, by="fft")
|
|
395
682
|
img_preprocessed = rotate_image(image, angle)
|
|
683
|
+
elif isinstance(rotate, (int, float)):
|
|
684
|
+
img_preprocessed = rotate_image(image, rotate)
|
|
396
685
|
else:
|
|
397
686
|
img_preprocessed = image
|
|
398
687
|
|
|
399
688
|
# Correct skew
|
|
400
689
|
if skew:
|
|
401
|
-
img_preprocessed = correct_skew(
|
|
690
|
+
img_preprocessed = correct_skew(img_preprocessed)
|
|
402
691
|
|
|
403
692
|
# Convert to grayscale
|
|
404
693
|
if grayscale:
|
|
@@ -407,7 +696,7 @@ def preprocess_img(
|
|
|
407
696
|
# Thresholding
|
|
408
697
|
if threshold:
|
|
409
698
|
if threshold_method == "adaptive":
|
|
410
|
-
|
|
699
|
+
img_preprocessed = cv2.adaptiveThreshold(
|
|
411
700
|
img_preprocessed,
|
|
412
701
|
255,
|
|
413
702
|
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
|
@@ -424,151 +713,172 @@ def preprocess_img(
|
|
|
424
713
|
if blur:
|
|
425
714
|
img_preprocessed = cv2.GaussianBlur(img_preprocessed, blur_ksize, 0)
|
|
426
715
|
|
|
427
|
-
#
|
|
716
|
+
# Morphological operations
|
|
428
717
|
if morph:
|
|
429
718
|
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, morph_kernel_size)
|
|
430
|
-
if morph_op == "close":
|
|
431
|
-
# 目的: 闭运算用于填补前景物体中的小孔或间隙,同时保留其形状和大小。
|
|
432
|
-
# 工作原理: 闭运算先进行膨胀,然后进行腐蚀。膨胀步骤填补小孔或间隙,腐蚀步骤恢复较大物体的形状。
|
|
433
|
-
# 效果:
|
|
434
|
-
# 填补前景物体中的小孔和间隙。
|
|
435
|
-
# 平滑较大物体的边缘。
|
|
436
|
-
# 示例用途: 填补物体中的小孔或间隙。
|
|
719
|
+
if morph_op == "close":
|
|
437
720
|
img_preprocessed = cv2.morphologyEx(
|
|
438
721
|
img_preprocessed, cv2.MORPH_CLOSE, kernel
|
|
439
722
|
)
|
|
440
|
-
elif morph_op == "open":
|
|
441
|
-
# 目的: 开运算用于去除背景中的小物体或噪声,同时保留较大物体的形状和大小。
|
|
442
|
-
# 工作原理: 开运算先进行腐蚀,然后进行膨胀。腐蚀步骤去除小规模的噪声,膨胀步骤恢复剩余物体的大小。
|
|
443
|
-
# 效果:
|
|
444
|
-
# 去除前景中的小物体。
|
|
445
|
-
# 平滑较大物体的轮廓。
|
|
446
|
-
# 示例用途: 去除小噪声或伪影,同时保持较大物体完整。
|
|
723
|
+
elif morph_op == "open":
|
|
447
724
|
img_preprocessed = cv2.morphologyEx(
|
|
448
725
|
img_preprocessed, cv2.MORPH_OPEN, kernel
|
|
449
726
|
)
|
|
450
|
-
elif morph_op == "dilate":
|
|
451
|
-
# 目的: 膨胀操作在物体边界上添加像素。它可以用来填补物体中的小孔或连接相邻的物体。
|
|
452
|
-
# 工作原理: 内核在图像上移动,每个位置上的像素值被设置为内核覆盖区域中的最大值。
|
|
453
|
-
# 效果:
|
|
454
|
-
# 物体变大。
|
|
455
|
-
# 填补物体中的小孔或间隙。
|
|
456
|
-
# 示例用途: 填补物体中的小孔或连接断裂的物体部分。
|
|
727
|
+
elif morph_op == "dilate":
|
|
457
728
|
img_preprocessed = cv2.dilate(img_preprocessed, kernel)
|
|
458
|
-
elif morph_op == "erode":
|
|
459
|
-
# 目的: 腐蚀操作用于去除物体边界上的像素。它可以用来去除小规模的噪声,并将靠近的物体分开。
|
|
460
|
-
# 工作原理: 内核(结构元素)在图像上移动,每个位置上的像素值被设置为内核覆盖区域中的最小值。
|
|
461
|
-
# 效果:
|
|
462
|
-
# 物体变小。
|
|
463
|
-
# 去除图像中的小白点(在白色前景/黑色背景的图像中)。
|
|
464
|
-
# 示例用途: 去除二值图像中的小噪声或分离相互接触的物体
|
|
729
|
+
elif morph_op == "erode":
|
|
465
730
|
img_preprocessed = cv2.erode(img_preprocessed, kernel)
|
|
466
731
|
|
|
467
|
-
#
|
|
732
|
+
# Contrast enhancement
|
|
468
733
|
if enhance_contrast:
|
|
469
734
|
clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=clahe_grid_size)
|
|
470
735
|
img_preprocessed = clahe.apply(img_preprocessed)
|
|
471
736
|
|
|
472
|
-
#
|
|
737
|
+
# Edge detection
|
|
473
738
|
if edge_detection:
|
|
474
739
|
img_preprocessed = cv2.Canny(img_preprocessed, 100, 200)
|
|
475
740
|
|
|
476
741
|
return img_preprocessed
|
|
477
742
|
|
|
743
|
+
def convert_image_to_bytes(image: Union[np.ndarray, Image.Image]) -> bytes:
|
|
744
|
+
"""Convert a CV2 or numpy image to bytes for OCR engines that require it."""
|
|
745
|
+
import io
|
|
746
|
+
from PIL import Image
|
|
747
|
+
|
|
748
|
+
# Convert OpenCV image (numpy array) to PIL image
|
|
749
|
+
if isinstance(image, np.ndarray):
|
|
750
|
+
image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
|
751
|
+
|
|
752
|
+
# Save PIL image to a byte stream
|
|
753
|
+
img_byte_arr = io.BytesIO()
|
|
754
|
+
image.save(img_byte_arr, format='PNG')
|
|
755
|
+
return img_byte_arr.getvalue()
|
|
478
756
|
|
|
479
757
|
def text_postprocess(
|
|
480
|
-
text,
|
|
481
|
-
spell_check=True,
|
|
482
|
-
clean=True,
|
|
483
|
-
filter=
|
|
484
|
-
pattern=None,
|
|
485
|
-
merge=True,
|
|
486
|
-
):
|
|
487
|
-
|
|
488
|
-
|
|
758
|
+
text: Union[str, List[str]],
|
|
759
|
+
spell_check: bool = True,
|
|
760
|
+
clean: bool = True,
|
|
761
|
+
filter: Dict = None,
|
|
762
|
+
pattern: str = None,
|
|
763
|
+
merge: bool = True,
|
|
764
|
+
) -> Union[str, List[str]]:
|
|
765
|
+
"""
|
|
766
|
+
Post-process OCR results to improve text quality.
|
|
767
|
+
|
|
768
|
+
Parameters:
|
|
769
|
+
text: Input text or list of texts
|
|
770
|
+
spell_check: Apply spell checking
|
|
771
|
+
clean: Remove special characters
|
|
772
|
+
filter: Dictionary with filtering options (e.g., min_length)
|
|
773
|
+
pattern: Regex pattern to match
|
|
774
|
+
merge: Merge fragments into single string
|
|
775
|
+
|
|
776
|
+
Returns:
|
|
777
|
+
Processed text or list of texts
|
|
778
|
+
"""
|
|
779
|
+
import re
|
|
780
|
+
from spellchecker import SpellChecker
|
|
781
|
+
|
|
782
|
+
if filter is None:
|
|
783
|
+
filter = {"min_length": 2}
|
|
784
|
+
|
|
785
|
+
if isinstance(text, str):
|
|
786
|
+
text = [text]
|
|
787
|
+
|
|
788
|
+
def correct_spelling(text_list: List[str]) -> List[str]:
|
|
489
789
|
spell = SpellChecker()
|
|
490
|
-
|
|
491
|
-
return corrected_text
|
|
790
|
+
return [spell.correction(word) if spell.correction(word) else word for word in text_list]
|
|
492
791
|
|
|
493
|
-
def clean_text(text_list):
|
|
494
|
-
|
|
495
|
-
return cleaned_text
|
|
792
|
+
def clean_text(text_list: List[str]) -> List[str]:
|
|
793
|
+
return [re.sub(r"[^\w\s]", "", t) for t in text_list]
|
|
496
794
|
|
|
497
|
-
def filter_text(text_list, min_length=2):
|
|
498
|
-
|
|
499
|
-
return filtered_text
|
|
795
|
+
def filter_text(text_list: List[str], min_length: int = 2) -> List[str]:
|
|
796
|
+
return [t for t in text_list if len(t) >= min_length]
|
|
500
797
|
|
|
501
|
-
def extract_patterns(text_list, pattern):
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
return matched_text
|
|
798
|
+
def extract_patterns(text_list: List[str], pattern: str) -> List[str]:
|
|
799
|
+
compiled_pattern = re.compile(pattern)
|
|
800
|
+
return [t for t in text_list if compiled_pattern.search(t)]
|
|
505
801
|
|
|
506
|
-
def merge_fragments(text_list):
|
|
507
|
-
|
|
508
|
-
return merged_text
|
|
802
|
+
def merge_fragments(text_list: List[str]) -> str:
|
|
803
|
+
return " ".join(text_list)
|
|
509
804
|
|
|
510
805
|
results = text
|
|
511
|
-
print(results)
|
|
512
806
|
if spell_check:
|
|
513
807
|
results = correct_spelling(results)
|
|
514
808
|
if clean:
|
|
515
809
|
results = clean_text(results)
|
|
516
810
|
if filter:
|
|
517
|
-
results = filter_text(
|
|
518
|
-
results, min_length=postprocess["filter"].get("min_length", 2)
|
|
519
|
-
)
|
|
811
|
+
results = filter_text(results, min_length=filter.get("min_length", 2))
|
|
520
812
|
if pattern:
|
|
521
|
-
results = extract_patterns(results,
|
|
522
|
-
if merge:
|
|
813
|
+
results = extract_patterns(results, pattern)
|
|
814
|
+
if merge and isinstance(results, list):
|
|
523
815
|
results = merge_fragments(results)
|
|
524
816
|
|
|
817
|
+
return results
|
|
818
|
+
|
|
819
|
+
def save_ocr_results(results: List[OCRResult], dir_save: str):
|
|
820
|
+
fname, output = os.path.splitext(dir_save)
|
|
821
|
+
if output == "txt":
|
|
822
|
+
with open(dir_save, "w", encoding="utf-8") as f:
|
|
823
|
+
for r in results:
|
|
824
|
+
f.write(r.text + "\n")
|
|
825
|
+
|
|
826
|
+
elif output == "csv":
|
|
827
|
+
import pandas as pd
|
|
828
|
+
df = pd.DataFrame([r.__dict__ for r in results])
|
|
829
|
+
df.to_csv(dir_save, index=False)
|
|
830
|
+
|
|
831
|
+
elif output == "xlsx":
|
|
832
|
+
import pandas as pd
|
|
833
|
+
df = pd.DataFrame([r.__dict__ for r in results])
|
|
834
|
+
df.to_excel(dir_save, index=False)
|
|
835
|
+
|
|
836
|
+
elif output == "json":
|
|
837
|
+
import json
|
|
838
|
+
with open(dir_save, "w", encoding="utf-8") as f:
|
|
839
|
+
json.dump([r.__dict__ for r in results], f, indent=4)
|
|
840
|
+
|
|
841
|
+
elif output == "docx":
|
|
842
|
+
from docx import Document
|
|
843
|
+
doc = Document()
|
|
844
|
+
for r in results:
|
|
845
|
+
doc.add_paragraph(r.text)
|
|
846
|
+
doc.save(dir_save)
|
|
525
847
|
|
|
526
|
-
# https://www.jaided.ai/easyocr/documentation/
|
|
527
|
-
# extract text from an image with EasyOCR
|
|
528
848
|
def get_text(
|
|
529
|
-
image,
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
849
|
+
image: Union[str, np.ndarray],
|
|
850
|
+
dir_save:str=None,
|
|
851
|
+
lang: Union[str, List[str]] = ["ch_sim", "en"],
|
|
852
|
+
model: str = "paddleocr",
|
|
853
|
+
thr: float = 0.1,
|
|
854
|
+
gpu: bool = True,
|
|
855
|
+
decoder: str = "wordbeamsearch",
|
|
856
|
+
output: str = "txt",
|
|
857
|
+
preprocess: Dict = None,
|
|
858
|
+
postprocess: Union[bool, Dict] = False,
|
|
859
|
+
show: bool = True,
|
|
860
|
+
ax = None,
|
|
861
|
+
cmap = cv2.COLOR_BGR2RGB,
|
|
862
|
+
font = cv2.FONT_HERSHEY_SIMPLEX,
|
|
863
|
+
fontsize: int = 8,
|
|
864
|
+
figsize: List[int] = [10, 10],
|
|
865
|
+
box_color: Tuple[int, int, int] = (0, 255, 0),
|
|
866
|
+
fontcolor: Tuple[int, int, int] = (116, 173, 233),
|
|
867
|
+
bg_color: Tuple[int, int, int, int] = (133, 203, 245, 100),
|
|
868
|
+
usage: bool = False,
|
|
548
869
|
**kwargs,
|
|
549
|
-
):
|
|
870
|
+
) -> Union[List[OCRResult], np.ndarray, Tuple[np.ndarray, List[OCRResult]]]:
|
|
550
871
|
"""
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
thr: 置信度阈值,低于此阈值的检测结果将被过滤。
|
|
556
|
-
gpu: 是否使用 GPU。
|
|
557
|
-
output: 输出类型,可以是 'all'(返回所有检测结果)、'text'(返回文本)、'score'(返回置信度分数)、'box'(返回边界框)。
|
|
558
|
-
preprocess: 预处理参数字典,传递给 preprocess_img 函数。
|
|
559
|
-
show: 是否显示结果图像。
|
|
560
|
-
ax: 用于显示图像的 Matplotlib 子图。
|
|
561
|
-
cmap: 用于显示图像的颜色映射。
|
|
562
|
-
color_box: 边界框的颜色。
|
|
563
|
-
color_text: 文本的颜色。
|
|
564
|
-
kwargs: 传递给 EasyOCR readtext 函数的其他参数。
|
|
565
|
-
|
|
566
|
-
# Uage
|
|
872
|
+
Extract text from an image using specified OCR engine.
|
|
873
|
+
|
|
874
|
+
This is a convenience wrapper around the OCRProcessor class for backward compatibility.
|
|
875
|
+
For new code, consider using the OCRProcessor class directly.
|
|
567
876
|
"""
|
|
877
|
+
# Backward compatibility wrapper
|
|
568
878
|
if usage:
|
|
569
|
-
print(
|
|
570
|
-
|
|
571
|
-
image_path = 'car_plate.jpg'
|
|
879
|
+
print("""
|
|
880
|
+
Example usage:
|
|
881
|
+
image_path = 'car_plate.jpg'
|
|
572
882
|
results = get_text(
|
|
573
883
|
image_path,
|
|
574
884
|
lang=["en"],
|
|
@@ -587,260 +897,120 @@ def get_text(
|
|
|
587
897
|
"clahe_clip": 2.0,
|
|
588
898
|
"clahe_grid_size": (8, 8),
|
|
589
899
|
"edge_detection": False
|
|
590
|
-
}
|
|
591
|
-
|
|
592
|
-
|
|
900
|
+
}
|
|
901
|
+
)""")
|
|
902
|
+
return
|
|
903
|
+
|
|
904
|
+
# Create config from parameters
|
|
905
|
+
engine_map = {
|
|
906
|
+
"easyocr": OCREngine.EASYOCR,
|
|
907
|
+
"paddleocr": OCREngine.PADDLEOCR,
|
|
908
|
+
"pytesseract": OCREngine.PYTHON_TESSERACT,
|
|
909
|
+
"ddddocr": OCREngine.DDDDOCR,
|
|
910
|
+
"zerox": OCREngine.ZEROX
|
|
911
|
+
}
|
|
912
|
+
|
|
913
|
+
config = OCRConfig(
|
|
914
|
+
languages=lang if isinstance(lang, list) else [lang],
|
|
915
|
+
engine=engine_map.get(model.lower(), OCREngine.PADDLEOCR),
|
|
916
|
+
threshold=thr,
|
|
917
|
+
decoder=decoder,
|
|
918
|
+
preprocess=preprocess if preprocess else {},
|
|
919
|
+
postprocess=postprocess if isinstance(postprocess, dict) else {"spell_check": postprocess},
|
|
920
|
+
visualization={
|
|
921
|
+
"show": show,
|
|
922
|
+
"box_color": box_color,
|
|
923
|
+
"text_color": fontcolor,
|
|
924
|
+
"font_size": fontsize
|
|
925
|
+
}
|
|
926
|
+
)
|
|
927
|
+
|
|
928
|
+
# Process image
|
|
929
|
+
processor = OCRProcessor(config)
|
|
930
|
+
results = processor.process_image(image)
|
|
931
|
+
|
|
932
|
+
# Format output based on requested type
|
|
933
|
+
if dir_save is None:
|
|
934
|
+
if output == "all":
|
|
935
|
+
return results
|
|
936
|
+
elif "text" in output.lower():
|
|
937
|
+
return [r.text for r in results]
|
|
938
|
+
elif "score" in output.lower() or "prob" in output.lower():
|
|
939
|
+
return [r.confidence for r in results]
|
|
940
|
+
elif "box" in output.lower():
|
|
941
|
+
return [r.bbox for r in results if r.bbox]
|
|
942
|
+
else:
|
|
943
|
+
save_ocr_results(results, dir_save)
|
|
944
|
+
if show:
|
|
945
|
+
print(f"OCR results saved to: {dir_save}")
|
|
946
|
+
return dir_save
|
|
947
|
+
|
|
948
|
+
def get_table(
|
|
949
|
+
image: Union[str, np.ndarray],
|
|
950
|
+
dir_save: str = "table_result.xlsx",
|
|
951
|
+
output: str = None, # 'excel' or 'df'
|
|
952
|
+
layout: bool = True,
|
|
953
|
+
show_log: bool = True,
|
|
954
|
+
use_gpu: bool = False,
|
|
955
|
+
):
|
|
593
956
|
"""
|
|
594
|
-
|
|
957
|
+
Recognize and extract tables using PaddleOCR's PPStructure.
|
|
958
|
+
|
|
959
|
+
Parameters:
|
|
960
|
+
image (str | np.ndarray): Path to image or numpy array
|
|
961
|
+
dir_save (str): Path to save Excel output (if output='excel')
|
|
962
|
+
output (str): 'excel' to save as .xlsx, 'df' or 'dataframe' to return pandas DataFrames
|
|
963
|
+
layout (bool): Whether to detect layout blocks
|
|
964
|
+
show_log (bool): Show PaddleOCR logs
|
|
965
|
+
use_gpu (bool): Whether to use GPU for inference
|
|
966
|
+
|
|
967
|
+
Returns:
|
|
968
|
+
List of dictionaries (if output='excel') or List of pandas DataFrames (if output='df')
|
|
969
|
+
"""
|
|
970
|
+
from paddleocr import PPStructure, save_structure_res
|
|
971
|
+
import cv2
|
|
972
|
+
|
|
595
973
|
|
|
596
|
-
models = ["easyocr", "paddleocr", "pytesseract"]
|
|
597
|
-
model = strcmp(model, models)[0]
|
|
598
|
-
lang = lang_auto_detect(lang, model)
|
|
599
974
|
if isinstance(image, str):
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
if
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
text_corr = []
|
|
624
|
-
[
|
|
625
|
-
text_corr.extend(text_postprocess(text, **postprocess))
|
|
626
|
-
for _, text, _ in detections
|
|
627
|
-
]
|
|
628
|
-
if show:
|
|
629
|
-
if ax is None:
|
|
630
|
-
ax = plt.gca()
|
|
631
|
-
for bbox, text, score in detections:
|
|
632
|
-
if score > thr:
|
|
633
|
-
top_left = tuple(map(int, bbox[0]))
|
|
634
|
-
bottom_right = tuple(map(int, bbox[2]))
|
|
635
|
-
image = cv2.rectangle(image, top_left, bottom_right, color_box, 2)
|
|
636
|
-
# image = cv2.putText(
|
|
637
|
-
# image, text, top_left, font, font_scale, color_text, thickness_text
|
|
638
|
-
# )
|
|
639
|
-
image = add_text_pil(
|
|
640
|
-
image,
|
|
641
|
-
text,
|
|
642
|
-
top_left,
|
|
643
|
-
font_size=font_scale * 32,
|
|
644
|
-
color=color_text,
|
|
645
|
-
)
|
|
646
|
-
img_cmp = cv2.cvtColor(image, cmap)
|
|
647
|
-
ax.imshow(img_cmp)
|
|
648
|
-
ax.axis("off")
|
|
649
|
-
# plt.show()
|
|
650
|
-
# 根据输出类型返回相应的结果
|
|
651
|
-
if output == "all":
|
|
652
|
-
return ax, detections
|
|
653
|
-
elif "t" in output.lower() and "x" in output.lower():
|
|
654
|
-
# 提取文本,过滤低置信度的结果
|
|
655
|
-
text = [text_ for _, text_, score_ in detections if score_ >= thr]
|
|
656
|
-
if postprocess:
|
|
657
|
-
return ax, text
|
|
658
|
-
else:
|
|
659
|
-
return text_corr
|
|
660
|
-
elif "score" in output.lower() or "prob" in output.lower():
|
|
661
|
-
# 提取分数
|
|
662
|
-
scores = [score_ for _, _, score_ in detections]
|
|
663
|
-
return ax, scores
|
|
664
|
-
elif "box" in output.lower():
|
|
665
|
-
# 提取边界框,过滤低置信度的结果
|
|
666
|
-
bboxes = [bbox_ for bbox_, _, score_ in detections if score_ >= thr]
|
|
667
|
-
return ax, bboxes
|
|
668
|
-
else:
|
|
669
|
-
# 默认返回所有检测信息
|
|
670
|
-
return ax, detections
|
|
671
|
-
else:
|
|
672
|
-
# 根据输出类型返回相应的结果
|
|
673
|
-
if output == "all":
|
|
674
|
-
return detections
|
|
675
|
-
elif "t" in output.lower() and "x" in output.lower():
|
|
676
|
-
# 提取文本,过滤低置信度的结果
|
|
677
|
-
text = [text_ for _, text_, score_ in detections if score_ >= thr]
|
|
678
|
-
return text
|
|
679
|
-
elif "score" in output.lower() or "prob" in output.lower():
|
|
680
|
-
# 提取分数
|
|
681
|
-
scores = [score_ for _, _, score_ in detections]
|
|
682
|
-
return scores
|
|
683
|
-
elif "box" in output.lower():
|
|
684
|
-
# 提取边界框,过滤低置信度的结果
|
|
685
|
-
bboxes = [bbox_ for bbox_, _, score_ in detections if score_ >= thr]
|
|
686
|
-
return bboxes
|
|
687
|
-
else:
|
|
688
|
-
# 默认返回所有检测信息
|
|
689
|
-
return detections
|
|
690
|
-
elif "pad" in model.lower():
|
|
691
|
-
ocr = PaddleOCR(
|
|
692
|
-
use_angle_cls=True,
|
|
693
|
-
cls=True,
|
|
694
|
-
) # PaddleOCR supports only one language at a time
|
|
695
|
-
result = ocr.ocr(image_process, **kwargs)
|
|
696
|
-
detections = []
|
|
697
|
-
for line in result[0]:
|
|
698
|
-
bbox, (text, score) = line
|
|
699
|
-
detections.append((bbox, text, score))
|
|
700
|
-
if postprocess is None:
|
|
701
|
-
postprocess = dict(
|
|
702
|
-
spell_check=True,
|
|
703
|
-
clean=True,
|
|
704
|
-
filter=dict(min_length=2),
|
|
705
|
-
pattern=None,
|
|
706
|
-
merge=True,
|
|
707
|
-
)
|
|
708
|
-
text_corr = []
|
|
709
|
-
[
|
|
710
|
-
text_corr.extend(text_postprocess(text, **postprocess))
|
|
711
|
-
for _, text, _ in detections
|
|
712
|
-
]
|
|
713
|
-
if show:
|
|
714
|
-
if ax is None:
|
|
715
|
-
ax = plt.gca()
|
|
716
|
-
for bbox, text, score in detections:
|
|
717
|
-
if score > thr:
|
|
718
|
-
top_left = tuple(map(int, bbox[0]))
|
|
719
|
-
bottom_left = tuple(
|
|
720
|
-
map(int, bbox[1])
|
|
721
|
-
) # Bottom-left for more accurate placement
|
|
722
|
-
bottom_right = tuple(map(int, bbox[2]))
|
|
723
|
-
image = cv2.rectangle(image, top_left, bottom_right, color_box, 2)
|
|
724
|
-
# image = cv2.putText(
|
|
725
|
-
# image, text, top_left, font, font_scale, color_text, thickness_text
|
|
726
|
-
# )
|
|
727
|
-
image = add_text_pil(
|
|
728
|
-
image,
|
|
729
|
-
text,
|
|
730
|
-
top_left,
|
|
731
|
-
font_size=font_scale * 32,
|
|
732
|
-
color=color_text,
|
|
733
|
-
bg_color=bg_color,
|
|
734
|
-
)
|
|
735
|
-
img_cmp = cv2.cvtColor(image, cmap)
|
|
736
|
-
ax.imshow(img_cmp)
|
|
737
|
-
ax.axis("off")
|
|
738
|
-
# plt.show()
|
|
739
|
-
# 根据输出类型返回相应的结果
|
|
740
|
-
if output == "all":
|
|
741
|
-
return ax, detections
|
|
742
|
-
elif "t" in output.lower() and "x" in output.lower():
|
|
743
|
-
# 提取文本,过滤低置信度的结果
|
|
744
|
-
text = [text_ for _, text_, score_ in detections if score_ >= thr]
|
|
745
|
-
if postprocess:
|
|
746
|
-
return ax, text
|
|
747
|
-
else:
|
|
748
|
-
return text_corr
|
|
749
|
-
elif "score" in output.lower() or "prob" in output.lower():
|
|
750
|
-
# 提取分数
|
|
751
|
-
scores = [score_ for _, _, score_ in detections]
|
|
752
|
-
return ax, scores
|
|
753
|
-
elif "box" in output.lower():
|
|
754
|
-
# 提取边界框,过滤低置信度的结果
|
|
755
|
-
bboxes = [bbox_ for bbox_, _, score_ in detections if score_ >= thr]
|
|
756
|
-
return ax, bboxes
|
|
757
|
-
else:
|
|
758
|
-
# 默认返回所有检测信息
|
|
759
|
-
return ax, detections
|
|
760
|
-
else:
|
|
761
|
-
# 根据输出类型返回相应的结果
|
|
762
|
-
if output == "all":
|
|
763
|
-
return detections
|
|
764
|
-
elif "t" in output.lower() and "x" in output.lower():
|
|
765
|
-
# 提取文本,过滤低置信度的结果
|
|
766
|
-
text = [text_ for _, text_, score_ in detections if score_ >= thr]
|
|
767
|
-
return text
|
|
768
|
-
elif "score" in output.lower() or "prob" in output.lower():
|
|
769
|
-
# 提取分数
|
|
770
|
-
scores = [score_ for _, _, score_ in detections]
|
|
771
|
-
return scores
|
|
772
|
-
elif "box" in output.lower():
|
|
773
|
-
# 提取边界框,过滤低置信度的结果
|
|
774
|
-
bboxes = [bbox_ for bbox_, _, score_ in detections if score_ >= thr]
|
|
775
|
-
return bboxes
|
|
776
|
-
else:
|
|
777
|
-
# 默认返回所有检测信息
|
|
778
|
-
return detections
|
|
779
|
-
|
|
780
|
-
else: # "pytesseract"
|
|
781
|
-
if ax is None:
|
|
782
|
-
ax = plt.gca()
|
|
783
|
-
text = pytesseract.image_to_string(image_process, lang="+".join(lang), **kwargs)
|
|
784
|
-
bboxes = pytesseract.image_to_boxes(image_process, **kwargs)
|
|
785
|
-
if show:
|
|
786
|
-
# Image dimensions
|
|
787
|
-
h, w, _ = image.shape
|
|
788
|
-
|
|
789
|
-
for line in bboxes.splitlines():
|
|
790
|
-
parts = line.split()
|
|
791
|
-
if len(parts) == 6:
|
|
792
|
-
char, left, bottom, right, top, _ = parts
|
|
793
|
-
left, bottom, right, top = map(int, [left, bottom, right, top])
|
|
794
|
-
|
|
795
|
-
# Convert Tesseract coordinates (bottom-left and top-right) to (top-left and bottom-right)
|
|
796
|
-
top_left = (left, h - top)
|
|
797
|
-
bottom_right = (right, h - bottom)
|
|
798
|
-
|
|
799
|
-
# Draw the bounding box
|
|
800
|
-
image = cv2.rectangle(image, top_left, bottom_right, color_box, 2)
|
|
801
|
-
image = add_text_pil(
|
|
802
|
-
image,
|
|
803
|
-
char,
|
|
804
|
-
left,
|
|
805
|
-
font_size=font_scale * 32,
|
|
806
|
-
color=color_text,
|
|
807
|
-
)
|
|
808
|
-
img_cmp = cv2.cvtColor(image, cmap)
|
|
809
|
-
ax.imshow(img_cmp)
|
|
810
|
-
ax.axis("off")
|
|
811
|
-
if output == "all":
|
|
812
|
-
# Get verbose data including boxes, confidences, line and page numbers
|
|
813
|
-
detections = pytesseract.image_to_data(image_process)
|
|
814
|
-
return ax, detections
|
|
815
|
-
elif "t" in output.lower() and "x" in output.lower():
|
|
816
|
-
return ax, text
|
|
817
|
-
elif "box" in output.lower():
|
|
818
|
-
return ax, bboxes
|
|
819
|
-
else:
|
|
820
|
-
# Get information about orientation and script detection
|
|
821
|
-
return pytesseract.image_to_osd(image_process, **kwargs)
|
|
822
|
-
else:
|
|
823
|
-
if output == "all":
|
|
824
|
-
# Get verbose data including boxes, confidences, line and page numbers
|
|
825
|
-
detections = pytesseract.image_to_data(image_process, **kwargs)
|
|
826
|
-
return detections
|
|
827
|
-
elif "t" in output.lower() and "x" in output.lower():
|
|
828
|
-
return text
|
|
829
|
-
elif "box" in output.lower():
|
|
830
|
-
return bboxes
|
|
831
|
-
else:
|
|
832
|
-
# Get information about orientation and script detection
|
|
833
|
-
return pytesseract.image_to_osd(image_process, **kwargs)
|
|
834
|
-
|
|
975
|
+
img = cv2.imread(image)
|
|
976
|
+
img_name = os.path.splitext(os.path.basename(image))[0]
|
|
977
|
+
else:
|
|
978
|
+
img = image
|
|
979
|
+
img_name = "table_result"
|
|
980
|
+
|
|
981
|
+
table_engine = PPStructure(layout=layout, show_log=show_log, use_gpu=use_gpu)
|
|
982
|
+
result = table_engine(img)
|
|
983
|
+
if output is None:
|
|
984
|
+
output="excel"
|
|
985
|
+
if output.lower() in ["df", "dataframe"]:
|
|
986
|
+
# Convert all table blocks into pandas DataFrames
|
|
987
|
+
dfs = []
|
|
988
|
+
for block in result:
|
|
989
|
+
if block["type"] == "table" and "res" in block:
|
|
990
|
+
table_data = block["res"]["html"]
|
|
991
|
+
try:
|
|
992
|
+
# Read HTML into DataFrame
|
|
993
|
+
df = pd.read_html(table_data)[0]
|
|
994
|
+
dfs.append(df)
|
|
995
|
+
except Exception as e:
|
|
996
|
+
print(f"[Warning] Could not parse table block: {e}")
|
|
997
|
+
return dfs
|
|
835
998
|
|
|
999
|
+
else:
|
|
1000
|
+
# Save to Excel file
|
|
1001
|
+
save_structure_res(result, os.path.dirname(dir_save), img_name)
|
|
1002
|
+
print(
|
|
1003
|
+
f"[Info] Table saved to: {os.path.join(os.path.dirname(dir_save), img_name + '.xlsx')}"
|
|
1004
|
+
)
|
|
1005
|
+
return result
|
|
836
1006
|
def draw_box(
|
|
837
1007
|
image,
|
|
838
1008
|
detections=None,
|
|
839
1009
|
thr=0.25,
|
|
840
1010
|
cmap=cv2.COLOR_BGR2RGB,
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
1011
|
+
box_color=(0, 255, 0), # draw_box
|
|
1012
|
+
fontcolor=(0, 0, 255), # draw_box
|
|
1013
|
+
fontsize=8,
|
|
844
1014
|
show=True,
|
|
845
1015
|
ax=None,
|
|
846
1016
|
**kwargs,
|
|
@@ -857,12 +1027,9 @@ def draw_box(
|
|
|
857
1027
|
if score > thr:
|
|
858
1028
|
top_left = tuple(map(int, bbox[0]))
|
|
859
1029
|
bottom_right = tuple(map(int, bbox[2]))
|
|
860
|
-
image = cv2.rectangle(image, top_left, bottom_right,
|
|
861
|
-
# image = cv2.putText(
|
|
862
|
-
# image, text, top_left, font, font_scale, color_text, thickness_text
|
|
863
|
-
# )
|
|
1030
|
+
image = cv2.rectangle(image, top_left, bottom_right, box_color, 2)
|
|
864
1031
|
image = add_text_pil(
|
|
865
|
-
image, text, top_left, font_size=
|
|
1032
|
+
image, text, top_left, cvt_cmp=cvt_cmp,font_size=fontsize *6, color=fontcolor
|
|
866
1033
|
)
|
|
867
1034
|
|
|
868
1035
|
img_cmp = cv2.cvtColor(image, cmap)
|
|
@@ -871,3 +1038,959 @@ def draw_box(
|
|
|
871
1038
|
ax.axis("off")
|
|
872
1039
|
# plt.show()
|
|
873
1040
|
return img_cmp
|
|
1041
|
+
|
|
1042
|
+
|
|
1043
|
+
#! ===========OCR Backup 250529===========
|
|
1044
|
+
|
|
1045
|
+
# import cv2
|
|
1046
|
+
# import numpy as np
|
|
1047
|
+
# import matplotlib.pyplot as plt
|
|
1048
|
+
# from py2ls.ips import (
|
|
1049
|
+
# strcmp,
|
|
1050
|
+
# detect_angle,
|
|
1051
|
+
# str2words,
|
|
1052
|
+
# isa
|
|
1053
|
+
# )
|
|
1054
|
+
# import logging
|
|
1055
|
+
|
|
1056
|
+
# """
|
|
1057
|
+
# Optical Character Recognition (OCR)
|
|
1058
|
+
# """
|
|
1059
|
+
|
|
1060
|
+
# # Valid language codes
|
|
1061
|
+
# lang_valid = {
|
|
1062
|
+
# "easyocr": {
|
|
1063
|
+
# "english": "en",
|
|
1064
|
+
# "thai": "th",
|
|
1065
|
+
# "chinese_traditional": "ch_tra",
|
|
1066
|
+
# "chinese": "ch_sim",
|
|
1067
|
+
# "japanese": "ja",
|
|
1068
|
+
# "korean": "ko",
|
|
1069
|
+
# "tamil": "ta",
|
|
1070
|
+
# "telugu": "te",
|
|
1071
|
+
# "kannada": "kn",
|
|
1072
|
+
# "german": "de",
|
|
1073
|
+
# },
|
|
1074
|
+
# "paddleocr": {
|
|
1075
|
+
# "chinese": "ch",
|
|
1076
|
+
# "chinese_traditional": "chinese_cht",
|
|
1077
|
+
# "english": "en",
|
|
1078
|
+
# "french": "fr",
|
|
1079
|
+
# "german": "de",
|
|
1080
|
+
# "korean": "korean",
|
|
1081
|
+
# "japanese": "japan",
|
|
1082
|
+
# "russian": "ru",
|
|
1083
|
+
# "italian": "it",
|
|
1084
|
+
# "portuguese": "pt",
|
|
1085
|
+
# "spanish": "es",
|
|
1086
|
+
# "polish": "pl",
|
|
1087
|
+
# "dutch": "nl",
|
|
1088
|
+
# "arabic": "ar",
|
|
1089
|
+
# "vietnamese": "vi",
|
|
1090
|
+
# "tamil": "ta",
|
|
1091
|
+
# "turkish": "tr",
|
|
1092
|
+
# },
|
|
1093
|
+
# "pytesseract": {
|
|
1094
|
+
# "afrikaans": "afr",
|
|
1095
|
+
# "amharic": "amh",
|
|
1096
|
+
# "arabic": "ara",
|
|
1097
|
+
# "assamese": "asm",
|
|
1098
|
+
# "azerbaijani": "aze",
|
|
1099
|
+
# "azerbaijani_cyrillic": "aze_cyrl",
|
|
1100
|
+
# "belarusian": "bel",
|
|
1101
|
+
# "bengali": "ben",
|
|
1102
|
+
# "tibetan": "bod",
|
|
1103
|
+
# "bosnian": "bos",
|
|
1104
|
+
# "breton": "bre",
|
|
1105
|
+
# "bulgarian": "bul",
|
|
1106
|
+
# "catalan": "cat",
|
|
1107
|
+
# "cebuano": "ceb",
|
|
1108
|
+
# "czech": "ces",
|
|
1109
|
+
# "chinese": "chi_sim",
|
|
1110
|
+
# "chinese_vertical": "chi_sim_vert",
|
|
1111
|
+
# "chinese_traditional": "chi_tra",
|
|
1112
|
+
# "chinese_traditional_vertical": "chi_tra_vert",
|
|
1113
|
+
# "cherokee": "chr",
|
|
1114
|
+
# "corsican": "cos",
|
|
1115
|
+
# "welsh": "cym",
|
|
1116
|
+
# "danish": "dan",
|
|
1117
|
+
# "danish_fraktur": "dan_frak",
|
|
1118
|
+
# "german": "deu",
|
|
1119
|
+
# "german_fraktur": "deu_frak",
|
|
1120
|
+
# "german_latf": "deu_latf",
|
|
1121
|
+
# "dhivehi": "div",
|
|
1122
|
+
# "dzongkha": "dzo",
|
|
1123
|
+
# "greek": "ell",
|
|
1124
|
+
# "english": "eng",
|
|
1125
|
+
# "middle_english": "enm",
|
|
1126
|
+
# "esperanto": "epo",
|
|
1127
|
+
# "math_equations": "equ",
|
|
1128
|
+
# "estonian": "est",
|
|
1129
|
+
# "basque": "eus",
|
|
1130
|
+
# "faroese": "fao",
|
|
1131
|
+
# "persian": "fas",
|
|
1132
|
+
# "filipino": "fil",
|
|
1133
|
+
# "finnish": "fin",
|
|
1134
|
+
# "french": "fra",
|
|
1135
|
+
# "middle_french": "frm",
|
|
1136
|
+
# "frisian": "fry",
|
|
1137
|
+
# "scottish_gaelic": "gla",
|
|
1138
|
+
# "irish": "gle",
|
|
1139
|
+
# "galician": "glg",
|
|
1140
|
+
# "ancient_greek": "grc",
|
|
1141
|
+
# "gujarati": "guj",
|
|
1142
|
+
# "haitian_creole": "hat",
|
|
1143
|
+
# "hebrew": "heb",
|
|
1144
|
+
# "hindi": "hin",
|
|
1145
|
+
# "croatian": "hrv",
|
|
1146
|
+
# "hungarian": "hun",
|
|
1147
|
+
# "armenian": "hye",
|
|
1148
|
+
# "inuktitut": "iku",
|
|
1149
|
+
# "indonesian": "ind",
|
|
1150
|
+
# "icelandic": "isl",
|
|
1151
|
+
# "italian": "ita",
|
|
1152
|
+
# "old_italian": "ita_old",
|
|
1153
|
+
# "javanese": "jav",
|
|
1154
|
+
# "japanese": "jpn",
|
|
1155
|
+
# "japanese_vertical": "jpn_vert",
|
|
1156
|
+
# "kannada": "kan",
|
|
1157
|
+
# "georgian": "kat",
|
|
1158
|
+
# "old_georgian": "kat_old",
|
|
1159
|
+
# "kazakh": "kaz",
|
|
1160
|
+
# "khmer": "khm",
|
|
1161
|
+
# "kyrgyz": "kir",
|
|
1162
|
+
# "kurdish_kurmanji": "kmr",
|
|
1163
|
+
# "korean": "kor",
|
|
1164
|
+
# "korean_vertical": "kor_vert",
|
|
1165
|
+
# "lao": "lao",
|
|
1166
|
+
# "latin": "lat",
|
|
1167
|
+
# "latvian": "lav",
|
|
1168
|
+
# "lithuanian": "lit",
|
|
1169
|
+
# "luxembourgish": "ltz",
|
|
1170
|
+
# "malayalam": "mal",
|
|
1171
|
+
# "marathi": "mar",
|
|
1172
|
+
# "macedonian": "mkd",
|
|
1173
|
+
# "maltese": "mlt",
|
|
1174
|
+
# "mongolian": "mon",
|
|
1175
|
+
# "maori": "mri",
|
|
1176
|
+
# "malay": "msa",
|
|
1177
|
+
# "burmese": "mya",
|
|
1178
|
+
# "nepali": "nep",
|
|
1179
|
+
# "dutch": "nld",
|
|
1180
|
+
# "norwegian": "nor",
|
|
1181
|
+
# "occitan": "oci",
|
|
1182
|
+
# "oriya": "ori",
|
|
1183
|
+
# "script_detection": "osd",
|
|
1184
|
+
# "punjabi": "pan",
|
|
1185
|
+
# "polish": "pol",
|
|
1186
|
+
# "portuguese": "por",
|
|
1187
|
+
# },
|
|
1188
|
+
# }
|
|
1189
|
+
|
|
1190
|
+
|
|
1191
|
+
# def lang_auto_detect(
|
|
1192
|
+
# lang,
|
|
1193
|
+
# model="easyocr", # "easyocr" or "pytesseract"
|
|
1194
|
+
# ):
|
|
1195
|
+
# models = ["easyocr", "paddleocr", "pytesseract"]
|
|
1196
|
+
# model = strcmp(model, models)[0]
|
|
1197
|
+
# res_lang = []
|
|
1198
|
+
# if isinstance(lang, str):
|
|
1199
|
+
# lang = [lang]
|
|
1200
|
+
# for i in lang:
|
|
1201
|
+
# res_lang.append(lang_valid[model][strcmp(i, list(lang_valid[model].keys()))[0]])
|
|
1202
|
+
# return res_lang
|
|
1203
|
+
|
|
1204
|
+
|
|
1205
|
+
# def determine_src_points(image):
|
|
1206
|
+
# gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
1207
|
+
# _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
|
|
1208
|
+
# contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
1209
|
+
|
|
1210
|
+
# # Sort contours by area and pick the largest one
|
|
1211
|
+
# contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5]
|
|
1212
|
+
# src_points = None
|
|
1213
|
+
|
|
1214
|
+
# for contour in contours:
|
|
1215
|
+
# epsilon = 0.02 * cv2.arcLength(contour, True)
|
|
1216
|
+
# approx = cv2.approxPolyDP(contour, epsilon, True)
|
|
1217
|
+
# if len(approx) == 4: # We need a quadrilateral
|
|
1218
|
+
# src_points = np.array(approx, dtype="float32")
|
|
1219
|
+
# break
|
|
1220
|
+
|
|
1221
|
+
# if src_points is not None:
|
|
1222
|
+
# # Order points in a specific order (top-left, top-right, bottom-right, bottom-left)
|
|
1223
|
+
# src_points = src_points.reshape(4, 2)
|
|
1224
|
+
# rect = np.zeros((4, 2), dtype="float32")
|
|
1225
|
+
# s = src_points.sum(axis=1)
|
|
1226
|
+
# diff = np.diff(src_points, axis=1)
|
|
1227
|
+
# rect[0] = src_points[np.argmin(s)]
|
|
1228
|
+
# rect[2] = src_points[np.argmax(s)]
|
|
1229
|
+
# rect[1] = src_points[np.argmin(diff)]
|
|
1230
|
+
# rect[3] = src_points[np.argmax(diff)]
|
|
1231
|
+
# src_points = rect
|
|
1232
|
+
# else:
|
|
1233
|
+
# # If no rectangle is detected, fallback to a default or user-defined points
|
|
1234
|
+
# height, width = image.shape[:2]
|
|
1235
|
+
# src_points = np.array(
|
|
1236
|
+
# [[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]],
|
|
1237
|
+
# dtype="float32",
|
|
1238
|
+
# )
|
|
1239
|
+
# return src_points
|
|
1240
|
+
|
|
1241
|
+
|
|
1242
|
+
# def get_default_camera_matrix(image_shape):
|
|
1243
|
+
# height, width = image_shape[:2]
|
|
1244
|
+
# focal_length = width
|
|
1245
|
+
# center = (width / 2, height / 2)
|
|
1246
|
+
# camera_matrix = np.array(
|
|
1247
|
+
# [[focal_length, 0, center[0]], [0, focal_length, center[1]], [0, 0, 1]],
|
|
1248
|
+
# dtype="float32",
|
|
1249
|
+
# )
|
|
1250
|
+
# dist_coeffs = np.zeros((4, 1)) # Assuming no distortion
|
|
1251
|
+
# return camera_matrix, dist_coeffs
|
|
1252
|
+
|
|
1253
|
+
|
|
1254
|
+
# def correct_perspective(image, src_points):
|
|
1255
|
+
# # Define the destination points for the perspective transform
|
|
1256
|
+
# width, height = 1000, 1000 # Adjust size as needed
|
|
1257
|
+
# dst_points = np.array(
|
|
1258
|
+
# [[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]],
|
|
1259
|
+
# dtype="float32",
|
|
1260
|
+
# )
|
|
1261
|
+
|
|
1262
|
+
# # Calculate the perspective transform matrix
|
|
1263
|
+
# M = cv2.getPerspectiveTransform(src_points, dst_points)
|
|
1264
|
+
# # Apply the perspective transform
|
|
1265
|
+
# corrected_image = cv2.warpPerspective(image, M, (width, height))
|
|
1266
|
+
# return corrected_image
|
|
1267
|
+
|
|
1268
|
+
|
|
1269
|
+
# def detect_text_orientation(image):
|
|
1270
|
+
# gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
1271
|
+
# edges = cv2.Canny(gray, 50, 150, apertureSize=3)
|
|
1272
|
+
# lines = cv2.HoughLines(edges, 1, np.pi / 180, 200)
|
|
1273
|
+
|
|
1274
|
+
# if lines is None:
|
|
1275
|
+
# return 0
|
|
1276
|
+
|
|
1277
|
+
# angles = []
|
|
1278
|
+
# for rho, theta in lines[:, 0]:
|
|
1279
|
+
# angle = theta * 180 / np.pi
|
|
1280
|
+
# if angle > 90:
|
|
1281
|
+
# angle -= 180
|
|
1282
|
+
# angles.append(angle)
|
|
1283
|
+
|
|
1284
|
+
# median_angle = np.median(angles)
|
|
1285
|
+
# return median_angle
|
|
1286
|
+
|
|
1287
|
+
|
|
1288
|
+
# def rotate_image(image, angle):
|
|
1289
|
+
# center = (image.shape[1] // 2, image.shape[0] // 2)
|
|
1290
|
+
# rot_mat = cv2.getRotationMatrix2D(center, angle, 1.0)
|
|
1291
|
+
# rotated_image = cv2.warpAffine(
|
|
1292
|
+
# image, rot_mat, (image.shape[1], image.shape[0]), flags=cv2.INTER_LINEAR
|
|
1293
|
+
# )
|
|
1294
|
+
# return rotated_image
|
|
1295
|
+
|
|
1296
|
+
|
|
1297
|
+
# def correct_skew(image):
|
|
1298
|
+
# gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
1299
|
+
# coords = np.column_stack(np.where(gray > 0))
|
|
1300
|
+
# angle = cv2.minAreaRect(coords)[-1]
|
|
1301
|
+
# if angle < -45:
|
|
1302
|
+
# angle = -(90 + angle)
|
|
1303
|
+
# else:
|
|
1304
|
+
# angle = -angle
|
|
1305
|
+
# (h, w) = image.shape[:2]
|
|
1306
|
+
# center = (w // 2, h // 2)
|
|
1307
|
+
# M = cv2.getRotationMatrix2D(center, angle, 1.0)
|
|
1308
|
+
# rotated = cv2.warpAffine(
|
|
1309
|
+
# image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE
|
|
1310
|
+
# )
|
|
1311
|
+
# return rotated
|
|
1312
|
+
|
|
1313
|
+
|
|
1314
|
+
# def undistort_image(image, camera_matrix, dist_coeffs):
|
|
1315
|
+
# return cv2.undistort(image, camera_matrix, dist_coeffs)
|
|
1316
|
+
|
|
1317
|
+
|
|
1318
|
+
# def add_text_pil(
|
|
1319
|
+
# image,
|
|
1320
|
+
# text,
|
|
1321
|
+
# position,
|
|
1322
|
+
# cvt_cmp=True,
|
|
1323
|
+
# font_size=12,
|
|
1324
|
+
# color=(0, 0, 0),
|
|
1325
|
+
# bg_color=(133, 203, 245, 100),
|
|
1326
|
+
# ):
|
|
1327
|
+
# from PIL import Image, ImageDraw, ImageFont
|
|
1328
|
+
# # Convert the image to PIL format
|
|
1329
|
+
# pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)).convert("RGBA")
|
|
1330
|
+
# # Define the font (make sure to use a font that supports Chinese characters)
|
|
1331
|
+
# overlay = Image.new("RGBA", pil_image.size, (255, 255, 255, 0))
|
|
1332
|
+
# # Create a drawing context
|
|
1333
|
+
# draw = ImageDraw.Draw(overlay)
|
|
1334
|
+
|
|
1335
|
+
# try:
|
|
1336
|
+
# font = ImageFont.truetype(
|
|
1337
|
+
# "/System/Library/Fonts/Supplemental/Songti.ttc", font_size
|
|
1338
|
+
# )
|
|
1339
|
+
# except IOError:
|
|
1340
|
+
# font = ImageFont.load_default()
|
|
1341
|
+
|
|
1342
|
+
# # cal top_left position
|
|
1343
|
+
# # Measure text size using textbbox
|
|
1344
|
+
# text_bbox = draw.textbbox((0, 0), text, font=font)
|
|
1345
|
+
# # # 或者只画 text, # Calculate text size
|
|
1346
|
+
# # text_width, text_height = draw.textsize(text, font=font)
|
|
1347
|
+
# text_width = text_bbox[2] - text_bbox[0]
|
|
1348
|
+
# text_height = text_bbox[3] - text_bbox[1]
|
|
1349
|
+
|
|
1350
|
+
# # Draw background rectangle
|
|
1351
|
+
# x, y = position
|
|
1352
|
+
# # Calculate 5% of the text height for upward adjustment
|
|
1353
|
+
# offset = int(
|
|
1354
|
+
# 0.1 * text_height
|
|
1355
|
+
# ) # 这就不再上移动了; # int(0.5 * text_height) # 上移动 50%
|
|
1356
|
+
|
|
1357
|
+
# # Adjust position to match OpenCV's bottom-left alignment
|
|
1358
|
+
# adjusted_position = (position[0], position[1] - text_height - offset)
|
|
1359
|
+
|
|
1360
|
+
# background_rect = [
|
|
1361
|
+
# adjusted_position[0],
|
|
1362
|
+
# adjusted_position[1],
|
|
1363
|
+
# x + text_width,
|
|
1364
|
+
# y + text_height,
|
|
1365
|
+
# ]
|
|
1366
|
+
# draw.rectangle(background_rect, fill=bg_color)
|
|
1367
|
+
# # Add text to the image
|
|
1368
|
+
# draw.text(adjusted_position, text, font=font, fill=color)
|
|
1369
|
+
# # Ensure both images are in RGBA mode for alpha compositing
|
|
1370
|
+
# if pil_image.mode != "RGBA":
|
|
1371
|
+
# pil_image = pil_image.convert("RGBA")
|
|
1372
|
+
# if overlay.mode != "RGBA":
|
|
1373
|
+
# overlay = overlay.convert("RGBA")
|
|
1374
|
+
# combined = Image.alpha_composite(pil_image, overlay)
|
|
1375
|
+
# # Convert the image back to OpenCV format
|
|
1376
|
+
# image = cv2.cvtColor(np.array(combined), cv2.COLOR_RGBA2BGR) #if cvt_cmp else np.array(combined)
|
|
1377
|
+
# return image
|
|
1378
|
+
|
|
1379
|
+
|
|
1380
|
+
# def preprocess_img(
|
|
1381
|
+
# image,
|
|
1382
|
+
# grayscale=True,
|
|
1383
|
+
# threshold=True,
|
|
1384
|
+
# threshold_method="adaptive",
|
|
1385
|
+
# rotate="auto",
|
|
1386
|
+
# skew=False,
|
|
1387
|
+
# blur=False,#True,
|
|
1388
|
+
# blur_ksize=(5, 5),
|
|
1389
|
+
# morph=True,
|
|
1390
|
+
# morph_op="open",
|
|
1391
|
+
# morph_kernel_size=(3, 3),
|
|
1392
|
+
# enhance_contrast=True,
|
|
1393
|
+
# clahe_clip=2.0,
|
|
1394
|
+
# clahe_grid_size=(8, 8),
|
|
1395
|
+
# edge_detection=False,
|
|
1396
|
+
# ):
|
|
1397
|
+
# """
|
|
1398
|
+
# 预处理步骤:
|
|
1399
|
+
|
|
1400
|
+
# 转换为灰度图像: 如果 grayscale 为 True,将图像转换为灰度图像。
|
|
1401
|
+
# 二值化处理: 根据 threshold 和 threshold_method 参数,对图像进行二值化处理。
|
|
1402
|
+
# 降噪处理: 使用高斯模糊对图像进行降噪。
|
|
1403
|
+
# 形态学处理: 根据 morph_op 参数选择不同的形态学操作(开运算、闭运算、膨胀、腐蚀),用于去除噪声或填补孔洞。
|
|
1404
|
+
# 对比度增强: 使用 CLAHE 技术增强图像对比度。
|
|
1405
|
+
# 边缘检测: 如果 edge_detection 为 True,使用 Canny 边缘检测算法。
|
|
1406
|
+
|
|
1407
|
+
# 预处理图像以提高 OCR 识别准确性。
|
|
1408
|
+
# 参数:
|
|
1409
|
+
# image: 输入的图像路径或图像数据。
|
|
1410
|
+
# grayscale: 是否将图像转换为灰度图像。
|
|
1411
|
+
# threshold: 是否对图像进行二值化处理。
|
|
1412
|
+
# threshold_method: 二值化方法,可以是 'global' 或 'adaptive'。
|
|
1413
|
+
# denoise: 是否对图像进行降噪处理。
|
|
1414
|
+
# blur_ksize: 高斯模糊的核大小。
|
|
1415
|
+
# morph: 是否进行形态学处理。
|
|
1416
|
+
# morph_op: 形态学操作的类型,包括 'open'(开运算)、'close'(闭运算)、'dilate'(膨胀)、'erode'(腐蚀)。
|
|
1417
|
+
# morph_kernel_size: 形态学操作的内核大小。
|
|
1418
|
+
# enhance_contrast: 是否增强图像对比度。
|
|
1419
|
+
# clahe_clip: CLAHE(对比度受限的自适应直方图均衡)的剪裁限制。
|
|
1420
|
+
# clahe_grid_size: CLAHE 的网格大小。
|
|
1421
|
+
# edge_detection: 是否进行边缘检测。
|
|
1422
|
+
# """
|
|
1423
|
+
# import PIL.PngImagePlugin
|
|
1424
|
+
# if isinstance(image, PIL.PngImagePlugin.PngImageFile):
|
|
1425
|
+
# image = np.array(image)
|
|
1426
|
+
# if isinstance(image, str):
|
|
1427
|
+
# image = cv2.imread(image)
|
|
1428
|
+
# if not isinstance(image, np.ndarray):
|
|
1429
|
+
# image = np.array(image)
|
|
1430
|
+
|
|
1431
|
+
# try:
|
|
1432
|
+
# if image.shape[1] == 4: # Check if it has an alpha channel
|
|
1433
|
+
# # Drop the alpha channel (if needed), or handle it as required
|
|
1434
|
+
# image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
|
|
1435
|
+
# else:
|
|
1436
|
+
# # Convert RGB to BGR for OpenCV compatibility
|
|
1437
|
+
# image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
|
1438
|
+
# except:
|
|
1439
|
+
# pass
|
|
1440
|
+
|
|
1441
|
+
# # Rotate image
|
|
1442
|
+
# if rotate == "auto":
|
|
1443
|
+
# angle = detect_angle(image, by="fft")
|
|
1444
|
+
# img_preprocessed = rotate_image(image, angle)
|
|
1445
|
+
# else:
|
|
1446
|
+
# img_preprocessed = image
|
|
1447
|
+
|
|
1448
|
+
# # Correct skew
|
|
1449
|
+
# if skew:
|
|
1450
|
+
# img_preprocessed = correct_skew(image)
|
|
1451
|
+
|
|
1452
|
+
# # Convert to grayscale
|
|
1453
|
+
# if grayscale:
|
|
1454
|
+
# img_preprocessed = cv2.cvtColor(img_preprocessed, cv2.COLOR_BGR2GRAY)
|
|
1455
|
+
|
|
1456
|
+
# # Thresholding
|
|
1457
|
+
# if threshold:
|
|
1458
|
+
# if threshold_method == "adaptive":
|
|
1459
|
+
# image = cv2.adaptiveThreshold(
|
|
1460
|
+
# img_preprocessed,
|
|
1461
|
+
# 255,
|
|
1462
|
+
# cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
|
1463
|
+
# cv2.THRESH_BINARY,
|
|
1464
|
+
# 11,
|
|
1465
|
+
# 2,
|
|
1466
|
+
# )
|
|
1467
|
+
# elif threshold_method == "global":
|
|
1468
|
+
# _, img_preprocessed = cv2.threshold(
|
|
1469
|
+
# img_preprocessed, 127, 255, cv2.THRESH_BINARY
|
|
1470
|
+
# )
|
|
1471
|
+
|
|
1472
|
+
# # Denoise by Gaussian Blur
|
|
1473
|
+
# if blur:
|
|
1474
|
+
# img_preprocessed = cv2.GaussianBlur(img_preprocessed, blur_ksize, 0)
|
|
1475
|
+
|
|
1476
|
+
# # 形态学处理
|
|
1477
|
+
# if morph:
|
|
1478
|
+
# kernel = cv2.getStructuringElement(cv2.MORPH_RECT, morph_kernel_size)
|
|
1479
|
+
# if morph_op == "close": # 闭运算
|
|
1480
|
+
# # 目的: 闭运算用于填补前景物体中的小孔或间隙,同时保留其形状和大小。
|
|
1481
|
+
# # 工作原理: 闭运算先进行膨胀,然后进行腐蚀。膨胀步骤填补小孔或间隙,腐蚀步骤恢复较大物体的形状。
|
|
1482
|
+
# # 效果:
|
|
1483
|
+
# # 填补前景物体中的小孔和间隙。
|
|
1484
|
+
# # 平滑较大物体的边缘。
|
|
1485
|
+
# # 示例用途: 填补物体中的小孔或间隙。
|
|
1486
|
+
# img_preprocessed = cv2.morphologyEx(
|
|
1487
|
+
# img_preprocessed, cv2.MORPH_CLOSE, kernel
|
|
1488
|
+
# )
|
|
1489
|
+
# elif morph_op == "open": # 开运算
|
|
1490
|
+
# # 目的: 开运算用于去除背景中的小物体或噪声,同时保留较大物体的形状和大小。
|
|
1491
|
+
# # 工作原理: 开运算先进行腐蚀,然后进行膨胀。腐蚀步骤去除小规模的噪声,膨胀步骤恢复剩余物体的大小。
|
|
1492
|
+
# # 效果:
|
|
1493
|
+
# # 去除前景中的小物体。
|
|
1494
|
+
# # 平滑较大物体的轮廓。
|
|
1495
|
+
# # 示例用途: 去除小噪声或伪影,同时保持较大物体完整。
|
|
1496
|
+
# img_preprocessed = cv2.morphologyEx(
|
|
1497
|
+
# img_preprocessed, cv2.MORPH_OPEN, kernel
|
|
1498
|
+
# )
|
|
1499
|
+
# elif morph_op == "dilate": # 膨胀
|
|
1500
|
+
# # 目的: 膨胀操作在物体边界上添加像素。它可以用来填补物体中的小孔或连接相邻的物体。
|
|
1501
|
+
# # 工作原理: 内核在图像上移动,每个位置上的像素值被设置为内核覆盖区域中的最大值。
|
|
1502
|
+
# # 效果:
|
|
1503
|
+
# # 物体变大。
|
|
1504
|
+
# # 填补物体中的小孔或间隙。
|
|
1505
|
+
# # 示例用途: 填补物体中的小孔或连接断裂的物体部分。
|
|
1506
|
+
# img_preprocessed = cv2.dilate(img_preprocessed, kernel)
|
|
1507
|
+
# elif morph_op == "erode": # 腐蚀
|
|
1508
|
+
# # 目的: 腐蚀操作用于去除物体边界上的像素。它可以用来去除小规模的噪声,并将靠近的物体分开。
|
|
1509
|
+
# # 工作原理: 内核(结构元素)在图像上移动,每个位置上的像素值被设置为内核覆盖区域中的最小值。
|
|
1510
|
+
# # 效果:
|
|
1511
|
+
# # 物体变小。
|
|
1512
|
+
# # 去除图像中的小白点(在白色前景/黑色背景的图像中)。
|
|
1513
|
+
# # 示例用途: 去除二值图像中的小噪声或分离相互接触的物体
|
|
1514
|
+
# img_preprocessed = cv2.erode(img_preprocessed, kernel)
|
|
1515
|
+
|
|
1516
|
+
# # 对比度增强
|
|
1517
|
+
# if enhance_contrast:
|
|
1518
|
+
# clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=clahe_grid_size)
|
|
1519
|
+
# img_preprocessed = clahe.apply(img_preprocessed)
|
|
1520
|
+
|
|
1521
|
+
# # 边缘检测
|
|
1522
|
+
# if edge_detection:
|
|
1523
|
+
# img_preprocessed = cv2.Canny(img_preprocessed, 100, 200)
|
|
1524
|
+
|
|
1525
|
+
# return img_preprocessed
|
|
1526
|
+
|
|
1527
|
+
# def convert_image_to_bytes(image):
|
|
1528
|
+
# """
|
|
1529
|
+
# Convert a CV2 or numpy image to bytes for ddddocr.
|
|
1530
|
+
# """
|
|
1531
|
+
# import io
|
|
1532
|
+
# # Convert OpenCV image (numpy array) to PIL image
|
|
1533
|
+
# if isinstance(image, np.ndarray):
|
|
1534
|
+
# image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
|
1535
|
+
# # Save PIL image to a byte stream
|
|
1536
|
+
# img_byte_arr = io.BytesIO()
|
|
1537
|
+
# image.save(img_byte_arr, format='PNG')
|
|
1538
|
+
# return img_byte_arr.getvalue()
|
|
1539
|
+
|
|
1540
|
+
# def text_postprocess(
|
|
1541
|
+
# text,
|
|
1542
|
+
# spell_check=True,
|
|
1543
|
+
# clean=True,
|
|
1544
|
+
# filter=dict(min_length=2),
|
|
1545
|
+
# pattern=None,
|
|
1546
|
+
# merge=True,
|
|
1547
|
+
# ):
|
|
1548
|
+
# import re
|
|
1549
|
+
# from spellchecker import SpellChecker
|
|
1550
|
+
|
|
1551
|
+
# def correct_spelling(text_list):
|
|
1552
|
+
# spell = SpellChecker()
|
|
1553
|
+
# corrected_text = [spell.candidates(word) for word in text_list]
|
|
1554
|
+
# return corrected_text
|
|
1555
|
+
|
|
1556
|
+
# def clean_text(text_list):
|
|
1557
|
+
# cleaned_text = [re.sub(r"[^\w\s]", "", text) for text in text_list]
|
|
1558
|
+
# return cleaned_text
|
|
1559
|
+
|
|
1560
|
+
# def filter_text(text_list, min_length=2):
|
|
1561
|
+
# filtered_text = [text for text in text_list if len(text) >= min_length]
|
|
1562
|
+
# return filtered_text
|
|
1563
|
+
|
|
1564
|
+
# def extract_patterns(text_list, pattern):
|
|
1565
|
+
# pattern = re.compile(pattern)
|
|
1566
|
+
# matched_text = [text for text in text_list if pattern.search(text)]
|
|
1567
|
+
# return matched_text
|
|
1568
|
+
|
|
1569
|
+
# def merge_fragments(text_list):
|
|
1570
|
+
# merged_text = " ".join(text_list)
|
|
1571
|
+
# return merged_text
|
|
1572
|
+
|
|
1573
|
+
# results = text
|
|
1574
|
+
# if spell_check:
|
|
1575
|
+
# # results = correct_spelling(results)
|
|
1576
|
+
# results=str2words(results)
|
|
1577
|
+
# if clean:
|
|
1578
|
+
# results = clean_text(results)
|
|
1579
|
+
# if filter:
|
|
1580
|
+
# results = filter_text(
|
|
1581
|
+
# results, min_length=postprocess["filter"].get("min_length", 2)
|
|
1582
|
+
# )
|
|
1583
|
+
# if pattern:
|
|
1584
|
+
# results = extract_patterns(results, postprocess["pattern"])
|
|
1585
|
+
# if merge:
|
|
1586
|
+
# results = merge_fragments(results)
|
|
1587
|
+
|
|
1588
|
+
|
|
1589
|
+
# # https://www.jaided.ai/easyocr/documentation/
|
|
1590
|
+
# # extract text from an image with EasyOCR
|
|
1591
|
+
# def get_text(
|
|
1592
|
+
# image,
|
|
1593
|
+
# lang=["ch_sim", "en"],
|
|
1594
|
+
# model="paddleocr", # "pytesseract","paddleocr","easyocr"
|
|
1595
|
+
# thr=0.1,
|
|
1596
|
+
# gpu=True,
|
|
1597
|
+
# decoder="wordbeamsearch", #'greedy', 'beamsearch' and 'wordbeamsearch'(hightly accurate)
|
|
1598
|
+
# output="txt",
|
|
1599
|
+
# preprocess=None,
|
|
1600
|
+
# postprocess=False,# do not check spell
|
|
1601
|
+
# show=True,
|
|
1602
|
+
# ax=None,
|
|
1603
|
+
# cmap=cv2.COLOR_BGR2RGB, # draw_box
|
|
1604
|
+
# font=cv2.FONT_HERSHEY_SIMPLEX,# draw_box
|
|
1605
|
+
# fontsize=8,# draw_box
|
|
1606
|
+
# figsize=[10,10],
|
|
1607
|
+
# box_color = (0, 255, 0), # draw_box
|
|
1608
|
+
# fontcolor = (116,173,233), # draw_box
|
|
1609
|
+
# bg_color=(133, 203, 245, 100),# draw_box
|
|
1610
|
+
# usage=False,
|
|
1611
|
+
# **kwargs,
|
|
1612
|
+
# ):
|
|
1613
|
+
# """
|
|
1614
|
+
# image: 输入的图像路径或图像数据。
|
|
1615
|
+
# lang: OCR 语言列表。
|
|
1616
|
+
# thr: 置信度阈值,低于此阈值的检测结果将被过滤。
|
|
1617
|
+
# gpu: 是否使用 GPU。
|
|
1618
|
+
# output: 输出类型,可以是 'all'(返回所有检测结果)、'text'(返回文本)、'score'(返回置信度分数)、'box'(返回边界框)。
|
|
1619
|
+
# preprocess: 预处理参数字典,传递给 preprocess_img 函数。
|
|
1620
|
+
# show: 是否显示结果图像。
|
|
1621
|
+
# ax: 用于显示图像的 Matplotlib 子图。
|
|
1622
|
+
# cmap: 用于显示图像的颜色映射。
|
|
1623
|
+
# box_color: 边界框的颜色。
|
|
1624
|
+
# fontcolor: 文本的颜色。
|
|
1625
|
+
# kwargs: 传递给 EasyOCR readtext 函数的其他参数。
|
|
1626
|
+
# """
|
|
1627
|
+
# from PIL import Image
|
|
1628
|
+
# if usage:
|
|
1629
|
+
# print(
|
|
1630
|
+
# """
|
|
1631
|
+
# image_path = 'car_plate.jpg' # 替换为你的图像路径
|
|
1632
|
+
# results = get_text(
|
|
1633
|
+
# image_path,
|
|
1634
|
+
# lang=["en"],
|
|
1635
|
+
# gpu=False,
|
|
1636
|
+
# output="text",
|
|
1637
|
+
# preprocess={
|
|
1638
|
+
# "grayscale": True,
|
|
1639
|
+
# "threshold": True,
|
|
1640
|
+
# "threshold_method": 'adaptive',
|
|
1641
|
+
# "blur": True,
|
|
1642
|
+
# "blur_ksize": (5, 5),
|
|
1643
|
+
# "morph": True,
|
|
1644
|
+
# "morph_op": 'close',
|
|
1645
|
+
# "morph_kernel_size": (3, 3),
|
|
1646
|
+
# "enhance_contrast": True,
|
|
1647
|
+
# "clahe_clip": 2.0,
|
|
1648
|
+
# "clahe_grid_size": (8, 8),
|
|
1649
|
+
# "edge_detection": False
|
|
1650
|
+
# },
|
|
1651
|
+
# adjust_contrast=0.7
|
|
1652
|
+
# )""")
|
|
1653
|
+
|
|
1654
|
+
# models = ["easyocr", "paddleocr", "pytesseract","ddddocr","zerox"]
|
|
1655
|
+
# model = strcmp(model, models)[0]
|
|
1656
|
+
# lang = lang_auto_detect(lang, model)
|
|
1657
|
+
# cvt_cmp=True
|
|
1658
|
+
# if isinstance(image, str) and isa(image,'file'):
|
|
1659
|
+
# image = cv2.imread(image)
|
|
1660
|
+
# elif isa(image,'image'):
|
|
1661
|
+
# cvt_cmp=False
|
|
1662
|
+
# image = np.array(image)
|
|
1663
|
+
# else:
|
|
1664
|
+
# raise ValueError(f"not support image with {type(image)} type")
|
|
1665
|
+
|
|
1666
|
+
# # Ensure lang is always a list
|
|
1667
|
+
# if isinstance(lang, str):
|
|
1668
|
+
# lang = [lang]
|
|
1669
|
+
|
|
1670
|
+
# # ! preprocessing img
|
|
1671
|
+
# if preprocess is None:
|
|
1672
|
+
# preprocess = {}
|
|
1673
|
+
# image_process = preprocess_img(image, **preprocess)
|
|
1674
|
+
# plt.figure(figsize=figsize) if show else None
|
|
1675
|
+
# # plt.subplot(131)
|
|
1676
|
+
# # plt.imshow(cv2.cvtColor(image, cmap)) if cvt_cmp else plt.imshow(image)
|
|
1677
|
+
# # plt.subplot(132)
|
|
1678
|
+
# # plt.imshow(image_process)
|
|
1679
|
+
# # plt.subplot(133)
|
|
1680
|
+
# if "easy" in model.lower():
|
|
1681
|
+
# import easyocr
|
|
1682
|
+
# print(f"detecting language(s):{lang}")
|
|
1683
|
+
# # Perform OCR on the image
|
|
1684
|
+
# reader = easyocr.Reader(lang, gpu=gpu)
|
|
1685
|
+
# detections = reader.readtext(image_process, decoder=decoder, **kwargs)
|
|
1686
|
+
|
|
1687
|
+
# text_corr = []
|
|
1688
|
+
# for _, text, _ in detections:
|
|
1689
|
+
# text_corr.append(text_postprocess(text) if postprocess else text)
|
|
1690
|
+
|
|
1691
|
+
# if show:
|
|
1692
|
+
# if ax is None:
|
|
1693
|
+
# ax = plt.gca()
|
|
1694
|
+
# for i, (bbox, text, score) in enumerate(detections):
|
|
1695
|
+
# if score > thr:
|
|
1696
|
+
# top_left = tuple(map(int, bbox[0]))
|
|
1697
|
+
# bottom_right = tuple(map(int, bbox[2]))
|
|
1698
|
+
# image = cv2.rectangle(image, top_left, bottom_right, box_color, 2)
|
|
1699
|
+
# image = add_text_pil(
|
|
1700
|
+
# image,
|
|
1701
|
+
# text_corr[i],
|
|
1702
|
+
# top_left,
|
|
1703
|
+
# cvt_cmp=cvt_cmp,
|
|
1704
|
+
# font_size=fontsize *6,
|
|
1705
|
+
# color=fontcolor,
|
|
1706
|
+
# )
|
|
1707
|
+
# try:
|
|
1708
|
+
# img_cmp = cv2.cvtColor(image, cmap) if cvt_cmp else image
|
|
1709
|
+
# except:
|
|
1710
|
+
# img_cmp=image
|
|
1711
|
+
|
|
1712
|
+
# ax.imshow(img_cmp) if cvt_cmp else ax.imshow(image)
|
|
1713
|
+
# ax.axis("off")
|
|
1714
|
+
|
|
1715
|
+
# if output == "all":
|
|
1716
|
+
# return ax, detections
|
|
1717
|
+
# elif "t" in output.lower() and "x" in output.lower():
|
|
1718
|
+
# text = [text_ for _, text_, score_ in detections if score_ >= thr]
|
|
1719
|
+
# if postprocess:
|
|
1720
|
+
# return ax, text
|
|
1721
|
+
# else:
|
|
1722
|
+
# return text_corr
|
|
1723
|
+
# elif "score" in output.lower() or "prob" in output.lower():
|
|
1724
|
+
# scores = [score_ for _, _, score_ in detections]
|
|
1725
|
+
# return ax, scores
|
|
1726
|
+
# elif "box" in output.lower():
|
|
1727
|
+
# bboxes = [bbox_ for bbox_, _, score_ in detections if score_ >= thr]
|
|
1728
|
+
# return ax, bboxes
|
|
1729
|
+
# else:
|
|
1730
|
+
# return ax, detections
|
|
1731
|
+
# else:
|
|
1732
|
+
# if output == "all":
|
|
1733
|
+
# return detections
|
|
1734
|
+
# elif "t" in output.lower() and "x" in output.lower():
|
|
1735
|
+
# text = [text_ for _, text_, score_ in detections if score_ >= thr]
|
|
1736
|
+
# return text
|
|
1737
|
+
# elif "score" in output.lower() or "prob" in output.lower():
|
|
1738
|
+
# scores = [score_ for _, _, score_ in detections]
|
|
1739
|
+
# return scores
|
|
1740
|
+
# elif "box" in output.lower():
|
|
1741
|
+
# bboxes = [bbox_ for bbox_, _, score_ in detections if score_ >= thr]
|
|
1742
|
+
# return bboxes
|
|
1743
|
+
# else:
|
|
1744
|
+
# return detections
|
|
1745
|
+
# elif "pad" in model.lower():
|
|
1746
|
+
# from paddleocr import PaddleOCR
|
|
1747
|
+
# logging.getLogger("ppocr").setLevel(logging.ERROR)
|
|
1748
|
+
|
|
1749
|
+
# lang=strcmp(lang, ['ch','en','french','german','korean','japan'])[0]
|
|
1750
|
+
# ocr = PaddleOCR(
|
|
1751
|
+
# use_angle_cls=True,
|
|
1752
|
+
# cls=True,
|
|
1753
|
+
# lang=lang
|
|
1754
|
+
# ) # PaddleOCR supports only one language at a time
|
|
1755
|
+
# cls=kwargs.pop('cls',True)
|
|
1756
|
+
# result = ocr.ocr(image_process,cls=cls, **kwargs)
|
|
1757
|
+
# detections = []
|
|
1758
|
+
# if result[0] is not None:
|
|
1759
|
+
# for line in result[0]:
|
|
1760
|
+
# bbox, (text, score) = line
|
|
1761
|
+
# text = str2words(text) if postprocess else text # check spell
|
|
1762
|
+
# detections.append((bbox, text, score))
|
|
1763
|
+
|
|
1764
|
+
# if show:
|
|
1765
|
+
# if ax is None:
|
|
1766
|
+
# ax = plt.gca()
|
|
1767
|
+
# for bbox, text, score in detections:
|
|
1768
|
+
# if score > thr:
|
|
1769
|
+
# top_left = tuple(map(int, bbox[0]))
|
|
1770
|
+
# bottom_left = tuple(
|
|
1771
|
+
# map(int, bbox[1])
|
|
1772
|
+
# ) # Bottom-left for more accurate placement
|
|
1773
|
+
# bottom_right = tuple(map(int, bbox[2]))
|
|
1774
|
+
# image = cv2.rectangle(image, top_left, bottom_right, box_color, 2)
|
|
1775
|
+
# image = add_text_pil(
|
|
1776
|
+
# image,
|
|
1777
|
+
# text,
|
|
1778
|
+
# top_left,
|
|
1779
|
+
# cvt_cmp=cvt_cmp,
|
|
1780
|
+
# font_size=fontsize *6,
|
|
1781
|
+
# color=fontcolor,
|
|
1782
|
+
# bg_color=bg_color,
|
|
1783
|
+
# )
|
|
1784
|
+
# try:
|
|
1785
|
+
# img_cmp = cv2.cvtColor(image, cmap) if cvt_cmp else image
|
|
1786
|
+
# except:
|
|
1787
|
+
# img_cmp = image
|
|
1788
|
+
|
|
1789
|
+
# ax.imshow(img_cmp)
|
|
1790
|
+
# ax.axis("off")
|
|
1791
|
+
# if output == "all":
|
|
1792
|
+
# return ax, detections
|
|
1793
|
+
# elif "t" in output.lower() and "x" in output.lower():
|
|
1794
|
+
# text = [text_ for _, text_, score_ in detections if score_ >= thr]
|
|
1795
|
+
# return ax, text
|
|
1796
|
+
# elif "score" in output.lower() or "prob" in output.lower():
|
|
1797
|
+
# scores = [score_ for _, _, score_ in detections]
|
|
1798
|
+
# return ax, scores
|
|
1799
|
+
# elif "box" in output.lower():
|
|
1800
|
+
# bboxes = [bbox_ for bbox_, _, score_ in detections if score_ >= thr]
|
|
1801
|
+
# return ax, bboxes
|
|
1802
|
+
# else:
|
|
1803
|
+
# return ax, detections
|
|
1804
|
+
# else:
|
|
1805
|
+
# if output == "all":
|
|
1806
|
+
# return detections
|
|
1807
|
+
# elif "t" in output.lower() and "x" in output.lower():
|
|
1808
|
+
# text = [text_ for _, text_, score_ in detections if score_ >= thr]
|
|
1809
|
+
# return text
|
|
1810
|
+
# elif "score" in output.lower() or "prob" in output.lower():
|
|
1811
|
+
# scores = [score_ for _, _, score_ in detections]
|
|
1812
|
+
# return scores
|
|
1813
|
+
# elif "box" in output.lower():
|
|
1814
|
+
# bboxes = [bbox_ for bbox_, _, score_ in detections if score_ >= thr]
|
|
1815
|
+
# return bboxes
|
|
1816
|
+
# else:
|
|
1817
|
+
# return detections
|
|
1818
|
+
# elif "ddddocr" in model.lower():
|
|
1819
|
+
# import ddddocr
|
|
1820
|
+
|
|
1821
|
+
# ocr = ddddocr.DdddOcr(det=False, ocr=True)
|
|
1822
|
+
# image_bytes = convert_image_to_bytes(image_process)
|
|
1823
|
+
|
|
1824
|
+
# results = ocr.classification(image_bytes) # Text extraction
|
|
1825
|
+
|
|
1826
|
+
# # Optional: Perform detection for bounding boxes
|
|
1827
|
+
# detections = []
|
|
1828
|
+
# if kwargs.get("det", False):
|
|
1829
|
+
# det_ocr = ddddocr.DdddOcr(det=True)
|
|
1830
|
+
# det_results = det_ocr.detect(image_bytes)
|
|
1831
|
+
# for box in det_results:
|
|
1832
|
+
# top_left = (box[0], box[1])
|
|
1833
|
+
# bottom_right = (box[2], box[3])
|
|
1834
|
+
# detections.append((top_left, bottom_right))
|
|
1835
|
+
|
|
1836
|
+
# if postprocess is None:
|
|
1837
|
+
# postprocess = dict(
|
|
1838
|
+
# spell_check=True,
|
|
1839
|
+
# clean=True,
|
|
1840
|
+
# filter=dict(min_length=2),
|
|
1841
|
+
# pattern=None,
|
|
1842
|
+
# merge=True,
|
|
1843
|
+
# )
|
|
1844
|
+
# text_corr = []
|
|
1845
|
+
# [
|
|
1846
|
+
# text_corr.extend(text_postprocess(text, **postprocess))
|
|
1847
|
+
# for _, text, _ in detections
|
|
1848
|
+
# ]
|
|
1849
|
+
# # Visualization
|
|
1850
|
+
# if show:
|
|
1851
|
+
# if ax is None:
|
|
1852
|
+
# ax = plt.gca()
|
|
1853
|
+
# image_vis = image.copy()
|
|
1854
|
+
# if detections:
|
|
1855
|
+
# for top_left, bottom_right in detections:
|
|
1856
|
+
# cv2.rectangle(image_vis, top_left, bottom_right, box_color, 2)
|
|
1857
|
+
# image_vis = cv2.cvtColor(image_vis, cmap)
|
|
1858
|
+
# ax.imshow(image_vis)
|
|
1859
|
+
# ax.axis("off")
|
|
1860
|
+
# return detections
|
|
1861
|
+
|
|
1862
|
+
# elif "zerox" in model.lower():
|
|
1863
|
+
# from pyzerox import zerox
|
|
1864
|
+
# result = zerox(image_process)
|
|
1865
|
+
# detections = [(bbox, text, score) for bbox, text, score in result]
|
|
1866
|
+
# # Postprocess and visualize
|
|
1867
|
+
# if postprocess is None:
|
|
1868
|
+
# postprocess = dict(
|
|
1869
|
+
# spell_check=True,
|
|
1870
|
+
# clean=True,
|
|
1871
|
+
# filter=dict(min_length=2),
|
|
1872
|
+
# pattern=None,
|
|
1873
|
+
# merge=True,
|
|
1874
|
+
# )
|
|
1875
|
+
# text_corr = [text_postprocess(text, **postprocess) for _, text, _ in detections]
|
|
1876
|
+
|
|
1877
|
+
# # Display results if 'show' is True
|
|
1878
|
+
# if show:
|
|
1879
|
+
# if ax is None:
|
|
1880
|
+
# ax = plt.gca()
|
|
1881
|
+
# for bbox, text, score in detections:
|
|
1882
|
+
# if score > thr:
|
|
1883
|
+
# top_left = tuple(map(int, bbox[0]))
|
|
1884
|
+
# bottom_right = tuple(map(int, bbox[2]))
|
|
1885
|
+
# image = cv2.rectangle(image, top_left, bottom_right, box_color, 2)
|
|
1886
|
+
# image = add_text_pil(image, text, top_left, cvt_cmp=cvt_cmp,font_size=fontsize *6, color=fontcolor, bg_color=bg_color)
|
|
1887
|
+
# ax.imshow(image)
|
|
1888
|
+
# ax.axis("off")
|
|
1889
|
+
|
|
1890
|
+
# # Return result based on 'output' type
|
|
1891
|
+
# if output == "all":
|
|
1892
|
+
# return ax, detections
|
|
1893
|
+
# elif "t" in output.lower() and "x" in output.lower():
|
|
1894
|
+
# text = [text_ for _, text_, score_ in detections if score_ >= thr]
|
|
1895
|
+
# return ax, text
|
|
1896
|
+
# elif "score" in output.lower() or "prob" in output.lower():
|
|
1897
|
+
# scores = [score_ for _, _, score_ in detections]
|
|
1898
|
+
# return ax, scores
|
|
1899
|
+
# elif "box" in output.lower():
|
|
1900
|
+
# bboxes = [bbox_ for bbox_, _, score_ in detections if score_ >= thr]
|
|
1901
|
+
# return ax, bboxes
|
|
1902
|
+
# else:
|
|
1903
|
+
# return detections
|
|
1904
|
+
# else: # "pytesseract"
|
|
1905
|
+
# import pytesseract
|
|
1906
|
+
# if ax is None:
|
|
1907
|
+
# ax = plt.gca()
|
|
1908
|
+
# text = pytesseract.image_to_string(image_process, lang="+".join(lang), **kwargs)
|
|
1909
|
+
# bboxes = pytesseract.image_to_boxes(image_process, **kwargs)
|
|
1910
|
+
# if show:
|
|
1911
|
+
# # Image dimensions
|
|
1912
|
+
# h, w, _ = image.shape
|
|
1913
|
+
|
|
1914
|
+
# for line in bboxes.splitlines():
|
|
1915
|
+
# parts = line.split()
|
|
1916
|
+
# if len(parts) == 6:
|
|
1917
|
+
# char, left, bottom, right, top, _ = parts
|
|
1918
|
+
# left, bottom, right, top = map(int, [left, bottom, right, top])
|
|
1919
|
+
|
|
1920
|
+
# # Convert Tesseract coordinates (bottom-left and top-right) to (top-left and bottom-right)
|
|
1921
|
+
# top_left = (left, h - top)
|
|
1922
|
+
# bottom_right = (right, h - bottom)
|
|
1923
|
+
|
|
1924
|
+
# # Draw the bounding box
|
|
1925
|
+
# image = cv2.rectangle(image, top_left, bottom_right, box_color, 2)
|
|
1926
|
+
# image = add_text_pil(
|
|
1927
|
+
# image,
|
|
1928
|
+
# char,
|
|
1929
|
+
# left,
|
|
1930
|
+
# cvt_cmp=cvt_cmp,
|
|
1931
|
+
# font_size=fontsize *6,
|
|
1932
|
+
# color=fontcolor,
|
|
1933
|
+
# )
|
|
1934
|
+
# img_cmp = cv2.cvtColor(image, cmap)
|
|
1935
|
+
# ax.imshow(img_cmp)
|
|
1936
|
+
# ax.axis("off")
|
|
1937
|
+
# if output == "all":
|
|
1938
|
+
# # Get verbose data including boxes, confidences, line and page numbers
|
|
1939
|
+
# detections = pytesseract.image_to_data(image_process)
|
|
1940
|
+
# return ax, detections
|
|
1941
|
+
# elif "t" in output.lower() and "x" in output.lower():
|
|
1942
|
+
# return ax, text
|
|
1943
|
+
# elif "box" in output.lower():
|
|
1944
|
+
# return ax, bboxes
|
|
1945
|
+
# else:
|
|
1946
|
+
# # Get information about orientation and script detection
|
|
1947
|
+
# return pytesseract.image_to_osd(image_process, **kwargs)
|
|
1948
|
+
# else:
|
|
1949
|
+
# if output == "all":
|
|
1950
|
+
# # Get verbose data including boxes, confidences, line and page numbers
|
|
1951
|
+
# detections = pytesseract.image_to_data(image_process, **kwargs)
|
|
1952
|
+
# return detections
|
|
1953
|
+
# elif "t" in output.lower() and "x" in output.lower():
|
|
1954
|
+
# return text
|
|
1955
|
+
# elif "box" in output.lower():
|
|
1956
|
+
# return bboxes
|
|
1957
|
+
# else:
|
|
1958
|
+
# # Get information about orientation and script detection
|
|
1959
|
+
# return pytesseract.image_to_osd(image_process, **kwargs)
|
|
1960
|
+
|
|
1961
|
+
|
|
1962
|
+
# def draw_box(
|
|
1963
|
+
# image,
|
|
1964
|
+
# detections=None,
|
|
1965
|
+
# thr=0.25,
|
|
1966
|
+
# cmap=cv2.COLOR_BGR2RGB,
|
|
1967
|
+
# box_color=(0, 255, 0), # draw_box
|
|
1968
|
+
# fontcolor=(0, 0, 255), # draw_box
|
|
1969
|
+
# fontsize=8,
|
|
1970
|
+
# show=True,
|
|
1971
|
+
# ax=None,
|
|
1972
|
+
# **kwargs,
|
|
1973
|
+
# ):
|
|
1974
|
+
|
|
1975
|
+
# if ax is None:
|
|
1976
|
+
# ax = plt.gca()
|
|
1977
|
+
# if isinstance(image, str):
|
|
1978
|
+
# image = cv2.imread(image)
|
|
1979
|
+
# if detections is None:
|
|
1980
|
+
# detections = get_text(image=image, show=0, output="all", **kwargs)
|
|
1981
|
+
|
|
1982
|
+
# for bbox, text, score in detections:
|
|
1983
|
+
# if score > thr:
|
|
1984
|
+
# top_left = tuple(map(int, bbox[0]))
|
|
1985
|
+
# bottom_right = tuple(map(int, bbox[2]))
|
|
1986
|
+
# image = cv2.rectangle(image, top_left, bottom_right, box_color, 2)
|
|
1987
|
+
# image = add_text_pil(
|
|
1988
|
+
# image, text, top_left, cvt_cmp=cvt_cmp,font_size=fontsize *6, color=fontcolor
|
|
1989
|
+
# )
|
|
1990
|
+
|
|
1991
|
+
# img_cmp = cv2.cvtColor(image, cmap)
|
|
1992
|
+
# if show:
|
|
1993
|
+
# ax.imshow(img_cmp)
|
|
1994
|
+
# ax.axis("off")
|
|
1995
|
+
# # plt.show()
|
|
1996
|
+
# return img_cmp
|