py2ls 0.1.10.12__py3-none-any.whl → 0.2.7.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of py2ls might be problematic. Click here for more details.

Files changed (72) hide show
  1. py2ls/.DS_Store +0 -0
  2. py2ls/.git/.DS_Store +0 -0
  3. py2ls/.git/index +0 -0
  4. py2ls/.git/logs/refs/remotes/origin/HEAD +1 -0
  5. py2ls/.git/objects/.DS_Store +0 -0
  6. py2ls/.git/refs/.DS_Store +0 -0
  7. py2ls/ImageLoader.py +621 -0
  8. py2ls/__init__.py +7 -5
  9. py2ls/apptainer2ls.py +3940 -0
  10. py2ls/batman.py +164 -42
  11. py2ls/bio.py +2595 -0
  12. py2ls/cell_image_clf.py +1632 -0
  13. py2ls/container2ls.py +4635 -0
  14. py2ls/corr.py +475 -0
  15. py2ls/data/.DS_Store +0 -0
  16. py2ls/data/email/email_html_template.html +88 -0
  17. py2ls/data/hyper_param_autogluon_zeroshot2024.json +2383 -0
  18. py2ls/data/hyper_param_tabrepo_2024.py +1753 -0
  19. py2ls/data/mygenes_fields_241022.txt +355 -0
  20. py2ls/data/re_common_pattern.json +173 -0
  21. py2ls/data/sns_info.json +74 -0
  22. py2ls/data/styles/.DS_Store +0 -0
  23. py2ls/data/styles/example/.DS_Store +0 -0
  24. py2ls/data/styles/stylelib/.DS_Store +0 -0
  25. py2ls/data/styles/stylelib/grid.mplstyle +15 -0
  26. py2ls/data/styles/stylelib/high-contrast.mplstyle +6 -0
  27. py2ls/data/styles/stylelib/high-vis.mplstyle +4 -0
  28. py2ls/data/styles/stylelib/ieee.mplstyle +15 -0
  29. py2ls/data/styles/stylelib/light.mplstyl +6 -0
  30. py2ls/data/styles/stylelib/muted.mplstyle +6 -0
  31. py2ls/data/styles/stylelib/nature-reviews-latex.mplstyle +616 -0
  32. py2ls/data/styles/stylelib/nature-reviews.mplstyle +616 -0
  33. py2ls/data/styles/stylelib/nature.mplstyle +31 -0
  34. py2ls/data/styles/stylelib/no-latex.mplstyle +10 -0
  35. py2ls/data/styles/stylelib/notebook.mplstyle +36 -0
  36. py2ls/data/styles/stylelib/paper.mplstyle +290 -0
  37. py2ls/data/styles/stylelib/paper2.mplstyle +305 -0
  38. py2ls/data/styles/stylelib/retro.mplstyle +4 -0
  39. py2ls/data/styles/stylelib/sans.mplstyle +10 -0
  40. py2ls/data/styles/stylelib/scatter.mplstyle +7 -0
  41. py2ls/data/styles/stylelib/science.mplstyle +48 -0
  42. py2ls/data/styles/stylelib/std-colors.mplstyle +4 -0
  43. py2ls/data/styles/stylelib/vibrant.mplstyle +6 -0
  44. py2ls/data/tiles.csv +146 -0
  45. py2ls/data/usages_pd.json +1417 -0
  46. py2ls/data/usages_sns.json +31 -0
  47. py2ls/docker2ls.py +5446 -0
  48. py2ls/ec2ls.py +61 -0
  49. py2ls/fetch_update.py +145 -0
  50. py2ls/ich2ls.py +1955 -296
  51. py2ls/im2.py +8242 -0
  52. py2ls/image_ml2ls.py +2100 -0
  53. py2ls/ips.py +33909 -3418
  54. py2ls/ml2ls.py +7700 -0
  55. py2ls/mol.py +289 -0
  56. py2ls/mount2ls.py +1307 -0
  57. py2ls/netfinder.py +873 -351
  58. py2ls/nl2ls.py +283 -0
  59. py2ls/ocr.py +1581 -458
  60. py2ls/plot.py +10394 -314
  61. py2ls/rna2ls.py +311 -0
  62. py2ls/ssh2ls.md +456 -0
  63. py2ls/ssh2ls.py +5933 -0
  64. py2ls/ssh2ls_v01.py +2204 -0
  65. py2ls/stats.py +66 -172
  66. py2ls/temp20251124.py +509 -0
  67. py2ls/translator.py +2 -0
  68. py2ls/utils/decorators.py +3564 -0
  69. py2ls/utils_bio.py +3453 -0
  70. {py2ls-0.1.10.12.dist-info → py2ls-0.2.7.10.dist-info}/METADATA +113 -224
  71. {py2ls-0.1.10.12.dist-info → py2ls-0.2.7.10.dist-info}/RECORD +72 -16
  72. {py2ls-0.1.10.12.dist-info → py2ls-0.2.7.10.dist-info}/WHEEL +0 -0
py2ls/ocr.py CHANGED
@@ -1,28 +1,76 @@
1
- import easyocr
2
1
  import cv2
2
+ import os
3
3
  import numpy as np
4
4
  import matplotlib.pyplot as plt
5
5
  from py2ls.ips import (
6
6
  strcmp,
7
7
  detect_angle,
8
- ) # Ensure this function is defined in your 'ips' module
9
- from spellchecker import SpellChecker
10
- import re
11
-
12
- from PIL import Image, ImageDraw, ImageFont
13
- import PIL.PngImagePlugin
14
- import pytesseract
15
- from paddleocr import PaddleOCR
8
+ str2words,
9
+ isa
10
+ )
11
+ from PIL import Image
16
12
  import logging
17
-
18
- logging.getLogger("ppocr").setLevel(
19
- logging.WARNING
20
- ) # or logging.ERROR to show only error messages
13
+ from typing import Union, List, Dict, Tuple, Optional
14
+ from dataclasses import dataclass
15
+ from enum import Enum, auto
16
+ import warnings
17
+ # Suppress unnecessary warnings
18
+ warnings.filterwarnings('ignore')
21
19
 
22
20
  """
23
- Optical Character Recognition (OCR)
21
+ Enhanced Optical Character Recognition (OCR) Package
24
22
  """
25
23
 
24
+ class OCREngine(Enum):
25
+ EASYOCR = auto()
26
+ PADDLEOCR = auto()
27
+ PYTHON_TESSERACT = auto()
28
+ DDDDOCR = auto()
29
+ ZEROX = auto()
30
+
31
+ @dataclass
32
+ class OCRResult:
33
+ text: str
34
+ confidence: float
35
+ bbox: Optional[List[Tuple[int, int]]] = None
36
+ language: Optional[str] = None
37
+ engine: Optional[str] = None
38
+
39
+ def __str__(self):
40
+ return f"Text: {self.text} (Confidence: {self.confidence:.2f})"
41
+
42
+ @dataclass
43
+ class OCRConfig:
44
+ languages: List[str] = None
45
+ engine: OCREngine = OCREngine.PADDLEOCR
46
+ threshold: float = 0.1
47
+ decoder: str = "wordbeamsearch"
48
+ preprocess: Dict = None
49
+ postprocess: Dict = None
50
+ visualization: Dict = None
51
+
52
+ def __post_init__(self):
53
+ if self.languages is None:
54
+ self.languages = ["en"]
55
+ if self.preprocess is None:
56
+ self.preprocess = {
57
+ "grayscale": True,
58
+ "threshold": True,
59
+ "rotate": "auto"
60
+ }
61
+ if self.postprocess is None:
62
+ self.postprocess = {
63
+ "spell_check": True,
64
+ "clean": True
65
+ }
66
+ if self.visualization is None:
67
+ self.visualization = {
68
+ "show": True,
69
+ "box_color": (0, 255, 0),
70
+ "text_color": (116, 173, 233),
71
+ "font_size": 8
72
+ }
73
+
26
74
  # Valid language codes
27
75
  lang_valid = {
28
76
  "easyocr": {
@@ -153,22 +201,257 @@ lang_valid = {
153
201
  },
154
202
  }
155
203
 
204
+ class OCRProcessor:
205
+ def __init__(self, config: OCRConfig = None):
206
+ self.config = config if config else OCRConfig()
207
+ self._initialize_engine()
208
+
209
+ def _initialize_engine(self):
210
+ """Initialize the selected OCR engine"""
211
+ engine_map = {
212
+ OCREngine.EASYOCR: "easyocr",
213
+ OCREngine.PADDLEOCR: "paddleocr",
214
+ OCREngine.PYTHON_TESSERACT: "pytesseract",
215
+ OCREngine.DDDDOCR: "ddddocr",
216
+ OCREngine.ZEROX: "zerox"
217
+ }
218
+ self.engine_name = engine_map.get(self.config.engine, "paddleocr")
219
+
220
+ def process_image(self, image_path: Union[str, np.ndarray]) -> List[OCRResult]:
221
+ """Main method to process an image and return OCR results"""
222
+ try:
223
+ # Load and preprocess image
224
+ image = self._load_image(image_path)
225
+ processed_image = self._preprocess_image(image)
226
+
227
+ # Perform OCR
228
+ results = self._perform_ocr(processed_image)
229
+
230
+ # Post-process results
231
+ final_results = self._postprocess_results(results)
232
+
233
+ # Visualize if needed
234
+ if self.config.visualization.get('show', True):
235
+ self._visualize_results(image, final_results)
236
+
237
+ return final_results
238
+
239
+ except Exception as e:
240
+ logging.error(f"Error processing image: {str(e)}")
241
+ raise
242
+
243
+ def _load_image(self, image_path: Union[str, np.ndarray]) -> np.ndarray:
244
+ """Load image from path or numpy array"""
245
+ if isinstance(image_path, str):
246
+ image = cv2.imread(image_path)
247
+ if image is None:
248
+ raise ValueError(f"Could not load image from path: {image_path}")
249
+ elif isinstance(image_path, np.ndarray):
250
+ image = image_path
251
+ else:
252
+ raise ValueError("Input must be either image path or numpy array")
253
+
254
+ return image
255
+
256
+ def _preprocess_image(self, image: np.ndarray) -> np.ndarray:
257
+ """Apply preprocessing steps to the image"""
258
+ return preprocess_img(image, **self.config.preprocess)
259
+
260
+ def _perform_ocr(self, image: np.ndarray) -> List[OCRResult]:
261
+ """Perform OCR using the selected engine"""
262
+ engine_methods = {
263
+ OCREngine.EASYOCR: self._easyocr_recognize,
264
+ OCREngine.PADDLEOCR: self._paddleocr_recognize,
265
+ OCREngine.PYTHON_TESSERACT: self._pytesseract_recognize,
266
+ OCREngine.DDDDOCR: self._ddddocr_recognize,
267
+ OCREngine.ZEROX: self._zerox_recognize
268
+ }
269
+
270
+ method = engine_methods.get(self.config.engine)
271
+ if not method:
272
+ raise ValueError(f"Unsupported OCR engine: {self.config.engine}")
273
+
274
+ return method(image)
275
+
276
+ def _postprocess_results(self, results: List[OCRResult]) -> List[OCRResult]:
277
+ """Apply post-processing to OCR results"""
278
+ if not self.config.postprocess:
279
+ return results
280
+
281
+ for result in results:
282
+ if self.config.postprocess.get('spell_check', False):
283
+ result.text = str2words(result.text)
284
+ if self.config.postprocess.get('clean', False):
285
+ result.text = self._clean_text(result.text)
286
+
287
+ return results
288
+
289
+ def _visualize_results(self, image: np.ndarray, results: List[OCRResult]):
290
+ """Visualize OCR results on the original image"""
291
+ vis_config = self.config.visualization
292
+ fig, ax = plt.subplots(figsize=(10, 10))
293
+
294
+ for result in results:
295
+ if result.confidence >= self.config.threshold and result.bbox:
296
+ top_left = tuple(map(int, result.bbox[0]))
297
+ bottom_right = tuple(map(int, result.bbox[2]))
298
+
299
+ # Draw bounding box
300
+ image = cv2.rectangle(
301
+ image,
302
+ top_left,
303
+ bottom_right,
304
+ vis_config['box_color'],
305
+ 2
306
+ )
307
+
308
+ # Add text
309
+ image = add_text_pil(
310
+ image,
311
+ result.text,
312
+ top_left,
313
+ font_size=vis_config['font_size'] * 6,
314
+ color=vis_config['text_color'],
315
+ bg_color=(133, 203, 245, 100)
316
+ )
317
+
318
+ # Display the image
319
+ ax.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
320
+ ax.axis("off")
321
+ plt.show()
322
+
323
+ # Engine-specific recognition methods
324
+ def _easyocr_recognize(self, image: np.ndarray) -> List[OCRResult]:
325
+ """Recognize text using EasyOCR"""
326
+ import easyocr
327
+
328
+ lang = lang_auto_detect(self.config.languages, "easyocr")
329
+ reader = easyocr.Reader(lang, gpu=self.config.use_gpu)
330
+ detections = reader.readtext(image, decoder=self.config.decoder)
331
+
332
+ return [
333
+ OCRResult(
334
+ text=text,
335
+ confidence=score,
336
+ bbox=bbox,
337
+ engine="easyocr"
338
+ ) for bbox, text, score in detections
339
+ ]
340
+
341
+ def _paddleocr_recognize(self, image: np.ndarray) -> List[OCRResult]:
342
+ """Recognize text using PaddleOCR"""
343
+ from paddleocr import PaddleOCR
344
+
345
+ lang = lang_auto_detect(self.config.languages, "paddleocr")
346
+ ocr = PaddleOCR(
347
+ use_angle_cls=True,
348
+ lang=lang[0], # PaddleOCR supports one language at a time
349
+ )
350
+ result = ocr.ocr(image, cls=True)
351
+
352
+ ocr_results = []
353
+ if result and result[0]:
354
+ for line in result[0]:
355
+ if line:
356
+ bbox, (text, score) = line
357
+ ocr_results.append(
358
+ OCRResult(
359
+ text=text,
360
+ confidence=score,
361
+ bbox=bbox,
362
+ engine="paddleocr"
363
+ )
364
+ )
365
+
366
+ return ocr_results
367
+
368
+ def _pytesseract_recognize(self, image: np.ndarray) -> List[OCRResult]:
369
+ """Recognize text using pytesseract"""
370
+ import pytesseract
371
+
372
+ lang = lang_auto_detect(self.config.languages, "pytesseract")
373
+ data = pytesseract.image_to_data(
374
+ image,
375
+ lang="+".join(lang),
376
+ output_type=pytesseract.Output.DICT
377
+ )
378
+
379
+ ocr_results = []
380
+ for i in range(len(data['text'])):
381
+ if int(data['conf'][i]) > 0: # Filter out empty results
382
+ ocr_results.append(
383
+ OCRResult(
384
+ text=data['text'][i],
385
+ confidence=float(data['conf'][i])/100,
386
+ bbox=(
387
+ (data['left'][i], data['top'][i]),
388
+ (data['left'][i] + data['width'][i], data['top'][i]),
389
+ (data['left'][i] + data['width'][i], data['top'][i] + data['height'][i]),
390
+ (data['left'][i], data['top'][i] + data['height'][i])
391
+ ),
392
+ engine="pytesseract"
393
+ )
394
+ )
395
+
396
+ return ocr_results
397
+
398
+ def _ddddocr_recognize(self, image: np.ndarray) -> List[OCRResult]:
399
+ """Recognize text using ddddocr"""
400
+ import ddddocr
401
+
402
+ ocr = ddddocr.DdddOcr(det=False, ocr=True)
403
+ image_bytes = convert_image_to_bytes(image)
404
+ text = ocr.classification(image_bytes)
405
+
406
+ return [
407
+ OCRResult(
408
+ text=text,
409
+ confidence=1.0, # ddddocr doesn't provide confidence scores
410
+ engine="ddddocr"
411
+ )
412
+ ]
413
+
414
+ def _zerox_recognize(self, image: np.ndarray) -> List[OCRResult]:
415
+ """Recognize text using pyzerox"""
416
+ from pyzerox import zerox
417
+
418
+ results = zerox(image)
419
+ return [
420
+ OCRResult(
421
+ text=text,
422
+ confidence=score,
423
+ bbox=bbox,
424
+ engine="zerox"
425
+ ) for bbox, text, score in results
426
+ ]
427
+
428
+ @staticmethod
429
+ def _clean_text(text: str) -> str:
430
+ """Clean text by removing special characters and extra spaces"""
431
+ import re
432
+ text = re.sub(r'[^\w\s]', '', text)
433
+ text = ' '.join(text.split())
434
+ return text
156
435
 
157
436
  def lang_auto_detect(
158
- lang,
159
- model="easyocr", # "easyocr" or "pytesseract"
160
- ):
437
+ lang: Union[str, List[str]],
438
+ model: str = "easyocr", # "easyocr" or "pytesseract"
439
+ ) -> List[str]:
440
+ """Automatically detect and validate language codes for the specified OCR model."""
161
441
  models = ["easyocr", "paddleocr", "pytesseract"]
162
442
  model = strcmp(model, models)[0]
163
443
  res_lang = []
444
+
164
445
  if isinstance(lang, str):
165
446
  lang = [lang]
447
+
166
448
  for i in lang:
167
449
  res_lang.append(lang_valid[model][strcmp(i, list(lang_valid[model].keys()))[0]])
450
+
168
451
  return res_lang
169
452
 
170
-
171
- def determine_src_points(image):
453
+ def determine_src_points(image: np.ndarray) -> np.ndarray:
454
+ """Determine source points for perspective correction."""
172
455
  gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
173
456
  _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
174
457
  contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
@@ -204,8 +487,8 @@ def determine_src_points(image):
204
487
  )
205
488
  return src_points
206
489
 
207
-
208
- def get_default_camera_matrix(image_shape):
490
+ def get_default_camera_matrix(image_shape: Tuple[int, int]) -> Tuple[np.ndarray, np.ndarray]:
491
+ """Generate a default camera matrix for undistortion."""
209
492
  height, width = image_shape[:2]
210
493
  focal_length = width
211
494
  center = (width / 2, height / 2)
@@ -216,8 +499,8 @@ def get_default_camera_matrix(image_shape):
216
499
  dist_coeffs = np.zeros((4, 1)) # Assuming no distortion
217
500
  return camera_matrix, dist_coeffs
218
501
 
219
-
220
- def correct_perspective(image, src_points):
502
+ def correct_perspective(image: np.ndarray, src_points: np.ndarray) -> np.ndarray:
503
+ """Correct perspective distortion in an image."""
221
504
  # Define the destination points for the perspective transform
222
505
  width, height = 1000, 1000 # Adjust size as needed
223
506
  dst_points = np.array(
@@ -231,8 +514,8 @@ def correct_perspective(image, src_points):
231
514
  corrected_image = cv2.warpPerspective(image, M, (width, height))
232
515
  return corrected_image
233
516
 
234
-
235
- def detect_text_orientation(image):
517
+ def detect_text_orientation(image: np.ndarray) -> float:
518
+ """Detect the orientation angle of text in an image."""
236
519
  gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
237
520
  edges = cv2.Canny(gray, 50, 150, apertureSize=3)
238
521
  lines = cv2.HoughLines(edges, 1, np.pi / 180, 200)
@@ -250,8 +533,8 @@ def detect_text_orientation(image):
250
533
  median_angle = np.median(angles)
251
534
  return median_angle
252
535
 
253
-
254
- def rotate_image(image, angle):
536
+ def rotate_image(image: np.ndarray, angle: float) -> np.ndarray:
537
+ """Rotate an image by a given angle."""
255
538
  center = (image.shape[1] // 2, image.shape[0] // 2)
256
539
  rot_mat = cv2.getRotationMatrix2D(center, angle, 1.0)
257
540
  rotated_image = cv2.warpAffine(
@@ -259,8 +542,8 @@ def rotate_image(image, angle):
259
542
  )
260
543
  return rotated_image
261
544
 
262
-
263
- def correct_skew(image):
545
+ def correct_skew(image: np.ndarray) -> np.ndarray:
546
+ """Correct skew in an image using contour detection."""
264
547
  gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
265
548
  coords = np.column_stack(np.where(gray > 0))
266
549
  angle = cv2.minAreaRect(coords)[-1]
@@ -276,24 +559,27 @@ def correct_skew(image):
276
559
  )
277
560
  return rotated
278
561
 
279
-
280
- def undistort_image(image, camera_matrix, dist_coeffs):
562
+ def undistort_image(image: np.ndarray, camera_matrix: np.ndarray, dist_coeffs: np.ndarray) -> np.ndarray:
563
+ """Undistort an image using camera calibration parameters."""
281
564
  return cv2.undistort(image, camera_matrix, dist_coeffs)
282
565
 
283
-
284
566
  def add_text_pil(
285
- image,
286
- text,
287
- position,
288
- font_size=12,
289
- color=(255, 0, 0),
290
- bg_color=(173, 216, 230, 120),
291
- ):
567
+ image: np.ndarray,
568
+ text: str,
569
+ position: Tuple[int, int],
570
+ cvt_cmp: bool = True,
571
+ font_size: int = 12,
572
+ color: Tuple[int, int, int] = (0, 0, 0),
573
+ bg_color: Tuple[int, int, int, int] = (133, 203, 245, 100),
574
+ ) -> np.ndarray:
575
+ """Add text to an image using PIL for better Unicode support."""
576
+ from PIL import Image, ImageDraw, ImageFont
577
+
292
578
  # Convert the image to PIL format
293
- pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
294
- # Create a drawing context
295
- draw = ImageDraw.Draw(pil_image)
296
- # Define the font (make sure to use a font that supports Chinese characters)
579
+ pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)).convert("RGBA")
580
+ overlay = Image.new("RGBA", pil_image.size, (255, 255, 255, 0))
581
+ draw = ImageDraw.Draw(overlay)
582
+
297
583
  try:
298
584
  font = ImageFont.truetype(
299
585
  "/System/Library/Fonts/Supplemental/Songti.ttc", font_size
@@ -301,22 +587,14 @@ def add_text_pil(
301
587
  except IOError:
302
588
  font = ImageFont.load_default()
303
589
 
304
- # cal top_left position
305
- # Measure text size using textbbox
590
+ # Calculate text size using textbbox
306
591
  text_bbox = draw.textbbox((0, 0), text, font=font)
307
- # # 或者只画 text, # Calculate text size
308
- # text_width, text_height = draw.textsize(text, font=font)
309
592
  text_width = text_bbox[2] - text_bbox[0]
310
593
  text_height = text_bbox[3] - text_bbox[1]
311
594
 
312
595
  # Draw background rectangle
313
596
  x, y = position
314
- # Calculate 5% of the text height for upward adjustment
315
- offset = int(
316
- 0.1 * text_height
317
- ) # 这就不再上移动了; # int(0.5 * text_height) # 上移动 50%
318
-
319
- # Adjust position to match OpenCV's bottom-left alignment
597
+ offset = int(0.1 * text_height)
320
598
  adjusted_position = (position[0], position[1] - text_height - offset)
321
599
 
322
600
  background_rect = [
@@ -326,79 +604,90 @@ def add_text_pil(
326
604
  y + text_height,
327
605
  ]
328
606
  draw.rectangle(background_rect, fill=bg_color)
607
+
329
608
  # Add text to the image
330
609
  draw.text(adjusted_position, text, font=font, fill=color)
331
- # Convert the image back to OpenCV format
332
- image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGBA2BGR)
333
- return image
334
-
610
+
611
+ # Combine images
612
+ if pil_image.mode != "RGBA":
613
+ pil_image = pil_image.convert("RGBA")
614
+ if overlay.mode != "RGBA":
615
+ overlay = overlay.convert("RGBA")
616
+ combined = Image.alpha_composite(pil_image, overlay)
617
+
618
+ # Convert back to OpenCV format
619
+ return cv2.cvtColor(np.array(combined), cv2.COLOR_RGBA2BGR)
335
620
 
336
621
  def preprocess_img(
337
- image,
338
- grayscale=True,
339
- threshold=True,
340
- threshold_method="adaptive",
341
- rotate="auto",
342
- skew=False,
343
- blur=True,
344
- blur_ksize=(5, 5),
345
- morph=True,
346
- morph_op="open",
347
- morph_kernel_size=(3, 3),
348
- enhance_contrast=True,
349
- clahe_clip=2.0,
350
- clahe_grid_size=(8, 8),
351
- edge_detection=False,
352
- ):
622
+ image: Union[str, np.ndarray],
623
+ grayscale: bool = True,
624
+ threshold: bool = True,
625
+ threshold_method: str = "adaptive",
626
+ rotate: Union[str, float] = "auto",
627
+ skew: bool = False,
628
+ blur: bool = False,
629
+ blur_ksize: Tuple[int, int] = (5, 5),
630
+ morph: bool = True,
631
+ morph_op: str = "open",
632
+ morph_kernel_size: Tuple[int, int] = (3, 3),
633
+ enhance_contrast: bool = True,
634
+ clahe_clip: float = 2.0,
635
+ clahe_grid_size: Tuple[int, int] = (8, 8),
636
+ edge_detection: bool = False,
637
+ ) -> np.ndarray:
353
638
  """
354
- 预处理步骤:
355
-
356
- 转换为灰度图像: 如果 grayscale 为 True,将图像转换为灰度图像。
357
- 二值化处理: 根据 threshold threshold_method 参数,对图像进行二值化处理。
358
- 降噪处理: 使用高斯模糊对图像进行降噪。
359
- 形态学处理: 根据 morph_op 参数选择不同的形态学操作(开运算、闭运算、膨胀、腐蚀),用于去除噪声或填补孔洞。
360
- 对比度增强: 使用 CLAHE 技术增强图像对比度。
361
- 边缘检测: 如果 edge_detection True,使用 Canny 边缘检测算法。
362
-
363
- 预处理图像以提高 OCR 识别准确性。
364
- 参数:
365
- image: 输入的图像路径或图像数据。
366
- grayscale: 是否将图像转换为灰度图像。
367
- threshold: 是否对图像进行二值化处理。
368
- threshold_method: 二值化方法,可以是 'global' 'adaptive'。
369
- denoise: 是否对图像进行降噪处理。
370
- blur_ksize: 高斯模糊的核大小。
371
- morph: 是否进行形态学处理。
372
- morph_op: 形态学操作的类型,包括 'open'(开运算)、'close'(闭运算)、'dilate'(膨胀)、'erode'(腐蚀)。
373
- morph_kernel_size: 形态学操作的内核大小。
374
- enhance_contrast: 是否增强图像对比度。
375
- clahe_clip: CLAHE(对比度受限的自适应直方图均衡)的剪裁限制。
376
- clahe_grid_size: CLAHE 的网格大小。
377
- edge_detection: 是否进行边缘检测。
639
+ Preprocess an image for OCR to improve recognition accuracy.
640
+
641
+ Parameters:
642
+ image: Input image (path, numpy array, or PIL image)
643
+ grayscale: Convert to grayscale
644
+ threshold: Apply thresholding
645
+ threshold_method: 'global' or 'adaptive' thresholding
646
+ rotate: 'auto' to auto-detect angle, or float for manual rotation
647
+ skew: Correct skew
648
+ blur: Apply Gaussian blur
649
+ blur_ksize: Kernel size for blur
650
+ morph: Apply morphological operations
651
+ morph_op: Type of operation ('open', 'close', 'dilate', 'erode')
652
+ morph_kernel_size: Kernel size for morphological operations
653
+ enhance_contrast: Apply CLAHE contrast enhancement
654
+ clahe_clip: Clip limit for CLAHE
655
+ clahe_grid_size: Grid size for CLAHE
656
+ edge_detection: Apply Canny edge detection
657
+
658
+ Returns:
659
+ Preprocessed image as numpy array
378
660
  """
379
- if isinstance(image, PIL.PngImagePlugin.PngImageFile):
661
+ import PIL.PngImagePlugin
662
+
663
+ # Convert different input types to numpy array
664
+ if isinstance(image, (PIL.PngImagePlugin.PngImageFile, Image.Image)):
380
665
  image = np.array(image)
381
666
  if isinstance(image, str):
382
667
  image = cv2.imread(image)
383
668
  if not isinstance(image, np.ndarray):
384
669
  image = np.array(image)
385
- if image.shape[1] == 4: # Check if it has an alpha channel
386
- # Drop the alpha channel (if needed), or handle it as required
387
- image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
388
- else:
389
- # Convert RGB to BGR for OpenCV compatibility
390
- image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
670
+
671
+ try:
672
+ if image.shape[1] == 4: # Check if it has an alpha channel
673
+ image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
674
+ else:
675
+ image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
676
+ except:
677
+ pass
391
678
 
392
679
  # Rotate image
393
680
  if rotate == "auto":
394
681
  angle = detect_angle(image, by="fft")
395
682
  img_preprocessed = rotate_image(image, angle)
683
+ elif isinstance(rotate, (int, float)):
684
+ img_preprocessed = rotate_image(image, rotate)
396
685
  else:
397
686
  img_preprocessed = image
398
687
 
399
688
  # Correct skew
400
689
  if skew:
401
- img_preprocessed = correct_skew(image)
690
+ img_preprocessed = correct_skew(img_preprocessed)
402
691
 
403
692
  # Convert to grayscale
404
693
  if grayscale:
@@ -407,7 +696,7 @@ def preprocess_img(
407
696
  # Thresholding
408
697
  if threshold:
409
698
  if threshold_method == "adaptive":
410
- image = cv2.adaptiveThreshold(
699
+ img_preprocessed = cv2.adaptiveThreshold(
411
700
  img_preprocessed,
412
701
  255,
413
702
  cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
@@ -424,151 +713,172 @@ def preprocess_img(
424
713
  if blur:
425
714
  img_preprocessed = cv2.GaussianBlur(img_preprocessed, blur_ksize, 0)
426
715
 
427
- # 形态学处理
716
+ # Morphological operations
428
717
  if morph:
429
718
  kernel = cv2.getStructuringElement(cv2.MORPH_RECT, morph_kernel_size)
430
- if morph_op == "close": # 闭运算
431
- # 目的: 闭运算用于填补前景物体中的小孔或间隙,同时保留其形状和大小。
432
- # 工作原理: 闭运算先进行膨胀,然后进行腐蚀。膨胀步骤填补小孔或间隙,腐蚀步骤恢复较大物体的形状。
433
- # 效果:
434
- # 填补前景物体中的小孔和间隙。
435
- # 平滑较大物体的边缘。
436
- # 示例用途: 填补物体中的小孔或间隙。
719
+ if morph_op == "close":
437
720
  img_preprocessed = cv2.morphologyEx(
438
721
  img_preprocessed, cv2.MORPH_CLOSE, kernel
439
722
  )
440
- elif morph_op == "open": # 开运算
441
- # 目的: 开运算用于去除背景中的小物体或噪声,同时保留较大物体的形状和大小。
442
- # 工作原理: 开运算先进行腐蚀,然后进行膨胀。腐蚀步骤去除小规模的噪声,膨胀步骤恢复剩余物体的大小。
443
- # 效果:
444
- # 去除前景中的小物体。
445
- # 平滑较大物体的轮廓。
446
- # 示例用途: 去除小噪声或伪影,同时保持较大物体完整。
723
+ elif morph_op == "open":
447
724
  img_preprocessed = cv2.morphologyEx(
448
725
  img_preprocessed, cv2.MORPH_OPEN, kernel
449
726
  )
450
- elif morph_op == "dilate": # 膨胀
451
- # 目的: 膨胀操作在物体边界上添加像素。它可以用来填补物体中的小孔或连接相邻的物体。
452
- # 工作原理: 内核在图像上移动,每个位置上的像素值被设置为内核覆盖区域中的最大值。
453
- # 效果:
454
- # 物体变大。
455
- # 填补物体中的小孔或间隙。
456
- # 示例用途: 填补物体中的小孔或连接断裂的物体部分。
727
+ elif morph_op == "dilate":
457
728
  img_preprocessed = cv2.dilate(img_preprocessed, kernel)
458
- elif morph_op == "erode": # 腐蚀
459
- # 目的: 腐蚀操作用于去除物体边界上的像素。它可以用来去除小规模的噪声,并将靠近的物体分开。
460
- # 工作原理: 内核(结构元素)在图像上移动,每个位置上的像素值被设置为内核覆盖区域中的最小值。
461
- # 效果:
462
- # 物体变小。
463
- # 去除图像中的小白点(在白色前景/黑色背景的图像中)。
464
- # 示例用途: 去除二值图像中的小噪声或分离相互接触的物体
729
+ elif morph_op == "erode":
465
730
  img_preprocessed = cv2.erode(img_preprocessed, kernel)
466
731
 
467
- # 对比度增强
732
+ # Contrast enhancement
468
733
  if enhance_contrast:
469
734
  clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=clahe_grid_size)
470
735
  img_preprocessed = clahe.apply(img_preprocessed)
471
736
 
472
- # 边缘检测
737
+ # Edge detection
473
738
  if edge_detection:
474
739
  img_preprocessed = cv2.Canny(img_preprocessed, 100, 200)
475
740
 
476
741
  return img_preprocessed
477
742
 
743
+ def convert_image_to_bytes(image: Union[np.ndarray, Image.Image]) -> bytes:
744
+ """Convert a CV2 or numpy image to bytes for OCR engines that require it."""
745
+ import io
746
+ from PIL import Image
747
+
748
+ # Convert OpenCV image (numpy array) to PIL image
749
+ if isinstance(image, np.ndarray):
750
+ image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
751
+
752
+ # Save PIL image to a byte stream
753
+ img_byte_arr = io.BytesIO()
754
+ image.save(img_byte_arr, format='PNG')
755
+ return img_byte_arr.getvalue()
478
756
 
479
757
  def text_postprocess(
480
- text,
481
- spell_check=True,
482
- clean=True,
483
- filter=dict(min_length=2),
484
- pattern=None,
485
- merge=True,
486
- ):
487
-
488
- def correct_spelling(text_list):
758
+ text: Union[str, List[str]],
759
+ spell_check: bool = True,
760
+ clean: bool = True,
761
+ filter: Dict = None,
762
+ pattern: str = None,
763
+ merge: bool = True,
764
+ ) -> Union[str, List[str]]:
765
+ """
766
+ Post-process OCR results to improve text quality.
767
+
768
+ Parameters:
769
+ text: Input text or list of texts
770
+ spell_check: Apply spell checking
771
+ clean: Remove special characters
772
+ filter: Dictionary with filtering options (e.g., min_length)
773
+ pattern: Regex pattern to match
774
+ merge: Merge fragments into single string
775
+
776
+ Returns:
777
+ Processed text or list of texts
778
+ """
779
+ import re
780
+ from spellchecker import SpellChecker
781
+
782
+ if filter is None:
783
+ filter = {"min_length": 2}
784
+
785
+ if isinstance(text, str):
786
+ text = [text]
787
+
788
+ def correct_spelling(text_list: List[str]) -> List[str]:
489
789
  spell = SpellChecker()
490
- corrected_text = [spell.candidates(word) for word in text_list]
491
- return corrected_text
790
+ return [spell.correction(word) if spell.correction(word) else word for word in text_list]
492
791
 
493
- def clean_text(text_list):
494
- cleaned_text = [re.sub(r"[^\w\s]", "", text) for text in text_list]
495
- return cleaned_text
792
+ def clean_text(text_list: List[str]) -> List[str]:
793
+ return [re.sub(r"[^\w\s]", "", t) for t in text_list]
496
794
 
497
- def filter_text(text_list, min_length=2):
498
- filtered_text = [text for text in text_list if len(text) >= min_length]
499
- return filtered_text
795
+ def filter_text(text_list: List[str], min_length: int = 2) -> List[str]:
796
+ return [t for t in text_list if len(t) >= min_length]
500
797
 
501
- def extract_patterns(text_list, pattern):
502
- pattern = re.compile(pattern)
503
- matched_text = [text for text in text_list if pattern.search(text)]
504
- return matched_text
798
+ def extract_patterns(text_list: List[str], pattern: str) -> List[str]:
799
+ compiled_pattern = re.compile(pattern)
800
+ return [t for t in text_list if compiled_pattern.search(t)]
505
801
 
506
- def merge_fragments(text_list):
507
- merged_text = " ".join(text_list)
508
- return merged_text
802
+ def merge_fragments(text_list: List[str]) -> str:
803
+ return " ".join(text_list)
509
804
 
510
805
  results = text
511
- print(results)
512
806
  if spell_check:
513
807
  results = correct_spelling(results)
514
808
  if clean:
515
809
  results = clean_text(results)
516
810
  if filter:
517
- results = filter_text(
518
- results, min_length=postprocess["filter"].get("min_length", 2)
519
- )
811
+ results = filter_text(results, min_length=filter.get("min_length", 2))
520
812
  if pattern:
521
- results = extract_patterns(results, postprocess["pattern"])
522
- if merge:
813
+ results = extract_patterns(results, pattern)
814
+ if merge and isinstance(results, list):
523
815
  results = merge_fragments(results)
524
816
 
817
+ return results
818
+
819
+ def save_ocr_results(results: List[OCRResult], dir_save: str):
820
+ fname, output = os.path.splitext(dir_save)
821
+ if output == "txt":
822
+ with open(dir_save, "w", encoding="utf-8") as f:
823
+ for r in results:
824
+ f.write(r.text + "\n")
825
+
826
+ elif output == "csv":
827
+ import pandas as pd
828
+ df = pd.DataFrame([r.__dict__ for r in results])
829
+ df.to_csv(dir_save, index=False)
830
+
831
+ elif output == "xlsx":
832
+ import pandas as pd
833
+ df = pd.DataFrame([r.__dict__ for r in results])
834
+ df.to_excel(dir_save, index=False)
835
+
836
+ elif output == "json":
837
+ import json
838
+ with open(dir_save, "w", encoding="utf-8") as f:
839
+ json.dump([r.__dict__ for r in results], f, indent=4)
840
+
841
+ elif output == "docx":
842
+ from docx import Document
843
+ doc = Document()
844
+ for r in results:
845
+ doc.add_paragraph(r.text)
846
+ doc.save(dir_save)
525
847
 
526
- # https://www.jaided.ai/easyocr/documentation/
527
- # extract text from an image with EasyOCR
528
848
  def get_text(
529
- image,
530
- lang=["ch_sim", "en"],
531
- model="paddleocr", # "pytesseract","paddleocr","easyocr"
532
- thr=0.1,
533
- gpu=True,
534
- decoder="wordbeamsearch", #'greedy', 'beamsearch' and 'wordbeamsearch'(hightly accurate)
535
- output="all",
536
- preprocess=None,
537
- postprocess="not ready",
538
- show=True,
539
- ax=None,
540
- cmap=cv2.COLOR_BGR2RGB, # draw_box
541
- font=cv2.FONT_HERSHEY_SIMPLEX,
542
- font_scale=0.8,
543
- thickness_text=2, # Line thickness of 2 px
544
- color_box=(0, 255, 0), # draw_box
545
- color_text=(203, 44, 57), # draw_box
546
- bg_color=(173, 216, 230, 128),
547
- usage=False,
849
+ image: Union[str, np.ndarray],
850
+ dir_save:str=None,
851
+ lang: Union[str, List[str]] = ["ch_sim", "en"],
852
+ model: str = "paddleocr",
853
+ thr: float = 0.1,
854
+ gpu: bool = True,
855
+ decoder: str = "wordbeamsearch",
856
+ output: str = "txt",
857
+ preprocess: Dict = None,
858
+ postprocess: Union[bool, Dict] = False,
859
+ show: bool = True,
860
+ ax = None,
861
+ cmap = cv2.COLOR_BGR2RGB,
862
+ font = cv2.FONT_HERSHEY_SIMPLEX,
863
+ fontsize: int = 8,
864
+ figsize: List[int] = [10, 10],
865
+ box_color: Tuple[int, int, int] = (0, 255, 0),
866
+ fontcolor: Tuple[int, int, int] = (116, 173, 233),
867
+ bg_color: Tuple[int, int, int, int] = (133, 203, 245, 100),
868
+ usage: bool = False,
548
869
  **kwargs,
549
- ):
870
+ ) -> Union[List[OCRResult], np.ndarray, Tuple[np.ndarray, List[OCRResult]]]:
550
871
  """
551
- 功能: 该函数使用 EasyOCR 进行文本识别,并允许自定义图像预处理步骤和结果展示。
552
- 参数:
553
- image: 输入的图像路径或图像数据。
554
- lang: OCR 语言列表。
555
- thr: 置信度阈值,低于此阈值的检测结果将被过滤。
556
- gpu: 是否使用 GPU。
557
- output: 输出类型,可以是 'all'(返回所有检测结果)、'text'(返回文本)、'score'(返回置信度分数)、'box'(返回边界框)。
558
- preprocess: 预处理参数字典,传递给 preprocess_img 函数。
559
- show: 是否显示结果图像。
560
- ax: 用于显示图像的 Matplotlib 子图。
561
- cmap: 用于显示图像的颜色映射。
562
- color_box: 边界框的颜色。
563
- color_text: 文本的颜色。
564
- kwargs: 传递给 EasyOCR readtext 函数的其他参数。
565
-
566
- # Uage
872
+ Extract text from an image using specified OCR engine.
873
+
874
+ This is a convenience wrapper around the OCRProcessor class for backward compatibility.
875
+ For new code, consider using the OCRProcessor class directly.
567
876
  """
877
+ # Backward compatibility wrapper
568
878
  if usage:
569
- print(
570
- """
571
- image_path = 'car_plate.jpg' # 替换为你的图像路径
879
+ print("""
880
+ Example usage:
881
+ image_path = 'car_plate.jpg'
572
882
  results = get_text(
573
883
  image_path,
574
884
  lang=["en"],
@@ -587,260 +897,120 @@ def get_text(
587
897
  "clahe_clip": 2.0,
588
898
  "clahe_grid_size": (8, 8),
589
899
  "edge_detection": False
590
- },
591
- adjust_contrast=0.7
592
- )
900
+ }
901
+ )""")
902
+ return
903
+
904
+ # Create config from parameters
905
+ engine_map = {
906
+ "easyocr": OCREngine.EASYOCR,
907
+ "paddleocr": OCREngine.PADDLEOCR,
908
+ "pytesseract": OCREngine.PYTHON_TESSERACT,
909
+ "ddddocr": OCREngine.DDDDOCR,
910
+ "zerox": OCREngine.ZEROX
911
+ }
912
+
913
+ config = OCRConfig(
914
+ languages=lang if isinstance(lang, list) else [lang],
915
+ engine=engine_map.get(model.lower(), OCREngine.PADDLEOCR),
916
+ threshold=thr,
917
+ decoder=decoder,
918
+ preprocess=preprocess if preprocess else {},
919
+ postprocess=postprocess if isinstance(postprocess, dict) else {"spell_check": postprocess},
920
+ visualization={
921
+ "show": show,
922
+ "box_color": box_color,
923
+ "text_color": fontcolor,
924
+ "font_size": fontsize
925
+ }
926
+ )
927
+
928
+ # Process image
929
+ processor = OCRProcessor(config)
930
+ results = processor.process_image(image)
931
+
932
+ # Format output based on requested type
933
+ if dir_save is None:
934
+ if output == "all":
935
+ return results
936
+ elif "text" in output.lower():
937
+ return [r.text for r in results]
938
+ elif "score" in output.lower() or "prob" in output.lower():
939
+ return [r.confidence for r in results]
940
+ elif "box" in output.lower():
941
+ return [r.bbox for r in results if r.bbox]
942
+ else:
943
+ save_ocr_results(results, dir_save)
944
+ if show:
945
+ print(f"OCR results saved to: {dir_save}")
946
+ return dir_save
947
+
948
+ def get_table(
949
+ image: Union[str, np.ndarray],
950
+ dir_save: str = "table_result.xlsx",
951
+ output: str = None, # 'excel' or 'df'
952
+ layout: bool = True,
953
+ show_log: bool = True,
954
+ use_gpu: bool = False,
955
+ ):
593
956
  """
594
- )
957
+ Recognize and extract tables using PaddleOCR's PPStructure.
958
+
959
+ Parameters:
960
+ image (str | np.ndarray): Path to image or numpy array
961
+ dir_save (str): Path to save Excel output (if output='excel')
962
+ output (str): 'excel' to save as .xlsx, 'df' or 'dataframe' to return pandas DataFrames
963
+ layout (bool): Whether to detect layout blocks
964
+ show_log (bool): Show PaddleOCR logs
965
+ use_gpu (bool): Whether to use GPU for inference
966
+
967
+ Returns:
968
+ List of dictionaries (if output='excel') or List of pandas DataFrames (if output='df')
969
+ """
970
+ from paddleocr import PPStructure, save_structure_res
971
+ import cv2
972
+
595
973
 
596
- models = ["easyocr", "paddleocr", "pytesseract"]
597
- model = strcmp(model, models)[0]
598
- lang = lang_auto_detect(lang, model)
599
974
  if isinstance(image, str):
600
- image = cv2.imread(image)
601
-
602
- # Ensure lang is always a list
603
- if isinstance(lang, str):
604
- lang = [lang]
605
-
606
- # ! preprocessing img
607
- if preprocess is None:
608
- preprocess = {}
609
- image_process = preprocess_img(image, **preprocess)
610
- if "easy" in model.lower():
611
- print(f"detecting language(s):{lang}")
612
- # Perform OCR on the image
613
- reader = easyocr.Reader(lang, gpu=gpu)
614
- detections = reader.readtext(image_process, decoder=decoder, **kwargs)
615
- if postprocess is None:
616
- postprocess = dict(
617
- spell_check=True,
618
- clean=True,
619
- filter=dict(min_length=2),
620
- pattern=None,
621
- merge=True,
622
- )
623
- text_corr = []
624
- [
625
- text_corr.extend(text_postprocess(text, **postprocess))
626
- for _, text, _ in detections
627
- ]
628
- if show:
629
- if ax is None:
630
- ax = plt.gca()
631
- for bbox, text, score in detections:
632
- if score > thr:
633
- top_left = tuple(map(int, bbox[0]))
634
- bottom_right = tuple(map(int, bbox[2]))
635
- image = cv2.rectangle(image, top_left, bottom_right, color_box, 2)
636
- # image = cv2.putText(
637
- # image, text, top_left, font, font_scale, color_text, thickness_text
638
- # )
639
- image = add_text_pil(
640
- image,
641
- text,
642
- top_left,
643
- font_size=font_scale * 32,
644
- color=color_text,
645
- )
646
- img_cmp = cv2.cvtColor(image, cmap)
647
- ax.imshow(img_cmp)
648
- ax.axis("off")
649
- # plt.show()
650
- # 根据输出类型返回相应的结果
651
- if output == "all":
652
- return ax, detections
653
- elif "t" in output.lower() and "x" in output.lower():
654
- # 提取文本,过滤低置信度的结果
655
- text = [text_ for _, text_, score_ in detections if score_ >= thr]
656
- if postprocess:
657
- return ax, text
658
- else:
659
- return text_corr
660
- elif "score" in output.lower() or "prob" in output.lower():
661
- # 提取分数
662
- scores = [score_ for _, _, score_ in detections]
663
- return ax, scores
664
- elif "box" in output.lower():
665
- # 提取边界框,过滤低置信度的结果
666
- bboxes = [bbox_ for bbox_, _, score_ in detections if score_ >= thr]
667
- return ax, bboxes
668
- else:
669
- # 默认返回所有检测信息
670
- return ax, detections
671
- else:
672
- # 根据输出类型返回相应的结果
673
- if output == "all":
674
- return detections
675
- elif "t" in output.lower() and "x" in output.lower():
676
- # 提取文本,过滤低置信度的结果
677
- text = [text_ for _, text_, score_ in detections if score_ >= thr]
678
- return text
679
- elif "score" in output.lower() or "prob" in output.lower():
680
- # 提取分数
681
- scores = [score_ for _, _, score_ in detections]
682
- return scores
683
- elif "box" in output.lower():
684
- # 提取边界框,过滤低置信度的结果
685
- bboxes = [bbox_ for bbox_, _, score_ in detections if score_ >= thr]
686
- return bboxes
687
- else:
688
- # 默认返回所有检测信息
689
- return detections
690
- elif "pad" in model.lower():
691
- ocr = PaddleOCR(
692
- use_angle_cls=True,
693
- cls=True,
694
- ) # PaddleOCR supports only one language at a time
695
- result = ocr.ocr(image_process, **kwargs)
696
- detections = []
697
- for line in result[0]:
698
- bbox, (text, score) = line
699
- detections.append((bbox, text, score))
700
- if postprocess is None:
701
- postprocess = dict(
702
- spell_check=True,
703
- clean=True,
704
- filter=dict(min_length=2),
705
- pattern=None,
706
- merge=True,
707
- )
708
- text_corr = []
709
- [
710
- text_corr.extend(text_postprocess(text, **postprocess))
711
- for _, text, _ in detections
712
- ]
713
- if show:
714
- if ax is None:
715
- ax = plt.gca()
716
- for bbox, text, score in detections:
717
- if score > thr:
718
- top_left = tuple(map(int, bbox[0]))
719
- bottom_left = tuple(
720
- map(int, bbox[1])
721
- ) # Bottom-left for more accurate placement
722
- bottom_right = tuple(map(int, bbox[2]))
723
- image = cv2.rectangle(image, top_left, bottom_right, color_box, 2)
724
- # image = cv2.putText(
725
- # image, text, top_left, font, font_scale, color_text, thickness_text
726
- # )
727
- image = add_text_pil(
728
- image,
729
- text,
730
- top_left,
731
- font_size=font_scale * 32,
732
- color=color_text,
733
- bg_color=bg_color,
734
- )
735
- img_cmp = cv2.cvtColor(image, cmap)
736
- ax.imshow(img_cmp)
737
- ax.axis("off")
738
- # plt.show()
739
- # 根据输出类型返回相应的结果
740
- if output == "all":
741
- return ax, detections
742
- elif "t" in output.lower() and "x" in output.lower():
743
- # 提取文本,过滤低置信度的结果
744
- text = [text_ for _, text_, score_ in detections if score_ >= thr]
745
- if postprocess:
746
- return ax, text
747
- else:
748
- return text_corr
749
- elif "score" in output.lower() or "prob" in output.lower():
750
- # 提取分数
751
- scores = [score_ for _, _, score_ in detections]
752
- return ax, scores
753
- elif "box" in output.lower():
754
- # 提取边界框,过滤低置信度的结果
755
- bboxes = [bbox_ for bbox_, _, score_ in detections if score_ >= thr]
756
- return ax, bboxes
757
- else:
758
- # 默认返回所有检测信息
759
- return ax, detections
760
- else:
761
- # 根据输出类型返回相应的结果
762
- if output == "all":
763
- return detections
764
- elif "t" in output.lower() and "x" in output.lower():
765
- # 提取文本,过滤低置信度的结果
766
- text = [text_ for _, text_, score_ in detections if score_ >= thr]
767
- return text
768
- elif "score" in output.lower() or "prob" in output.lower():
769
- # 提取分数
770
- scores = [score_ for _, _, score_ in detections]
771
- return scores
772
- elif "box" in output.lower():
773
- # 提取边界框,过滤低置信度的结果
774
- bboxes = [bbox_ for bbox_, _, score_ in detections if score_ >= thr]
775
- return bboxes
776
- else:
777
- # 默认返回所有检测信息
778
- return detections
779
-
780
- else: # "pytesseract"
781
- if ax is None:
782
- ax = plt.gca()
783
- text = pytesseract.image_to_string(image_process, lang="+".join(lang), **kwargs)
784
- bboxes = pytesseract.image_to_boxes(image_process, **kwargs)
785
- if show:
786
- # Image dimensions
787
- h, w, _ = image.shape
788
-
789
- for line in bboxes.splitlines():
790
- parts = line.split()
791
- if len(parts) == 6:
792
- char, left, bottom, right, top, _ = parts
793
- left, bottom, right, top = map(int, [left, bottom, right, top])
794
-
795
- # Convert Tesseract coordinates (bottom-left and top-right) to (top-left and bottom-right)
796
- top_left = (left, h - top)
797
- bottom_right = (right, h - bottom)
798
-
799
- # Draw the bounding box
800
- image = cv2.rectangle(image, top_left, bottom_right, color_box, 2)
801
- image = add_text_pil(
802
- image,
803
- char,
804
- left,
805
- font_size=font_scale * 32,
806
- color=color_text,
807
- )
808
- img_cmp = cv2.cvtColor(image, cmap)
809
- ax.imshow(img_cmp)
810
- ax.axis("off")
811
- if output == "all":
812
- # Get verbose data including boxes, confidences, line and page numbers
813
- detections = pytesseract.image_to_data(image_process)
814
- return ax, detections
815
- elif "t" in output.lower() and "x" in output.lower():
816
- return ax, text
817
- elif "box" in output.lower():
818
- return ax, bboxes
819
- else:
820
- # Get information about orientation and script detection
821
- return pytesseract.image_to_osd(image_process, **kwargs)
822
- else:
823
- if output == "all":
824
- # Get verbose data including boxes, confidences, line and page numbers
825
- detections = pytesseract.image_to_data(image_process, **kwargs)
826
- return detections
827
- elif "t" in output.lower() and "x" in output.lower():
828
- return text
829
- elif "box" in output.lower():
830
- return bboxes
831
- else:
832
- # Get information about orientation and script detection
833
- return pytesseract.image_to_osd(image_process, **kwargs)
834
-
975
+ img = cv2.imread(image)
976
+ img_name = os.path.splitext(os.path.basename(image))[0]
977
+ else:
978
+ img = image
979
+ img_name = "table_result"
980
+
981
+ table_engine = PPStructure(layout=layout, show_log=show_log, use_gpu=use_gpu)
982
+ result = table_engine(img)
983
+ if output is None:
984
+ output="excel"
985
+ if output.lower() in ["df", "dataframe"]:
986
+ # Convert all table blocks into pandas DataFrames
987
+ dfs = []
988
+ for block in result:
989
+ if block["type"] == "table" and "res" in block:
990
+ table_data = block["res"]["html"]
991
+ try:
992
+ # Read HTML into DataFrame
993
+ df = pd.read_html(table_data)[0]
994
+ dfs.append(df)
995
+ except Exception as e:
996
+ print(f"[Warning] Could not parse table block: {e}")
997
+ return dfs
835
998
 
999
+ else:
1000
+ # Save to Excel file
1001
+ save_structure_res(result, os.path.dirname(dir_save), img_name)
1002
+ print(
1003
+ f"[Info] Table saved to: {os.path.join(os.path.dirname(dir_save), img_name + '.xlsx')}"
1004
+ )
1005
+ return result
836
1006
  def draw_box(
837
1007
  image,
838
1008
  detections=None,
839
1009
  thr=0.25,
840
1010
  cmap=cv2.COLOR_BGR2RGB,
841
- color_box=(0, 255, 0), # draw_box
842
- color_text=(0, 0, 255), # draw_box
843
- font_scale=0.8,
1011
+ box_color=(0, 255, 0), # draw_box
1012
+ fontcolor=(0, 0, 255), # draw_box
1013
+ fontsize=8,
844
1014
  show=True,
845
1015
  ax=None,
846
1016
  **kwargs,
@@ -857,12 +1027,9 @@ def draw_box(
857
1027
  if score > thr:
858
1028
  top_left = tuple(map(int, bbox[0]))
859
1029
  bottom_right = tuple(map(int, bbox[2]))
860
- image = cv2.rectangle(image, top_left, bottom_right, color_box, 2)
861
- # image = cv2.putText(
862
- # image, text, top_left, font, font_scale, color_text, thickness_text
863
- # )
1030
+ image = cv2.rectangle(image, top_left, bottom_right, box_color, 2)
864
1031
  image = add_text_pil(
865
- image, text, top_left, font_size=font_scale * 32, color=color_text
1032
+ image, text, top_left, cvt_cmp=cvt_cmp,font_size=fontsize *6, color=fontcolor
866
1033
  )
867
1034
 
868
1035
  img_cmp = cv2.cvtColor(image, cmap)
@@ -871,3 +1038,959 @@ def draw_box(
871
1038
  ax.axis("off")
872
1039
  # plt.show()
873
1040
  return img_cmp
1041
+
1042
+
1043
+ #! ===========OCR Backup 250529===========
1044
+
1045
+ # import cv2
1046
+ # import numpy as np
1047
+ # import matplotlib.pyplot as plt
1048
+ # from py2ls.ips import (
1049
+ # strcmp,
1050
+ # detect_angle,
1051
+ # str2words,
1052
+ # isa
1053
+ # )
1054
+ # import logging
1055
+
1056
+ # """
1057
+ # Optical Character Recognition (OCR)
1058
+ # """
1059
+
1060
+ # # Valid language codes
1061
+ # lang_valid = {
1062
+ # "easyocr": {
1063
+ # "english": "en",
1064
+ # "thai": "th",
1065
+ # "chinese_traditional": "ch_tra",
1066
+ # "chinese": "ch_sim",
1067
+ # "japanese": "ja",
1068
+ # "korean": "ko",
1069
+ # "tamil": "ta",
1070
+ # "telugu": "te",
1071
+ # "kannada": "kn",
1072
+ # "german": "de",
1073
+ # },
1074
+ # "paddleocr": {
1075
+ # "chinese": "ch",
1076
+ # "chinese_traditional": "chinese_cht",
1077
+ # "english": "en",
1078
+ # "french": "fr",
1079
+ # "german": "de",
1080
+ # "korean": "korean",
1081
+ # "japanese": "japan",
1082
+ # "russian": "ru",
1083
+ # "italian": "it",
1084
+ # "portuguese": "pt",
1085
+ # "spanish": "es",
1086
+ # "polish": "pl",
1087
+ # "dutch": "nl",
1088
+ # "arabic": "ar",
1089
+ # "vietnamese": "vi",
1090
+ # "tamil": "ta",
1091
+ # "turkish": "tr",
1092
+ # },
1093
+ # "pytesseract": {
1094
+ # "afrikaans": "afr",
1095
+ # "amharic": "amh",
1096
+ # "arabic": "ara",
1097
+ # "assamese": "asm",
1098
+ # "azerbaijani": "aze",
1099
+ # "azerbaijani_cyrillic": "aze_cyrl",
1100
+ # "belarusian": "bel",
1101
+ # "bengali": "ben",
1102
+ # "tibetan": "bod",
1103
+ # "bosnian": "bos",
1104
+ # "breton": "bre",
1105
+ # "bulgarian": "bul",
1106
+ # "catalan": "cat",
1107
+ # "cebuano": "ceb",
1108
+ # "czech": "ces",
1109
+ # "chinese": "chi_sim",
1110
+ # "chinese_vertical": "chi_sim_vert",
1111
+ # "chinese_traditional": "chi_tra",
1112
+ # "chinese_traditional_vertical": "chi_tra_vert",
1113
+ # "cherokee": "chr",
1114
+ # "corsican": "cos",
1115
+ # "welsh": "cym",
1116
+ # "danish": "dan",
1117
+ # "danish_fraktur": "dan_frak",
1118
+ # "german": "deu",
1119
+ # "german_fraktur": "deu_frak",
1120
+ # "german_latf": "deu_latf",
1121
+ # "dhivehi": "div",
1122
+ # "dzongkha": "dzo",
1123
+ # "greek": "ell",
1124
+ # "english": "eng",
1125
+ # "middle_english": "enm",
1126
+ # "esperanto": "epo",
1127
+ # "math_equations": "equ",
1128
+ # "estonian": "est",
1129
+ # "basque": "eus",
1130
+ # "faroese": "fao",
1131
+ # "persian": "fas",
1132
+ # "filipino": "fil",
1133
+ # "finnish": "fin",
1134
+ # "french": "fra",
1135
+ # "middle_french": "frm",
1136
+ # "frisian": "fry",
1137
+ # "scottish_gaelic": "gla",
1138
+ # "irish": "gle",
1139
+ # "galician": "glg",
1140
+ # "ancient_greek": "grc",
1141
+ # "gujarati": "guj",
1142
+ # "haitian_creole": "hat",
1143
+ # "hebrew": "heb",
1144
+ # "hindi": "hin",
1145
+ # "croatian": "hrv",
1146
+ # "hungarian": "hun",
1147
+ # "armenian": "hye",
1148
+ # "inuktitut": "iku",
1149
+ # "indonesian": "ind",
1150
+ # "icelandic": "isl",
1151
+ # "italian": "ita",
1152
+ # "old_italian": "ita_old",
1153
+ # "javanese": "jav",
1154
+ # "japanese": "jpn",
1155
+ # "japanese_vertical": "jpn_vert",
1156
+ # "kannada": "kan",
1157
+ # "georgian": "kat",
1158
+ # "old_georgian": "kat_old",
1159
+ # "kazakh": "kaz",
1160
+ # "khmer": "khm",
1161
+ # "kyrgyz": "kir",
1162
+ # "kurdish_kurmanji": "kmr",
1163
+ # "korean": "kor",
1164
+ # "korean_vertical": "kor_vert",
1165
+ # "lao": "lao",
1166
+ # "latin": "lat",
1167
+ # "latvian": "lav",
1168
+ # "lithuanian": "lit",
1169
+ # "luxembourgish": "ltz",
1170
+ # "malayalam": "mal",
1171
+ # "marathi": "mar",
1172
+ # "macedonian": "mkd",
1173
+ # "maltese": "mlt",
1174
+ # "mongolian": "mon",
1175
+ # "maori": "mri",
1176
+ # "malay": "msa",
1177
+ # "burmese": "mya",
1178
+ # "nepali": "nep",
1179
+ # "dutch": "nld",
1180
+ # "norwegian": "nor",
1181
+ # "occitan": "oci",
1182
+ # "oriya": "ori",
1183
+ # "script_detection": "osd",
1184
+ # "punjabi": "pan",
1185
+ # "polish": "pol",
1186
+ # "portuguese": "por",
1187
+ # },
1188
+ # }
1189
+
1190
+
1191
+ # def lang_auto_detect(
1192
+ # lang,
1193
+ # model="easyocr", # "easyocr" or "pytesseract"
1194
+ # ):
1195
+ # models = ["easyocr", "paddleocr", "pytesseract"]
1196
+ # model = strcmp(model, models)[0]
1197
+ # res_lang = []
1198
+ # if isinstance(lang, str):
1199
+ # lang = [lang]
1200
+ # for i in lang:
1201
+ # res_lang.append(lang_valid[model][strcmp(i, list(lang_valid[model].keys()))[0]])
1202
+ # return res_lang
1203
+
1204
+
1205
+ # def determine_src_points(image):
1206
+ # gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
1207
+ # _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
1208
+ # contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
1209
+
1210
+ # # Sort contours by area and pick the largest one
1211
+ # contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5]
1212
+ # src_points = None
1213
+
1214
+ # for contour in contours:
1215
+ # epsilon = 0.02 * cv2.arcLength(contour, True)
1216
+ # approx = cv2.approxPolyDP(contour, epsilon, True)
1217
+ # if len(approx) == 4: # We need a quadrilateral
1218
+ # src_points = np.array(approx, dtype="float32")
1219
+ # break
1220
+
1221
+ # if src_points is not None:
1222
+ # # Order points in a specific order (top-left, top-right, bottom-right, bottom-left)
1223
+ # src_points = src_points.reshape(4, 2)
1224
+ # rect = np.zeros((4, 2), dtype="float32")
1225
+ # s = src_points.sum(axis=1)
1226
+ # diff = np.diff(src_points, axis=1)
1227
+ # rect[0] = src_points[np.argmin(s)]
1228
+ # rect[2] = src_points[np.argmax(s)]
1229
+ # rect[1] = src_points[np.argmin(diff)]
1230
+ # rect[3] = src_points[np.argmax(diff)]
1231
+ # src_points = rect
1232
+ # else:
1233
+ # # If no rectangle is detected, fallback to a default or user-defined points
1234
+ # height, width = image.shape[:2]
1235
+ # src_points = np.array(
1236
+ # [[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]],
1237
+ # dtype="float32",
1238
+ # )
1239
+ # return src_points
1240
+
1241
+
1242
+ # def get_default_camera_matrix(image_shape):
1243
+ # height, width = image_shape[:2]
1244
+ # focal_length = width
1245
+ # center = (width / 2, height / 2)
1246
+ # camera_matrix = np.array(
1247
+ # [[focal_length, 0, center[0]], [0, focal_length, center[1]], [0, 0, 1]],
1248
+ # dtype="float32",
1249
+ # )
1250
+ # dist_coeffs = np.zeros((4, 1)) # Assuming no distortion
1251
+ # return camera_matrix, dist_coeffs
1252
+
1253
+
1254
+ # def correct_perspective(image, src_points):
1255
+ # # Define the destination points for the perspective transform
1256
+ # width, height = 1000, 1000 # Adjust size as needed
1257
+ # dst_points = np.array(
1258
+ # [[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]],
1259
+ # dtype="float32",
1260
+ # )
1261
+
1262
+ # # Calculate the perspective transform matrix
1263
+ # M = cv2.getPerspectiveTransform(src_points, dst_points)
1264
+ # # Apply the perspective transform
1265
+ # corrected_image = cv2.warpPerspective(image, M, (width, height))
1266
+ # return corrected_image
1267
+
1268
+
1269
+ # def detect_text_orientation(image):
1270
+ # gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
1271
+ # edges = cv2.Canny(gray, 50, 150, apertureSize=3)
1272
+ # lines = cv2.HoughLines(edges, 1, np.pi / 180, 200)
1273
+
1274
+ # if lines is None:
1275
+ # return 0
1276
+
1277
+ # angles = []
1278
+ # for rho, theta in lines[:, 0]:
1279
+ # angle = theta * 180 / np.pi
1280
+ # if angle > 90:
1281
+ # angle -= 180
1282
+ # angles.append(angle)
1283
+
1284
+ # median_angle = np.median(angles)
1285
+ # return median_angle
1286
+
1287
+
1288
+ # def rotate_image(image, angle):
1289
+ # center = (image.shape[1] // 2, image.shape[0] // 2)
1290
+ # rot_mat = cv2.getRotationMatrix2D(center, angle, 1.0)
1291
+ # rotated_image = cv2.warpAffine(
1292
+ # image, rot_mat, (image.shape[1], image.shape[0]), flags=cv2.INTER_LINEAR
1293
+ # )
1294
+ # return rotated_image
1295
+
1296
+
1297
+ # def correct_skew(image):
1298
+ # gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
1299
+ # coords = np.column_stack(np.where(gray > 0))
1300
+ # angle = cv2.minAreaRect(coords)[-1]
1301
+ # if angle < -45:
1302
+ # angle = -(90 + angle)
1303
+ # else:
1304
+ # angle = -angle
1305
+ # (h, w) = image.shape[:2]
1306
+ # center = (w // 2, h // 2)
1307
+ # M = cv2.getRotationMatrix2D(center, angle, 1.0)
1308
+ # rotated = cv2.warpAffine(
1309
+ # image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE
1310
+ # )
1311
+ # return rotated
1312
+
1313
+
1314
+ # def undistort_image(image, camera_matrix, dist_coeffs):
1315
+ # return cv2.undistort(image, camera_matrix, dist_coeffs)
1316
+
1317
+
1318
+ # def add_text_pil(
1319
+ # image,
1320
+ # text,
1321
+ # position,
1322
+ # cvt_cmp=True,
1323
+ # font_size=12,
1324
+ # color=(0, 0, 0),
1325
+ # bg_color=(133, 203, 245, 100),
1326
+ # ):
1327
+ # from PIL import Image, ImageDraw, ImageFont
1328
+ # # Convert the image to PIL format
1329
+ # pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)).convert("RGBA")
1330
+ # # Define the font (make sure to use a font that supports Chinese characters)
1331
+ # overlay = Image.new("RGBA", pil_image.size, (255, 255, 255, 0))
1332
+ # # Create a drawing context
1333
+ # draw = ImageDraw.Draw(overlay)
1334
+
1335
+ # try:
1336
+ # font = ImageFont.truetype(
1337
+ # "/System/Library/Fonts/Supplemental/Songti.ttc", font_size
1338
+ # )
1339
+ # except IOError:
1340
+ # font = ImageFont.load_default()
1341
+
1342
+ # # cal top_left position
1343
+ # # Measure text size using textbbox
1344
+ # text_bbox = draw.textbbox((0, 0), text, font=font)
1345
+ # # # 或者只画 text, # Calculate text size
1346
+ # # text_width, text_height = draw.textsize(text, font=font)
1347
+ # text_width = text_bbox[2] - text_bbox[0]
1348
+ # text_height = text_bbox[3] - text_bbox[1]
1349
+
1350
+ # # Draw background rectangle
1351
+ # x, y = position
1352
+ # # Calculate 5% of the text height for upward adjustment
1353
+ # offset = int(
1354
+ # 0.1 * text_height
1355
+ # ) # 这就不再上移动了; # int(0.5 * text_height) # 上移动 50%
1356
+
1357
+ # # Adjust position to match OpenCV's bottom-left alignment
1358
+ # adjusted_position = (position[0], position[1] - text_height - offset)
1359
+
1360
+ # background_rect = [
1361
+ # adjusted_position[0],
1362
+ # adjusted_position[1],
1363
+ # x + text_width,
1364
+ # y + text_height,
1365
+ # ]
1366
+ # draw.rectangle(background_rect, fill=bg_color)
1367
+ # # Add text to the image
1368
+ # draw.text(adjusted_position, text, font=font, fill=color)
1369
+ # # Ensure both images are in RGBA mode for alpha compositing
1370
+ # if pil_image.mode != "RGBA":
1371
+ # pil_image = pil_image.convert("RGBA")
1372
+ # if overlay.mode != "RGBA":
1373
+ # overlay = overlay.convert("RGBA")
1374
+ # combined = Image.alpha_composite(pil_image, overlay)
1375
+ # # Convert the image back to OpenCV format
1376
+ # image = cv2.cvtColor(np.array(combined), cv2.COLOR_RGBA2BGR) #if cvt_cmp else np.array(combined)
1377
+ # return image
1378
+
1379
+
1380
+ # def preprocess_img(
1381
+ # image,
1382
+ # grayscale=True,
1383
+ # threshold=True,
1384
+ # threshold_method="adaptive",
1385
+ # rotate="auto",
1386
+ # skew=False,
1387
+ # blur=False,#True,
1388
+ # blur_ksize=(5, 5),
1389
+ # morph=True,
1390
+ # morph_op="open",
1391
+ # morph_kernel_size=(3, 3),
1392
+ # enhance_contrast=True,
1393
+ # clahe_clip=2.0,
1394
+ # clahe_grid_size=(8, 8),
1395
+ # edge_detection=False,
1396
+ # ):
1397
+ # """
1398
+ # 预处理步骤:
1399
+
1400
+ # 转换为灰度图像: 如果 grayscale 为 True,将图像转换为灰度图像。
1401
+ # 二值化处理: 根据 threshold 和 threshold_method 参数,对图像进行二值化处理。
1402
+ # 降噪处理: 使用高斯模糊对图像进行降噪。
1403
+ # 形态学处理: 根据 morph_op 参数选择不同的形态学操作(开运算、闭运算、膨胀、腐蚀),用于去除噪声或填补孔洞。
1404
+ # 对比度增强: 使用 CLAHE 技术增强图像对比度。
1405
+ # 边缘检测: 如果 edge_detection 为 True,使用 Canny 边缘检测算法。
1406
+
1407
+ # 预处理图像以提高 OCR 识别准确性。
1408
+ # 参数:
1409
+ # image: 输入的图像路径或图像数据。
1410
+ # grayscale: 是否将图像转换为灰度图像。
1411
+ # threshold: 是否对图像进行二值化处理。
1412
+ # threshold_method: 二值化方法,可以是 'global' 或 'adaptive'。
1413
+ # denoise: 是否对图像进行降噪处理。
1414
+ # blur_ksize: 高斯模糊的核大小。
1415
+ # morph: 是否进行形态学处理。
1416
+ # morph_op: 形态学操作的类型,包括 'open'(开运算)、'close'(闭运算)、'dilate'(膨胀)、'erode'(腐蚀)。
1417
+ # morph_kernel_size: 形态学操作的内核大小。
1418
+ # enhance_contrast: 是否增强图像对比度。
1419
+ # clahe_clip: CLAHE(对比度受限的自适应直方图均衡)的剪裁限制。
1420
+ # clahe_grid_size: CLAHE 的网格大小。
1421
+ # edge_detection: 是否进行边缘检测。
1422
+ # """
1423
+ # import PIL.PngImagePlugin
1424
+ # if isinstance(image, PIL.PngImagePlugin.PngImageFile):
1425
+ # image = np.array(image)
1426
+ # if isinstance(image, str):
1427
+ # image = cv2.imread(image)
1428
+ # if not isinstance(image, np.ndarray):
1429
+ # image = np.array(image)
1430
+
1431
+ # try:
1432
+ # if image.shape[1] == 4: # Check if it has an alpha channel
1433
+ # # Drop the alpha channel (if needed), or handle it as required
1434
+ # image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
1435
+ # else:
1436
+ # # Convert RGB to BGR for OpenCV compatibility
1437
+ # image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
1438
+ # except:
1439
+ # pass
1440
+
1441
+ # # Rotate image
1442
+ # if rotate == "auto":
1443
+ # angle = detect_angle(image, by="fft")
1444
+ # img_preprocessed = rotate_image(image, angle)
1445
+ # else:
1446
+ # img_preprocessed = image
1447
+
1448
+ # # Correct skew
1449
+ # if skew:
1450
+ # img_preprocessed = correct_skew(image)
1451
+
1452
+ # # Convert to grayscale
1453
+ # if grayscale:
1454
+ # img_preprocessed = cv2.cvtColor(img_preprocessed, cv2.COLOR_BGR2GRAY)
1455
+
1456
+ # # Thresholding
1457
+ # if threshold:
1458
+ # if threshold_method == "adaptive":
1459
+ # image = cv2.adaptiveThreshold(
1460
+ # img_preprocessed,
1461
+ # 255,
1462
+ # cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
1463
+ # cv2.THRESH_BINARY,
1464
+ # 11,
1465
+ # 2,
1466
+ # )
1467
+ # elif threshold_method == "global":
1468
+ # _, img_preprocessed = cv2.threshold(
1469
+ # img_preprocessed, 127, 255, cv2.THRESH_BINARY
1470
+ # )
1471
+
1472
+ # # Denoise by Gaussian Blur
1473
+ # if blur:
1474
+ # img_preprocessed = cv2.GaussianBlur(img_preprocessed, blur_ksize, 0)
1475
+
1476
+ # # 形态学处理
1477
+ # if morph:
1478
+ # kernel = cv2.getStructuringElement(cv2.MORPH_RECT, morph_kernel_size)
1479
+ # if morph_op == "close": # 闭运算
1480
+ # # 目的: 闭运算用于填补前景物体中的小孔或间隙,同时保留其形状和大小。
1481
+ # # 工作原理: 闭运算先进行膨胀,然后进行腐蚀。膨胀步骤填补小孔或间隙,腐蚀步骤恢复较大物体的形状。
1482
+ # # 效果:
1483
+ # # 填补前景物体中的小孔和间隙。
1484
+ # # 平滑较大物体的边缘。
1485
+ # # 示例用途: 填补物体中的小孔或间隙。
1486
+ # img_preprocessed = cv2.morphologyEx(
1487
+ # img_preprocessed, cv2.MORPH_CLOSE, kernel
1488
+ # )
1489
+ # elif morph_op == "open": # 开运算
1490
+ # # 目的: 开运算用于去除背景中的小物体或噪声,同时保留较大物体的形状和大小。
1491
+ # # 工作原理: 开运算先进行腐蚀,然后进行膨胀。腐蚀步骤去除小规模的噪声,膨胀步骤恢复剩余物体的大小。
1492
+ # # 效果:
1493
+ # # 去除前景中的小物体。
1494
+ # # 平滑较大物体的轮廓。
1495
+ # # 示例用途: 去除小噪声或伪影,同时保持较大物体完整。
1496
+ # img_preprocessed = cv2.morphologyEx(
1497
+ # img_preprocessed, cv2.MORPH_OPEN, kernel
1498
+ # )
1499
+ # elif morph_op == "dilate": # 膨胀
1500
+ # # 目的: 膨胀操作在物体边界上添加像素。它可以用来填补物体中的小孔或连接相邻的物体。
1501
+ # # 工作原理: 内核在图像上移动,每个位置上的像素值被设置为内核覆盖区域中的最大值。
1502
+ # # 效果:
1503
+ # # 物体变大。
1504
+ # # 填补物体中的小孔或间隙。
1505
+ # # 示例用途: 填补物体中的小孔或连接断裂的物体部分。
1506
+ # img_preprocessed = cv2.dilate(img_preprocessed, kernel)
1507
+ # elif morph_op == "erode": # 腐蚀
1508
+ # # 目的: 腐蚀操作用于去除物体边界上的像素。它可以用来去除小规模的噪声,并将靠近的物体分开。
1509
+ # # 工作原理: 内核(结构元素)在图像上移动,每个位置上的像素值被设置为内核覆盖区域中的最小值。
1510
+ # # 效果:
1511
+ # # 物体变小。
1512
+ # # 去除图像中的小白点(在白色前景/黑色背景的图像中)。
1513
+ # # 示例用途: 去除二值图像中的小噪声或分离相互接触的物体
1514
+ # img_preprocessed = cv2.erode(img_preprocessed, kernel)
1515
+
1516
+ # # 对比度增强
1517
+ # if enhance_contrast:
1518
+ # clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=clahe_grid_size)
1519
+ # img_preprocessed = clahe.apply(img_preprocessed)
1520
+
1521
+ # # 边缘检测
1522
+ # if edge_detection:
1523
+ # img_preprocessed = cv2.Canny(img_preprocessed, 100, 200)
1524
+
1525
+ # return img_preprocessed
1526
+
1527
+ # def convert_image_to_bytes(image):
1528
+ # """
1529
+ # Convert a CV2 or numpy image to bytes for ddddocr.
1530
+ # """
1531
+ # import io
1532
+ # # Convert OpenCV image (numpy array) to PIL image
1533
+ # if isinstance(image, np.ndarray):
1534
+ # image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
1535
+ # # Save PIL image to a byte stream
1536
+ # img_byte_arr = io.BytesIO()
1537
+ # image.save(img_byte_arr, format='PNG')
1538
+ # return img_byte_arr.getvalue()
1539
+
1540
+ # def text_postprocess(
1541
+ # text,
1542
+ # spell_check=True,
1543
+ # clean=True,
1544
+ # filter=dict(min_length=2),
1545
+ # pattern=None,
1546
+ # merge=True,
1547
+ # ):
1548
+ # import re
1549
+ # from spellchecker import SpellChecker
1550
+
1551
+ # def correct_spelling(text_list):
1552
+ # spell = SpellChecker()
1553
+ # corrected_text = [spell.candidates(word) for word in text_list]
1554
+ # return corrected_text
1555
+
1556
+ # def clean_text(text_list):
1557
+ # cleaned_text = [re.sub(r"[^\w\s]", "", text) for text in text_list]
1558
+ # return cleaned_text
1559
+
1560
+ # def filter_text(text_list, min_length=2):
1561
+ # filtered_text = [text for text in text_list if len(text) >= min_length]
1562
+ # return filtered_text
1563
+
1564
+ # def extract_patterns(text_list, pattern):
1565
+ # pattern = re.compile(pattern)
1566
+ # matched_text = [text for text in text_list if pattern.search(text)]
1567
+ # return matched_text
1568
+
1569
+ # def merge_fragments(text_list):
1570
+ # merged_text = " ".join(text_list)
1571
+ # return merged_text
1572
+
1573
+ # results = text
1574
+ # if spell_check:
1575
+ # # results = correct_spelling(results)
1576
+ # results=str2words(results)
1577
+ # if clean:
1578
+ # results = clean_text(results)
1579
+ # if filter:
1580
+ # results = filter_text(
1581
+ # results, min_length=postprocess["filter"].get("min_length", 2)
1582
+ # )
1583
+ # if pattern:
1584
+ # results = extract_patterns(results, postprocess["pattern"])
1585
+ # if merge:
1586
+ # results = merge_fragments(results)
1587
+
1588
+
1589
+ # # https://www.jaided.ai/easyocr/documentation/
1590
+ # # extract text from an image with EasyOCR
1591
+ # def get_text(
1592
+ # image,
1593
+ # lang=["ch_sim", "en"],
1594
+ # model="paddleocr", # "pytesseract","paddleocr","easyocr"
1595
+ # thr=0.1,
1596
+ # gpu=True,
1597
+ # decoder="wordbeamsearch", #'greedy', 'beamsearch' and 'wordbeamsearch'(hightly accurate)
1598
+ # output="txt",
1599
+ # preprocess=None,
1600
+ # postprocess=False,# do not check spell
1601
+ # show=True,
1602
+ # ax=None,
1603
+ # cmap=cv2.COLOR_BGR2RGB, # draw_box
1604
+ # font=cv2.FONT_HERSHEY_SIMPLEX,# draw_box
1605
+ # fontsize=8,# draw_box
1606
+ # figsize=[10,10],
1607
+ # box_color = (0, 255, 0), # draw_box
1608
+ # fontcolor = (116,173,233), # draw_box
1609
+ # bg_color=(133, 203, 245, 100),# draw_box
1610
+ # usage=False,
1611
+ # **kwargs,
1612
+ # ):
1613
+ # """
1614
+ # image: 输入的图像路径或图像数据。
1615
+ # lang: OCR 语言列表。
1616
+ # thr: 置信度阈值,低于此阈值的检测结果将被过滤。
1617
+ # gpu: 是否使用 GPU。
1618
+ # output: 输出类型,可以是 'all'(返回所有检测结果)、'text'(返回文本)、'score'(返回置信度分数)、'box'(返回边界框)。
1619
+ # preprocess: 预处理参数字典,传递给 preprocess_img 函数。
1620
+ # show: 是否显示结果图像。
1621
+ # ax: 用于显示图像的 Matplotlib 子图。
1622
+ # cmap: 用于显示图像的颜色映射。
1623
+ # box_color: 边界框的颜色。
1624
+ # fontcolor: 文本的颜色。
1625
+ # kwargs: 传递给 EasyOCR readtext 函数的其他参数。
1626
+ # """
1627
+ # from PIL import Image
1628
+ # if usage:
1629
+ # print(
1630
+ # """
1631
+ # image_path = 'car_plate.jpg' # 替换为你的图像路径
1632
+ # results = get_text(
1633
+ # image_path,
1634
+ # lang=["en"],
1635
+ # gpu=False,
1636
+ # output="text",
1637
+ # preprocess={
1638
+ # "grayscale": True,
1639
+ # "threshold": True,
1640
+ # "threshold_method": 'adaptive',
1641
+ # "blur": True,
1642
+ # "blur_ksize": (5, 5),
1643
+ # "morph": True,
1644
+ # "morph_op": 'close',
1645
+ # "morph_kernel_size": (3, 3),
1646
+ # "enhance_contrast": True,
1647
+ # "clahe_clip": 2.0,
1648
+ # "clahe_grid_size": (8, 8),
1649
+ # "edge_detection": False
1650
+ # },
1651
+ # adjust_contrast=0.7
1652
+ # )""")
1653
+
1654
+ # models = ["easyocr", "paddleocr", "pytesseract","ddddocr","zerox"]
1655
+ # model = strcmp(model, models)[0]
1656
+ # lang = lang_auto_detect(lang, model)
1657
+ # cvt_cmp=True
1658
+ # if isinstance(image, str) and isa(image,'file'):
1659
+ # image = cv2.imread(image)
1660
+ # elif isa(image,'image'):
1661
+ # cvt_cmp=False
1662
+ # image = np.array(image)
1663
+ # else:
1664
+ # raise ValueError(f"not support image with {type(image)} type")
1665
+
1666
+ # # Ensure lang is always a list
1667
+ # if isinstance(lang, str):
1668
+ # lang = [lang]
1669
+
1670
+ # # ! preprocessing img
1671
+ # if preprocess is None:
1672
+ # preprocess = {}
1673
+ # image_process = preprocess_img(image, **preprocess)
1674
+ # plt.figure(figsize=figsize) if show else None
1675
+ # # plt.subplot(131)
1676
+ # # plt.imshow(cv2.cvtColor(image, cmap)) if cvt_cmp else plt.imshow(image)
1677
+ # # plt.subplot(132)
1678
+ # # plt.imshow(image_process)
1679
+ # # plt.subplot(133)
1680
+ # if "easy" in model.lower():
1681
+ # import easyocr
1682
+ # print(f"detecting language(s):{lang}")
1683
+ # # Perform OCR on the image
1684
+ # reader = easyocr.Reader(lang, gpu=gpu)
1685
+ # detections = reader.readtext(image_process, decoder=decoder, **kwargs)
1686
+
1687
+ # text_corr = []
1688
+ # for _, text, _ in detections:
1689
+ # text_corr.append(text_postprocess(text) if postprocess else text)
1690
+
1691
+ # if show:
1692
+ # if ax is None:
1693
+ # ax = plt.gca()
1694
+ # for i, (bbox, text, score) in enumerate(detections):
1695
+ # if score > thr:
1696
+ # top_left = tuple(map(int, bbox[0]))
1697
+ # bottom_right = tuple(map(int, bbox[2]))
1698
+ # image = cv2.rectangle(image, top_left, bottom_right, box_color, 2)
1699
+ # image = add_text_pil(
1700
+ # image,
1701
+ # text_corr[i],
1702
+ # top_left,
1703
+ # cvt_cmp=cvt_cmp,
1704
+ # font_size=fontsize *6,
1705
+ # color=fontcolor,
1706
+ # )
1707
+ # try:
1708
+ # img_cmp = cv2.cvtColor(image, cmap) if cvt_cmp else image
1709
+ # except:
1710
+ # img_cmp=image
1711
+
1712
+ # ax.imshow(img_cmp) if cvt_cmp else ax.imshow(image)
1713
+ # ax.axis("off")
1714
+
1715
+ # if output == "all":
1716
+ # return ax, detections
1717
+ # elif "t" in output.lower() and "x" in output.lower():
1718
+ # text = [text_ for _, text_, score_ in detections if score_ >= thr]
1719
+ # if postprocess:
1720
+ # return ax, text
1721
+ # else:
1722
+ # return text_corr
1723
+ # elif "score" in output.lower() or "prob" in output.lower():
1724
+ # scores = [score_ for _, _, score_ in detections]
1725
+ # return ax, scores
1726
+ # elif "box" in output.lower():
1727
+ # bboxes = [bbox_ for bbox_, _, score_ in detections if score_ >= thr]
1728
+ # return ax, bboxes
1729
+ # else:
1730
+ # return ax, detections
1731
+ # else:
1732
+ # if output == "all":
1733
+ # return detections
1734
+ # elif "t" in output.lower() and "x" in output.lower():
1735
+ # text = [text_ for _, text_, score_ in detections if score_ >= thr]
1736
+ # return text
1737
+ # elif "score" in output.lower() or "prob" in output.lower():
1738
+ # scores = [score_ for _, _, score_ in detections]
1739
+ # return scores
1740
+ # elif "box" in output.lower():
1741
+ # bboxes = [bbox_ for bbox_, _, score_ in detections if score_ >= thr]
1742
+ # return bboxes
1743
+ # else:
1744
+ # return detections
1745
+ # elif "pad" in model.lower():
1746
+ # from paddleocr import PaddleOCR
1747
+ # logging.getLogger("ppocr").setLevel(logging.ERROR)
1748
+
1749
+ # lang=strcmp(lang, ['ch','en','french','german','korean','japan'])[0]
1750
+ # ocr = PaddleOCR(
1751
+ # use_angle_cls=True,
1752
+ # cls=True,
1753
+ # lang=lang
1754
+ # ) # PaddleOCR supports only one language at a time
1755
+ # cls=kwargs.pop('cls',True)
1756
+ # result = ocr.ocr(image_process,cls=cls, **kwargs)
1757
+ # detections = []
1758
+ # if result[0] is not None:
1759
+ # for line in result[0]:
1760
+ # bbox, (text, score) = line
1761
+ # text = str2words(text) if postprocess else text # check spell
1762
+ # detections.append((bbox, text, score))
1763
+
1764
+ # if show:
1765
+ # if ax is None:
1766
+ # ax = plt.gca()
1767
+ # for bbox, text, score in detections:
1768
+ # if score > thr:
1769
+ # top_left = tuple(map(int, bbox[0]))
1770
+ # bottom_left = tuple(
1771
+ # map(int, bbox[1])
1772
+ # ) # Bottom-left for more accurate placement
1773
+ # bottom_right = tuple(map(int, bbox[2]))
1774
+ # image = cv2.rectangle(image, top_left, bottom_right, box_color, 2)
1775
+ # image = add_text_pil(
1776
+ # image,
1777
+ # text,
1778
+ # top_left,
1779
+ # cvt_cmp=cvt_cmp,
1780
+ # font_size=fontsize *6,
1781
+ # color=fontcolor,
1782
+ # bg_color=bg_color,
1783
+ # )
1784
+ # try:
1785
+ # img_cmp = cv2.cvtColor(image, cmap) if cvt_cmp else image
1786
+ # except:
1787
+ # img_cmp = image
1788
+
1789
+ # ax.imshow(img_cmp)
1790
+ # ax.axis("off")
1791
+ # if output == "all":
1792
+ # return ax, detections
1793
+ # elif "t" in output.lower() and "x" in output.lower():
1794
+ # text = [text_ for _, text_, score_ in detections if score_ >= thr]
1795
+ # return ax, text
1796
+ # elif "score" in output.lower() or "prob" in output.lower():
1797
+ # scores = [score_ for _, _, score_ in detections]
1798
+ # return ax, scores
1799
+ # elif "box" in output.lower():
1800
+ # bboxes = [bbox_ for bbox_, _, score_ in detections if score_ >= thr]
1801
+ # return ax, bboxes
1802
+ # else:
1803
+ # return ax, detections
1804
+ # else:
1805
+ # if output == "all":
1806
+ # return detections
1807
+ # elif "t" in output.lower() and "x" in output.lower():
1808
+ # text = [text_ for _, text_, score_ in detections if score_ >= thr]
1809
+ # return text
1810
+ # elif "score" in output.lower() or "prob" in output.lower():
1811
+ # scores = [score_ for _, _, score_ in detections]
1812
+ # return scores
1813
+ # elif "box" in output.lower():
1814
+ # bboxes = [bbox_ for bbox_, _, score_ in detections if score_ >= thr]
1815
+ # return bboxes
1816
+ # else:
1817
+ # return detections
1818
+ # elif "ddddocr" in model.lower():
1819
+ # import ddddocr
1820
+
1821
+ # ocr = ddddocr.DdddOcr(det=False, ocr=True)
1822
+ # image_bytes = convert_image_to_bytes(image_process)
1823
+
1824
+ # results = ocr.classification(image_bytes) # Text extraction
1825
+
1826
+ # # Optional: Perform detection for bounding boxes
1827
+ # detections = []
1828
+ # if kwargs.get("det", False):
1829
+ # det_ocr = ddddocr.DdddOcr(det=True)
1830
+ # det_results = det_ocr.detect(image_bytes)
1831
+ # for box in det_results:
1832
+ # top_left = (box[0], box[1])
1833
+ # bottom_right = (box[2], box[3])
1834
+ # detections.append((top_left, bottom_right))
1835
+
1836
+ # if postprocess is None:
1837
+ # postprocess = dict(
1838
+ # spell_check=True,
1839
+ # clean=True,
1840
+ # filter=dict(min_length=2),
1841
+ # pattern=None,
1842
+ # merge=True,
1843
+ # )
1844
+ # text_corr = []
1845
+ # [
1846
+ # text_corr.extend(text_postprocess(text, **postprocess))
1847
+ # for _, text, _ in detections
1848
+ # ]
1849
+ # # Visualization
1850
+ # if show:
1851
+ # if ax is None:
1852
+ # ax = plt.gca()
1853
+ # image_vis = image.copy()
1854
+ # if detections:
1855
+ # for top_left, bottom_right in detections:
1856
+ # cv2.rectangle(image_vis, top_left, bottom_right, box_color, 2)
1857
+ # image_vis = cv2.cvtColor(image_vis, cmap)
1858
+ # ax.imshow(image_vis)
1859
+ # ax.axis("off")
1860
+ # return detections
1861
+
1862
+ # elif "zerox" in model.lower():
1863
+ # from pyzerox import zerox
1864
+ # result = zerox(image_process)
1865
+ # detections = [(bbox, text, score) for bbox, text, score in result]
1866
+ # # Postprocess and visualize
1867
+ # if postprocess is None:
1868
+ # postprocess = dict(
1869
+ # spell_check=True,
1870
+ # clean=True,
1871
+ # filter=dict(min_length=2),
1872
+ # pattern=None,
1873
+ # merge=True,
1874
+ # )
1875
+ # text_corr = [text_postprocess(text, **postprocess) for _, text, _ in detections]
1876
+
1877
+ # # Display results if 'show' is True
1878
+ # if show:
1879
+ # if ax is None:
1880
+ # ax = plt.gca()
1881
+ # for bbox, text, score in detections:
1882
+ # if score > thr:
1883
+ # top_left = tuple(map(int, bbox[0]))
1884
+ # bottom_right = tuple(map(int, bbox[2]))
1885
+ # image = cv2.rectangle(image, top_left, bottom_right, box_color, 2)
1886
+ # image = add_text_pil(image, text, top_left, cvt_cmp=cvt_cmp,font_size=fontsize *6, color=fontcolor, bg_color=bg_color)
1887
+ # ax.imshow(image)
1888
+ # ax.axis("off")
1889
+
1890
+ # # Return result based on 'output' type
1891
+ # if output == "all":
1892
+ # return ax, detections
1893
+ # elif "t" in output.lower() and "x" in output.lower():
1894
+ # text = [text_ for _, text_, score_ in detections if score_ >= thr]
1895
+ # return ax, text
1896
+ # elif "score" in output.lower() or "prob" in output.lower():
1897
+ # scores = [score_ for _, _, score_ in detections]
1898
+ # return ax, scores
1899
+ # elif "box" in output.lower():
1900
+ # bboxes = [bbox_ for bbox_, _, score_ in detections if score_ >= thr]
1901
+ # return ax, bboxes
1902
+ # else:
1903
+ # return detections
1904
+ # else: # "pytesseract"
1905
+ # import pytesseract
1906
+ # if ax is None:
1907
+ # ax = plt.gca()
1908
+ # text = pytesseract.image_to_string(image_process, lang="+".join(lang), **kwargs)
1909
+ # bboxes = pytesseract.image_to_boxes(image_process, **kwargs)
1910
+ # if show:
1911
+ # # Image dimensions
1912
+ # h, w, _ = image.shape
1913
+
1914
+ # for line in bboxes.splitlines():
1915
+ # parts = line.split()
1916
+ # if len(parts) == 6:
1917
+ # char, left, bottom, right, top, _ = parts
1918
+ # left, bottom, right, top = map(int, [left, bottom, right, top])
1919
+
1920
+ # # Convert Tesseract coordinates (bottom-left and top-right) to (top-left and bottom-right)
1921
+ # top_left = (left, h - top)
1922
+ # bottom_right = (right, h - bottom)
1923
+
1924
+ # # Draw the bounding box
1925
+ # image = cv2.rectangle(image, top_left, bottom_right, box_color, 2)
1926
+ # image = add_text_pil(
1927
+ # image,
1928
+ # char,
1929
+ # left,
1930
+ # cvt_cmp=cvt_cmp,
1931
+ # font_size=fontsize *6,
1932
+ # color=fontcolor,
1933
+ # )
1934
+ # img_cmp = cv2.cvtColor(image, cmap)
1935
+ # ax.imshow(img_cmp)
1936
+ # ax.axis("off")
1937
+ # if output == "all":
1938
+ # # Get verbose data including boxes, confidences, line and page numbers
1939
+ # detections = pytesseract.image_to_data(image_process)
1940
+ # return ax, detections
1941
+ # elif "t" in output.lower() and "x" in output.lower():
1942
+ # return ax, text
1943
+ # elif "box" in output.lower():
1944
+ # return ax, bboxes
1945
+ # else:
1946
+ # # Get information about orientation and script detection
1947
+ # return pytesseract.image_to_osd(image_process, **kwargs)
1948
+ # else:
1949
+ # if output == "all":
1950
+ # # Get verbose data including boxes, confidences, line and page numbers
1951
+ # detections = pytesseract.image_to_data(image_process, **kwargs)
1952
+ # return detections
1953
+ # elif "t" in output.lower() and "x" in output.lower():
1954
+ # return text
1955
+ # elif "box" in output.lower():
1956
+ # return bboxes
1957
+ # else:
1958
+ # # Get information about orientation and script detection
1959
+ # return pytesseract.image_to_osd(image_process, **kwargs)
1960
+
1961
+
1962
+ # def draw_box(
1963
+ # image,
1964
+ # detections=None,
1965
+ # thr=0.25,
1966
+ # cmap=cv2.COLOR_BGR2RGB,
1967
+ # box_color=(0, 255, 0), # draw_box
1968
+ # fontcolor=(0, 0, 255), # draw_box
1969
+ # fontsize=8,
1970
+ # show=True,
1971
+ # ax=None,
1972
+ # **kwargs,
1973
+ # ):
1974
+
1975
+ # if ax is None:
1976
+ # ax = plt.gca()
1977
+ # if isinstance(image, str):
1978
+ # image = cv2.imread(image)
1979
+ # if detections is None:
1980
+ # detections = get_text(image=image, show=0, output="all", **kwargs)
1981
+
1982
+ # for bbox, text, score in detections:
1983
+ # if score > thr:
1984
+ # top_left = tuple(map(int, bbox[0]))
1985
+ # bottom_right = tuple(map(int, bbox[2]))
1986
+ # image = cv2.rectangle(image, top_left, bottom_right, box_color, 2)
1987
+ # image = add_text_pil(
1988
+ # image, text, top_left, cvt_cmp=cvt_cmp,font_size=fontsize *6, color=fontcolor
1989
+ # )
1990
+
1991
+ # img_cmp = cv2.cvtColor(image, cmap)
1992
+ # if show:
1993
+ # ax.imshow(img_cmp)
1994
+ # ax.axis("off")
1995
+ # # plt.show()
1996
+ # return img_cmp