onnxtr 0.4.1__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
onnxtr/io/elements.py CHANGED
@@ -163,7 +163,7 @@ class Line(Element):
163
163
  if geometry is None:
164
164
  # Check whether this is a rotated or straight box
165
165
  box_resolution_fn = resolve_enclosing_rbbox if len(words[0].geometry) == 4 else resolve_enclosing_bbox
166
- geometry = box_resolution_fn([w.geometry for w in words]) # type: ignore[operator]
166
+ geometry = box_resolution_fn([w.geometry for w in words]) # type: ignore[misc]
167
167
 
168
168
  super().__init__(words=words)
169
169
  self.geometry = geometry
@@ -216,7 +216,7 @@ class Block(Element):
216
216
  box_resolution_fn = (
217
217
  resolve_enclosing_rbbox if isinstance(lines[0].geometry, np.ndarray) else resolve_enclosing_bbox
218
218
  )
219
- geometry = box_resolution_fn(line_boxes + artefact_boxes) # type: ignore[operator]
219
+ geometry = box_resolution_fn(line_boxes + artefact_boxes) # type: ignore
220
220
 
221
221
  super().__init__(lines=lines, artefacts=artefacts)
222
222
  self.geometry = geometry
@@ -294,6 +294,10 @@ class Page(Element):
294
294
  def synthesize(self, **kwargs) -> np.ndarray:
295
295
  """Synthesize the page from the predictions
296
296
 
297
+ Args:
298
+ ----
299
+ **kwargs: keyword arguments passed to the `synthesize_page` method
300
+
297
301
  Returns
298
302
  -------
299
303
  synthesized page
@@ -442,11 +446,15 @@ class Document(Element):
442
446
  def synthesize(self, **kwargs) -> List[np.ndarray]:
443
447
  """Synthesize all pages from their predictions
444
448
 
449
+ Args:
450
+ ----
451
+ **kwargs: keyword arguments passed to the `Page.synthesize` method
452
+
445
453
  Returns
446
454
  -------
447
455
  list of synthesized pages
448
456
  """
449
- return [page.synthesize() for page in self.pages]
457
+ return [page.synthesize(**kwargs) for page in self.pages]
450
458
 
451
459
  def export_as_xml(self, **kwargs) -> List[Tuple[bytes, ET.ElementTree]]:
452
460
  """Export the document as XML (hOCR-format)
onnxtr/models/builder.py CHANGED
@@ -266,7 +266,7 @@ class DocumentBuilder(NestedObject):
266
266
  Line([
267
267
  Word(
268
268
  *word_preds[idx],
269
- tuple([tuple(pt) for pt in boxes[idx].tolist()]), # type: ignore[arg-type]
269
+ tuple(tuple(pt) for pt in boxes[idx].tolist()), # type: ignore[arg-type]
270
270
  float(objectness_scores[idx]),
271
271
  crop_orientations[idx],
272
272
  )
@@ -13,6 +13,7 @@ import numpy as np
13
13
  from ...engine import Engine, EngineConfig
14
14
 
15
15
  __all__ = [
16
+ "MobileNetV3",
16
17
  "mobilenet_v3_small_crop_orientation",
17
18
  "mobilenet_v3_small_page_orientation",
18
19
  ]
@@ -3,7 +3,7 @@
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
 
6
- from typing import Any, List, Union
6
+ from typing import Any, List, Optional, Union
7
7
 
8
8
  import numpy as np
9
9
  from scipy.special import softmax
@@ -29,10 +29,10 @@ class OrientationPredictor(NestedObject):
29
29
 
30
30
  def __init__(
31
31
  self,
32
- pre_processor: PreProcessor,
33
- model: Any,
32
+ pre_processor: Optional[PreProcessor],
33
+ model: Optional[Any],
34
34
  ) -> None:
35
- self.pre_processor = pre_processor
35
+ self.pre_processor = pre_processor if isinstance(pre_processor, PreProcessor) else None
36
36
  self.model = model
37
37
 
38
38
  def __call__(
@@ -43,6 +43,10 @@ class OrientationPredictor(NestedObject):
43
43
  if any(input.ndim != 3 for input in inputs):
44
44
  raise ValueError("incorrect input shape: all inputs are expected to be multi-channel 2D images.")
45
45
 
46
+ if self.model is None or self.pre_processor is None:
47
+ # predictor is disabled
48
+ return [[0] * len(inputs), [0] * len(inputs), [1.0] * len(inputs)]
49
+
46
50
  processed_batches = self.pre_processor(inputs)
47
51
  predicted_batches = [self.model(batch) for batch in processed_batches]
48
52
 
@@ -17,16 +17,30 @@ ORIENTATION_ARCHS: List[str] = ["mobilenet_v3_small_crop_orientation", "mobilene
17
17
 
18
18
 
19
19
  def _orientation_predictor(
20
- arch: str, load_in_8_bit: bool = False, engine_cfg: Optional[EngineConfig] = None, **kwargs: Any
20
+ arch: Any,
21
+ model_type: str,
22
+ load_in_8_bit: bool = False,
23
+ engine_cfg: Optional[EngineConfig] = None,
24
+ disabled: bool = False,
25
+ **kwargs: Any,
21
26
  ) -> OrientationPredictor:
22
- if arch not in ORIENTATION_ARCHS:
23
- raise ValueError(f"unknown architecture '{arch}'")
27
+ if disabled:
28
+ # Case where the orientation predictor is disabled
29
+ return OrientationPredictor(None, None)
30
+
31
+ if isinstance(arch, str):
32
+ if arch not in ORIENTATION_ARCHS:
33
+ raise ValueError(f"unknown architecture '{arch}'")
34
+ # Load directly classifier from backbone
35
+ _model = classification.__dict__[arch](load_in_8_bit=load_in_8_bit, engine_cfg=engine_cfg)
36
+ else:
37
+ if not isinstance(arch, classification.MobileNetV3):
38
+ raise ValueError(f"unknown architecture: {type(arch)}")
39
+ _model = arch
24
40
 
25
- # Load directly classifier from backbone
26
- _model = classification.__dict__[arch](load_in_8_bit=load_in_8_bit, engine_cfg=engine_cfg)
27
41
  kwargs["mean"] = kwargs.get("mean", _model.cfg["mean"])
28
42
  kwargs["std"] = kwargs.get("std", _model.cfg["std"])
29
- kwargs["batch_size"] = kwargs.get("batch_size", 512 if "crop" in arch else 2)
43
+ kwargs["batch_size"] = kwargs.get("batch_size", 512 if model_type == "crop" else 2)
30
44
  input_shape = _model.cfg["input_shape"][1:]
31
45
  predictor = OrientationPredictor(
32
46
  PreProcessor(input_shape, preserve_aspect_ratio=True, symmetric_pad=True, **kwargs),
@@ -60,7 +74,8 @@ def crop_orientation_predictor(
60
74
  -------
61
75
  OrientationPredictor
62
76
  """
63
- return _orientation_predictor(arch, load_in_8_bit, engine_cfg, **kwargs)
77
+ model_type = "crop"
78
+ return _orientation_predictor(arch, model_type, load_in_8_bit, engine_cfg, **kwargs)
64
79
 
65
80
 
66
81
  def page_orientation_predictor(
@@ -88,4 +103,5 @@ def page_orientation_predictor(
88
103
  -------
89
104
  OrientationPredictor
90
105
  """
91
- return _orientation_predictor(arch, load_in_8_bit, engine_cfg, **kwargs)
106
+ model_type = "page"
107
+ return _orientation_predictor(arch, model_type, load_in_8_bit, engine_cfg, **kwargs)
@@ -9,7 +9,7 @@ import numpy as np
9
9
 
10
10
  from onnxtr.models.builder import DocumentBuilder
11
11
  from onnxtr.models.engine import EngineConfig
12
- from onnxtr.utils.geometry import extract_crops, extract_rcrops, rotate_image
12
+ from onnxtr.utils.geometry import extract_crops, extract_rcrops, remove_image_padding, rotate_image
13
13
 
14
14
  from .._utils import estimate_orientation, rectify_crops, rectify_loc_preds
15
15
  from ..classification import crop_orientation_predictor, page_orientation_predictor
@@ -55,13 +55,19 @@ class _OCRPredictor:
55
55
  ) -> None:
56
56
  self.assume_straight_pages = assume_straight_pages
57
57
  self.straighten_pages = straighten_pages
58
+ self._page_orientation_disabled = kwargs.pop("disable_page_orientation", False)
59
+ self._crop_orientation_disabled = kwargs.pop("disable_crop_orientation", False)
58
60
  self.crop_orientation_predictor = (
59
61
  None
60
62
  if assume_straight_pages
61
- else crop_orientation_predictor(load_in_8_bit=load_in_8_bit, engine_cfg=clf_engine_cfg)
63
+ else crop_orientation_predictor(
64
+ load_in_8_bit=load_in_8_bit, engine_cfg=clf_engine_cfg, disabled=self._crop_orientation_disabled
65
+ )
62
66
  )
63
67
  self.page_orientation_predictor = (
64
- page_orientation_predictor(load_in_8_bit=load_in_8_bit, engine_cfg=clf_engine_cfg)
68
+ page_orientation_predictor(
69
+ load_in_8_bit=load_in_8_bit, engine_cfg=clf_engine_cfg, disabled=self._crop_orientation_disabled
70
+ )
65
71
  if detect_orientation or straighten_pages or not assume_straight_pages
66
72
  else None
67
73
  )
@@ -112,8 +118,8 @@ class _OCRPredictor:
112
118
  ]
113
119
  )
114
120
  return [
115
- # expand if height and width are not equal
116
- rotate_image(page, angle, expand=page.shape[0] != page.shape[1])
121
+ # expand if height and width are not equal, afterwards remove padding
122
+ remove_image_padding(rotate_image(page, angle, expand=page.shape[0] != page.shape[1]))
117
123
  for page, angle in zip(pages, origin_pages_orientations)
118
124
  ]
119
125
 
@@ -123,13 +129,18 @@ class _OCRPredictor:
123
129
  loc_preds: List[np.ndarray],
124
130
  channels_last: bool,
125
131
  assume_straight_pages: bool = False,
132
+ assume_horizontal: bool = False,
126
133
  ) -> List[List[np.ndarray]]:
127
- extraction_fn = extract_crops if assume_straight_pages else extract_rcrops
128
-
129
- crops = [
130
- extraction_fn(page, _boxes[:, :4], channels_last=channels_last) # type: ignore[operator]
131
- for page, _boxes in zip(pages, loc_preds)
132
- ]
134
+ if assume_straight_pages:
135
+ crops = [
136
+ extract_crops(page, _boxes[:, :4], channels_last=channels_last)
137
+ for page, _boxes in zip(pages, loc_preds)
138
+ ]
139
+ else:
140
+ crops = [
141
+ extract_rcrops(page, _boxes[:, :4], channels_last=channels_last, assume_horizontal=assume_horizontal)
142
+ for page, _boxes in zip(pages, loc_preds)
143
+ ]
133
144
  return crops
134
145
 
135
146
  @staticmethod
@@ -138,8 +149,9 @@ class _OCRPredictor:
138
149
  loc_preds: List[np.ndarray],
139
150
  channels_last: bool,
140
151
  assume_straight_pages: bool = False,
152
+ assume_horizontal: bool = False,
141
153
  ) -> Tuple[List[List[np.ndarray]], List[np.ndarray]]:
142
- crops = _OCRPredictor._generate_crops(pages, loc_preds, channels_last, assume_straight_pages)
154
+ crops = _OCRPredictor._generate_crops(pages, loc_preds, channels_last, assume_straight_pages, assume_horizontal)
143
155
 
144
156
  # Avoid sending zero-sized crops
145
157
  is_kept = [[all(s > 0 for s in crop.shape) for crop in page_crops] for page_crops in crops]
@@ -119,6 +119,7 @@ class OCRPredictor(NestedObject, _OCRPredictor):
119
119
  loc_preds, # type: ignore[arg-type]
120
120
  channels_last=True,
121
121
  assume_straight_pages=self.assume_straight_pages,
122
+ assume_horizontal=self._page_orientation_disabled,
122
123
  )
123
124
  # Rectify crop orientation and get crop orientation predictions
124
125
  crop_orientations: Any = []
onnxtr/utils/geometry.py CHANGED
@@ -391,6 +391,26 @@ def rotate_image(
391
391
  return rot_img
392
392
 
393
393
 
394
+ def remove_image_padding(image: np.ndarray) -> np.ndarray:
395
+ """Remove black border padding from an image
396
+
397
+ Args:
398
+ ----
399
+ image: numpy tensor to remove padding from
400
+
401
+ Returns:
402
+ -------
403
+ Image with padding removed
404
+ """
405
+ # Find the bounding box of the non-black region
406
+ rows = np.any(image, axis=1)
407
+ cols = np.any(image, axis=0)
408
+ rmin, rmax = np.where(rows)[0][[0, -1]]
409
+ cmin, cmax = np.where(cols)[0][[0, -1]]
410
+
411
+ return image[rmin : rmax + 1, cmin : cmax + 1]
412
+
413
+
394
414
  def estimate_page_angle(polys: np.ndarray) -> float:
395
415
  """Takes a batch of rotated previously ORIENTED polys (N, 4, 2) (rectified by the classifier) and return the
396
416
  estimated angle ccw in degrees
@@ -471,7 +491,7 @@ def extract_crops(img: np.ndarray, boxes: np.ndarray, channels_last: bool = True
471
491
 
472
492
 
473
493
  def extract_rcrops(
474
- img: np.ndarray, polys: np.ndarray, dtype=np.float32, channels_last: bool = True
494
+ img: np.ndarray, polys: np.ndarray, dtype=np.float32, channels_last: bool = True, assume_horizontal: bool = False
475
495
  ) -> List[np.ndarray]:
476
496
  """Created cropped images from list of rotated bounding boxes
477
497
 
@@ -481,6 +501,7 @@ def extract_rcrops(
481
501
  polys: bounding boxes of shape (N, 4, 2)
482
502
  dtype: target data type of bounding boxes
483
503
  channels_last: whether the channel dimensions is the last one instead of the last one
504
+ assume_horizontal: whether the boxes are assumed to be only horizontally oriented
484
505
 
485
506
  Returns:
486
507
  -------
@@ -498,22 +519,88 @@ def extract_rcrops(
498
519
  _boxes[:, :, 0] *= width
499
520
  _boxes[:, :, 1] *= height
500
521
 
501
- src_pts = _boxes[:, :3].astype(np.float32)
502
- # Preserve size
503
- d1 = np.linalg.norm(src_pts[:, 0] - src_pts[:, 1], axis=-1)
504
- d2 = np.linalg.norm(src_pts[:, 1] - src_pts[:, 2], axis=-1)
505
- # (N, 3, 2)
506
- dst_pts = np.zeros((_boxes.shape[0], 3, 2), dtype=dtype)
507
- dst_pts[:, 1, 0] = dst_pts[:, 2, 0] = d1 - 1
508
- dst_pts[:, 2, 1] = d2 - 1
509
- # Use a warp transformation to extract the crop
510
- crops = [
511
- cv2.warpAffine(
512
- img if channels_last else img.transpose(1, 2, 0),
513
- # Transformation matrix
514
- cv2.getAffineTransform(src_pts[idx], dst_pts[idx]),
515
- (int(d1[idx]), int(d2[idx])),
516
- )
517
- for idx in range(_boxes.shape[0])
518
- ]
522
+ src_img = img if channels_last else img.transpose(1, 2, 0)
523
+
524
+ # Handle only horizontal oriented boxes
525
+ if assume_horizontal:
526
+ crops = []
527
+
528
+ for box in _boxes:
529
+ # Calculate the centroid of the quadrilateral
530
+ centroid = np.mean(box, axis=0)
531
+
532
+ # Divide the points into left and right
533
+ left_points = box[box[:, 0] < centroid[0]]
534
+ right_points = box[box[:, 0] >= centroid[0]]
535
+
536
+ # Sort the left points according to the y-axis
537
+ left_points = left_points[np.argsort(left_points[:, 1])]
538
+ top_left_pt = left_points[0]
539
+ bottom_left_pt = left_points[-1]
540
+ # Sort the right points according to the y-axis
541
+ right_points = right_points[np.argsort(right_points[:, 1])]
542
+ top_right_pt = right_points[0]
543
+ bottom_right_pt = right_points[-1]
544
+ box_points = np.array(
545
+ [top_left_pt, bottom_left_pt, top_right_pt, bottom_right_pt],
546
+ dtype=dtype,
547
+ )
548
+
549
+ # Get the width and height of the rectangle that will contain the warped quadrilateral
550
+ width_upper = np.linalg.norm(top_right_pt - top_left_pt)
551
+ width_lower = np.linalg.norm(bottom_right_pt - bottom_left_pt)
552
+ height_left = np.linalg.norm(bottom_left_pt - top_left_pt)
553
+ height_right = np.linalg.norm(bottom_right_pt - top_right_pt)
554
+
555
+ # Get the maximum width and height
556
+ rect_width = max(int(width_upper), int(width_lower))
557
+ rect_height = max(int(height_left), int(height_right))
558
+
559
+ dst_pts = np.array(
560
+ [
561
+ [0, 0], # top-left
562
+ # bottom-left
563
+ [0, rect_height - 1],
564
+ # top-right
565
+ [rect_width - 1, 0],
566
+ # bottom-right
567
+ [rect_width - 1, rect_height - 1],
568
+ ],
569
+ dtype=dtype,
570
+ )
571
+
572
+ # Get the perspective transform matrix using the box points
573
+ affine_mat = cv2.getPerspectiveTransform(box_points, dst_pts)
574
+
575
+ # Perform the perspective warp to get the rectified crop
576
+ crop = cv2.warpPerspective(
577
+ src_img,
578
+ affine_mat,
579
+ (rect_width, rect_height),
580
+ )
581
+
582
+ # Add the crop to the list of crops
583
+ crops.append(crop)
584
+
585
+ # Handle any oriented boxes
586
+ else:
587
+ src_pts = _boxes[:, :3].astype(np.float32)
588
+ # Preserve size
589
+ d1 = np.linalg.norm(src_pts[:, 0] - src_pts[:, 1], axis=-1)
590
+ d2 = np.linalg.norm(src_pts[:, 1] - src_pts[:, 2], axis=-1)
591
+ # (N, 3, 2)
592
+ dst_pts = np.zeros((_boxes.shape[0], 3, 2), dtype=dtype)
593
+ dst_pts[:, 1, 0] = dst_pts[:, 2, 0] = d1 - 1
594
+ dst_pts[:, 2, 1] = d2 - 1
595
+ # Use a warp transformation to extract the crop
596
+ crops = [
597
+ cv2.warpAffine(
598
+ src_img,
599
+ # Transformation matrix
600
+ cv2.getAffineTransform(src_pts[idx], dst_pts[idx]),
601
+ (int(d1[idx]), int(d2[idx])),
602
+ )
603
+ for idx in range(_boxes.shape[0])
604
+ ]
605
+
519
606
  return crops # type: ignore[return-value]
@@ -2,6 +2,7 @@
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
+ import logging
5
6
  from typing import Any, Dict, Optional
6
7
 
7
8
  import numpy as np
@@ -13,10 +14,109 @@ from .fonts import get_font
13
14
  __all__ = ["synthesize_page"]
14
15
 
15
16
 
17
+ # Global variable to avoid multiple warnings
18
+ ROTATION_WARNING = False
19
+
20
+
21
+ def _warn_rotation(entry: Dict[str, Any]) -> None: # pragma: no cover
22
+ global ROTATION_WARNING
23
+ if not ROTATION_WARNING and len(entry["geometry"]) == 4:
24
+ logging.warning("Polygons with larger rotations will lead to inaccurate rendering")
25
+ ROTATION_WARNING = True
26
+
27
+
28
+ def _synthesize(
29
+ response: Image.Image,
30
+ entry: Dict[str, Any],
31
+ w: int,
32
+ h: int,
33
+ draw_proba: bool = False,
34
+ font_family: Optional[str] = None,
35
+ smoothing_factor: float = 0.75,
36
+ min_font_size: int = 6,
37
+ max_font_size: int = 50,
38
+ ) -> Image.Image:
39
+ if len(entry["geometry"]) == 2:
40
+ (xmin, ymin), (xmax, ymax) = entry["geometry"]
41
+ polygon = [(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)]
42
+ else:
43
+ polygon = entry["geometry"]
44
+
45
+ # Calculate the bounding box of the word
46
+ x_coords, y_coords = zip(*polygon)
47
+ xmin, ymin, xmax, ymax = (
48
+ int(round(w * min(x_coords))),
49
+ int(round(h * min(y_coords))),
50
+ int(round(w * max(x_coords))),
51
+ int(round(h * max(y_coords))),
52
+ )
53
+ word_width = xmax - xmin
54
+ word_height = ymax - ymin
55
+
56
+ # If lines are provided instead of words, concatenate the word entries
57
+ if "words" in entry:
58
+ word_text = " ".join(word["value"] for word in entry["words"])
59
+ else:
60
+ word_text = entry["value"]
61
+ # Find the optimal font size
62
+ try:
63
+ font_size = min(word_height, max_font_size)
64
+ font = get_font(font_family, font_size)
65
+ text_width, text_height = font.getbbox(word_text)[2:4]
66
+
67
+ while (text_width > word_width or text_height > word_height) and font_size > min_font_size:
68
+ font_size = max(int(font_size * smoothing_factor), min_font_size)
69
+ font = get_font(font_family, font_size)
70
+ text_width, text_height = font.getbbox(word_text)[2:4]
71
+ except ValueError: # pragma: no cover
72
+ font = get_font(font_family, min_font_size)
73
+
74
+ # Create a mask for the word
75
+ mask = Image.new("L", (w, h), 0)
76
+ ImageDraw.Draw(mask).polygon([(int(round(w * x)), int(round(h * y))) for x, y in polygon], fill=255)
77
+
78
+ # Draw the word text
79
+ d = ImageDraw.Draw(response)
80
+ try:
81
+ try:
82
+ d.text((xmin, ymin), word_text, font=font, fill=(0, 0, 0), anchor="lt")
83
+ except UnicodeEncodeError: # pragma: no cover
84
+ d.text((xmin, ymin), anyascii(word_text), font=font, fill=(0, 0, 0), anchor="lt")
85
+ # Catch generic exceptions to avoid crashing the whole rendering
86
+ except Exception: # pragma: no cover
87
+ logging.warning(f"Could not render word: {word_text}")
88
+
89
+ if draw_proba:
90
+ confidence = (
91
+ entry["confidence"]
92
+ if "confidence" in entry
93
+ else sum(w["confidence"] for w in entry["words"]) / len(entry["words"])
94
+ )
95
+ p = int(255 * confidence)
96
+ color = (255 - p, 0, p) # Red to blue gradient based on probability
97
+ d.rectangle([(xmin, ymin), (xmax, ymax)], outline=color, width=2)
98
+
99
+ prob_font = get_font(font_family, 20)
100
+ prob_text = f"{confidence:.2f}"
101
+ prob_text_width, prob_text_height = prob_font.getbbox(prob_text)[2:4]
102
+
103
+ # Position the probability slightly above the bounding box
104
+ prob_x_offset = (word_width - prob_text_width) // 2
105
+ prob_y_offset = ymin - prob_text_height - 2
106
+ prob_y_offset = max(0, prob_y_offset)
107
+
108
+ d.text((xmin + prob_x_offset, prob_y_offset), prob_text, font=prob_font, fill=color, anchor="lt")
109
+
110
+ return response
111
+
112
+
16
113
  def synthesize_page(
17
114
  page: Dict[str, Any],
18
115
  draw_proba: bool = False,
19
116
  font_family: Optional[str] = None,
117
+ smoothing_factor: float = 0.95,
118
+ min_font_size: int = 8,
119
+ max_font_size: int = 50,
20
120
  ) -> np.ndarray:
21
121
  """Draw a the content of the element page (OCR response) on a blank page.
22
122
 
@@ -24,8 +124,10 @@ def synthesize_page(
24
124
  ----
25
125
  page: exported Page object to represent
26
126
  draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
27
- font_size: size of the font, default font = 13
28
127
  font_family: family of the font
128
+ smoothing_factor: factor to smooth the font size
129
+ min_font_size: minimum font size
130
+ max_font_size: maximum font size
29
131
 
30
132
  Returns:
31
133
  -------
@@ -33,38 +135,39 @@ def synthesize_page(
33
135
  """
34
136
  # Draw template
35
137
  h, w = page["dimensions"]
36
- response = 255 * np.ones((h, w, 3), dtype=np.int32)
138
+ response = Image.new("RGB", (w, h), color=(255, 255, 255))
37
139
 
38
- # Draw each word
39
140
  for block in page["blocks"]:
40
- for line in block["lines"]:
41
- for word in line["words"]:
42
- # Get absolute word geometry
43
- (xmin, ymin), (xmax, ymax) = word["geometry"]
44
- xmin, xmax = int(round(w * xmin)), int(round(w * xmax))
45
- ymin, ymax = int(round(h * ymin)), int(round(h * ymax))
46
-
47
- # White drawing context adapted to font size, 0.75 factor to convert pts --> pix
48
- font = get_font(font_family, int(0.75 * (ymax - ymin)))
49
- img = Image.new("RGB", (xmax - xmin, ymax - ymin), color=(255, 255, 255))
50
- d = ImageDraw.Draw(img)
51
- # Draw in black the value of the word
52
- try:
53
- d.text((0, 0), word["value"], font=font, fill=(0, 0, 0))
54
- except UnicodeEncodeError:
55
- # When character cannot be encoded, use its anyascii version
56
- d.text((0, 0), anyascii(word["value"]), font=font, fill=(0, 0, 0))
57
-
58
- # Colorize if draw_proba
59
- if draw_proba:
60
- p = int(255 * word["confidence"])
61
- mask = np.where(np.array(img) == 0, 1, 0)
62
- proba: np.ndarray = np.array([255 - p, 0, p])
63
- color = mask * proba[np.newaxis, np.newaxis, :]
64
- white_mask = 255 * (1 - mask)
65
- img = color + white_mask
66
-
67
- # Write to response page
68
- response[ymin:ymax, xmin:xmax, :] = np.array(img)
141
+ # If lines are provided use these to get better rendering results
142
+ if len(block["lines"]) > 1:
143
+ for line in block["lines"]:
144
+ _warn_rotation(block) # pragma: no cover
145
+ response = _synthesize(
146
+ response=response,
147
+ entry=line,
148
+ w=w,
149
+ h=h,
150
+ draw_proba=draw_proba,
151
+ font_family=font_family,
152
+ smoothing_factor=smoothing_factor,
153
+ min_font_size=min_font_size,
154
+ max_font_size=max_font_size,
155
+ )
156
+ # Otherwise, draw each word
157
+ else:
158
+ for line in block["lines"]:
159
+ _warn_rotation(block) # pragma: no cover
160
+ for word in line["words"]:
161
+ response = _synthesize(
162
+ response=response,
163
+ entry=word,
164
+ w=w,
165
+ h=h,
166
+ draw_proba=draw_proba,
167
+ font_family=font_family,
168
+ smoothing_factor=smoothing_factor,
169
+ min_font_size=min_font_size,
170
+ max_font_size=max_font_size,
171
+ )
69
172
 
70
- return response
173
+ return np.array(response, dtype=np.uint8)
onnxtr/utils/vocabs.py CHANGED
@@ -25,6 +25,7 @@ VOCABS: Dict[str, str] = {
25
25
  "hindi_punctuation": "।,?!:्ॐ॰॥॰",
26
26
  "bangla_letters": "অআইঈউঊঋএঐওঔকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহ়ঽািীুূৃেৈোৌ্ৎংঃঁ",
27
27
  "bangla_digits": "০১২৩৪৫৬৭৮৯",
28
+ "generic_cyrillic_letters": "абвгдежзийклмнопрстуфхцчшщьюяАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЬЮЯ",
28
29
  }
29
30
 
30
31
  VOCABS["latin"] = VOCABS["digits"] + VOCABS["ascii_letters"] + VOCABS["punctuation"]
@@ -59,6 +60,9 @@ VOCABS["vietnamese"] = (
59
60
  VOCABS["hebrew"] = VOCABS["english"] + "אבגדהוזחטיכלמנסעפצקרשת" + "₪"
60
61
  VOCABS["hindi"] = VOCABS["hindi_letters"] + VOCABS["hindi_digits"] + VOCABS["hindi_punctuation"]
61
62
  VOCABS["bangla"] = VOCABS["bangla_letters"] + VOCABS["bangla_digits"]
63
+ VOCABS["ukrainian"] = (
64
+ VOCABS["generic_cyrillic_letters"] + VOCABS["digits"] + VOCABS["punctuation"] + VOCABS["currency"] + "ґіїєҐІЇЄ₴"
65
+ )
62
66
  VOCABS["multilingual"] = "".join(
63
67
  dict.fromkeys(
64
68
  VOCABS["french"]
onnxtr/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = 'v0.4.1'
1
+ __version__ = 'v0.5.1'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: onnxtr
3
- Version: 0.4.1
3
+ Version: 0.5.1
4
4
  Summary: Onnx Text Recognition (OnnxTR): docTR Onnx-Wrapper for high-performance OCR on documents.
5
5
  Author-email: Felix Dittrich <felixdittrich92@gmail.com>
6
6
  Maintainer: Felix Dittrich
@@ -227,7 +227,6 @@ Description-Content-Type: text/markdown
227
227
  License-File: LICENSE
228
228
  Requires-Dist: numpy<3.0.0,>=1.16.0
229
229
  Requires-Dist: scipy<2.0.0,>=1.4.0
230
- Requires-Dist: opencv-python<5.0.0,>=4.5.0
231
230
  Requires-Dist: pypdfium2<5.0.0,>=4.11.0
232
231
  Requires-Dist: pyclipper<2.0.0,>=1.2.0
233
232
  Requires-Dist: shapely<3.0.0,>=1.6.0
@@ -240,8 +239,13 @@ Requires-Dist: anyascii>=0.3.2
240
239
  Requires-Dist: tqdm>=4.30.0
241
240
  Provides-Extra: cpu
242
241
  Requires-Dist: onnxruntime>=1.11.0; extra == "cpu"
242
+ Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "cpu"
243
+ Provides-Extra: cpu-headless
244
+ Requires-Dist: onnxruntime>=1.11.0; extra == "cpu-headless"
245
+ Requires-Dist: opencv-python-headless<5.0.0,>=4.5.0; extra == "cpu-headless"
243
246
  Provides-Extra: dev
244
247
  Requires-Dist: onnxruntime>=1.11.0; extra == "dev"
248
+ Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "dev"
245
249
  Requires-Dist: weasyprint>=55.0; extra == "dev"
246
250
  Requires-Dist: matplotlib>=3.1.0; extra == "dev"
247
251
  Requires-Dist: mplcursors>=0.3; extra == "dev"
@@ -253,6 +257,10 @@ Requires-Dist: mypy>=0.812; extra == "dev"
253
257
  Requires-Dist: pre-commit>=2.17.0; extra == "dev"
254
258
  Provides-Extra: gpu
255
259
  Requires-Dist: onnxruntime-gpu>=1.11.0; extra == "gpu"
260
+ Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "gpu"
261
+ Provides-Extra: gpu-headless
262
+ Requires-Dist: onnxruntime-gpu>=1.11.0; extra == "gpu-headless"
263
+ Requires-Dist: opencv-python-headless<5.0.0,>=4.5.0; extra == "gpu-headless"
256
264
  Provides-Extra: html
257
265
  Requires-Dist: weasyprint>=55.0; extra == "html"
258
266
  Provides-Extra: quality
@@ -276,7 +284,8 @@ Requires-Dist: mplcursors>=0.3; extra == "viz"
276
284
  [![codecov](https://codecov.io/gh/felixdittrich92/OnnxTR/graph/badge.svg?token=WVFRCQBOLI)](https://codecov.io/gh/felixdittrich92/OnnxTR)
277
285
  [![Codacy Badge](https://app.codacy.com/project/badge/Grade/4fff4d764bb14fb8b4f4afeb9587231b)](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
278
286
  [![CodeFactor](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr/badge)](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
279
- [![Pypi](https://img.shields.io/badge/pypi-v0.4.1-blue.svg)](https://pypi.org/project/OnnxTR/)
287
+ [![Pypi](https://img.shields.io/badge/pypi-v0.5.0-blue.svg)](https://pypi.org/project/OnnxTR/)
288
+ [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/Felix92/OnnxTR-OCR)
280
289
 
281
290
  > :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
282
291
 
@@ -309,8 +318,10 @@ For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/get
309
318
 
310
319
  ```shell
311
320
  pip install "onnxtr[cpu]"
321
+ pip install "onnxtr[cpu-headless]" # same as cpu but with opencv-headless
312
322
  # with gpu support
313
323
  pip install "onnxtr[gpu]"
324
+ pip install "onnxtr[gpu-headless]" # same as gpu but with opencv-headless
314
325
  # with HTML support
315
326
  pip install "onnxtr[html]"
316
327
  # with support for visualization
@@ -356,6 +367,9 @@ model = ocr_predictor(
356
367
  # Additional parameters - meta information
357
368
  detect_orientation=False, # set to `True` if the orientation of the pages should be detected (default: False)
358
369
  detect_language=False, # set to `True` if the language of the pages should be detected (default: False)
370
+ # Orientation specific parameters in combination with `assume_straight_pages=False` and/or `straighten_pages=True`
371
+ disable_crop_orientation=False, # set to `True` if the crop orientation classification should be disabled (default: False)
372
+ disable_page_orientation=False, # set to `True` if the general page orientation classification should be disabled (default: False)
359
373
  # DocumentBuilder specific parameters
360
374
  resolve_lines=True, # whether words should be automatically grouped into lines (default: True)
361
375
  resolve_blocks=False, # whether lines should be automatically grouped into blocks (default: False)
@@ -1,27 +1,27 @@
1
1
  onnxtr/__init__.py,sha256=h7Wc2tuHLsaoCk5xNpEFEK-g11A6SJA7nAasA76TQ_Y,100
2
2
  onnxtr/file_utils.py,sha256=WjUKalEdR53aoeIY4e-ihy3r7J_C9qFxL40JHGPfutc,1107
3
3
  onnxtr/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- onnxtr/version.py,sha256=FMLDI1-41V4TL8AtTJYDQPs1_pJ9uac4WXnI9j16CjY,23
4
+ onnxtr/version.py,sha256=6swtMqpBHD3aLY2AA1x8huT5k183agnIJ6bjR3m048c,23
5
5
  onnxtr/contrib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  onnxtr/contrib/artefacts.py,sha256=tdmfhvfXVRYEH7uj4_hqf2cuUGoTieyNK8bXsD3zHwo,5383
7
7
  onnxtr/contrib/base.py,sha256=KyJ8_zDSKEWSFBszgCbLjEeI7SKg4N_iH_ZQNf90SWQ,3288
8
8
  onnxtr/io/__init__.py,sha256=kS7tKGFvzxOCWBOun-Y8n9CsziwRKNynjwpZEUUI03M,106
9
- onnxtr/io/elements.py,sha256=h-IxpFqXrvg-fOhpnOqpGFLdG-lR-xYYIxk3chy_MN8,17769
9
+ onnxtr/io/elements.py,sha256=GX6rhwg_ByAlL8rAsuLgPAeJ7JsN3_V2o_ETkhh_U68,17977
10
10
  onnxtr/io/html.py,sha256=Em_7PjZ56SugJ9bjjcWLCMVe5ee6uUMKeZovNxJFAXw,737
11
11
  onnxtr/io/image.py,sha256=4tLTh2bGdA0ohh3a6mV6xD0KqNOtIVi5lJ06XSmeyMI,1759
12
12
  onnxtr/io/pdf.py,sha256=tD0klmxI-gkMXp56f_ZXWyPHLsUBKa_xlhNTtGV6tpU,1367
13
13
  onnxtr/io/reader.py,sha256=BA7DPhW-Gkmce_ZfzrOl4H3pSXVy2JBeQEuY3pWrBFg,2852
14
14
  onnxtr/models/__init__.py,sha256=QTfZlqUyv1d7NUCbGIUFM1DLOOXe-cqHZ7uaKkGdXvk,157
15
15
  onnxtr/models/_utils.py,sha256=KncsNcoWqbsxFwduce2STuGHLhv63nXEHv7CMuh6wYA,6606
16
- onnxtr/models/builder.py,sha256=Bzg-XHZc5k16Ti2XeV9hm4POTHofe581Azq1a3d1O6E,14296
16
+ onnxtr/models/builder.py,sha256=dEZPHkyq-qXo4ZPs9CrufkkWYRrbvxU38ujTx333Mmo,14294
17
17
  onnxtr/models/engine.py,sha256=w1vzEduzVDHuxOb0JEkhPp2whrK7ViP03KZiNUNbe4I,4837
18
18
  onnxtr/models/zoo.py,sha256=Zcx0mOfMwUR2YAMd7ug06RvXeG2T1PzR2twS6y9X19A,5352
19
19
  onnxtr/models/classification/__init__.py,sha256=h1bZs55iLJBMATtzS4ntTKwfD6OGXBiiqGv_hEnOFnE,41
20
- onnxtr/models/classification/zoo.py,sha256=jzZMf7hKqN9omGAPHJR83rVDaaWhPm-Rk55Xn4bGaIs,3436
20
+ onnxtr/models/classification/zoo.py,sha256=45l0gM3rMrGd_CTxA-OwULA7AOHjTx55_HMidorIjdc,3908
21
21
  onnxtr/models/classification/models/__init__.py,sha256=rohbM6ZQslfYchi7feZwwh-sX3XXRUhgtEJQeurAytQ,24
22
- onnxtr/models/classification/models/mobilenet.py,sha256=l6Ch7ZwL4tqoN94YhSmudY6XYl5fIILzzu4T9JUwZKs,4881
22
+ onnxtr/models/classification/models/mobilenet.py,sha256=rgxkTpRUk_QtU2fAA-Qg3u6y0iWA-zFsmMmYVZRAWiw,4900
23
23
  onnxtr/models/classification/predictor/__init__.py,sha256=ERmmOxz_9mUkIuccNbzUa5Y6gVLLVDdyc4cCxbCCUbY,20
24
- onnxtr/models/classification/predictor/base.py,sha256=Xfaj2XlaJuQ2R81XqF5RB0Wcvzd4wh7Z6j1ifn2niFc,2097
24
+ onnxtr/models/classification/predictor/base.py,sha256=J-EckJM88S4V7f50ukMVgK0tCQEbRZTQ7d1p77x8lUg,2357
25
25
  onnxtr/models/detection/__init__.py,sha256=h1bZs55iLJBMATtzS4ntTKwfD6OGXBiiqGv_hEnOFnE,41
26
26
  onnxtr/models/detection/core.py,sha256=ZmVDHLJ1l4LQ8rFSKc7enXDkGcOWrcQv4H0SJWyLsag,3584
27
27
  onnxtr/models/detection/zoo.py,sha256=5kz4l67Xkr4YTDoI2wDTiI6HSaB926zfua0SZU-Kaw8,2735
@@ -38,8 +38,8 @@ onnxtr/models/detection/predictor/base.py,sha256=bt8M6I14tWC9DYjrFrqg-AU5u670_uP
38
38
  onnxtr/models/factory/__init__.py,sha256=cKPoH2V2157lLMTR2zsljG3_IQHziodqR-XK_LG0D_I,19
39
39
  onnxtr/models/factory/hub.py,sha256=Fk6pX9VJD422rnVgLh37o136T_0YAsQFzY2dQplDfa4,7176
40
40
  onnxtr/models/predictor/__init__.py,sha256=XL25XkRkgyK7mldF-CWhg2MMakSdP5vLpDLwL59hphk,25
41
- onnxtr/models/predictor/base.py,sha256=ZutI4iNUWk5I5wFfzip89JEDl0SmN7W7hCWRmVec38w,8813
42
- onnxtr/models/predictor/predictor.py,sha256=pfyTu2qidlPOpXyNKkh20cZefWYUZlF3VEmzrsQr2K8,6368
41
+ onnxtr/models/predictor/base.py,sha256=JnaOMy6pQh0ht2bc0koGqA9rBi_F6mHNyPYGcg0T7C8,9471
42
+ onnxtr/models/predictor/predictor.py,sha256=-ZQfPT70HKTfAP4etywSTy_W-pq5uPDHU00toHuWVcI,6431
43
43
  onnxtr/models/preprocessor/__init__.py,sha256=ERmmOxz_9mUkIuccNbzUa5Y6gVLLVDdyc4cCxbCCUbY,20
44
44
  onnxtr/models/preprocessor/base.py,sha256=8ZCKsB-o9uRaUm0x4x9FYpYxLXpwHyq2nVv_TlRgaMw,3990
45
45
  onnxtr/models/recognition/__init__.py,sha256=h1bZs55iLJBMATtzS4ntTKwfD6OGXBiiqGv_hEnOFnE,41
@@ -61,15 +61,15 @@ onnxtr/utils/__init__.py,sha256=pESRJKtcQyjRxiMgZPhtPYeLbCj-YSGyMVRHTbcMONU,94
61
61
  onnxtr/utils/common_types.py,sha256=eC_NyIwbo9qVF33LiNPqHKfyabWq9mYEKD9gAloo5UU,601
62
62
  onnxtr/utils/data.py,sha256=Dh0mgeHJhyPwmm63J90uDVmIYbrp63hh1_SnYLnpgJI,4354
63
63
  onnxtr/utils/fonts.py,sha256=27v0cojgUrVxNF8Krb1FybSoykoxFy1XjG8lHRUuiEY,1353
64
- onnxtr/utils/geometry.py,sha256=u9ei6WW8Yd29rtwnrDYercAY-tWkOLkzBd5Oi6NNyDI,17774
64
+ onnxtr/utils/geometry.py,sha256=mYsxRYpMm-UtwmXTcbiSfe2j6-50ZSWAohTcfyi7aZU,20929
65
65
  onnxtr/utils/multithreading.py,sha256=30T7AylM3rb52ZEI3Pk1pfB0VYraTbc7yO2vNODVVFY,2011
66
- onnxtr/utils/reconstitution.py,sha256=Hx1_ddLevKLzuxXc19UelPdsGlAwqi4f6vRSYKHDUB4,2617
66
+ onnxtr/utils/reconstitution.py,sha256=DGb2Isxc2At2GTOO93rbzNvnLRG2vTc5cpdzdijnA8w,6162
67
67
  onnxtr/utils/repr.py,sha256=kfbjGL6KymGT8spo2UL4FJXZ0XRwa7CO7Y1dTVR8dIk,2129
68
68
  onnxtr/utils/visualization.py,sha256=CX09qvDnNIw3BFW5F3jM4R9OcpLWAeZyoDyTAOGRvls,9925
69
- onnxtr/utils/vocabs.py,sha256=9Ufmjf7OczWb0931NjWTL7owXLYOKn5x0ulaoVeJGn8,3855
70
- onnxtr-0.4.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
71
- onnxtr-0.4.1.dist-info/METADATA,sha256=ghYrvvMe49613vZHcXr8qw6e8o7aoVD4uocdcKDaRYU,31723
72
- onnxtr-0.4.1.dist-info/WHEEL,sha256=Mdi9PDNwEZptOjTlUcAth7XJDFtKrHYaQMPulZeBCiQ,91
73
- onnxtr-0.4.1.dist-info/top_level.txt,sha256=r_MSUTpspp4pWEEWvly-s7ZkfCg1KwrK6-kBlXkWKU8,7
74
- onnxtr-0.4.1.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
75
- onnxtr-0.4.1.dist-info/RECORD,,
69
+ onnxtr/utils/vocabs.py,sha256=KGGsSLjGl9YLbAYcVCloNR5OIwMKMUc4idpn08EqYYY,4160
70
+ onnxtr-0.5.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
71
+ onnxtr-0.5.1.dist-info/METADATA,sha256=VvjTLpK86BT_Psv02B0MbC9GbFflt33d93uRSf3q5wE,32873
72
+ onnxtr-0.5.1.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
73
+ onnxtr-0.5.1.dist-info/top_level.txt,sha256=r_MSUTpspp4pWEEWvly-s7ZkfCg1KwrK6-kBlXkWKU8,7
74
+ onnxtr-0.5.1.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
75
+ onnxtr-0.5.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (73.0.1)
2
+ Generator: setuptools (75.2.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5