onnxtr 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
onnxtr/io/elements.py CHANGED
@@ -163,7 +163,7 @@ class Line(Element):
163
163
  if geometry is None:
164
164
  # Check whether this is a rotated or straight box
165
165
  box_resolution_fn = resolve_enclosing_rbbox if len(words[0].geometry) == 4 else resolve_enclosing_bbox
166
- geometry = box_resolution_fn([w.geometry for w in words]) # type: ignore[operator]
166
+ geometry = box_resolution_fn([w.geometry for w in words]) # type: ignore[misc]
167
167
 
168
168
  super().__init__(words=words)
169
169
  self.geometry = geometry
@@ -216,7 +216,7 @@ class Block(Element):
216
216
  box_resolution_fn = (
217
217
  resolve_enclosing_rbbox if isinstance(lines[0].geometry, np.ndarray) else resolve_enclosing_bbox
218
218
  )
219
- geometry = box_resolution_fn(line_boxes + artefact_boxes) # type: ignore[operator]
219
+ geometry = box_resolution_fn(line_boxes + artefact_boxes) # type: ignore
220
220
 
221
221
  super().__init__(lines=lines, artefacts=artefacts)
222
222
  self.geometry = geometry
@@ -294,6 +294,10 @@ class Page(Element):
294
294
  def synthesize(self, **kwargs) -> np.ndarray:
295
295
  """Synthesize the page from the predictions
296
296
 
297
+ Args:
298
+ ----
299
+ **kwargs: keyword arguments passed to the `synthesize_page` method
300
+
297
301
  Returns
298
302
  -------
299
303
  synthesized page
@@ -442,11 +446,15 @@ class Document(Element):
442
446
  def synthesize(self, **kwargs) -> List[np.ndarray]:
443
447
  """Synthesize all pages from their predictions
444
448
 
449
+ Args:
450
+ ----
451
+ **kwargs: keyword arguments passed to the `Page.synthesize` method
452
+
445
453
  Returns
446
454
  -------
447
455
  list of synthesized pages
448
456
  """
449
- return [page.synthesize() for page in self.pages]
457
+ return [page.synthesize(**kwargs) for page in self.pages]
450
458
 
451
459
  def export_as_xml(self, **kwargs) -> List[Tuple[bytes, ET.ElementTree]]:
452
460
  """Export the document as XML (hOCR-format)
@@ -2,6 +2,7 @@
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
+ import logging
5
6
  from typing import Any, Dict, Optional
6
7
 
7
8
  import numpy as np
@@ -13,10 +14,109 @@ from .fonts import get_font
13
14
  __all__ = ["synthesize_page"]
14
15
 
15
16
 
17
+ # Global variable to avoid multiple warnings
18
+ ROTATION_WARNING = False
19
+
20
+
21
+ def _warn_rotation(entry: Dict[str, Any]) -> None: # pragma: no cover
22
+ global ROTATION_WARNING
23
+ if not ROTATION_WARNING and len(entry["geometry"]) == 4:
24
+ logging.warning("Polygons with larger rotations will lead to inaccurate rendering")
25
+ ROTATION_WARNING = True
26
+
27
+
28
+ def _synthesize(
29
+ response: Image.Image,
30
+ entry: Dict[str, Any],
31
+ w: int,
32
+ h: int,
33
+ draw_proba: bool = False,
34
+ font_family: Optional[str] = None,
35
+ smoothing_factor: float = 0.75,
36
+ min_font_size: int = 6,
37
+ max_font_size: int = 50,
38
+ ) -> Image.Image:
39
+ if len(entry["geometry"]) == 2:
40
+ (xmin, ymin), (xmax, ymax) = entry["geometry"]
41
+ polygon = [(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)]
42
+ else:
43
+ polygon = entry["geometry"]
44
+
45
+ # Calculate the bounding box of the word
46
+ x_coords, y_coords = zip(*polygon)
47
+ xmin, ymin, xmax, ymax = (
48
+ int(round(w * min(x_coords))),
49
+ int(round(h * min(y_coords))),
50
+ int(round(w * max(x_coords))),
51
+ int(round(h * max(y_coords))),
52
+ )
53
+ word_width = xmax - xmin
54
+ word_height = ymax - ymin
55
+
56
+ # If lines are provided instead of words, concatenate the word entries
57
+ if "words" in entry:
58
+ word_text = " ".join(word["value"] for word in entry["words"])
59
+ else:
60
+ word_text = entry["value"]
61
+ # Find the optimal font size
62
+ try:
63
+ font_size = min(word_height, max_font_size)
64
+ font = get_font(font_family, font_size)
65
+ text_width, text_height = font.getbbox(word_text)[2:4]
66
+
67
+ while (text_width > word_width or text_height > word_height) and font_size > min_font_size:
68
+ font_size = max(int(font_size * smoothing_factor), min_font_size)
69
+ font = get_font(font_family, font_size)
70
+ text_width, text_height = font.getbbox(word_text)[2:4]
71
+ except ValueError: # pragma: no cover
72
+ font = get_font(font_family, min_font_size)
73
+
74
+ # Create a mask for the word
75
+ mask = Image.new("L", (w, h), 0)
76
+ ImageDraw.Draw(mask).polygon([(int(round(w * x)), int(round(h * y))) for x, y in polygon], fill=255)
77
+
78
+ # Draw the word text
79
+ d = ImageDraw.Draw(response)
80
+ try:
81
+ try:
82
+ d.text((xmin, ymin), word_text, font=font, fill=(0, 0, 0), anchor="lt")
83
+ except UnicodeEncodeError: # pragma: no cover
84
+ d.text((xmin, ymin), anyascii(word_text), font=font, fill=(0, 0, 0), anchor="lt")
85
+ # Catch generic exceptions to avoid crashing the whole rendering
86
+ except Exception: # pragma: no cover
87
+ logging.warning(f"Could not render word: {word_text}")
88
+
89
+ if draw_proba:
90
+ confidence = (
91
+ entry["confidence"]
92
+ if "confidence" in entry
93
+ else sum(w["confidence"] for w in entry["words"]) / len(entry["words"])
94
+ )
95
+ p = int(255 * confidence)
96
+ color = (255 - p, 0, p) # Red to blue gradient based on probability
97
+ d.rectangle([(xmin, ymin), (xmax, ymax)], outline=color, width=2)
98
+
99
+ prob_font = get_font(font_family, 20)
100
+ prob_text = f"{confidence:.2f}"
101
+ prob_text_width, prob_text_height = prob_font.getbbox(prob_text)[2:4]
102
+
103
+ # Position the probability slightly above the bounding box
104
+ prob_x_offset = (word_width - prob_text_width) // 2
105
+ prob_y_offset = ymin - prob_text_height - 2
106
+ prob_y_offset = max(0, prob_y_offset)
107
+
108
+ d.text((xmin + prob_x_offset, prob_y_offset), prob_text, font=prob_font, fill=color, anchor="lt")
109
+
110
+ return response
111
+
112
+
16
113
  def synthesize_page(
17
114
  page: Dict[str, Any],
18
115
  draw_proba: bool = False,
19
116
  font_family: Optional[str] = None,
117
+ smoothing_factor: float = 0.95,
118
+ min_font_size: int = 8,
119
+ max_font_size: int = 50,
20
120
  ) -> np.ndarray:
21
121
  """Draw a the content of the element page (OCR response) on a blank page.
22
122
 
@@ -24,8 +124,10 @@ def synthesize_page(
24
124
  ----
25
125
  page: exported Page object to represent
26
126
  draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
27
- font_size: size of the font, default font = 13
28
127
  font_family: family of the font
128
+ smoothing_factor: factor to smooth the font size
129
+ min_font_size: minimum font size
130
+ max_font_size: maximum font size
29
131
 
30
132
  Returns:
31
133
  -------
@@ -33,38 +135,39 @@ def synthesize_page(
33
135
  """
34
136
  # Draw template
35
137
  h, w = page["dimensions"]
36
- response = 255 * np.ones((h, w, 3), dtype=np.int32)
138
+ response = Image.new("RGB", (w, h), color=(255, 255, 255))
37
139
 
38
- # Draw each word
39
140
  for block in page["blocks"]:
40
- for line in block["lines"]:
41
- for word in line["words"]:
42
- # Get absolute word geometry
43
- (xmin, ymin), (xmax, ymax) = word["geometry"]
44
- xmin, xmax = int(round(w * xmin)), int(round(w * xmax))
45
- ymin, ymax = int(round(h * ymin)), int(round(h * ymax))
46
-
47
- # White drawing context adapted to font size, 0.75 factor to convert pts --> pix
48
- font = get_font(font_family, int(0.75 * (ymax - ymin)))
49
- img = Image.new("RGB", (xmax - xmin, ymax - ymin), color=(255, 255, 255))
50
- d = ImageDraw.Draw(img)
51
- # Draw in black the value of the word
52
- try:
53
- d.text((0, 0), word["value"], font=font, fill=(0, 0, 0))
54
- except UnicodeEncodeError:
55
- # When character cannot be encoded, use its anyascii version
56
- d.text((0, 0), anyascii(word["value"]), font=font, fill=(0, 0, 0))
57
-
58
- # Colorize if draw_proba
59
- if draw_proba:
60
- p = int(255 * word["confidence"])
61
- mask = np.where(np.array(img) == 0, 1, 0)
62
- proba: np.ndarray = np.array([255 - p, 0, p])
63
- color = mask * proba[np.newaxis, np.newaxis, :]
64
- white_mask = 255 * (1 - mask)
65
- img = color + white_mask
66
-
67
- # Write to response page
68
- response[ymin:ymax, xmin:xmax, :] = np.array(img)
141
+ # If lines are provided use these to get better rendering results
142
+ if len(block["lines"]) > 1:
143
+ for line in block["lines"]:
144
+ _warn_rotation(block) # pragma: no cover
145
+ response = _synthesize(
146
+ response=response,
147
+ entry=line,
148
+ w=w,
149
+ h=h,
150
+ draw_proba=draw_proba,
151
+ font_family=font_family,
152
+ smoothing_factor=smoothing_factor,
153
+ min_font_size=min_font_size,
154
+ max_font_size=max_font_size,
155
+ )
156
+ # Otherwise, draw each word
157
+ else:
158
+ for line in block["lines"]:
159
+ _warn_rotation(block) # pragma: no cover
160
+ for word in line["words"]:
161
+ response = _synthesize(
162
+ response=response,
163
+ entry=word,
164
+ w=w,
165
+ h=h,
166
+ draw_proba=draw_proba,
167
+ font_family=font_family,
168
+ smoothing_factor=smoothing_factor,
169
+ min_font_size=min_font_size,
170
+ max_font_size=max_font_size,
171
+ )
69
172
 
70
- return response
173
+ return np.array(response, dtype=np.uint8)
onnxtr/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = 'v0.5.0'
1
+ __version__ = 'v0.5.1'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: onnxtr
3
- Version: 0.5.0
3
+ Version: 0.5.1
4
4
  Summary: Onnx Text Recognition (OnnxTR): docTR Onnx-Wrapper for high-performance OCR on documents.
5
5
  Author-email: Felix Dittrich <felixdittrich92@gmail.com>
6
6
  Maintainer: Felix Dittrich
@@ -285,6 +285,7 @@ Requires-Dist: mplcursors>=0.3; extra == "viz"
285
285
  [![Codacy Badge](https://app.codacy.com/project/badge/Grade/4fff4d764bb14fb8b4f4afeb9587231b)](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
286
286
  [![CodeFactor](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr/badge)](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
287
287
  [![Pypi](https://img.shields.io/badge/pypi-v0.5.0-blue.svg)](https://pypi.org/project/OnnxTR/)
288
+ [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/Felix92/OnnxTR-OCR)
288
289
 
289
290
  > :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
290
291
 
@@ -1,12 +1,12 @@
1
1
  onnxtr/__init__.py,sha256=h7Wc2tuHLsaoCk5xNpEFEK-g11A6SJA7nAasA76TQ_Y,100
2
2
  onnxtr/file_utils.py,sha256=WjUKalEdR53aoeIY4e-ihy3r7J_C9qFxL40JHGPfutc,1107
3
3
  onnxtr/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- onnxtr/version.py,sha256=ay2eyvYrrrmoUF0UOrGyngPZ_bXQGVx5uePigx1AqEU,23
4
+ onnxtr/version.py,sha256=6swtMqpBHD3aLY2AA1x8huT5k183agnIJ6bjR3m048c,23
5
5
  onnxtr/contrib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  onnxtr/contrib/artefacts.py,sha256=tdmfhvfXVRYEH7uj4_hqf2cuUGoTieyNK8bXsD3zHwo,5383
7
7
  onnxtr/contrib/base.py,sha256=KyJ8_zDSKEWSFBszgCbLjEeI7SKg4N_iH_ZQNf90SWQ,3288
8
8
  onnxtr/io/__init__.py,sha256=kS7tKGFvzxOCWBOun-Y8n9CsziwRKNynjwpZEUUI03M,106
9
- onnxtr/io/elements.py,sha256=h-IxpFqXrvg-fOhpnOqpGFLdG-lR-xYYIxk3chy_MN8,17769
9
+ onnxtr/io/elements.py,sha256=GX6rhwg_ByAlL8rAsuLgPAeJ7JsN3_V2o_ETkhh_U68,17977
10
10
  onnxtr/io/html.py,sha256=Em_7PjZ56SugJ9bjjcWLCMVe5ee6uUMKeZovNxJFAXw,737
11
11
  onnxtr/io/image.py,sha256=4tLTh2bGdA0ohh3a6mV6xD0KqNOtIVi5lJ06XSmeyMI,1759
12
12
  onnxtr/io/pdf.py,sha256=tD0klmxI-gkMXp56f_ZXWyPHLsUBKa_xlhNTtGV6tpU,1367
@@ -63,13 +63,13 @@ onnxtr/utils/data.py,sha256=Dh0mgeHJhyPwmm63J90uDVmIYbrp63hh1_SnYLnpgJI,4354
63
63
  onnxtr/utils/fonts.py,sha256=27v0cojgUrVxNF8Krb1FybSoykoxFy1XjG8lHRUuiEY,1353
64
64
  onnxtr/utils/geometry.py,sha256=mYsxRYpMm-UtwmXTcbiSfe2j6-50ZSWAohTcfyi7aZU,20929
65
65
  onnxtr/utils/multithreading.py,sha256=30T7AylM3rb52ZEI3Pk1pfB0VYraTbc7yO2vNODVVFY,2011
66
- onnxtr/utils/reconstitution.py,sha256=Hx1_ddLevKLzuxXc19UelPdsGlAwqi4f6vRSYKHDUB4,2617
66
+ onnxtr/utils/reconstitution.py,sha256=DGb2Isxc2At2GTOO93rbzNvnLRG2vTc5cpdzdijnA8w,6162
67
67
  onnxtr/utils/repr.py,sha256=kfbjGL6KymGT8spo2UL4FJXZ0XRwa7CO7Y1dTVR8dIk,2129
68
68
  onnxtr/utils/visualization.py,sha256=CX09qvDnNIw3BFW5F3jM4R9OcpLWAeZyoDyTAOGRvls,9925
69
69
  onnxtr/utils/vocabs.py,sha256=KGGsSLjGl9YLbAYcVCloNR5OIwMKMUc4idpn08EqYYY,4160
70
- onnxtr-0.5.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
71
- onnxtr-0.5.0.dist-info/METADATA,sha256=uWZiVV7JRZfJTI1mn9y4jxq84AT8yYQt3zaZ27J4NS4,32726
72
- onnxtr-0.5.0.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
73
- onnxtr-0.5.0.dist-info/top_level.txt,sha256=r_MSUTpspp4pWEEWvly-s7ZkfCg1KwrK6-kBlXkWKU8,7
74
- onnxtr-0.5.0.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
75
- onnxtr-0.5.0.dist-info/RECORD,,
70
+ onnxtr-0.5.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
71
+ onnxtr-0.5.1.dist-info/METADATA,sha256=VvjTLpK86BT_Psv02B0MbC9GbFflt33d93uRSf3q5wE,32873
72
+ onnxtr-0.5.1.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
73
+ onnxtr-0.5.1.dist-info/top_level.txt,sha256=r_MSUTpspp4pWEEWvly-s7ZkfCg1KwrK6-kBlXkWKU8,7
74
+ onnxtr-0.5.1.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
75
+ onnxtr-0.5.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.1.0)
2
+ Generator: setuptools (75.2.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5