PyPI - onnxtr - Versions diffs - 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

onnxtr 0.5.0py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

onnxtr/io/elements.py CHANGED Viewed

@@ -163,7 +163,7 @@ class Line(Element):
         if geometry is None:
             # Check whether this is a rotated or straight box
             box_resolution_fn = resolve_enclosing_rbbox if len(words[0].geometry) == 4 else resolve_enclosing_bbox
-            geometry = box_resolution_fn([w.geometry for w in words])  # type: ignore[operator]
+            geometry = box_resolution_fn([w.geometry for w in words])  # type: ignore[misc]
         super().__init__(words=words)
         self.geometry = geometry
@@ -216,7 +216,7 @@ class Block(Element):
             box_resolution_fn = (
                 resolve_enclosing_rbbox if isinstance(lines[0].geometry, np.ndarray) else resolve_enclosing_bbox
             )
-            geometry = box_resolution_fn(line_boxes + artefact_boxes)  # type: ignore[operator]
+            geometry = box_resolution_fn(line_boxes + artefact_boxes)  # type: ignore
         super().__init__(lines=lines, artefacts=artefacts)
         self.geometry = geometry
@@ -294,6 +294,10 @@ class Page(Element):
     def synthesize(self, **kwargs) -> np.ndarray:
         """Synthesize the page from the predictions
+        Args:
+        ----
+            **kwargs: keyword arguments passed to the `synthesize_page` method
         Returns
         -------
             synthesized page
@@ -442,11 +446,15 @@ class Document(Element):
     def synthesize(self, **kwargs) -> List[np.ndarray]:
         """Synthesize all pages from their predictions
+        Args:
+        ----
+            **kwargs: keyword arguments passed to the `Page.synthesize` method
         Returns
         -------
             list of synthesized pages
         """
-        return [page.synthesize() for page in self.pages]
+        return [page.synthesize(**kwargs) for page in self.pages]
     def export_as_xml(self, **kwargs) -> List[Tuple[bytes, ET.ElementTree]]:
         """Export the document as XML (hOCR-format)

onnxtr/utils/reconstitution.py CHANGED Viewed

@@ -2,6 +2,7 @@
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+import logging
 from typing import Any, Dict, Optional
 import numpy as np
@@ -13,10 +14,109 @@ from .fonts import get_font
 __all__ = ["synthesize_page"]
+# Global variable to avoid multiple warnings
+ROTATION_WARNING = False
+def _warn_rotation(entry: Dict[str, Any]) -> None:  # pragma: no cover
+    global ROTATION_WARNING
+    if not ROTATION_WARNING and len(entry["geometry"]) == 4:
+        logging.warning("Polygons with larger rotations will lead to inaccurate rendering")
+        ROTATION_WARNING = True
+def _synthesize(
+    response: Image.Image,
+    entry: Dict[str, Any],
+    w: int,
+    h: int,
+    draw_proba: bool = False,
+    font_family: Optional[str] = None,
+    smoothing_factor: float = 0.75,
+    min_font_size: int = 6,
+    max_font_size: int = 50,
+) -> Image.Image:
+    if len(entry["geometry"]) == 2:
+        (xmin, ymin), (xmax, ymax) = entry["geometry"]
+        polygon = [(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)]
+    else:
+        polygon = entry["geometry"]
+    # Calculate the bounding box of the word
+    x_coords, y_coords = zip(*polygon)
+    xmin, ymin, xmax, ymax = (
+        int(round(w * min(x_coords))),
+        int(round(h * min(y_coords))),
+        int(round(w * max(x_coords))),
+        int(round(h * max(y_coords))),
+    )
+    word_width = xmax - xmin
+    word_height = ymax - ymin
+    # If lines are provided instead of words, concatenate the word entries
+    if "words" in entry:
+        word_text = " ".join(word["value"] for word in entry["words"])
+    else:
+        word_text = entry["value"]
+    # Find the optimal font size
+    try:
+        font_size = min(word_height, max_font_size)
+        font = get_font(font_family, font_size)
+        text_width, text_height = font.getbbox(word_text)[2:4]
+        while (text_width > word_width or text_height > word_height) and font_size > min_font_size:
+            font_size = max(int(font_size * smoothing_factor), min_font_size)
+            font = get_font(font_family, font_size)
+            text_width, text_height = font.getbbox(word_text)[2:4]
+    except ValueError:  # pragma: no cover
+        font = get_font(font_family, min_font_size)
+    # Create a mask for the word
+    mask = Image.new("L", (w, h), 0)
+    ImageDraw.Draw(mask).polygon([(int(round(w * x)), int(round(h * y))) for x, y in polygon], fill=255)
+    # Draw the word text
+    d = ImageDraw.Draw(response)
+    try:
+        try:
+            d.text((xmin, ymin), word_text, font=font, fill=(0, 0, 0), anchor="lt")
+        except UnicodeEncodeError:  # pragma: no cover
+            d.text((xmin, ymin), anyascii(word_text), font=font, fill=(0, 0, 0), anchor="lt")
+    # Catch generic exceptions to avoid crashing the whole rendering
+    except Exception:  # pragma: no cover
+        logging.warning(f"Could not render word: {word_text}")
+    if draw_proba:
+        confidence = (
+            entry["confidence"]
+            if "confidence" in entry
+            else sum(w["confidence"] for w in entry["words"]) / len(entry["words"])
+        )
+        p = int(255 * confidence)
+        color = (255 - p, 0, p)  # Red to blue gradient based on probability
+        d.rectangle([(xmin, ymin), (xmax, ymax)], outline=color, width=2)
+        prob_font = get_font(font_family, 20)
+        prob_text = f"{confidence:.2f}"
+        prob_text_width, prob_text_height = prob_font.getbbox(prob_text)[2:4]
+        # Position the probability slightly above the bounding box
+        prob_x_offset = (word_width - prob_text_width) // 2
+        prob_y_offset = ymin - prob_text_height - 2
+        prob_y_offset = max(0, prob_y_offset)
+        d.text((xmin + prob_x_offset, prob_y_offset), prob_text, font=prob_font, fill=color, anchor="lt")
+    return response
 def synthesize_page(
     page: Dict[str, Any],
     draw_proba: bool = False,
     font_family: Optional[str] = None,
+    smoothing_factor: float = 0.95,
+    min_font_size: int = 8,
+    max_font_size: int = 50,
 ) -> np.ndarray:
     """Draw a the content of the element page (OCR response) on a blank page.
@@ -24,8 +124,10 @@ def synthesize_page(
     ----
         page: exported Page object to represent
         draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
-        font_size: size of the font, default font = 13
         font_family: family of the font
+        smoothing_factor: factor to smooth the font size
+        min_font_size: minimum font size
+        max_font_size: maximum font size
     Returns:
     -------
@@ -33,38 +135,39 @@ def synthesize_page(
     """
     # Draw template
     h, w = page["dimensions"]
-    response = 255 * np.ones((h, w, 3), dtype=np.int32)
+    response = Image.new("RGB", (w, h), color=(255, 255, 255))
-    # Draw each word
     for block in page["blocks"]:
-        for line in block["lines"]:
-            for word in line["words"]:
-                # Get absolute word geometry
-                (xmin, ymin), (xmax, ymax) = word["geometry"]
-                xmin, xmax = int(round(w * xmin)), int(round(w * xmax))
-                ymin, ymax = int(round(h * ymin)), int(round(h * ymax))
-                # White drawing context adapted to font size, 0.75 factor to convert pts --> pix
-                font = get_font(font_family, int(0.75 * (ymax - ymin)))
-                img = Image.new("RGB", (xmax - xmin, ymax - ymin), color=(255, 255, 255))
-                d = ImageDraw.Draw(img)
-                # Draw in black the value of the word
-                try:
-                    d.text((0, 0), word["value"], font=font, fill=(0, 0, 0))
-                except UnicodeEncodeError:
-                    # When character cannot be encoded, use its anyascii version
-                    d.text((0, 0), anyascii(word["value"]), font=font, fill=(0, 0, 0))
-                # Colorize if draw_proba
-                if draw_proba:
-                    p = int(255 * word["confidence"])
-                    mask = np.where(np.array(img) == 0, 1, 0)
-                    proba: np.ndarray = np.array([255 - p, 0, p])
-                    color = mask * proba[np.newaxis, np.newaxis, :]
-                    white_mask = 255 * (1 - mask)
-                    img = color + white_mask
-                # Write to response page
-                response[ymin:ymax, xmin:xmax, :] = np.array(img)
+        # If lines are provided use these to get better rendering results
+        if len(block["lines"]) > 1:
+            for line in block["lines"]:
+                _warn_rotation(block)  # pragma: no cover
+                response = _synthesize(
+                    response=response,
+                    entry=line,
+                    w=w,
+                    h=h,
+                    draw_proba=draw_proba,
+                    font_family=font_family,
+                    smoothing_factor=smoothing_factor,
+                    min_font_size=min_font_size,
+                    max_font_size=max_font_size,
+                )
+        # Otherwise, draw each word
+        else:
+            for line in block["lines"]:
+                _warn_rotation(block)  # pragma: no cover
+                for word in line["words"]:
+                    response = _synthesize(
+                        response=response,
+                        entry=word,
+                        w=w,
+                        h=h,
+                        draw_proba=draw_proba,
+                        font_family=font_family,
+                        smoothing_factor=smoothing_factor,
+                        min_font_size=min_font_size,
+                        max_font_size=max_font_size,
+                    )
-    return response
+    return np.array(response, dtype=np.uint8)

onnxtr/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = 'v0.5.0'
1	+ __version__ = 'v0.5.1'

{onnxtr-0.5.0.dist-info → onnxtr-0.5.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: onnxtr
-Version: 0.5.0
+Version: 0.5.1
 Summary: Onnx Text Recognition (OnnxTR): docTR Onnx-Wrapper for high-performance OCR on documents.
 Author-email: Felix Dittrich <felixdittrich92@gmail.com>
 Maintainer: Felix Dittrich
@@ -285,6 +285,7 @@ Requires-Dist: mplcursors>=0.3; extra == "viz"
 [![Codacy Badge](https://app.codacy.com/project/badge/Grade/4fff4d764bb14fb8b4f4afeb9587231b)](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
 [![CodeFactor](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr/badge)](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
 [![Pypi](https://img.shields.io/badge/pypi-v0.5.0-blue.svg)](https://pypi.org/project/OnnxTR/)
+[![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/Felix92/OnnxTR-OCR)
 > :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.

{onnxtr-0.5.0.dist-info → onnxtr-0.5.1.dist-info}/RECORD RENAMED Viewed

@@ -1,12 +1,12 @@
 onnxtr/__init__.py,sha256=h7Wc2tuHLsaoCk5xNpEFEK-g11A6SJA7nAasA76TQ_Y,100
 onnxtr/file_utils.py,sha256=WjUKalEdR53aoeIY4e-ihy3r7J_C9qFxL40JHGPfutc,1107
 onnxtr/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-onnxtr/version.py,sha256=ay2eyvYrrrmoUF0UOrGyngPZ_bXQGVx5uePigx1AqEU,23
+onnxtr/version.py,sha256=6swtMqpBHD3aLY2AA1x8huT5k183agnIJ6bjR3m048c,23
 onnxtr/contrib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 onnxtr/contrib/artefacts.py,sha256=tdmfhvfXVRYEH7uj4_hqf2cuUGoTieyNK8bXsD3zHwo,5383
 onnxtr/contrib/base.py,sha256=KyJ8_zDSKEWSFBszgCbLjEeI7SKg4N_iH_ZQNf90SWQ,3288
 onnxtr/io/__init__.py,sha256=kS7tKGFvzxOCWBOun-Y8n9CsziwRKNynjwpZEUUI03M,106
-onnxtr/io/elements.py,sha256=h-IxpFqXrvg-fOhpnOqpGFLdG-lR-xYYIxk3chy_MN8,17769
+onnxtr/io/elements.py,sha256=GX6rhwg_ByAlL8rAsuLgPAeJ7JsN3_V2o_ETkhh_U68,17977
 onnxtr/io/html.py,sha256=Em_7PjZ56SugJ9bjjcWLCMVe5ee6uUMKeZovNxJFAXw,737
 onnxtr/io/image.py,sha256=4tLTh2bGdA0ohh3a6mV6xD0KqNOtIVi5lJ06XSmeyMI,1759
 onnxtr/io/pdf.py,sha256=tD0klmxI-gkMXp56f_ZXWyPHLsUBKa_xlhNTtGV6tpU,1367
@@ -63,13 +63,13 @@ onnxtr/utils/data.py,sha256=Dh0mgeHJhyPwmm63J90uDVmIYbrp63hh1_SnYLnpgJI,4354
 onnxtr/utils/fonts.py,sha256=27v0cojgUrVxNF8Krb1FybSoykoxFy1XjG8lHRUuiEY,1353
 onnxtr/utils/geometry.py,sha256=mYsxRYpMm-UtwmXTcbiSfe2j6-50ZSWAohTcfyi7aZU,20929
 onnxtr/utils/multithreading.py,sha256=30T7AylM3rb52ZEI3Pk1pfB0VYraTbc7yO2vNODVVFY,2011
-onnxtr/utils/reconstitution.py,sha256=Hx1_ddLevKLzuxXc19UelPdsGlAwqi4f6vRSYKHDUB4,2617
+onnxtr/utils/reconstitution.py,sha256=DGb2Isxc2At2GTOO93rbzNvnLRG2vTc5cpdzdijnA8w,6162
 onnxtr/utils/repr.py,sha256=kfbjGL6KymGT8spo2UL4FJXZ0XRwa7CO7Y1dTVR8dIk,2129
 onnxtr/utils/visualization.py,sha256=CX09qvDnNIw3BFW5F3jM4R9OcpLWAeZyoDyTAOGRvls,9925
 onnxtr/utils/vocabs.py,sha256=KGGsSLjGl9YLbAYcVCloNR5OIwMKMUc4idpn08EqYYY,4160
-onnxtr-0.5.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-onnxtr-0.5.0.dist-info/METADATA,sha256=uWZiVV7JRZfJTI1mn9y4jxq84AT8yYQt3zaZ27J4NS4,32726
-onnxtr-0.5.0.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
-onnxtr-0.5.0.dist-info/top_level.txt,sha256=r_MSUTpspp4pWEEWvly-s7ZkfCg1KwrK6-kBlXkWKU8,7
-onnxtr-0.5.0.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
-onnxtr-0.5.0.dist-info/RECORD,,
+onnxtr-0.5.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+onnxtr-0.5.1.dist-info/METADATA,sha256=VvjTLpK86BT_Psv02B0MbC9GbFflt33d93uRSf3q5wE,32873
+onnxtr-0.5.1.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
+onnxtr-0.5.1.dist-info/top_level.txt,sha256=r_MSUTpspp4pWEEWvly-s7ZkfCg1KwrK6-kBlXkWKU8,7
+onnxtr-0.5.1.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
+onnxtr-0.5.1.dist-info/RECORD,,

{onnxtr-0.5.0.dist-info → onnxtr-0.5.1.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.1.0)
+Generator: setuptools (75.2.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{onnxtr-0.5.0.dist-info → onnxtr-0.5.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{onnxtr-0.5.0.dist-info → onnxtr-0.5.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

{onnxtr-0.5.0.dist-info → onnxtr-0.5.1.dist-info}/zip-safe RENAMED Viewed

File without changes

onnxtr 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

onnxtr 0.5.0py3-none-any.whl → 0.5.1py3-none-any.whl