PyPI - onnxtr - Versions diffs - 0.4.0__tar.gz → 0.5.0__tar.gz - Mend

onnxtr 0.4.0tar.gz → 0.5.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (82) hide show

{onnxtr-0.4.0 → onnxtr-0.5.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: onnxtr
-Version: 0.4.0
+Version: 0.5.0
 Summary: Onnx Text Recognition (OnnxTR): docTR Onnx-Wrapper for high-performance OCR on documents.
 Author-email: Felix Dittrich <felixdittrich92@gmail.com>
 Maintainer: Felix Dittrich
@@ -225,9 +225,8 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: <4,>=3.9.0
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: numpy<2.0.0,>=1.16.0
+Requires-Dist: numpy<3.0.0,>=1.16.0
 Requires-Dist: scipy<2.0.0,>=1.4.0
-Requires-Dist: opencv-python<5.0.0,>=4.5.0
 Requires-Dist: pypdfium2<5.0.0,>=4.11.0
 Requires-Dist: pyclipper<2.0.0,>=1.2.0
 Requires-Dist: shapely<3.0.0,>=1.6.0
@@ -240,8 +239,16 @@ Requires-Dist: anyascii>=0.3.2
 Requires-Dist: tqdm>=4.30.0
 Provides-Extra: cpu
 Requires-Dist: onnxruntime>=1.11.0; extra == "cpu"
+Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "cpu"
 Provides-Extra: gpu
 Requires-Dist: onnxruntime-gpu>=1.11.0; extra == "gpu"
+Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "gpu"
+Provides-Extra: cpu-headless
+Requires-Dist: onnxruntime>=1.11.0; extra == "cpu-headless"
+Requires-Dist: opencv-python-headless<5.0.0,>=4.5.0; extra == "cpu-headless"
+Provides-Extra: gpu-headless
+Requires-Dist: onnxruntime-gpu>=1.11.0; extra == "gpu-headless"
+Requires-Dist: opencv-python-headless<5.0.0,>=4.5.0; extra == "gpu-headless"
 Provides-Extra: html
 Requires-Dist: weasyprint>=55.0; extra == "html"
 Provides-Extra: viz
@@ -257,6 +264,7 @@ Requires-Dist: mypy>=0.812; extra == "quality"
 Requires-Dist: pre-commit>=2.17.0; extra == "quality"
 Provides-Extra: dev
 Requires-Dist: onnxruntime>=1.11.0; extra == "dev"
+Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "dev"
 Requires-Dist: weasyprint>=55.0; extra == "dev"
 Requires-Dist: matplotlib>=3.1.0; extra == "dev"
 Requires-Dist: mplcursors>=0.3; extra == "dev"
@@ -276,7 +284,7 @@ Requires-Dist: pre-commit>=2.17.0; extra == "dev"
 [![codecov](https://codecov.io/gh/felixdittrich92/OnnxTR/graph/badge.svg?token=WVFRCQBOLI)](https://codecov.io/gh/felixdittrich92/OnnxTR)
 [![Codacy Badge](https://app.codacy.com/project/badge/Grade/4fff4d764bb14fb8b4f4afeb9587231b)](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
 [![CodeFactor](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr/badge)](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
-[![Pypi](https://img.shields.io/badge/pypi-v0.3.2-blue.svg)](https://pypi.org/project/OnnxTR/)
+[![Pypi](https://img.shields.io/badge/pypi-v0.5.0-blue.svg)](https://pypi.org/project/OnnxTR/)
 > :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
@@ -309,8 +317,10 @@ For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/get
 ```shell
 pip install "onnxtr[cpu]"
+pip install "onnxtr[cpu-headless]"  # same as cpu but with opencv-headless
 # with gpu support
 pip install "onnxtr[gpu]"
+pip install "onnxtr[gpu-headless]"  # same as gpu but with opencv-headless
 # with HTML support
 pip install "onnxtr[html]"
 # with support for visualization
@@ -356,6 +366,9 @@ model = ocr_predictor(
     # Additional parameters - meta information
     detect_orientation=False,  # set to `True` if the orientation of the pages should be detected (default: False)
     detect_language=False, # set to `True` if the language of the pages should be detected (default: False)
+    # Orientation specific parameters in combination with `assume_straight_pages=False` and/or `straighten_pages=True`
+    disable_crop_orientation=False,  # set to `True` if the crop orientation classification should be disabled (default: False)
+    disable_page_orientation=False,  # set to `True` if the general page orientation classification should be disabled (default: False)
     # DocumentBuilder specific parameters
     resolve_lines=True,  # whether words should be automatically grouped into lines (default: True)
     resolve_blocks=False,  # whether lines should be automatically grouped into blocks (default: False)
@@ -589,8 +602,8 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_
 |Library                          |FUNSD (199 pages)              |CORD  (900 pages)              |
 |---------------------------------|-------------------------------|-------------------------------|
 |docTR (CPU) - v0.8.1             | ~1.29s / Page                 | ~0.60s / Page                 |
-|**OnnxTR (CPU)** - v0.1.2        | ~0.57s / Page                 | **~0.25s / Page**             |
-|**OnnxTR (CPU) 8-bit** - v0.1.2  | **~0.38s / Page**             | **~0.14s / Page**             |
+|**OnnxTR (CPU)** - v0.4.1        | ~0.57s / Page                 | **~0.25s / Page**             |
+|**OnnxTR (CPU) 8-bit** - v0.4.1  | **~0.38s / Page**             | **~0.14s / Page**             |
 |EasyOCR (CPU) - v1.7.1           | ~1.96s / Page                 | ~1.75s / Page                 |
 |**PyTesseract (CPU)** - v0.3.10  | **~0.50s / Page**             | ~0.52s / Page                 |
 |Surya (line) (CPU) - v0.4.4      | ~48.76s / Page                | ~35.49s / Page                |
@@ -602,7 +615,7 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_
 |-------------------------------------|-------------------------------|-------------------------------|
 |docTR (GPU) - v0.8.1                 | ~0.07s / Page                 | ~0.05s / Page                 |
 |**docTR (GPU) float16** - v0.8.1     | **~0.06s / Page**             | **~0.03s / Page**             |
-|OnnxTR (GPU) - v0.1.2                | **~0.06s / Page**             | ~0.04s / Page                 |
+|OnnxTR (GPU) - v0.4.1                | **~0.06s / Page**             | ~0.04s / Page                 |
 |EasyOCR (GPU) - v1.7.1               | ~0.31s / Page                 | ~0.19s / Page                 |
 |Surya (GPU) float16 - v0.4.4         | ~3.70s / Page                 | ~2.81s / Page                 |
 |**PaddleOCR (GPU) - no cls - v2.7.3**| ~0.08s / Page                 | **~0.03s / Page**             |
@@ -621,6 +634,16 @@ If you wish to cite please refer to the base project citation, feel free to use
 }
 ```
+```bibtex
+@misc{onnxtr2024,
+    title={OnnxTR: Optical Character Recognition made seamless & accessible to anyone, powered by Onnx},
+    author={Felix Dittrich},
+    year={2024},
+    publisher = {GitHub},
+    howpublished = {\url{https://github.com/felixdittrich92/OnnxTR}}
+}
+```
 ## License
 Distributed under the Apache 2.0 License. See [`LICENSE`](https://github.com/felixdittrich92/OnnxTR?tab=Apache-2.0-1-ov-file#readme) for more information.

{onnxtr-0.4.0 → onnxtr-0.5.0}/README.md RENAMED Viewed

@@ -7,7 +7,7 @@
 [![codecov](https://codecov.io/gh/felixdittrich92/OnnxTR/graph/badge.svg?token=WVFRCQBOLI)](https://codecov.io/gh/felixdittrich92/OnnxTR)
 [![Codacy Badge](https://app.codacy.com/project/badge/Grade/4fff4d764bb14fb8b4f4afeb9587231b)](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
 [![CodeFactor](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr/badge)](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
-[![Pypi](https://img.shields.io/badge/pypi-v0.3.2-blue.svg)](https://pypi.org/project/OnnxTR/)
+[![Pypi](https://img.shields.io/badge/pypi-v0.5.0-blue.svg)](https://pypi.org/project/OnnxTR/)
 > :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
@@ -40,8 +40,10 @@ For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/get
 ```shell
 pip install "onnxtr[cpu]"
+pip install "onnxtr[cpu-headless]"  # same as cpu but with opencv-headless
 # with gpu support
 pip install "onnxtr[gpu]"
+pip install "onnxtr[gpu-headless]"  # same as gpu but with opencv-headless
 # with HTML support
 pip install "onnxtr[html]"
 # with support for visualization
@@ -87,6 +89,9 @@ model = ocr_predictor(
     # Additional parameters - meta information
     detect_orientation=False,  # set to `True` if the orientation of the pages should be detected (default: False)
     detect_language=False, # set to `True` if the language of the pages should be detected (default: False)
+    # Orientation specific parameters in combination with `assume_straight_pages=False` and/or `straighten_pages=True`
+    disable_crop_orientation=False,  # set to `True` if the crop orientation classification should be disabled (default: False)
+    disable_page_orientation=False,  # set to `True` if the general page orientation classification should be disabled (default: False)
     # DocumentBuilder specific parameters
     resolve_lines=True,  # whether words should be automatically grouped into lines (default: True)
     resolve_blocks=False,  # whether lines should be automatically grouped into blocks (default: False)
@@ -320,8 +325,8 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_
 |Library                          |FUNSD (199 pages)              |CORD  (900 pages)              |
 |---------------------------------|-------------------------------|-------------------------------|
 |docTR (CPU) - v0.8.1             | ~1.29s / Page                 | ~0.60s / Page                 |
-|**OnnxTR (CPU)** - v0.1.2        | ~0.57s / Page                 | **~0.25s / Page**             |
-|**OnnxTR (CPU) 8-bit** - v0.1.2  | **~0.38s / Page**             | **~0.14s / Page**             |
+|**OnnxTR (CPU)** - v0.4.1        | ~0.57s / Page                 | **~0.25s / Page**             |
+|**OnnxTR (CPU) 8-bit** - v0.4.1  | **~0.38s / Page**             | **~0.14s / Page**             |
 |EasyOCR (CPU) - v1.7.1           | ~1.96s / Page                 | ~1.75s / Page                 |
 |**PyTesseract (CPU)** - v0.3.10  | **~0.50s / Page**             | ~0.52s / Page                 |
 |Surya (line) (CPU) - v0.4.4      | ~48.76s / Page                | ~35.49s / Page                |
@@ -333,7 +338,7 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_
 |-------------------------------------|-------------------------------|-------------------------------|
 |docTR (GPU) - v0.8.1                 | ~0.07s / Page                 | ~0.05s / Page                 |
 |**docTR (GPU) float16** - v0.8.1     | **~0.06s / Page**             | **~0.03s / Page**             |
-|OnnxTR (GPU) - v0.1.2                | **~0.06s / Page**             | ~0.04s / Page                 |
+|OnnxTR (GPU) - v0.4.1                | **~0.06s / Page**             | ~0.04s / Page                 |
 |EasyOCR (GPU) - v1.7.1               | ~0.31s / Page                 | ~0.19s / Page                 |
 |Surya (GPU) float16 - v0.4.4         | ~3.70s / Page                 | ~2.81s / Page                 |
 |**PaddleOCR (GPU) - no cls - v2.7.3**| ~0.08s / Page                 | **~0.03s / Page**             |
@@ -352,6 +357,16 @@ If you wish to cite please refer to the base project citation, feel free to use
 }
 ```
+```bibtex
+@misc{onnxtr2024,
+    title={OnnxTR: Optical Character Recognition made seamless & accessible to anyone, powered by Onnx},
+    author={Felix Dittrich},
+    year={2024},
+    publisher = {GitHub},
+    howpublished = {\url{https://github.com/felixdittrich92/OnnxTR}}
+}
+```
 ## License
 Distributed under the Apache 2.0 License. See [`LICENSE`](https://github.com/felixdittrich92/OnnxTR?tab=Apache-2.0-1-ov-file#readme) for more information.

{onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/builder.py RENAMED Viewed

@@ -266,7 +266,7 @@ class DocumentBuilder(NestedObject):
                 Line([
                     Word(
                         *word_preds[idx],
-                        tuple([tuple(pt) for pt in boxes[idx].tolist()]),  # type: ignore[arg-type]
+                        tuple(tuple(pt) for pt in boxes[idx].tolist()),  # type: ignore[arg-type]
                         float(objectness_scores[idx]),
                         crop_orientations[idx],
                     )

{onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/classification/models/mobilenet.py RENAMED Viewed

@@ -13,6 +13,7 @@ import numpy as np
 from ...engine import Engine, EngineConfig
 __all__ = [
+    "MobileNetV3",
     "mobilenet_v3_small_crop_orientation",
     "mobilenet_v3_small_page_orientation",
 ]

{onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/classification/predictor/base.py RENAMED Viewed

@@ -3,7 +3,7 @@
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import Any, List, Union
+from typing import Any, List, Optional, Union
 import numpy as np
 from scipy.special import softmax
@@ -29,10 +29,10 @@ class OrientationPredictor(NestedObject):
     def __init__(
         self,
-        pre_processor: PreProcessor,
-        model: Any,
+        pre_processor: Optional[PreProcessor],
+        model: Optional[Any],
     ) -> None:
-        self.pre_processor = pre_processor
+        self.pre_processor = pre_processor if isinstance(pre_processor, PreProcessor) else None
         self.model = model
     def __call__(
@@ -43,6 +43,10 @@ class OrientationPredictor(NestedObject):
         if any(input.ndim != 3 for input in inputs):
             raise ValueError("incorrect input shape: all inputs are expected to be multi-channel 2D images.")
+        if self.model is None or self.pre_processor is None:
+            # predictor is disabled
+            return [[0] * len(inputs), [0] * len(inputs), [1.0] * len(inputs)]
         processed_batches = self.pre_processor(inputs)
         predicted_batches = [self.model(batch) for batch in processed_batches]

{onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/classification/zoo.py RENAMED Viewed

@@ -17,16 +17,30 @@ ORIENTATION_ARCHS: List[str] = ["mobilenet_v3_small_crop_orientation", "mobilene
 def _orientation_predictor(
-    arch: str, load_in_8_bit: bool = False, engine_cfg: Optional[EngineConfig] = None, **kwargs: Any
+    arch: Any,
+    model_type: str,
+    load_in_8_bit: bool = False,
+    engine_cfg: Optional[EngineConfig] = None,
+    disabled: bool = False,
+    **kwargs: Any,
 ) -> OrientationPredictor:
-    if arch not in ORIENTATION_ARCHS:
-        raise ValueError(f"unknown architecture '{arch}'")
+    if disabled:
+        # Case where the orientation predictor is disabled
+        return OrientationPredictor(None, None)
+    if isinstance(arch, str):
+        if arch not in ORIENTATION_ARCHS:
+            raise ValueError(f"unknown architecture '{arch}'")
+        # Load directly classifier from backbone
+        _model = classification.__dict__[arch](load_in_8_bit=load_in_8_bit, engine_cfg=engine_cfg)
+    else:
+        if not isinstance(arch, classification.MobileNetV3):
+            raise ValueError(f"unknown architecture: {type(arch)}")
+        _model = arch
-    # Load directly classifier from backbone
-    _model = classification.__dict__[arch](load_in_8_bit=load_in_8_bit, engine_cfg=engine_cfg)
     kwargs["mean"] = kwargs.get("mean", _model.cfg["mean"])
     kwargs["std"] = kwargs.get("std", _model.cfg["std"])
-    kwargs["batch_size"] = kwargs.get("batch_size", 512 if "crop" in arch else 2)
+    kwargs["batch_size"] = kwargs.get("batch_size", 512 if model_type == "crop" else 2)
     input_shape = _model.cfg["input_shape"][1:]
     predictor = OrientationPredictor(
         PreProcessor(input_shape, preserve_aspect_ratio=True, symmetric_pad=True, **kwargs),
@@ -60,7 +74,8 @@ def crop_orientation_predictor(
     -------
         OrientationPredictor
     """
-    return _orientation_predictor(arch, load_in_8_bit, engine_cfg, **kwargs)
+    model_type = "crop"
+    return _orientation_predictor(arch, model_type, load_in_8_bit, engine_cfg, **kwargs)
 def page_orientation_predictor(
@@ -88,4 +103,5 @@ def page_orientation_predictor(
     -------
         OrientationPredictor
     """
-    return _orientation_predictor(arch, load_in_8_bit, engine_cfg, **kwargs)
+    model_type = "page"
+    return _orientation_predictor(arch, model_type, load_in_8_bit, engine_cfg, **kwargs)

{onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/predictor/base.py RENAMED Viewed

@@ -9,7 +9,7 @@ import numpy as np
 from onnxtr.models.builder import DocumentBuilder
 from onnxtr.models.engine import EngineConfig
-from onnxtr.utils.geometry import extract_crops, extract_rcrops, rotate_image
+from onnxtr.utils.geometry import extract_crops, extract_rcrops, remove_image_padding, rotate_image
 from .._utils import estimate_orientation, rectify_crops, rectify_loc_preds
 from ..classification import crop_orientation_predictor, page_orientation_predictor
@@ -55,13 +55,19 @@ class _OCRPredictor:
     ) -> None:
         self.assume_straight_pages = assume_straight_pages
         self.straighten_pages = straighten_pages
+        self._page_orientation_disabled = kwargs.pop("disable_page_orientation", False)
+        self._crop_orientation_disabled = kwargs.pop("disable_crop_orientation", False)
         self.crop_orientation_predictor = (
             None
             if assume_straight_pages
-            else crop_orientation_predictor(load_in_8_bit=load_in_8_bit, engine_cfg=clf_engine_cfg)
+            else crop_orientation_predictor(
+                load_in_8_bit=load_in_8_bit, engine_cfg=clf_engine_cfg, disabled=self._crop_orientation_disabled
+            )
         )
         self.page_orientation_predictor = (
-            page_orientation_predictor(load_in_8_bit=load_in_8_bit, engine_cfg=clf_engine_cfg)
+            page_orientation_predictor(
+                load_in_8_bit=load_in_8_bit, engine_cfg=clf_engine_cfg, disabled=self._crop_orientation_disabled
+            )
             if detect_orientation or straighten_pages or not assume_straight_pages
             else None
         )
@@ -112,8 +118,8 @@ class _OCRPredictor:
             ]
         )
         return [
-            # We exapnd if the page is wider than tall and the angle is 90 or -90
-            rotate_image(page, angle, expand=page.shape[1] > page.shape[0] and abs(angle) == 90)
+            # expand if height and width are not equal, afterwards remove padding
+            remove_image_padding(rotate_image(page, angle, expand=page.shape[0] != page.shape[1]))
             for page, angle in zip(pages, origin_pages_orientations)
         ]
@@ -123,13 +129,18 @@ class _OCRPredictor:
         loc_preds: List[np.ndarray],
         channels_last: bool,
         assume_straight_pages: bool = False,
+        assume_horizontal: bool = False,
     ) -> List[List[np.ndarray]]:
-        extraction_fn = extract_crops if assume_straight_pages else extract_rcrops
-        crops = [
-            extraction_fn(page, _boxes[:, :4], channels_last=channels_last)  # type: ignore[operator]
-            for page, _boxes in zip(pages, loc_preds)
-        ]
+        if assume_straight_pages:
+            crops = [
+                extract_crops(page, _boxes[:, :4], channels_last=channels_last)
+                for page, _boxes in zip(pages, loc_preds)
+            ]
+        else:
+            crops = [
+                extract_rcrops(page, _boxes[:, :4], channels_last=channels_last, assume_horizontal=assume_horizontal)
+                for page, _boxes in zip(pages, loc_preds)
+            ]
         return crops
     @staticmethod
@@ -138,8 +149,9 @@ class _OCRPredictor:
         loc_preds: List[np.ndarray],
         channels_last: bool,
         assume_straight_pages: bool = False,
+        assume_horizontal: bool = False,
     ) -> Tuple[List[List[np.ndarray]], List[np.ndarray]]:
-        crops = _OCRPredictor._generate_crops(pages, loc_preds, channels_last, assume_straight_pages)
+        crops = _OCRPredictor._generate_crops(pages, loc_preds, channels_last, assume_straight_pages, assume_horizontal)
         # Avoid sending zero-sized crops
         is_kept = [[all(s > 0 for s in crop.shape) for crop in page_crops] for page_crops in crops]

{onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/predictor/predictor.py RENAMED Viewed

@@ -100,6 +100,8 @@ class OCRPredictor(NestedObject, _OCRPredictor):
             origin_pages_orientations = None
         if self.straighten_pages:
             pages = self._straighten_pages(pages, seg_maps, general_pages_orientations, origin_pages_orientations)
+            # update page shapes after straightening
+            origin_page_shapes = [page.shape[:2] for page in pages]
             # forward again to get predictions on straight pages
             loc_preds = self.det_predictor(pages, **kwargs)  # type: ignore[assignment]
@@ -117,6 +119,7 @@ class OCRPredictor(NestedObject, _OCRPredictor):
             loc_preds,  # type: ignore[arg-type]
             channels_last=True,
             assume_straight_pages=self.assume_straight_pages,
+            assume_horizontal=self._page_orientation_disabled,
         )
         # Rectify crop orientation and get crop orientation predictions
         crop_orientations: Any = []

{onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/utils/geometry.py RENAMED Viewed

@@ -391,6 +391,26 @@ def rotate_image(
     return rot_img
+def remove_image_padding(image: np.ndarray) -> np.ndarray:
+    """Remove black border padding from an image
+    Args:
+    ----
+        image: numpy tensor to remove padding from
+    Returns:
+    -------
+        Image with padding removed
+    """
+    # Find the bounding box of the non-black region
+    rows = np.any(image, axis=1)
+    cols = np.any(image, axis=0)
+    rmin, rmax = np.where(rows)[0][[0, -1]]
+    cmin, cmax = np.where(cols)[0][[0, -1]]
+    return image[rmin : rmax + 1, cmin : cmax + 1]
 def estimate_page_angle(polys: np.ndarray) -> float:
     """Takes a batch of rotated previously ORIENTED polys (N, 4, 2) (rectified by the classifier) and return the
     estimated angle ccw in degrees
@@ -471,7 +491,7 @@ def extract_crops(img: np.ndarray, boxes: np.ndarray, channels_last: bool = True
 def extract_rcrops(
-    img: np.ndarray, polys: np.ndarray, dtype=np.float32, channels_last: bool = True
+    img: np.ndarray, polys: np.ndarray, dtype=np.float32, channels_last: bool = True, assume_horizontal: bool = False
 ) -> List[np.ndarray]:
     """Created cropped images from list of rotated bounding boxes
@@ -481,6 +501,7 @@ def extract_rcrops(
         polys: bounding boxes of shape (N, 4, 2)
         dtype: target data type of bounding boxes
         channels_last: whether the channel dimensions is the last one instead of the last one
+        assume_horizontal: whether the boxes are assumed to be only horizontally oriented
     Returns:
     -------
@@ -498,22 +519,88 @@ def extract_rcrops(
         _boxes[:, :, 0] *= width
         _boxes[:, :, 1] *= height
-    src_pts = _boxes[:, :3].astype(np.float32)
-    # Preserve size
-    d1 = np.linalg.norm(src_pts[:, 0] - src_pts[:, 1], axis=-1)
-    d2 = np.linalg.norm(src_pts[:, 1] - src_pts[:, 2], axis=-1)
-    # (N, 3, 2)
-    dst_pts = np.zeros((_boxes.shape[0], 3, 2), dtype=dtype)
-    dst_pts[:, 1, 0] = dst_pts[:, 2, 0] = d1 - 1
-    dst_pts[:, 2, 1] = d2 - 1
-    # Use a warp transformation to extract the crop
-    crops = [
-        cv2.warpAffine(
-            img if channels_last else img.transpose(1, 2, 0),
-            # Transformation matrix
-            cv2.getAffineTransform(src_pts[idx], dst_pts[idx]),
-            (int(d1[idx]), int(d2[idx])),
-        )
-        for idx in range(_boxes.shape[0])
-    ]
+    src_img = img if channels_last else img.transpose(1, 2, 0)
+    # Handle only horizontal oriented boxes
+    if assume_horizontal:
+        crops = []
+        for box in _boxes:
+            # Calculate the centroid of the quadrilateral
+            centroid = np.mean(box, axis=0)
+            # Divide the points into left and right
+            left_points = box[box[:, 0] < centroid[0]]
+            right_points = box[box[:, 0] >= centroid[0]]
+            # Sort the left points according to the y-axis
+            left_points = left_points[np.argsort(left_points[:, 1])]
+            top_left_pt = left_points[0]
+            bottom_left_pt = left_points[-1]
+            # Sort the right points according to the y-axis
+            right_points = right_points[np.argsort(right_points[:, 1])]
+            top_right_pt = right_points[0]
+            bottom_right_pt = right_points[-1]
+            box_points = np.array(
+                [top_left_pt, bottom_left_pt, top_right_pt, bottom_right_pt],
+                dtype=dtype,
+            )
+            # Get the width and height of the rectangle that will contain the warped quadrilateral
+            width_upper = np.linalg.norm(top_right_pt - top_left_pt)
+            width_lower = np.linalg.norm(bottom_right_pt - bottom_left_pt)
+            height_left = np.linalg.norm(bottom_left_pt - top_left_pt)
+            height_right = np.linalg.norm(bottom_right_pt - top_right_pt)
+            # Get the maximum width and height
+            rect_width = max(int(width_upper), int(width_lower))
+            rect_height = max(int(height_left), int(height_right))
+            dst_pts = np.array(
+                [
+                    [0, 0],  # top-left
+                    # bottom-left
+                    [0, rect_height - 1],
+                    # top-right
+                    [rect_width - 1, 0],
+                    # bottom-right
+                    [rect_width - 1, rect_height - 1],
+                ],
+                dtype=dtype,
+            )
+            # Get the perspective transform matrix using the box points
+            affine_mat = cv2.getPerspectiveTransform(box_points, dst_pts)
+            # Perform the perspective warp to get the rectified crop
+            crop = cv2.warpPerspective(
+                src_img,
+                affine_mat,
+                (rect_width, rect_height),
+            )
+            # Add the crop to the list of crops
+            crops.append(crop)
+    # Handle any oriented boxes
+    else:
+        src_pts = _boxes[:, :3].astype(np.float32)
+        # Preserve size
+        d1 = np.linalg.norm(src_pts[:, 0] - src_pts[:, 1], axis=-1)
+        d2 = np.linalg.norm(src_pts[:, 1] - src_pts[:, 2], axis=-1)
+        # (N, 3, 2)
+        dst_pts = np.zeros((_boxes.shape[0], 3, 2), dtype=dtype)
+        dst_pts[:, 1, 0] = dst_pts[:, 2, 0] = d1 - 1
+        dst_pts[:, 2, 1] = d2 - 1
+        # Use a warp transformation to extract the crop
+        crops = [
+            cv2.warpAffine(
+                src_img,
+                # Transformation matrix
+                cv2.getAffineTransform(src_pts[idx], dst_pts[idx]),
+                (int(d1[idx]), int(d2[idx])),
+            )
+            for idx in range(_boxes.shape[0])
+        ]
     return crops  # type: ignore[return-value]

{onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/utils/vocabs.py RENAMED Viewed

@@ -25,6 +25,7 @@ VOCABS: Dict[str, str] = {
     "hindi_punctuation": "।,?!:्ॐ॰॥॰",
     "bangla_letters": "অআইঈউঊঋএঐওঔকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহ়ঽািীুূৃেৈোৌ্ৎংঃঁ",
     "bangla_digits": "০১২৩৪৫৬৭৮৯",
+    "generic_cyrillic_letters": "абвгдежзийклмнопрстуфхцчшщьюяАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЬЮЯ",
 }
 VOCABS["latin"] = VOCABS["digits"] + VOCABS["ascii_letters"] + VOCABS["punctuation"]
@@ -59,6 +60,9 @@ VOCABS["vietnamese"] = (
 VOCABS["hebrew"] = VOCABS["english"] + "אבגדהוזחטיכלמנסעפצקרשת" + "₪"
 VOCABS["hindi"] = VOCABS["hindi_letters"] + VOCABS["hindi_digits"] + VOCABS["hindi_punctuation"]
 VOCABS["bangla"] = VOCABS["bangla_letters"] + VOCABS["bangla_digits"]
+VOCABS["ukrainian"] = (
+    VOCABS["generic_cyrillic_letters"] + VOCABS["digits"] + VOCABS["punctuation"] + VOCABS["currency"] + "ґіїєҐІЇЄ₴"
+)
 VOCABS["multilingual"] = "".join(
     dict.fromkeys(
         VOCABS["french"]

onnxtr-0.5.0/onnxtr/version.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = 'v0.5.0'

{onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: onnxtr
-Version: 0.4.0
+Version: 0.5.0
 Summary: Onnx Text Recognition (OnnxTR): docTR Onnx-Wrapper for high-performance OCR on documents.
 Author-email: Felix Dittrich <felixdittrich92@gmail.com>
 Maintainer: Felix Dittrich
@@ -225,9 +225,8 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: <4,>=3.9.0
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: numpy<2.0.0,>=1.16.0
+Requires-Dist: numpy<3.0.0,>=1.16.0
 Requires-Dist: scipy<2.0.0,>=1.4.0
-Requires-Dist: opencv-python<5.0.0,>=4.5.0
 Requires-Dist: pypdfium2<5.0.0,>=4.11.0
 Requires-Dist: pyclipper<2.0.0,>=1.2.0
 Requires-Dist: shapely<3.0.0,>=1.6.0
@@ -240,8 +239,16 @@ Requires-Dist: anyascii>=0.3.2
 Requires-Dist: tqdm>=4.30.0
 Provides-Extra: cpu
 Requires-Dist: onnxruntime>=1.11.0; extra == "cpu"
+Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "cpu"
 Provides-Extra: gpu
 Requires-Dist: onnxruntime-gpu>=1.11.0; extra == "gpu"
+Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "gpu"
+Provides-Extra: cpu-headless
+Requires-Dist: onnxruntime>=1.11.0; extra == "cpu-headless"
+Requires-Dist: opencv-python-headless<5.0.0,>=4.5.0; extra == "cpu-headless"
+Provides-Extra: gpu-headless
+Requires-Dist: onnxruntime-gpu>=1.11.0; extra == "gpu-headless"
+Requires-Dist: opencv-python-headless<5.0.0,>=4.5.0; extra == "gpu-headless"
 Provides-Extra: html
 Requires-Dist: weasyprint>=55.0; extra == "html"
 Provides-Extra: viz
@@ -257,6 +264,7 @@ Requires-Dist: mypy>=0.812; extra == "quality"
 Requires-Dist: pre-commit>=2.17.0; extra == "quality"
 Provides-Extra: dev
 Requires-Dist: onnxruntime>=1.11.0; extra == "dev"
+Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "dev"
 Requires-Dist: weasyprint>=55.0; extra == "dev"
 Requires-Dist: matplotlib>=3.1.0; extra == "dev"
 Requires-Dist: mplcursors>=0.3; extra == "dev"
@@ -276,7 +284,7 @@ Requires-Dist: pre-commit>=2.17.0; extra == "dev"
 [![codecov](https://codecov.io/gh/felixdittrich92/OnnxTR/graph/badge.svg?token=WVFRCQBOLI)](https://codecov.io/gh/felixdittrich92/OnnxTR)
 [![Codacy Badge](https://app.codacy.com/project/badge/Grade/4fff4d764bb14fb8b4f4afeb9587231b)](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
 [![CodeFactor](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr/badge)](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
-[![Pypi](https://img.shields.io/badge/pypi-v0.3.2-blue.svg)](https://pypi.org/project/OnnxTR/)
+[![Pypi](https://img.shields.io/badge/pypi-v0.5.0-blue.svg)](https://pypi.org/project/OnnxTR/)
 > :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
@@ -309,8 +317,10 @@ For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/get
 ```shell
 pip install "onnxtr[cpu]"
+pip install "onnxtr[cpu-headless]"  # same as cpu but with opencv-headless
 # with gpu support
 pip install "onnxtr[gpu]"
+pip install "onnxtr[gpu-headless]"  # same as gpu but with opencv-headless
 # with HTML support
 pip install "onnxtr[html]"
 # with support for visualization
@@ -356,6 +366,9 @@ model = ocr_predictor(
     # Additional parameters - meta information
     detect_orientation=False,  # set to `True` if the orientation of the pages should be detected (default: False)
     detect_language=False, # set to `True` if the language of the pages should be detected (default: False)
+    # Orientation specific parameters in combination with `assume_straight_pages=False` and/or `straighten_pages=True`
+    disable_crop_orientation=False,  # set to `True` if the crop orientation classification should be disabled (default: False)
+    disable_page_orientation=False,  # set to `True` if the general page orientation classification should be disabled (default: False)
     # DocumentBuilder specific parameters
     resolve_lines=True,  # whether words should be automatically grouped into lines (default: True)
     resolve_blocks=False,  # whether lines should be automatically grouped into blocks (default: False)
@@ -589,8 +602,8 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_
 |Library                          |FUNSD (199 pages)              |CORD  (900 pages)              |
 |---------------------------------|-------------------------------|-------------------------------|
 |docTR (CPU) - v0.8.1             | ~1.29s / Page                 | ~0.60s / Page                 |
-|**OnnxTR (CPU)** - v0.1.2        | ~0.57s / Page                 | **~0.25s / Page**             |
-|**OnnxTR (CPU) 8-bit** - v0.1.2  | **~0.38s / Page**             | **~0.14s / Page**             |
+|**OnnxTR (CPU)** - v0.4.1        | ~0.57s / Page                 | **~0.25s / Page**             |
+|**OnnxTR (CPU) 8-bit** - v0.4.1  | **~0.38s / Page**             | **~0.14s / Page**             |
 |EasyOCR (CPU) - v1.7.1           | ~1.96s / Page                 | ~1.75s / Page                 |
 |**PyTesseract (CPU)** - v0.3.10  | **~0.50s / Page**             | ~0.52s / Page                 |
 |Surya (line) (CPU) - v0.4.4      | ~48.76s / Page                | ~35.49s / Page                |
@@ -602,7 +615,7 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_
 |-------------------------------------|-------------------------------|-------------------------------|
 |docTR (GPU) - v0.8.1                 | ~0.07s / Page                 | ~0.05s / Page                 |
 |**docTR (GPU) float16** - v0.8.1     | **~0.06s / Page**             | **~0.03s / Page**             |
-|OnnxTR (GPU) - v0.1.2                | **~0.06s / Page**             | ~0.04s / Page                 |
+|OnnxTR (GPU) - v0.4.1                | **~0.06s / Page**             | ~0.04s / Page                 |
 |EasyOCR (GPU) - v1.7.1               | ~0.31s / Page                 | ~0.19s / Page                 |
 |Surya (GPU) float16 - v0.4.4         | ~3.70s / Page                 | ~2.81s / Page                 |
 |**PaddleOCR (GPU) - no cls - v2.7.3**| ~0.08s / Page                 | **~0.03s / Page**             |
@@ -621,6 +634,16 @@ If you wish to cite please refer to the base project citation, feel free to use
 }
 ```
+```bibtex
+@misc{onnxtr2024,
+    title={OnnxTR: Optical Character Recognition made seamless & accessible to anyone, powered by Onnx},
+    author={Felix Dittrich},
+    year={2024},
+    publisher = {GitHub},
+    howpublished = {\url{https://github.com/felixdittrich92/OnnxTR}}
+}
+```
 ## License
 Distributed under the Apache 2.0 License. See [`LICENSE`](https://github.com/felixdittrich92/OnnxTR?tab=Apache-2.0-1-ov-file#readme) for more information.

{onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr.egg-info/requires.txt RENAMED Viewed

@@ -1,6 +1,5 @@
-numpy<2.0.0,>=1.16.0
+numpy<3.0.0,>=1.16.0
 scipy<2.0.0,>=1.4.0
-opencv-python<5.0.0,>=4.5.0
 pypdfium2<5.0.0,>=4.11.0
 pyclipper<2.0.0,>=1.2.0
 shapely<3.0.0,>=1.6.0
@@ -14,9 +13,15 @@ tqdm>=4.30.0
 [cpu]
 onnxruntime>=1.11.0
+opencv-python<5.0.0,>=4.5.0
+[cpu-headless]
+onnxruntime>=1.11.0
+opencv-python-headless<5.0.0,>=4.5.0
 [dev]
 onnxruntime>=1.11.0
+opencv-python<5.0.0,>=4.5.0
 weasyprint>=55.0
 matplotlib>=3.1.0
 mplcursors>=0.3
@@ -29,6 +34,11 @@ pre-commit>=2.17.0
 [gpu]
 onnxruntime-gpu>=1.11.0
+opencv-python<5.0.0,>=4.5.0
+[gpu-headless]
+onnxruntime-gpu>=1.11.0
+opencv-python-headless<5.0.0,>=4.5.0
 [html]
 weasyprint>=55.0

{onnxtr-0.4.0 → onnxtr-0.5.0}/pyproject.toml RENAMED Viewed

@@ -31,9 +31,8 @@ dynamic = ["version"]
 dependencies = [
     # For proper typing, mypy needs numpy>=1.20.0 (cf. https://github.com/numpy/numpy/pull/16515)
     # Additional typing support is brought by numpy>=1.22.4, but core build sticks to >=1.16.0
-    "numpy>=1.16.0,<2.0.0",
+    "numpy>=1.16.0,<3.0.0",
     "scipy>=1.4.0,<2.0.0",
-    "opencv-python>=4.5.0,<5.0.0",
     "pypdfium2>=4.11.0,<5.0.0",
     "pyclipper>=1.2.0,<2.0.0",
     "shapely>=1.6.0,<3.0.0",
@@ -49,9 +48,19 @@ dependencies = [
 [project.optional-dependencies]
 cpu = [
     "onnxruntime>=1.11.0",
+    "opencv-python>=4.5.0,<5.0.0",
 ]
 gpu = [
     "onnxruntime-gpu>=1.11.0",
+    "opencv-python>=4.5.0,<5.0.0",
+]
+cpu-headless = [
+    "onnxruntime>=1.11.0",
+    "opencv-python-headless>=4.5.0,<5.0.0",
+]
+gpu-headless = [
+    "onnxruntime-gpu>=1.11.0",
+    "opencv-python-headless>=4.5.0,<5.0.0",
 ]
 html = [
     "weasyprint>=55.0",
@@ -73,6 +82,7 @@ quality = [
 dev = [
     # Runtime
     "onnxruntime>=1.11.0",
+    "opencv-python>=4.5.0,<5.0.0",
     # HTML
     "weasyprint>=55.0",
     # Visualization

{onnxtr-0.4.0 → onnxtr-0.5.0}/setup.py RENAMED Viewed

@@ -9,7 +9,7 @@ from pathlib import Path
 from setuptools import setup
 PKG_NAME = "onnxtr"
-VERSION = os.getenv("BUILD_VERSION", "0.4.0a0")
+VERSION = os.getenv("BUILD_VERSION", "0.5.0a0")
 if __name__ == "__main__":