PyPI - onnxtr - Versions diffs - 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

onnxtr 0.1.1py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

onnxtr/models/classification/models/mobilenet.py +15 -4
onnxtr/models/classification/predictor/base.py +1 -0
onnxtr/models/classification/zoo.py +10 -7
onnxtr/models/detection/models/differentiable_binarization.py +21 -6
onnxtr/models/detection/models/fast.py +13 -6
onnxtr/models/detection/models/linknet.py +21 -6
onnxtr/models/detection/zoo.py +7 -3
onnxtr/models/engine.py +2 -2
onnxtr/models/predictor/base.py +5 -1
onnxtr/models/recognition/models/crnn.py +21 -6
onnxtr/models/recognition/models/master.py +7 -2
onnxtr/models/recognition/models/parseq.py +8 -2
onnxtr/models/recognition/models/sar.py +9 -2
onnxtr/models/recognition/models/vitstr.py +17 -6
onnxtr/models/recognition/zoo.py +7 -4
onnxtr/models/zoo.py +6 -0
onnxtr/version.py +1 -1
{onnxtr-0.1.1.dist-info → onnxtr-0.2.0.dist-info}/METADATA +53 -13
{onnxtr-0.1.1.dist-info → onnxtr-0.2.0.dist-info}/RECORD +23 -23
{onnxtr-0.1.1.dist-info → onnxtr-0.2.0.dist-info}/LICENSE +0 -0
{onnxtr-0.1.1.dist-info → onnxtr-0.2.0.dist-info}/WHEEL +0 -0
{onnxtr-0.1.1.dist-info → onnxtr-0.2.0.dist-info}/top_level.txt +0 -0
{onnxtr-0.1.1.dist-info → onnxtr-0.2.0.dist-info}/zip-safe +0 -0

onnxtr/models/classification/models/mobilenet.py CHANGED Viewed

@@ -24,6 +24,7 @@ default_cfgs: Dict[str, Dict[str, Any]] = {
         "input_shape": (3, 256, 256),
         "classes": [0, -90, 180, 90],
         "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/mobilenet_v3_small_crop_orientation-5620cf7e.onnx",
+        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/mobilenet_v3_small_crop_orientation_static_8_bit-4cfaa621.onnx",
     },
     "mobilenet_v3_small_page_orientation": {
         "mean": (0.694, 0.695, 0.693),
@@ -31,6 +32,7 @@ default_cfgs: Dict[str, Dict[str, Any]] = {
         "input_shape": (3, 512, 512),
         "classes": [0, -90, 180, 90],
         "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/mobilenet_v3_small_page_orientation-d3f76d79.onnx",
+        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/mobilenet_v3_small_page_orientation_static_8_bit-3e5ef3dc.onnx",
     },
 }
@@ -64,14 +66,19 @@ class MobileNetV3(Engine):
 def _mobilenet_v3(
     arch: str,
     model_path: str,
+    load_in_8_bit: bool = False,
     **kwargs: Any,
 ) -> MobileNetV3:
+    # Patch the url
+    model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path
     _cfg = deepcopy(default_cfgs[arch])
     return MobileNetV3(model_path, cfg=_cfg, **kwargs)
 def mobilenet_v3_small_crop_orientation(
-    model_path: str = default_cfgs["mobilenet_v3_small_crop_orientation"]["url"], **kwargs: Any
+    model_path: str = default_cfgs["mobilenet_v3_small_crop_orientation"]["url"],
+    load_in_8_bit: bool = False,
+    **kwargs: Any,
 ) -> MobileNetV3:
     """MobileNetV3-Small architecture as described in
     `"Searching for MobileNetV3",
@@ -86,17 +93,20 @@ def mobilenet_v3_small_crop_orientation(
     Args:
     ----
         model_path: path to onnx model file, defaults to url in default_cfgs
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
         **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
     -------
         MobileNetV3
     """
-    return _mobilenet_v3("mobilenet_v3_small_crop_orientation", model_path, **kwargs)
+    return _mobilenet_v3("mobilenet_v3_small_crop_orientation", model_path, load_in_8_bit, **kwargs)
 def mobilenet_v3_small_page_orientation(
-    model_path: str = default_cfgs["mobilenet_v3_small_page_orientation"]["url"], **kwargs: Any
+    model_path: str = default_cfgs["mobilenet_v3_small_page_orientation"]["url"],
+    load_in_8_bit: bool = False,
+    **kwargs: Any,
 ) -> MobileNetV3:
     """MobileNetV3-Small architecture as described in
     `"Searching for MobileNetV3",
@@ -111,10 +121,11 @@ def mobilenet_v3_small_page_orientation(
     Args:
     ----
         model_path: path to onnx model file, defaults to url in default_cfgs
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
         **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
     -------
         MobileNetV3
     """
-    return _mobilenet_v3("mobilenet_v3_small_page_orientation", model_path, **kwargs)
+    return _mobilenet_v3("mobilenet_v3_small_page_orientation", model_path, load_in_8_bit, **kwargs)

onnxtr/models/classification/predictor/base.py CHANGED Viewed

@@ -22,6 +22,7 @@ class OrientationPredictor(NestedObject):
     ----
         pre_processor: transform inputs for easier batched model inference
         model: core classification architecture (backbone + classification head)
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
     """
     _children_names: List[str] = ["pre_processor", "model"]

onnxtr/models/classification/zoo.py CHANGED Viewed

@@ -14,24 +14,25 @@ __all__ = ["crop_orientation_predictor", "page_orientation_predictor"]
 ORIENTATION_ARCHS: List[str] = ["mobilenet_v3_small_crop_orientation", "mobilenet_v3_small_page_orientation"]
-def _orientation_predictor(arch: str, **kwargs: Any) -> OrientationPredictor:
+def _orientation_predictor(arch: str, load_in_8_bit: bool = False, **kwargs: Any) -> OrientationPredictor:
     if arch not in ORIENTATION_ARCHS:
         raise ValueError(f"unknown architecture '{arch}'")
     # Load directly classifier from backbone
-    _model = classification.__dict__[arch]()
+    _model = classification.__dict__[arch](load_in_8_bit=load_in_8_bit)
     kwargs["mean"] = kwargs.get("mean", _model.cfg["mean"])
     kwargs["std"] = kwargs.get("std", _model.cfg["std"])
     kwargs["batch_size"] = kwargs.get("batch_size", 128 if "crop" in arch else 4)
     input_shape = _model.cfg["input_shape"][1:]
     predictor = OrientationPredictor(
-        PreProcessor(input_shape, preserve_aspect_ratio=True, symmetric_pad=True, **kwargs), _model
+        PreProcessor(input_shape, preserve_aspect_ratio=True, symmetric_pad=True, **kwargs),
+        _model,
     )
     return predictor
 def crop_orientation_predictor(
-    arch: Any = "mobilenet_v3_small_crop_orientation", **kwargs: Any
+    arch: Any = "mobilenet_v3_small_crop_orientation", load_in_8_bit: bool = False, **kwargs: Any
 ) -> OrientationPredictor:
     """Crop orientation classification architecture.
@@ -44,17 +45,18 @@ def crop_orientation_predictor(
     Args:
     ----
         arch: name of the architecture to use (e.g. 'mobilenet_v3_small_crop_orientation')
+        load_in_8_bit: load the 8-bit quantized version of the model
         **kwargs: keyword arguments to be passed to the OrientationPredictor
     Returns:
     -------
         OrientationPredictor
     """
-    return _orientation_predictor(arch, **kwargs)
+    return _orientation_predictor(arch, load_in_8_bit, **kwargs)
 def page_orientation_predictor(
-    arch: Any = "mobilenet_v3_small_page_orientation", **kwargs: Any
+    arch: Any = "mobilenet_v3_small_page_orientation", load_in_8_bit: bool = False, **kwargs: Any
 ) -> OrientationPredictor:
     """Page orientation classification architecture.
@@ -67,10 +69,11 @@ def page_orientation_predictor(
     Args:
     ----
         arch: name of the architecture to use (e.g. 'mobilenet_v3_small_page_orientation')
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
         **kwargs: keyword arguments to be passed to the OrientationPredictor
     Returns:
     -------
         OrientationPredictor
     """
-    return _orientation_predictor(arch, **kwargs)
+    return _orientation_predictor(arch, load_in_8_bit, **kwargs)

onnxtr/models/detection/models/differentiable_binarization.py CHANGED Viewed

@@ -20,18 +20,21 @@ default_cfgs: Dict[str, Dict[str, Any]] = {
         "mean": (0.798, 0.785, 0.772),
         "std": (0.264, 0.2749, 0.287),
         "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/db_resnet50-69ba0015.onnx",
+        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/db_resnet50_static_8_bit-09a6104f.onnx",
     },
     "db_resnet34": {
         "input_shape": (3, 1024, 1024),
         "mean": (0.798, 0.785, 0.772),
         "std": (0.264, 0.2749, 0.287),
         "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/db_resnet34-b4873198.onnx",
+        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/db_resnet34_static_8_bit-027e2c7f.onnx",
     },
     "db_mobilenet_v3_large": {
         "input_shape": (3, 1024, 1024),
         "mean": (0.798, 0.785, 0.772),
         "std": (0.264, 0.2749, 0.287),
         "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/db_mobilenet_v3_large-1866973f.onnx",
+        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/db_mobilenet_v3_large_static_8_bit-51659bb9.onnx",
     },
 }
@@ -87,13 +90,18 @@ class DBNet(Engine):
 def _dbnet(
     arch: str,
     model_path: str,
+    load_in_8_bit: bool = False,
     **kwargs: Any,
 ) -> DBNet:
+    # Patch the url
+    model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path
     # Build the model
     return DBNet(model_path, cfg=default_cfgs[arch], **kwargs)
-def db_resnet34(model_path: str = default_cfgs["db_resnet34"]["url"], **kwargs: Any) -> DBNet:
+def db_resnet34(
+    model_path: str = default_cfgs["db_resnet34"]["url"], load_in_8_bit: bool = False, **kwargs: Any
+) -> DBNet:
     """DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization"
     <https://arxiv.org/pdf/1911.08947.pdf>`_, using a ResNet-34 backbone.
@@ -106,16 +114,19 @@ def db_resnet34(model_path: str = default_cfgs["db_resnet34"]["url"], **kwargs:
     Args:
     ----
         model_path: path to onnx model file, defaults to url in default_cfgs
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
         **kwargs: keyword arguments of the DBNet architecture
     Returns:
     -------
         text detection architecture
     """
-    return _dbnet("db_resnet34", model_path, **kwargs)
+    return _dbnet("db_resnet34", model_path, load_in_8_bit, **kwargs)
-def db_resnet50(model_path: str = default_cfgs["db_resnet50"]["url"], **kwargs: Any) -> DBNet:
+def db_resnet50(
+    model_path: str = default_cfgs["db_resnet50"]["url"], load_in_8_bit: bool = False, **kwargs: Any
+) -> DBNet:
     """DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization"
     <https://arxiv.org/pdf/1911.08947.pdf>`_, using a ResNet-50 backbone.
@@ -128,16 +139,19 @@ def db_resnet50(model_path: str = default_cfgs["db_resnet50"]["url"], **kwargs:
     Args:
     ----
         model_path: path to onnx model file, defaults to url in default_cfgs
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
         **kwargs: keyword arguments of the DBNet architecture
     Returns:
     -------
         text detection architecture
     """
-    return _dbnet("db_resnet50", model_path, **kwargs)
+    return _dbnet("db_resnet50", model_path, load_in_8_bit, **kwargs)
-def db_mobilenet_v3_large(model_path: str = default_cfgs["db_mobilenet_v3_large"]["url"], **kwargs: Any) -> DBNet:
+def db_mobilenet_v3_large(
+    model_path: str = default_cfgs["db_mobilenet_v3_large"]["url"], load_in_8_bit: bool = False, **kwargs: Any
+) -> DBNet:
     """DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization"
     <https://arxiv.org/pdf/1911.08947.pdf>`_, using a MobileNet V3 Large backbone.
@@ -150,10 +164,11 @@ def db_mobilenet_v3_large(model_path: str = default_cfgs["db_mobilenet_v3_large"
     Args:
     ----
         model_path: path to onnx model file, defaults to url in default_cfgs
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
         **kwargs: keyword arguments of the DBNet architecture
     Returns:
     -------
         text detection architecture
     """
-    return _dbnet("db_mobilenet_v3_large", model_path, **kwargs)
+    return _dbnet("db_mobilenet_v3_large", model_path, load_in_8_bit, **kwargs)

onnxtr/models/detection/models/fast.py CHANGED Viewed

@@ -3,6 +3,7 @@
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+import logging
 from typing import Any, Dict, Optional
 import numpy as np
@@ -88,13 +89,16 @@ class FAST(Engine):
 def _fast(
     arch: str,
     model_path: str,
+    load_in_8_bit: bool = False,
     **kwargs: Any,
 ) -> FAST:
+    if load_in_8_bit:
+        logging.warning("FAST models do not support 8-bit quantization yet. Loading full precision model...")
     # Build the model
     return FAST(model_path, cfg=default_cfgs[arch], **kwargs)
-def fast_tiny(model_path: str = default_cfgs["fast_tiny"]["url"], **kwargs: Any) -> FAST:
+def fast_tiny(model_path: str = default_cfgs["fast_tiny"]["url"], load_in_8_bit: bool = False, **kwargs: Any) -> FAST:
     """FAST as described in `"FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation"
     <https://arxiv.org/pdf/2111.02394.pdf>`_, using a tiny TextNet backbone.
@@ -107,16 +111,17 @@ def fast_tiny(model_path: str = default_cfgs["fast_tiny"]["url"], **kwargs: Any)
     Args:
     ----
         model_path: path to onnx model file, defaults to url in default_cfgs
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
         **kwargs: keyword arguments of the DBNet architecture
     Returns:
     -------
         text detection architecture
     """
-    return _fast("fast_tiny", model_path, **kwargs)
+    return _fast("fast_tiny", model_path, load_in_8_bit, **kwargs)
-def fast_small(model_path: str = default_cfgs["fast_small"]["url"], **kwargs: Any) -> FAST:
+def fast_small(model_path: str = default_cfgs["fast_small"]["url"], load_in_8_bit: bool = False, **kwargs: Any) -> FAST:
     """FAST as described in `"FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation"
     <https://arxiv.org/pdf/2111.02394.pdf>`_, using a small TextNet backbone.
@@ -129,16 +134,17 @@ def fast_small(model_path: str = default_cfgs["fast_small"]["url"], **kwargs: An
     Args:
     ----
         model_path: path to onnx model file, defaults to url in default_cfgs
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
         **kwargs: keyword arguments of the DBNet architecture
     Returns:
     -------
         text detection architecture
     """
-    return _fast("fast_small", model_path, **kwargs)
+    return _fast("fast_small", model_path, load_in_8_bit, **kwargs)
-def fast_base(model_path: str = default_cfgs["fast_base"]["url"], **kwargs: Any) -> FAST:
+def fast_base(model_path: str = default_cfgs["fast_base"]["url"], load_in_8_bit: bool = False, **kwargs: Any) -> FAST:
     """FAST as described in `"FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation"
     <https://arxiv.org/pdf/2111.02394.pdf>`_, using a base TextNet backbone.
@@ -151,10 +157,11 @@ def fast_base(model_path: str = default_cfgs["fast_base"]["url"], **kwargs: Any)
     Args:
     ----
         model_path: path to onnx model file, defaults to url in default_cfgs
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
         **kwargs: keyword arguments of the DBNet architecture
     Returns:
     -------
         text detection architecture
     """
-    return _fast("fast_base", model_path, **kwargs)
+    return _fast("fast_base", model_path, load_in_8_bit, **kwargs)

onnxtr/models/detection/models/linknet.py CHANGED Viewed

@@ -20,18 +20,21 @@ default_cfgs: Dict[str, Dict[str, Any]] = {
         "mean": (0.798, 0.785, 0.772),
         "std": (0.264, 0.2749, 0.287),
         "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/linknet_resnet18-e0e0b9dc.onnx",
+        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/linknet_resnet18_static_8_bit-3b3a37dd.onnx",
     },
     "linknet_resnet34": {
         "input_shape": (3, 1024, 1024),
         "mean": (0.798, 0.785, 0.772),
         "std": (0.264, 0.2749, 0.287),
         "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/linknet_resnet34-93e39a39.onnx",
+        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/linknet_resnet34_static_8_bit-2824329d.onnx",
     },
     "linknet_resnet50": {
         "input_shape": (3, 1024, 1024),
         "mean": (0.798, 0.785, 0.772),
         "std": (0.264, 0.2749, 0.287),
         "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/linknet_resnet50-15d8c4ec.onnx",
+        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/linknet_resnet50_static_8_bit-65d6b0b8.onnx",
     },
 }
@@ -88,13 +91,18 @@ class LinkNet(Engine):
 def _linknet(
     arch: str,
     model_path: str,
+    load_in_8_bit: bool = False,
     **kwargs: Any,
 ) -> LinkNet:
+    # Patch the url
+    model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path
     # Build the model
     return LinkNet(model_path, cfg=default_cfgs[arch], **kwargs)
-def linknet_resnet18(model_path: str = default_cfgs["linknet_resnet18"]["url"], **kwargs: Any) -> LinkNet:
+def linknet_resnet18(
+    model_path: str = default_cfgs["linknet_resnet18"]["url"], load_in_8_bit: bool = False, **kwargs: Any
+) -> LinkNet:
     """LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation"
     <https://arxiv.org/pdf/1707.03718.pdf>`_.
@@ -107,16 +115,19 @@ def linknet_resnet18(model_path: str = default_cfgs["linknet_resnet18"]["url"],
     Args:
     ----
         model_path: path to onnx model file, defaults to url in default_cfgs
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
         **kwargs: keyword arguments of the LinkNet architecture
     Returns:
     -------
         text detection architecture
     """
-    return _linknet("linknet_resnet18", model_path, **kwargs)
+    return _linknet("linknet_resnet18", model_path, load_in_8_bit, **kwargs)
-def linknet_resnet34(model_path: str = default_cfgs["linknet_resnet34"]["url"], **kwargs: Any) -> LinkNet:
+def linknet_resnet34(
+    model_path: str = default_cfgs["linknet_resnet34"]["url"], load_in_8_bit: bool = False, **kwargs: Any
+) -> LinkNet:
     """LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation"
     <https://arxiv.org/pdf/1707.03718.pdf>`_.
@@ -129,16 +140,19 @@ def linknet_resnet34(model_path: str = default_cfgs["linknet_resnet34"]["url"],
     Args:
     ----
         model_path: path to onnx model file, defaults to url in default_cfgs
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
         **kwargs: keyword arguments of the LinkNet architecture
     Returns:
     -------
         text detection architecture
     """
-    return _linknet("linknet_resnet34", model_path, **kwargs)
+    return _linknet("linknet_resnet34", model_path, load_in_8_bit, **kwargs)
-def linknet_resnet50(model_path: str = default_cfgs["linknet_resnet50"]["url"], **kwargs: Any) -> LinkNet:
+def linknet_resnet50(
+    model_path: str = default_cfgs["linknet_resnet50"]["url"], load_in_8_bit: bool = False, **kwargs: Any
+) -> LinkNet:
     """LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation"
     <https://arxiv.org/pdf/1707.03718.pdf>`_.
@@ -151,10 +165,11 @@ def linknet_resnet50(model_path: str = default_cfgs["linknet_resnet50"]["url"],
     Args:
     ----
         model_path: path to onnx model file, defaults to url in default_cfgs
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
         **kwargs: keyword arguments of the LinkNet architecture
     Returns:
     -------
         text detection architecture
     """
-    return _linknet("linknet_resnet50", model_path, **kwargs)
+    return _linknet("linknet_resnet50", model_path, load_in_8_bit, **kwargs)

onnxtr/models/detection/zoo.py CHANGED Viewed

@@ -24,12 +24,14 @@ ARCHS = [
 ]
-def _predictor(arch: Any, assume_straight_pages: bool = True, **kwargs: Any) -> DetectionPredictor:
+def _predictor(
+    arch: Any, assume_straight_pages: bool = True, load_in_8_bit: bool = False, **kwargs: Any
+) -> DetectionPredictor:
     if isinstance(arch, str):
         if arch not in ARCHS:
             raise ValueError(f"unknown architecture '{arch}'")
-        _model = detection.__dict__[arch](assume_straight_pages=assume_straight_pages)
+        _model = detection.__dict__[arch](assume_straight_pages=assume_straight_pages, load_in_8_bit=load_in_8_bit)
     else:
         if not isinstance(arch, (detection.DBNet, detection.LinkNet, detection.FAST)):
             raise ValueError(f"unknown architecture: {type(arch)}")
@@ -50,6 +52,7 @@ def _predictor(arch: Any, assume_straight_pages: bool = True, **kwargs: Any) ->
 def detection_predictor(
     arch: Any = "fast_base",
     assume_straight_pages: bool = True,
+    load_in_8_bit: bool = False,
     **kwargs: Any,
 ) -> DetectionPredictor:
     """Text detection architecture.
@@ -64,10 +67,11 @@ def detection_predictor(
     ----
         arch: name of the architecture or model itself to use (e.g. 'db_resnet50')
         assume_straight_pages: If True, fit straight boxes to the page
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
         **kwargs: optional keyword arguments passed to the architecture
     Returns:
     -------
         Detection predictor
     """
-    return _predictor(arch, assume_straight_pages, **kwargs)
+    return _predictor(arch, assume_straight_pages, load_in_8_bit, **kwargs)

onnxtr/models/engine.py CHANGED Viewed

@@ -43,8 +43,8 @@ class Engine:
             inputs = np.broadcast_to(inputs, (self.fixed_batch_size, *inputs.shape))
             # combine the results
             logits = np.concatenate(
-                [self.runtime.run(self.output_name, {"input": batch})[0] for batch in inputs], axis=0
+                [self.runtime.run(self.output_name, {self.runtime_inputs.name: batch})[0] for batch in inputs], axis=0
             )
         else:
-            logits = self.runtime.run(self.output_name, {"input": inputs})[0]
+            logits = self.runtime.run(self.output_name, {self.runtime_inputs.name: inputs})[0]
         return shape_translate(logits, format="BHWC")

onnxtr/models/predictor/base.py CHANGED Viewed

@@ -31,6 +31,7 @@ class _OCRPredictor:
             accordingly. Doing so will improve performances for documents with page-uniform rotations.
         preserve_aspect_ratio: if True, resize preserving the aspect ratio (with padding)
         symmetric_pad: if True and preserve_aspect_ratio is True, pas the image symmetrically.
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
         **kwargs: keyword args of `DocumentBuilder`
     """
@@ -42,11 +43,14 @@ class _OCRPredictor:
         straighten_pages: bool = False,
         preserve_aspect_ratio: bool = True,
         symmetric_pad: bool = True,
+        load_in_8_bit: bool = False,
         **kwargs: Any,
     ) -> None:
         self.assume_straight_pages = assume_straight_pages
         self.straighten_pages = straighten_pages
-        self.crop_orientation_predictor = None if assume_straight_pages else crop_orientation_predictor()
+        self.crop_orientation_predictor = (
+            None if assume_straight_pages else crop_orientation_predictor(load_in_8_bit=load_in_8_bit)
+        )
         self.doc_builder = DocumentBuilder(**kwargs)
         self.preserve_aspect_ratio = preserve_aspect_ratio
         self.symmetric_pad = symmetric_pad

onnxtr/models/recognition/models/crnn.py CHANGED Viewed

@@ -24,6 +24,7 @@ default_cfgs: Dict[str, Dict[str, Any]] = {
         "input_shape": (3, 32, 128),
         "vocab": VOCABS["legacy_french"],
         "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/crnn_vgg16_bn-662979cc.onnx",
+        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/crnn_vgg16_bn_static_8_bit-bce050c7.onnx",
     },
     "crnn_mobilenet_v3_small": {
         "mean": (0.694, 0.695, 0.693),
@@ -31,6 +32,7 @@ default_cfgs: Dict[str, Dict[str, Any]] = {
         "input_shape": (3, 32, 128),
         "vocab": VOCABS["french"],
         "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/crnn_mobilenet_v3_small-bded4d49.onnx",
+        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/crnn_mobilenet_v3_small_static_8_bit-4949006f.onnx",
     },
     "crnn_mobilenet_v3_large": {
         "mean": (0.694, 0.695, 0.693),
@@ -38,6 +40,7 @@ default_cfgs: Dict[str, Dict[str, Any]] = {
         "input_shape": (3, 32, 128),
         "vocab": VOCABS["french"],
         "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/crnn_mobilenet_v3_large-d42e8185.onnx",
+        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/crnn_mobilenet_v3_large_static_8_bit-459e856d.onnx",
     },
 }
@@ -148,6 +151,7 @@ class CRNN(Engine):
 def _crnn(
     arch: str,
     model_path: str,
+    load_in_8_bit: bool = False,
     **kwargs: Any,
 ) -> CRNN:
     kwargs["vocab"] = kwargs.get("vocab", default_cfgs[arch]["vocab"])
@@ -155,12 +159,16 @@ def _crnn(
     _cfg = deepcopy(default_cfgs[arch])
     _cfg["vocab"] = kwargs["vocab"]
     _cfg["input_shape"] = kwargs.get("input_shape", default_cfgs[arch]["input_shape"])
+    # Patch the url
+    model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path
     # Build the model
     return CRNN(model_path, cfg=_cfg, **kwargs)
-def crnn_vgg16_bn(model_path: str = default_cfgs["crnn_vgg16_bn"]["url"], **kwargs: Any) -> CRNN:
+def crnn_vgg16_bn(
+    model_path: str = default_cfgs["crnn_vgg16_bn"]["url"], load_in_8_bit: bool = False, **kwargs: Any
+) -> CRNN:
     """CRNN with a VGG-16 backbone as described in `"An End-to-End Trainable Neural Network for Image-based
     Sequence Recognition and Its Application to Scene Text Recognition" <https://arxiv.org/pdf/1507.05717.pdf>`_.
@@ -173,16 +181,19 @@ def crnn_vgg16_bn(model_path: str = default_cfgs["crnn_vgg16_bn"]["url"], **kwar
     Args:
     ----
         model_path: path to onnx model file, defaults to url in default_cfgs
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
         **kwargs: keyword arguments of the CRNN architecture
     Returns:
     -------
         text recognition architecture
     """
-    return _crnn("crnn_vgg16_bn", model_path, **kwargs)
+    return _crnn("crnn_vgg16_bn", model_path, load_in_8_bit, **kwargs)
-def crnn_mobilenet_v3_small(model_path: str = default_cfgs["crnn_mobilenet_v3_small"]["url"], **kwargs: Any) -> CRNN:
+def crnn_mobilenet_v3_small(
+    model_path: str = default_cfgs["crnn_mobilenet_v3_small"]["url"], load_in_8_bit: bool = False, **kwargs: Any
+) -> CRNN:
     """CRNN with a MobileNet V3 Small backbone as described in `"An End-to-End Trainable Neural Network for Image-based
     Sequence Recognition and Its Application to Scene Text Recognition" <https://arxiv.org/pdf/1507.05717.pdf>`_.
@@ -195,16 +206,19 @@ def crnn_mobilenet_v3_small(model_path: str = default_cfgs["crnn_mobilenet_v3_sm
     Args:
     ----
         model_path: path to onnx model file, defaults to url in default_cfgs
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
         **kwargs: keyword arguments of the CRNN architecture
     Returns:
     -------
         text recognition architecture
     """
-    return _crnn("crnn_mobilenet_v3_small", model_path, **kwargs)
+    return _crnn("crnn_mobilenet_v3_small", model_path, load_in_8_bit, **kwargs)
-def crnn_mobilenet_v3_large(model_path: str = default_cfgs["crnn_mobilenet_v3_large"]["url"], **kwargs: Any) -> CRNN:
+def crnn_mobilenet_v3_large(
+    model_path: str = default_cfgs["crnn_mobilenet_v3_large"]["url"], load_in_8_bit: bool = False, **kwargs: Any
+) -> CRNN:
     """CRNN with a MobileNet V3 Large backbone as described in `"An End-to-End Trainable Neural Network for Image-based
     Sequence Recognition and Its Application to Scene Text Recognition" <https://arxiv.org/pdf/1507.05717.pdf>`_.
@@ -217,10 +231,11 @@ def crnn_mobilenet_v3_large(model_path: str = default_cfgs["crnn_mobilenet_v3_la
     Args:
     ----
         model_path: path to onnx model file, defaults to url in default_cfgs
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
         **kwargs: keyword arguments of the CRNN architecture
     Returns:
     -------
         text recognition architecture
     """
-    return _crnn("crnn_mobilenet_v3_large", model_path, **kwargs)
+    return _crnn("crnn_mobilenet_v3_large", model_path, load_in_8_bit, **kwargs)

onnxtr/models/recognition/models/master.py CHANGED Viewed

@@ -24,6 +24,7 @@ default_cfgs: Dict[str, Dict[str, Any]] = {
         "input_shape": (3, 32, 128),
         "vocab": VOCABS["french"],
         "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/master-b1287fcd.onnx",
+        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/master_dynamic_8_bit-d8bd8206.onnx",
     },
 }
@@ -112,6 +113,7 @@ class MASTERPostProcessor(RecognitionPostProcessor):
 def _master(
     arch: str,
     model_path: str,
+    load_in_8_bit: bool = False,
     **kwargs: Any,
 ) -> MASTER:
     # Patch the config
@@ -120,11 +122,13 @@ def _master(
     _cfg["vocab"] = kwargs.get("vocab", _cfg["vocab"])
     kwargs["vocab"] = _cfg["vocab"]
+    # Patch the url
+    model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path
     return MASTER(model_path, cfg=_cfg, **kwargs)
-def master(model_path: str = default_cfgs["master"]["url"], **kwargs: Any) -> MASTER:
+def master(model_path: str = default_cfgs["master"]["url"], load_in_8_bit: bool = False, **kwargs: Any) -> MASTER:
     """MASTER as described in paper: <https://arxiv.org/pdf/1910.02562.pdf>`_.
     >>> import numpy as np
@@ -136,10 +140,11 @@ def master(model_path: str = default_cfgs["master"]["url"], **kwargs: Any) -> MA
     Args:
     ----
         model_path: path to onnx model file, defaults to url in default_cfgs
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
         **kwargs: keywoard arguments passed to the MASTER architecture
     Returns:
     -------
         text recognition architecture
     """
-    return _master("master", model_path, **kwargs)
+    return _master("master", model_path, load_in_8_bit, **kwargs)

onnxtr/models/recognition/models/parseq.py CHANGED Viewed

@@ -23,6 +23,7 @@ default_cfgs: Dict[str, Dict[str, Any]] = {
         "input_shape": (3, 32, 128),
         "vocab": VOCABS["french"],
         "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/parseq-00b40714.onnx",
+        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/parseq_dynamic_8_bit-5b04d9f7.onnx",
     },
 }
@@ -32,6 +33,7 @@ class PARSeq(Engine):
     Args:
     ----
+        model_path: path to onnx model file
         vocab: vocabulary used for encoding
         cfg: dictionary containing information about the model
         **kwargs: additional arguments to be passed to `Engine`
@@ -99,6 +101,7 @@ class PARSeqPostProcessor(RecognitionPostProcessor):
 def _parseq(
     arch: str,
     model_path: str,
+    load_in_8_bit: bool = False,
     **kwargs: Any,
 ) -> PARSeq:
     # Patch the config
@@ -107,12 +110,14 @@ def _parseq(
     _cfg["input_shape"] = kwargs.get("input_shape", _cfg["input_shape"])
     kwargs["vocab"] = _cfg["vocab"]
+    # Patch the url
+    model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path
     # Build the model
     return PARSeq(model_path, cfg=_cfg, **kwargs)
-def parseq(model_path: str = default_cfgs["parseq"]["url"], **kwargs: Any) -> PARSeq:
+def parseq(model_path: str = default_cfgs["parseq"]["url"], load_in_8_bit: bool = False, **kwargs: Any) -> PARSeq:
     """PARSeq architecture from
     `"Scene Text Recognition with Permuted Autoregressive Sequence Models" <https://arxiv.org/pdf/2207.06966>`_.
@@ -125,10 +130,11 @@ def parseq(model_path: str = default_cfgs["parseq"]["url"], **kwargs: Any) -> PA
     Args:
     ----
         model_path: path to onnx model file, defaults to url in default_cfgs
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
         **kwargs: keyword arguments of the PARSeq architecture
     Returns:
     -------
         text recognition architecture
     """
-    return _parseq("parseq", model_path, **kwargs)
+    return _parseq("parseq", model_path, load_in_8_bit, **kwargs)

onnxtr/models/recognition/models/sar.py CHANGED Viewed

@@ -23,6 +23,7 @@ default_cfgs: Dict[str, Dict[str, Any]] = {
         "input_shape": (3, 32, 128),
         "vocab": VOCABS["french"],
         "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/sar_resnet31-395f8005.onnx",
+        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/sar_resnet31_static_8_bit-c07316bc.onnx",
     },
 }
@@ -99,6 +100,7 @@ class SARPostProcessor(RecognitionPostProcessor):
 def _sar(
     arch: str,
     model_path: str,
+    load_in_8_bit: bool = False,
     **kwargs: Any,
 ) -> SAR:
     # Patch the config
@@ -107,12 +109,16 @@ def _sar(
     _cfg["input_shape"] = kwargs.get("input_shape", _cfg["input_shape"])
     kwargs["vocab"] = _cfg["vocab"]
+    # Patch the url
+    model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path
     # Build the model
     return SAR(model_path, cfg=_cfg, **kwargs)
-def sar_resnet31(model_path: str = default_cfgs["sar_resnet31"]["url"], **kwargs: Any) -> SAR:
+def sar_resnet31(
+    model_path: str = default_cfgs["sar_resnet31"]["url"], load_in_8_bit: bool = False, **kwargs: Any
+) -> SAR:
     """SAR with a resnet-31 feature extractor as described in `"Show, Attend and Read:A Simple and Strong
     Baseline for Irregular Text Recognition" <https://arxiv.org/pdf/1811.00751.pdf>`_.
@@ -125,10 +131,11 @@ def sar_resnet31(model_path: str = default_cfgs["sar_resnet31"]["url"], **kwargs
     Args:
     ----
         model_path: path to onnx model file, defaults to url in default_cfgs
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
         **kwargs: keyword arguments of the SAR architecture
     Returns:
     -------
         text recognition architecture
     """
-    return _sar("sar_resnet31", model_path, **kwargs)
+    return _sar("sar_resnet31", model_path, load_in_8_bit, **kwargs)

onnxtr/models/recognition/models/vitstr.py CHANGED Viewed

@@ -23,6 +23,7 @@ default_cfgs: Dict[str, Dict[str, Any]] = {
         "input_shape": (3, 32, 128),
         "vocab": VOCABS["french"],
         "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/vitstr_small-3ff9c500.onnx",
+        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/vitstr_small_dynamic_8_bit-bec6c796.onnx",
     },
     "vitstr_base": {
         "mean": (0.694, 0.695, 0.693),
@@ -30,6 +31,7 @@ default_cfgs: Dict[str, Dict[str, Any]] = {
         "input_shape": (3, 32, 128),
         "vocab": VOCABS["french"],
         "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/vitstr_base-ff62f5be.onnx",
+        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/vitstr_base_dynamic_8_bit-976c7cd6.onnx",
     },
 }
@@ -109,6 +111,7 @@ class ViTSTRPostProcessor(RecognitionPostProcessor):
 def _vitstr(
     arch: str,
     model_path: str,
+    load_in_8_bit: bool = False,
     **kwargs: Any,
 ) -> ViTSTR:
     # Patch the config
@@ -117,12 +120,16 @@ def _vitstr(
     _cfg["input_shape"] = kwargs.get("input_shape", _cfg["input_shape"])
     kwargs["vocab"] = _cfg["vocab"]
+    # Patch the url
+    model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path
     # Build the model
     return ViTSTR(model_path, cfg=_cfg, **kwargs)
-def vitstr_small(model_path: str = default_cfgs["vitstr_small"]["url"], **kwargs: Any) -> ViTSTR:
+def vitstr_small(
+    model_path: str = default_cfgs["vitstr_small"]["url"], load_in_8_bit: bool = False, **kwargs: Any
+) -> ViTSTR:
     """ViTSTR-Small as described in `"Vision Transformer for Fast and Efficient Scene Text Recognition"
     <https://arxiv.org/pdf/2105.08582.pdf>`_.
@@ -135,16 +142,19 @@ def vitstr_small(model_path: str = default_cfgs["vitstr_small"]["url"], **kwargs
     Args:
     ----
         model_path: path to onnx model file, defaults to url in default_cfgs
-        kwargs: keyword arguments of the ViTSTR architecture
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
+        **kwargs: keyword arguments of the ViTSTR architecture
     Returns:
     -------
         text recognition architecture
     """
-    return _vitstr("vitstr_small", model_path, **kwargs)
+    return _vitstr("vitstr_small", model_path, load_in_8_bit, **kwargs)
-def vitstr_base(model_path: str = default_cfgs["vitstr_base"]["url"], **kwargs: Any) -> ViTSTR:
+def vitstr_base(
+    model_path: str = default_cfgs["vitstr_base"]["url"], load_in_8_bit: bool = False, **kwargs: Any
+) -> ViTSTR:
     """ViTSTR-Base as described in `"Vision Transformer for Fast and Efficient Scene Text Recognition"
     <https://arxiv.org/pdf/2105.08582.pdf>`_.
@@ -157,10 +167,11 @@ def vitstr_base(model_path: str = default_cfgs["vitstr_base"]["url"], **kwargs:
     Args:
     ----
         model_path: path to onnx model file, defaults to url in default_cfgs
-        kwargs: keyword arguments of the ViTSTR architecture
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
+        **kwargs: keyword arguments of the ViTSTR architecture
     Returns:
     -------
         text recognition architecture
     """
-    return _vitstr("vitstr_base", model_path, **kwargs)
+    return _vitstr("vitstr_base", model_path, load_in_8_bit, **kwargs)

onnxtr/models/recognition/zoo.py CHANGED Viewed

@@ -25,12 +25,12 @@ ARCHS: List[str] = [
 ]
-def _predictor(arch: Any, **kwargs: Any) -> RecognitionPredictor:
+def _predictor(arch: Any, load_in_8_bit: bool = False, **kwargs: Any) -> RecognitionPredictor:
     if isinstance(arch, str):
         if arch not in ARCHS:
             raise ValueError(f"unknown architecture '{arch}'")
-        _model = recognition.__dict__[arch]()
+        _model = recognition.__dict__[arch](load_in_8_bit=load_in_8_bit)
     else:
         if not isinstance(
             arch, (recognition.CRNN, recognition.SAR, recognition.MASTER, recognition.ViTSTR, recognition.PARSeq)
@@ -47,7 +47,9 @@ def _predictor(arch: Any, **kwargs: Any) -> RecognitionPredictor:
     return predictor
-def recognition_predictor(arch: Any = "crnn_vgg16_bn", **kwargs: Any) -> RecognitionPredictor:
+def recognition_predictor(
+    arch: Any = "crnn_vgg16_bn", load_in_8_bit: bool = False, **kwargs: Any
+) -> RecognitionPredictor:
     """Text recognition architecture.
     Example::
@@ -60,10 +62,11 @@ def recognition_predictor(arch: Any = "crnn_vgg16_bn", **kwargs: Any) -> Recogni
     Args:
     ----
         arch: name of the architecture or model itself to use (e.g. 'crnn_vgg16_bn')
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
         **kwargs: optional parameters to be passed to the architecture
     Returns:
     -------
         Recognition predictor
     """
-    return _predictor(arch, **kwargs)
+    return _predictor(arch, load_in_8_bit, **kwargs)

onnxtr/models/zoo.py CHANGED Viewed

@@ -23,6 +23,7 @@ def _predictor(
     detect_orientation: bool = False,
     straighten_pages: bool = False,
     detect_language: bool = False,
+    load_in_8_bit: bool = False,
     **kwargs,
 ) -> OCRPredictor:
     # Detection
@@ -32,12 +33,14 @@ def _predictor(
         assume_straight_pages=assume_straight_pages,
         preserve_aspect_ratio=preserve_aspect_ratio,
         symmetric_pad=symmetric_pad,
+        load_in_8_bit=load_in_8_bit,
     )
     # Recognition
     reco_predictor = recognition_predictor(
         reco_arch,
         batch_size=reco_bs,
+        load_in_8_bit=load_in_8_bit,
     )
     return OCRPredictor(
@@ -63,6 +66,7 @@ def ocr_predictor(
     detect_orientation: bool = False,
     straighten_pages: bool = False,
     detect_language: bool = False,
+    load_in_8_bit: bool = False,
     **kwargs: Any,
 ) -> OCRPredictor:
     """End-to-end OCR architecture using one model for localization, and another for text recognition.
@@ -94,6 +98,7 @@ def ocr_predictor(
             Doing so will improve performances for documents with page-uniform rotations.
         detect_language: if True, the language prediction will be added to the predictions for each
             page. Doing so will slightly deteriorate the overall latency.
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
         kwargs: keyword args of `OCRPredictor`
     Returns:
@@ -110,5 +115,6 @@ def ocr_predictor(
         detect_orientation=detect_orientation,
         straighten_pages=straighten_pages,
         detect_language=detect_language,
+        load_in_8_bit=load_in_8_bit,
         **kwargs,
     )

onnxtr/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = 'v0.1.1'
1	+ __version__ = 'v0.2.0'

{onnxtr-0.1.1.dist-info → onnxtr-0.2.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: onnxtr
-Version: 0.1.1
+Version: 0.2.0
 Summary: Onnx Text Recognition (OnnxTR): docTR Onnx-Wrapper for high-performance OCR on documents.
 Author-email: Felix Dittrich <felixdittrich92@gmail.com>
 Maintainer: Felix Dittrich
@@ -227,7 +227,6 @@ Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: numpy <2.0.0,>=1.16.0
 Requires-Dist: scipy <2.0.0,>=1.4.0
-Requires-Dist: onnx <2.0.0,>=1.12.0
 Requires-Dist: opencv-python <5.0.0,>=4.5.0
 Requires-Dist: pypdfium2 <5.0.0,>=4.0.0
 Requires-Dist: pyclipper <2.0.0,>=1.2.0
@@ -276,17 +275,18 @@ Requires-Dist: mplcursors >=0.3 ; extra == 'viz'
 [![codecov](https://codecov.io/gh/felixdittrich92/OnnxTR/graph/badge.svg?token=WVFRCQBOLI)](https://codecov.io/gh/felixdittrich92/OnnxTR)
 [![Codacy Badge](https://app.codacy.com/project/badge/Grade/4fff4d764bb14fb8b4f4afeb9587231b)](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
 [![CodeFactor](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr/badge)](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
-[![Pypi](https://img.shields.io/badge/pypi-v0.0.1-blue.svg)](https://pypi.org/project/OnnxTR/)
+[![Pypi](https://img.shields.io/badge/pypi-v0.2.0-blue.svg)](https://pypi.org/project/OnnxTR/)
-> :warning: Please note that this is wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
+> :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
 **Optical Character Recognition made seamless & accessible to anyone, powered by Onnx**
 What you can expect from this repository:
 - efficient ways to parse textual information (localize and identify each word) from your documents
-- a Onnx pipeline for docTR, a wrapper around the [doctr](https://github.com/mindee/doctr) library
+- a Onnx pipeline for docTR, a wrapper around the [doctr](https://github.com/mindee/doctr) library - no PyTorch or TensorFlow dependencies
 - more lightweight package with faster inference latency and less required resources
+- 8-Bit quantized models for faster inference on CPU
 ![OCR_example](https://github.com/felixdittrich92/OnnxTR/raw/main/docs/images/ocr.png)
@@ -300,10 +300,14 @@ Python 3.9 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to
 You can then install the latest release of the package using [pypi](https://pypi.org/project/OnnxTR/) as follows:
-**NOTE:** For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/getting-started). Currently supported execution providers by default are: CPU, CUDA
+**NOTE:**
+For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/getting-started). Currently supported execution providers by default are: CPU, CUDA
+- **Prerequisites:** CUDA & cuDNN needs to be installed before [Version table](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html).
 ```shell
-pip install onnxtr[cpu]
+pip install "onnxtr[cpu]"
 # with gpu support
 pip install "onnxtr[gpu]"
 # with HTML support
@@ -340,17 +344,24 @@ from onnxtr.models import ocr_predictor
 model = ocr_predictor(
     det_arch='fast_base',  # detection architecture
-    rec_arch='vitstr_base',  # recognition architecture
+    reco_arch='vitstr_base',  # recognition architecture
     det_bs=4, # detection batch size
     reco_bs=1024, # recognition batch size
     assume_straight_pages=True,  # set to `False` if the pages are not straight (rotation, perspective, etc.) (default: True)
     straighten_pages=False,  # set to `True` if the pages should be straightened before final processing (default: False)
+    # Preprocessing related parameters
     preserve_aspect_ratio=True,  # set to `False` if the aspect ratio should not be preserved (default: True)
     symmetric_pad=True,  # set to `False` to disable symmetric padding (default: True)
+    # Additional parameters - meta information
+    detect_orientation=False,  # set to `True` if the orientation of the pages should be detected (default: False)
+    detect_language=False, # set to `True` if the language of the pages should be detected (default: False)
     # DocumentBuilder specific parameters
     resolve_lines=True,  # whether words should be automatically grouped into lines (default: True)
     resolve_blocks=True,  # whether lines should be automatically grouped into blocks (default: True)
     paragraph_break=0.035,  # relative length of the minimum space separating paragraphs (default: 0.035)
+    # OnnxTR specific parameters
+    # NOTE: 8-Bit quantized models are not available for FAST detection models and can in general lead to poorer accuracy
+    load_in_8_bit=False,  # set to `True` to load 8-bit quantized models instead of the full precision onces (default: False)
 )
 # PDF
 doc = DocumentFile.from_pdf("path/to/your/doc.pdf")
@@ -398,7 +409,7 @@ from onnxtr.models import ocr_predictor, linknet_resnet18, parseq
 reco_model = parseq("path_to_custom_model.onnx", vocab="ABC")
 det_model = linknet_resnet18("path_to_custom_model.onnx")
-model = ocr_predictor(det_model=det_model, reco_model=reco_model)
+model = ocr_predictor(det_arch=det_model, reco_arch=reco_model)
 ```
 ## Models architectures
@@ -431,9 +442,9 @@ predictor.list_archs()
             'linknet_resnet18',
             'linknet_resnet34',
             'linknet_resnet50',
-            'fast_tiny',
-            'fast_small',
-            'fast_base'
+            'fast_tiny',  # No 8-bit support
+            'fast_small',  # No 8-bit support
+            'fast_base'  # No 8-bit support
         ],
     'recognition archs':
         [
@@ -462,7 +473,36 @@ NOTE:
 ### Benchmarks
-COMING SOON
+The CPU benchmarks was measured on a `i7-14700K Intel CPU`.
+The GPU benchmarks was measured on a `RTX 4080 Nvidia GPU`.
+Benchmarking performed on the FUNSD dataset and CORD dataset.
+docTR / OnnxTR models used for the benchmarks are `fast_base` (full precision) | `db_resnet50` (8-bit variant) for detection and `crnn_vgg16_bn` for recognition.
+The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_mobilenet_v3_small` takes as comparison `~0.17s / Page` on the FUNSD dataset and `~0.12s / Page` on the CORD dataset in **full precision**.
+- CPU benchmarks:
+|Library                         |FUNSD (199 pages)              |CORD  (900 pages)              |
+|--------------------------------|-------------------------------|-------------------------------|
+|docTR (CPU) - v0.8.1            | ~1.29s / Page                 | ~0.60s / Page                 |
+|**OnnxTR (CPU)** - v0.1.2       | ~0.57s / Page                 | **~0.25s / Page**             |
+|**OnnxTR (CPU) 8-bit** - v0.1.2 | **~0.38s / Page**             | **~0.14s / Page**             |
+|EasyOCR (CPU) - v1.7.1          | ~1.96s / Page                 | ~1.75s / Page                 |
+|**PyTesseract (CPU)** - v0.3.10 | **~0.50s / Page**             | ~0.52s / Page                 |
+|Surya (line) (CPU) - v0.4.4     | ~48.76s / Page                | ~35.49s / Page                |
+- GPU benchmarks:
+|Library                         |FUNSD (199 pages)              |CORD  (900 pages)              |
+|--------------------------------|-------------------------------|-------------------------------|
+|docTR (GPU) - v0.8.1            | ~0.07s / Page                 | ~0.05s / Page                 |
+|**docTR (GPU) float16** - v0.8.1| **~0.06s / Page**             | **~0.03s / Page**             |
+|OnnxTR (GPU) - v0.1.2           | **~0.06s / Page**             | ~0.04s / Page                 |
+|EasyOCR (GPU) - v1.7.1          | ~0.31s / Page                 | ~0.19s / Page                 |
+|Surya (GPU) float16 - v0.4.4    | ~3.70s / Page                 | ~2.81s / Page                 |
 ## Citation

{onnxtr-0.1.1.dist-info → onnxtr-0.2.0.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 onnxtr/__init__.py,sha256=h7Wc2tuHLsaoCk5xNpEFEK-g11A6SJA7nAasA76TQ_Y,100
 onnxtr/file_utils.py,sha256=WjUKalEdR53aoeIY4e-ihy3r7J_C9qFxL40JHGPfutc,1107
-onnxtr/version.py,sha256=jQY6B0xUe16A1ACYL5noxMrvy3wwP2XMsFq2Su0alUA,23
+onnxtr/version.py,sha256=F-dLDdnsmwBrwqQCEb3nX0LORU_q1xg9pmFGVU_3j8o,23
 onnxtr/contrib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 onnxtr/contrib/artefacts.py,sha256=tdmfhvfXVRYEH7uj4_hqf2cuUGoTieyNK8bXsD3zHwo,5383
 onnxtr/contrib/base.py,sha256=PoCKtOIgj7u4xl-V0eBVh-QmVeTyk_eEggFHQ8R34AI,3445
@@ -13,40 +13,40 @@ onnxtr/io/reader.py,sha256=BA7DPhW-Gkmce_ZfzrOl4H3pSXVy2JBeQEuY3pWrBFg,2852
 onnxtr/models/__init__.py,sha256=CzdiA34zjFq4a22XvgzG_Bojosi9aN5jeRCizRyh_7o,101
 onnxtr/models/_utils.py,sha256=RnSmj70gFU9CCJlsCYiVULEs7L8MhogkLf2Xu_7wBhM,4824
 onnxtr/models/builder.py,sha256=byUpCs9x5RS2lqNXLECR5GELqd1yF-MEg_09OGWj20Q,13642
-onnxtr/models/engine.py,sha256=2SwzcWCf2s4ti1mHqLF0lqLDw3wpM9FYjdEdtK5MYJ4,2140
-onnxtr/models/zoo.py,sha256=FTG6KCv-4G90N-m0fZhRDOFiyd_sFzLZuBw4bg2ecvI,4302
+onnxtr/models/engine.py,sha256=EtBIceuxIdJ1bqEl59n35D-AUrkIEelSOqD1IWEtXAE,2174
+onnxtr/models/zoo.py,sha256=oHk9BOt5U3fZBpeFRgKkQ_U6PthwfoKw0lBEi4Lu6ak,4567
 onnxtr/models/classification/__init__.py,sha256=h1bZs55iLJBMATtzS4ntTKwfD6OGXBiiqGv_hEnOFnE,41
-onnxtr/models/classification/zoo.py,sha256=yiDTEZCHLBu6WUwQVnQipKhjngPs6OgTeNt4MX8z9m4,2748
+onnxtr/models/classification/zoo.py,sha256=MW2UH3MOEJ1aDwHQ_SjrUfPb_U6MrWA3lOGlhwxogaQ,3058
 onnxtr/models/classification/models/__init__.py,sha256=rohbM6ZQslfYchi7feZwwh-sX3XXRUhgtEJQeurAytQ,24
-onnxtr/models/classification/models/mobilenet.py,sha256=9SzYYpn1XOUFHZHzKM0BtvSh08WXeAhMeebt3E1eAmQ,3677
+onnxtr/models/classification/models/mobilenet.py,sha256=z9zN0GnXcOCTH-Hu-xKsjA4W7r3D8F9n_hE-T_Rzqhc,4427
 onnxtr/models/classification/predictor/__init__.py,sha256=ERmmOxz_9mUkIuccNbzUa5Y6gVLLVDdyc4cCxbCCUbY,20
-onnxtr/models/classification/predictor/base.py,sha256=9-5i1Uj0EdNQBeSOhaQjGz78Gr0IAr3JZhoKVMA_il4,2009
+onnxtr/models/classification/predictor/base.py,sha256=Xfaj2XlaJuQ2R81XqF5RB0Wcvzd4wh7Z6j1ifn2niFc,2097
 onnxtr/models/detection/__init__.py,sha256=h1bZs55iLJBMATtzS4ntTKwfD6OGXBiiqGv_hEnOFnE,41
 onnxtr/models/detection/core.py,sha256=ZmVDHLJ1l4LQ8rFSKc7enXDkGcOWrcQv4H0SJWyLsag,3584
-onnxtr/models/detection/zoo.py,sha256=GfwvVAT3bbx-1zccqDO-jzOzzL5Sj3D06zbd_x6dUOo,2257
+onnxtr/models/detection/zoo.py,sha256=4DDHWQtWAQpEhl0SHcqEV12HgjXpTBh4LB4tdQc7etk,2457
 onnxtr/models/detection/models/__init__.py,sha256=6Ea6knYrVCR2jAmPlsVWmCdHe-c6lSRETSAuZGfhx8I,85
-onnxtr/models/detection/models/differentiable_binarization.py,sha256=fu1Mn8i38vIyW98YKdPnwXRcvk_-OlkOmcez7wef8l4,5157
-onnxtr/models/detection/models/fast.py,sha256=40P_q5TWUnY9iIyi-pAqnvauQ5KeqIAco2NGGF4plrg,5101
-onnxtr/models/detection/models/linknet.py,sha256=e9uZZB5S3GYImGroHyZ4oonb_ULxL0VAXd_yDuqG1K0,5194
+onnxtr/models/detection/models/differentiable_binarization.py,sha256=mn1kE7k6VLsiyERH9ghtVJBqalG_In2dv8SvtJNN_HM,6131
+onnxtr/models/detection/models/fast.py,sha256=jhw8M6vf9FM8Q5a5Lgu6mfpzkLrIEErX-1GxCR_DfYw,5677
+onnxtr/models/detection/models/linknet.py,sha256=PCwaYhTqFgt2BbixoVTeZWHN9fd4r2i1GTZ0CywZRvw,6173
 onnxtr/models/detection/postprocessor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 onnxtr/models/detection/postprocessor/base.py,sha256=U8FgqveZt2qPHI0aUnIEeX370zGUEWUxL6EPqhFZnRI,5714
 onnxtr/models/detection/predictor/__init__.py,sha256=ERmmOxz_9mUkIuccNbzUa5Y6gVLLVDdyc4cCxbCCUbY,20
 onnxtr/models/detection/predictor/base.py,sha256=K0fSTFnA8-fGL9F3HzW3nqnEqO5YKbhFf2TvpUOe_Tw,1744
 onnxtr/models/predictor/__init__.py,sha256=XL25XkRkgyK7mldF-CWhg2MMakSdP5vLpDLwL59hphk,25
-onnxtr/models/predictor/base.py,sha256=nrIE6rvxA3LbeUbQ-e3wkochO66OJT3BP-NW_OyHUyw,7555
+onnxtr/models/predictor/base.py,sha256=6rWf8FZrp6ku4yvP1jJl39t0Y8L7wqWb2GYYagTExtw,7731
 onnxtr/models/predictor/predictor.py,sha256=sfwqRzabqavlI6ty_Z6jGm64olmmQBLQGU39_nulQZs,6045
 onnxtr/models/preprocessor/__init__.py,sha256=ERmmOxz_9mUkIuccNbzUa5Y6gVLLVDdyc4cCxbCCUbY,20
 onnxtr/models/preprocessor/base.py,sha256=f0t0rMCzvuxwgq7jlKvcVWyjeDOx7yCLUw52quEaETM,3990
 onnxtr/models/recognition/__init__.py,sha256=h1bZs55iLJBMATtzS4ntTKwfD6OGXBiiqGv_hEnOFnE,41
 onnxtr/models/recognition/core.py,sha256=0Q1dVXqRcDUr_ycT5tpoSH9-zuDF58GtnmxWpUS8Ibo,739
 onnxtr/models/recognition/utils.py,sha256=04abbjx-_OuF5iEANWIAOK3tQQl1tExPmBQx4IG04Lc,3569
-onnxtr/models/recognition/zoo.py,sha256=FC8kHSr66s5D-Ltb8dnNI-x3HLdy5LZhZeiWMV69J9k,2115
+onnxtr/models/recognition/zoo.py,sha256=MR5O5UVM0tmWGKdGzXTE7HTBCYKZBuOVwvRBASz-zRo,2309
 onnxtr/models/recognition/models/__init__.py,sha256=IXfiuzzkft8O1CpBZWYTpFw19y49mt5rJ_iGSdaWiU0,105
-onnxtr/models/recognition/models/crnn.py,sha256=Y1lmvCAN_r-ciZNHRIZWHLhv60j2zcR1XoRC5r8_BD0,7375
-onnxtr/models/recognition/models/master.py,sha256=4FiWWBtezb7WjhDvnzauZ-iL6_rKashoqbs-9JfHlvM,4009
-onnxtr/models/recognition/models/parseq.py,sha256=qYtXWC8jXnIKViKTLp-WvcEOoU_DPkZ-_KHVhoKgUmw,3765
-onnxtr/models/recognition/models/sar.py,sha256=c0hRZ5dXl9sijkXZXtUnu9cB4DjUFc1MMzriDBhor78,3815
-onnxtr/models/recognition/models/vitstr.py,sha256=jID2RJs5O81b4jBGjkeSNPQ2PTbon-_U8ZDGfCoGcwY,4880
+onnxtr/models/recognition/models/crnn.py,sha256=jtrbe2WqPCvWCgaZkWVCX0LPE3poQXQpe6Aq6pJN6i4,8365
+onnxtr/models/recognition/models/master.py,sha256=iacF5zZNf3kWTetYbGemBMey8o0heHmgFi8sVoTMyik,4427
+onnxtr/models/recognition/models/parseq.py,sha256=omHQVfJ1BR03JxiPcr5wxR_cx9okiWddTCikpUOJc-E,4227
+onnxtr/models/recognition/models/sar.py,sha256=12U2RH8jQp4SE2axHvME3TjIa5WkSbZaJfzClFxVGUE,4244
+onnxtr/models/recognition/models/vitstr.py,sha256=uPyBBE-PDPJNxzUFqJbaVdm6MTCbE5dgvst2MSPqswk,5583
 onnxtr/models/recognition/predictor/__init__.py,sha256=ERmmOxz_9mUkIuccNbzUa5Y6gVLLVDdyc4cCxbCCUbY,20
 onnxtr/models/recognition/predictor/_utils.py,sha256=ZNm5I7ibiWfTlz302uiifCkUOu65YWa-oUBUMPrrUuQ,3406
 onnxtr/models/recognition/predictor/base.py,sha256=YvqSNEM3rCEttxl6hsC9zl1R97N9zO2WZfD5_-nfkR0,2483
@@ -62,9 +62,9 @@ onnxtr/utils/reconstitution.py,sha256=Hx1_ddLevKLzuxXc19UelPdsGlAwqi4f6vRSYKHDUB
 onnxtr/utils/repr.py,sha256=kfbjGL6KymGT8spo2UL4FJXZ0XRwa7CO7Y1dTVR8dIk,2129
 onnxtr/utils/visualization.py,sha256=CX09qvDnNIw3BFW5F3jM4R9OcpLWAeZyoDyTAOGRvls,9925
 onnxtr/utils/vocabs.py,sha256=SCQ4XQjbHSxunj1tg2iHRiPfE8OaTAMhcJbKq5BNvFs,3138
-onnxtr-0.1.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-onnxtr-0.1.1.dist-info/METADATA,sha256=Fo9T9dyuaLLYzE8cAxCdiIQfhQRnumUqw2X1OS_PqIc,24740
-onnxtr-0.1.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-onnxtr-0.1.1.dist-info/top_level.txt,sha256=48aR9EH095hmgye7CNuxAW3o_cj4TjhWmuw3jEMdTs0,12
-onnxtr-0.1.1.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
-onnxtr-0.1.1.dist-info/RECORD,,
+onnxtr-0.2.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+onnxtr-0.2.0.dist-info/METADATA,sha256=Uc9NddF-vWik-X2Sohhx-3Jw8gZJxp86GFpt1Q7-QRg,27690
+onnxtr-0.2.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+onnxtr-0.2.0.dist-info/top_level.txt,sha256=48aR9EH095hmgye7CNuxAW3o_cj4TjhWmuw3jEMdTs0,12
+onnxtr-0.2.0.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
+onnxtr-0.2.0.dist-info/RECORD,,

{onnxtr-0.1.1.dist-info → onnxtr-0.2.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{onnxtr-0.1.1.dist-info → onnxtr-0.2.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{onnxtr-0.1.1.dist-info → onnxtr-0.2.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

{onnxtr-0.1.1.dist-info → onnxtr-0.2.0.dist-info}/zip-safe RENAMED Viewed

File without changes

onnxtr 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl

onnxtr 0.1.1py3-none-any.whl → 0.2.0py3-none-any.whl