PyPI - onnxtr - Versions diffs - 0.1.2__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

onnxtr 0.1.2py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

onnxtr/io/elements.py +17 -4
onnxtr/io/pdf.py +6 -3
onnxtr/models/__init__.py +1 -0
onnxtr/models/_utils.py +57 -20
onnxtr/models/builder.py +24 -9
onnxtr/models/classification/models/mobilenet.py +25 -7
onnxtr/models/classification/predictor/base.py +1 -0
onnxtr/models/classification/zoo.py +22 -7
onnxtr/models/detection/_utils/__init__.py +1 -0
onnxtr/models/detection/_utils/base.py +66 -0
onnxtr/models/detection/models/differentiable_binarization.py +41 -11
onnxtr/models/detection/models/fast.py +37 -9
onnxtr/models/detection/models/linknet.py +39 -9
onnxtr/models/detection/postprocessor/base.py +4 -3
onnxtr/models/detection/predictor/base.py +15 -1
onnxtr/models/detection/zoo.py +16 -3
onnxtr/models/engine.py +75 -9
onnxtr/models/predictor/base.py +69 -42
onnxtr/models/predictor/predictor.py +22 -15
onnxtr/models/recognition/models/crnn.py +39 -9
onnxtr/models/recognition/models/master.py +19 -5
onnxtr/models/recognition/models/parseq.py +20 -5
onnxtr/models/recognition/models/sar.py +19 -5
onnxtr/models/recognition/models/vitstr.py +31 -9
onnxtr/models/recognition/zoo.py +12 -6
onnxtr/models/zoo.py +22 -0
onnxtr/py.typed +0 -0
onnxtr/utils/geometry.py +33 -12
onnxtr/version.py +1 -1
{onnxtr-0.1.2.dist-info → onnxtr-0.3.0.dist-info}/METADATA +81 -16
{onnxtr-0.1.2.dist-info → onnxtr-0.3.0.dist-info}/RECORD +35 -32
{onnxtr-0.1.2.dist-info → onnxtr-0.3.0.dist-info}/WHEEL +1 -1
{onnxtr-0.1.2.dist-info → onnxtr-0.3.0.dist-info}/top_level.txt +0 -1
{onnxtr-0.1.2.dist-info → onnxtr-0.3.0.dist-info}/LICENSE +0 -0
{onnxtr-0.1.2.dist-info → onnxtr-0.3.0.dist-info}/zip-safe +0 -0

onnxtr/models/detection/models/differentiable_binarization.py CHANGED Viewed

@@ -8,7 +8,7 @@ from typing import Any, Dict, Optional
 import numpy as np
 from scipy.special import expit
-from ...engine import Engine
+from ...engine import Engine, EngineConfig
 from ..postprocessor.base import GeneralDetectionPostProcessor
 __all__ = ["DBNet", "db_resnet50", "db_resnet34", "db_mobilenet_v3_large"]
@@ -20,18 +20,21 @@ default_cfgs: Dict[str, Dict[str, Any]] = {
         "mean": (0.798, 0.785, 0.772),
         "std": (0.264, 0.2749, 0.287),
         "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/db_resnet50-69ba0015.onnx",
+        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/db_resnet50_static_8_bit-09a6104f.onnx",
     },
     "db_resnet34": {
         "input_shape": (3, 1024, 1024),
         "mean": (0.798, 0.785, 0.772),
         "std": (0.264, 0.2749, 0.287),
         "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/db_resnet34-b4873198.onnx",
+        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/db_resnet34_static_8_bit-027e2c7f.onnx",
     },
     "db_mobilenet_v3_large": {
         "input_shape": (3, 1024, 1024),
         "mean": (0.798, 0.785, 0.772),
         "std": (0.264, 0.2749, 0.287),
-        "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/db_mobilenet_v3_large-1866973f.onnx",
+        "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.2.0/db_mobilenet_v3_large-4987e7bd.onnx",
+        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.2.0/db_mobilenet_v3_large_static_8_bit-535a6f25.onnx",
     },
 }
@@ -42,6 +45,7 @@ class DBNet(Engine):
     Args:
     ----
         model_path: path or url to onnx model file
+        engine_cfg: configuration for the inference engine
         bin_thresh: threshold for binarization of the output feature map
         box_thresh: minimal objectness score to consider a box
         assume_straight_pages: if True, fit straight bounding boxes only
@@ -51,14 +55,15 @@ class DBNet(Engine):
     def __init__(
         self,
-        model_path,
+        model_path: str,
+        engine_cfg: EngineConfig = EngineConfig(),
         bin_thresh: float = 0.3,
         box_thresh: float = 0.1,
         assume_straight_pages: bool = True,
         cfg: Optional[Dict[str, Any]] = None,
         **kwargs: Any,
     ) -> None:
-        super().__init__(url=model_path, **kwargs)
+        super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs)
         self.cfg = cfg
         self.assume_straight_pages = assume_straight_pages
         self.postprocessor = GeneralDetectionPostProcessor(
@@ -87,13 +92,22 @@ class DBNet(Engine):
 def _dbnet(
     arch: str,
     model_path: str,
+    load_in_8_bit: bool = False,
+    engine_cfg: EngineConfig = EngineConfig(),
     **kwargs: Any,
 ) -> DBNet:
+    # Patch the url
+    model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path
     # Build the model
-    return DBNet(model_path, cfg=default_cfgs[arch], **kwargs)
+    return DBNet(model_path, cfg=default_cfgs[arch], engine_cfg=engine_cfg, **kwargs)
-def db_resnet34(model_path: str = default_cfgs["db_resnet34"]["url"], **kwargs: Any) -> DBNet:
+def db_resnet34(
+    model_path: str = default_cfgs["db_resnet34"]["url"],
+    load_in_8_bit: bool = False,
+    engine_cfg: EngineConfig = EngineConfig(),
+    **kwargs: Any,
+) -> DBNet:
     """DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization"
     <https://arxiv.org/pdf/1911.08947.pdf>`_, using a ResNet-34 backbone.
@@ -106,16 +120,23 @@ def db_resnet34(model_path: str = default_cfgs["db_resnet34"]["url"], **kwargs:
     Args:
     ----
         model_path: path to onnx model file, defaults to url in default_cfgs
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
+        engine_cfg: configuration for the inference engine
         **kwargs: keyword arguments of the DBNet architecture
     Returns:
     -------
         text detection architecture
     """
-    return _dbnet("db_resnet34", model_path, **kwargs)
+    return _dbnet("db_resnet34", model_path, load_in_8_bit, engine_cfg, **kwargs)
-def db_resnet50(model_path: str = default_cfgs["db_resnet50"]["url"], **kwargs: Any) -> DBNet:
+def db_resnet50(
+    model_path: str = default_cfgs["db_resnet50"]["url"],
+    load_in_8_bit: bool = False,
+    engine_cfg: EngineConfig = EngineConfig(),
+    **kwargs: Any,
+) -> DBNet:
     """DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization"
     <https://arxiv.org/pdf/1911.08947.pdf>`_, using a ResNet-50 backbone.
@@ -128,16 +149,23 @@ def db_resnet50(model_path: str = default_cfgs["db_resnet50"]["url"], **kwargs:
     Args:
     ----
         model_path: path to onnx model file, defaults to url in default_cfgs
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
+        engine_cfg: configuration for the inference engine
         **kwargs: keyword arguments of the DBNet architecture
     Returns:
     -------
         text detection architecture
     """
-    return _dbnet("db_resnet50", model_path, **kwargs)
+    return _dbnet("db_resnet50", model_path, load_in_8_bit, engine_cfg, **kwargs)
-def db_mobilenet_v3_large(model_path: str = default_cfgs["db_mobilenet_v3_large"]["url"], **kwargs: Any) -> DBNet:
+def db_mobilenet_v3_large(
+    model_path: str = default_cfgs["db_mobilenet_v3_large"]["url"],
+    load_in_8_bit: bool = False,
+    engine_cfg: EngineConfig = EngineConfig(),
+    **kwargs: Any,
+) -> DBNet:
     """DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization"
     <https://arxiv.org/pdf/1911.08947.pdf>`_, using a MobileNet V3 Large backbone.
@@ -150,10 +178,12 @@ def db_mobilenet_v3_large(model_path: str = default_cfgs["db_mobilenet_v3_large"
     Args:
     ----
         model_path: path to onnx model file, defaults to url in default_cfgs
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
+        engine_cfg: configuration for the inference engine
         **kwargs: keyword arguments of the DBNet architecture
     Returns:
     -------
         text detection architecture
     """
-    return _dbnet("db_mobilenet_v3_large", model_path, **kwargs)
+    return _dbnet("db_mobilenet_v3_large", model_path, load_in_8_bit, engine_cfg, **kwargs)

onnxtr/models/detection/models/fast.py CHANGED Viewed

@@ -3,12 +3,13 @@
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+import logging
 from typing import Any, Dict, Optional
 import numpy as np
 from scipy.special import expit
-from ...engine import Engine
+from ...engine import Engine, EngineConfig
 from ..postprocessor.base import GeneralDetectionPostProcessor
 __all__ = ["FAST", "fast_tiny", "fast_small", "fast_base"]
@@ -42,6 +43,7 @@ class FAST(Engine):
     Args:
     ----
         model_path: path or url to onnx model file
+        engine_cfg: configuration for the inference engine
         bin_thresh: threshold for binarization of the output feature map
         box_thresh: minimal objectness score to consider a box
         assume_straight_pages: if True, fit straight bounding boxes only
@@ -52,13 +54,14 @@ class FAST(Engine):
     def __init__(
         self,
         model_path: str,
+        engine_cfg: EngineConfig = EngineConfig(),
         bin_thresh: float = 0.1,
         box_thresh: float = 0.1,
         assume_straight_pages: bool = True,
         cfg: Optional[Dict[str, Any]] = None,
         **kwargs: Any,
     ) -> None:
-        super().__init__(url=model_path, **kwargs)
+        super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs)
         self.cfg = cfg
         self.assume_straight_pages = assume_straight_pages
@@ -88,13 +91,22 @@ class FAST(Engine):
 def _fast(
     arch: str,
     model_path: str,
+    load_in_8_bit: bool = False,
+    engine_cfg: EngineConfig = EngineConfig(),
     **kwargs: Any,
 ) -> FAST:
+    if load_in_8_bit:
+        logging.warning("FAST models do not support 8-bit quantization yet. Loading full precision model...")
     # Build the model
-    return FAST(model_path, cfg=default_cfgs[arch], **kwargs)
+    return FAST(model_path, cfg=default_cfgs[arch], engine_cfg=engine_cfg, **kwargs)
-def fast_tiny(model_path: str = default_cfgs["fast_tiny"]["url"], **kwargs: Any) -> FAST:
+def fast_tiny(
+    model_path: str = default_cfgs["fast_tiny"]["url"],
+    load_in_8_bit: bool = False,
+    engine_cfg: EngineConfig = EngineConfig(),
+    **kwargs: Any,
+) -> FAST:
     """FAST as described in `"FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation"
     <https://arxiv.org/pdf/2111.02394.pdf>`_, using a tiny TextNet backbone.
@@ -107,16 +119,23 @@ def fast_tiny(model_path: str = default_cfgs["fast_tiny"]["url"], **kwargs: Any)
     Args:
     ----
         model_path: path to onnx model file, defaults to url in default_cfgs
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
+        engine_cfg: configuration for the inference engine
         **kwargs: keyword arguments of the DBNet architecture
     Returns:
     -------
         text detection architecture
     """
-    return _fast("fast_tiny", model_path, **kwargs)
+    return _fast("fast_tiny", model_path, load_in_8_bit, engine_cfg, **kwargs)
-def fast_small(model_path: str = default_cfgs["fast_small"]["url"], **kwargs: Any) -> FAST:
+def fast_small(
+    model_path: str = default_cfgs["fast_small"]["url"],
+    load_in_8_bit: bool = False,
+    engine_cfg: EngineConfig = EngineConfig(),
+    **kwargs: Any,
+) -> FAST:
     """FAST as described in `"FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation"
     <https://arxiv.org/pdf/2111.02394.pdf>`_, using a small TextNet backbone.
@@ -129,16 +148,23 @@ def fast_small(model_path: str = default_cfgs["fast_small"]["url"], **kwargs: An
     Args:
     ----
         model_path: path to onnx model file, defaults to url in default_cfgs
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
+        engine_cfg: configuration for the inference engine
         **kwargs: keyword arguments of the DBNet architecture
     Returns:
     -------
         text detection architecture
     """
-    return _fast("fast_small", model_path, **kwargs)
+    return _fast("fast_small", model_path, load_in_8_bit, engine_cfg, **kwargs)
-def fast_base(model_path: str = default_cfgs["fast_base"]["url"], **kwargs: Any) -> FAST:
+def fast_base(
+    model_path: str = default_cfgs["fast_base"]["url"],
+    load_in_8_bit: bool = False,
+    engine_cfg: EngineConfig = EngineConfig(),
+    **kwargs: Any,
+) -> FAST:
     """FAST as described in `"FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation"
     <https://arxiv.org/pdf/2111.02394.pdf>`_, using a base TextNet backbone.
@@ -151,10 +177,12 @@ def fast_base(model_path: str = default_cfgs["fast_base"]["url"], **kwargs: Any)
     Args:
     ----
         model_path: path to onnx model file, defaults to url in default_cfgs
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
+        engine_cfg: configuration for the inference engine
         **kwargs: keyword arguments of the DBNet architecture
     Returns:
     -------
         text detection architecture
     """
-    return _fast("fast_base", model_path, **kwargs)
+    return _fast("fast_base", model_path, load_in_8_bit, engine_cfg, **kwargs)

onnxtr/models/detection/models/linknet.py CHANGED Viewed

@@ -8,7 +8,7 @@ from typing import Any, Dict, Optional
 import numpy as np
 from scipy.special import expit
-from ...engine import Engine
+from ...engine import Engine, EngineConfig
 from ..postprocessor.base import GeneralDetectionPostProcessor
 __all__ = ["LinkNet", "linknet_resnet18", "linknet_resnet34", "linknet_resnet50"]
@@ -20,18 +20,21 @@ default_cfgs: Dict[str, Dict[str, Any]] = {
         "mean": (0.798, 0.785, 0.772),
         "std": (0.264, 0.2749, 0.287),
         "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/linknet_resnet18-e0e0b9dc.onnx",
+        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/linknet_resnet18_static_8_bit-3b3a37dd.onnx",
     },
     "linknet_resnet34": {
         "input_shape": (3, 1024, 1024),
         "mean": (0.798, 0.785, 0.772),
         "std": (0.264, 0.2749, 0.287),
         "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/linknet_resnet34-93e39a39.onnx",
+        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/linknet_resnet34_static_8_bit-2824329d.onnx",
     },
     "linknet_resnet50": {
         "input_shape": (3, 1024, 1024),
         "mean": (0.798, 0.785, 0.772),
         "std": (0.264, 0.2749, 0.287),
         "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/linknet_resnet50-15d8c4ec.onnx",
+        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/linknet_resnet50_static_8_bit-65d6b0b8.onnx",
     },
 }
@@ -42,6 +45,7 @@ class LinkNet(Engine):
     Args:
     ----
         model_path: path or url to onnx model file
+        engine_cfg: configuration for the inference engine
         bin_thresh: threshold for binarization of the output feature map
         box_thresh: minimal objectness score to consider a box
         assume_straight_pages: if True, fit straight bounding boxes only
@@ -52,13 +56,14 @@ class LinkNet(Engine):
     def __init__(
         self,
         model_path: str,
+        engine_cfg: EngineConfig = EngineConfig(),
         bin_thresh: float = 0.1,
         box_thresh: float = 0.1,
         assume_straight_pages: bool = True,
         cfg: Optional[Dict[str, Any]] = None,
         **kwargs: Any,
     ) -> None:
-        super().__init__(url=model_path, **kwargs)
+        super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs)
         self.cfg = cfg
         self.assume_straight_pages = assume_straight_pages
@@ -88,13 +93,22 @@ class LinkNet(Engine):
 def _linknet(
     arch: str,
     model_path: str,
+    load_in_8_bit: bool = False,
+    engine_cfg: EngineConfig = EngineConfig(),
     **kwargs: Any,
 ) -> LinkNet:
+    # Patch the url
+    model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path
     # Build the model
-    return LinkNet(model_path, cfg=default_cfgs[arch], **kwargs)
+    return LinkNet(model_path, cfg=default_cfgs[arch], engine_cfg=engine_cfg, **kwargs)
-def linknet_resnet18(model_path: str = default_cfgs["linknet_resnet18"]["url"], **kwargs: Any) -> LinkNet:
+def linknet_resnet18(
+    model_path: str = default_cfgs["linknet_resnet18"]["url"],
+    load_in_8_bit: bool = False,
+    engine_cfg: EngineConfig = EngineConfig(),
+    **kwargs: Any,
+) -> LinkNet:
     """LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation"
     <https://arxiv.org/pdf/1707.03718.pdf>`_.
@@ -107,16 +121,23 @@ def linknet_resnet18(model_path: str = default_cfgs["linknet_resnet18"]["url"],
     Args:
     ----
         model_path: path to onnx model file, defaults to url in default_cfgs
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
+        engine_cfg: configuration for the inference engine
         **kwargs: keyword arguments of the LinkNet architecture
     Returns:
     -------
         text detection architecture
     """
-    return _linknet("linknet_resnet18", model_path, **kwargs)
+    return _linknet("linknet_resnet18", model_path, load_in_8_bit, engine_cfg, **kwargs)
-def linknet_resnet34(model_path: str = default_cfgs["linknet_resnet34"]["url"], **kwargs: Any) -> LinkNet:
+def linknet_resnet34(
+    model_path: str = default_cfgs["linknet_resnet34"]["url"],
+    load_in_8_bit: bool = False,
+    engine_cfg: EngineConfig = EngineConfig(),
+    **kwargs: Any,
+) -> LinkNet:
     """LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation"
     <https://arxiv.org/pdf/1707.03718.pdf>`_.
@@ -129,16 +150,23 @@ def linknet_resnet34(model_path: str = default_cfgs["linknet_resnet34"]["url"],
     Args:
     ----
         model_path: path to onnx model file, defaults to url in default_cfgs
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
+        engine_cfg: configuration for the inference engine
         **kwargs: keyword arguments of the LinkNet architecture
     Returns:
     -------
         text detection architecture
     """
-    return _linknet("linknet_resnet34", model_path, **kwargs)
+    return _linknet("linknet_resnet34", model_path, load_in_8_bit, engine_cfg, **kwargs)
-def linknet_resnet50(model_path: str = default_cfgs["linknet_resnet50"]["url"], **kwargs: Any) -> LinkNet:
+def linknet_resnet50(
+    model_path: str = default_cfgs["linknet_resnet50"]["url"],
+    load_in_8_bit: bool = False,
+    engine_cfg: EngineConfig = EngineConfig(),
+    **kwargs: Any,
+) -> LinkNet:
     """LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation"
     <https://arxiv.org/pdf/1707.03718.pdf>`_.
@@ -151,10 +179,12 @@ def linknet_resnet50(model_path: str = default_cfgs["linknet_resnet50"]["url"],
     Args:
     ----
         model_path: path to onnx model file, defaults to url in default_cfgs
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
+        engine_cfg: configuration for the inference engine
         **kwargs: keyword arguments of the LinkNet architecture
     Returns:
     -------
         text detection architecture
     """
-    return _linknet("linknet_resnet50", model_path, **kwargs)
+    return _linknet("linknet_resnet50", model_path, load_in_8_bit, engine_cfg, **kwargs)

onnxtr/models/detection/postprocessor/base.py CHANGED Viewed

@@ -109,7 +109,7 @@ class GeneralDetectionPostProcessor(DetectionPostProcessor):
         contours, _ = cv2.findContours(bitmap.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
         for contour in contours:
             # Check whether smallest enclosing bounding box is not too small
-            if np.any(contour[:, 0].max(axis=0) - contour[:, 0].min(axis=0) < 2):
+            if np.any(contour[:, 0].max(axis=0) - contour[:, 0].min(axis=0) < 2):  # type: ignore[index]
                 continue
             # Compute objectness
             if self.assume_straight_pages:
@@ -136,9 +136,10 @@ class GeneralDetectionPostProcessor(DetectionPostProcessor):
                 # compute relative box to get rid of img shape
                 _box[:, 0] /= width
                 _box[:, 1] /= height
-                boxes.append(_box)
+                # Add score to box as (0, score)
+                boxes.append(np.vstack([_box, np.array([0.0, score])]))
         if not self.assume_straight_pages:
-            return np.clip(np.asarray(boxes), 0, 1) if len(boxes) > 0 else np.zeros((0, 4, 2), dtype=pred.dtype)
+            return np.clip(np.asarray(boxes), 0, 1) if len(boxes) > 0 else np.zeros((0, 5, 2), dtype=pred.dtype)
         else:
             return np.clip(np.asarray(boxes), 0, 1) if len(boxes) > 0 else np.zeros((0, 5), dtype=pred.dtype)

onnxtr/models/detection/predictor/base.py CHANGED Viewed

@@ -7,6 +7,7 @@ from typing import Any, List, Tuple, Union
 import numpy as np
+from onnxtr.models.detection._utils import _remove_padding
 from onnxtr.models.preprocessor import PreProcessor
 from onnxtr.utils.repr import NestedObject
@@ -38,6 +39,11 @@ class DetectionPredictor(NestedObject):
         return_maps: bool = False,
         **kwargs: Any,
     ) -> Union[List[np.ndarray], Tuple[List[np.ndarray], List[np.ndarray]]]:
+        # Extract parameters from the preprocessor
+        preserve_aspect_ratio = self.pre_processor.resize.preserve_aspect_ratio
+        symmetric_pad = self.pre_processor.resize.symmetric_pad
+        assume_straight_pages = self.model.assume_straight_pages
         # Dimension check
         if any(page.ndim != 3 for page in pages):
             raise ValueError("incorrect input shape: all pages are expected to be multi-channel 2D images.")
@@ -47,7 +53,15 @@ class DetectionPredictor(NestedObject):
             self.model(batch, return_preds=True, return_model_output=True, **kwargs) for batch in processed_batches
         ]
-        preds = [pred for batch in predicted_batches for pred in batch["preds"]]
+        # Remove padding from loc predictions
+        preds = _remove_padding(
+            pages,
+            [pred[0] for batch in predicted_batches for pred in batch["preds"]],
+            preserve_aspect_ratio=preserve_aspect_ratio,
+            symmetric_pad=symmetric_pad,
+            assume_straight_pages=assume_straight_pages,
+        )
         if return_maps:
             seg_maps = [pred for batch in predicted_batches for pred in batch["out_map"]]
             return preds, seg_maps

onnxtr/models/detection/zoo.py CHANGED Viewed

@@ -6,6 +6,7 @@
 from typing import Any
 from .. import detection
+from ..engine import EngineConfig
 from ..preprocessor import PreProcessor
 from .predictor import DetectionPredictor
@@ -24,12 +25,20 @@ ARCHS = [
 ]
-def _predictor(arch: Any, assume_straight_pages: bool = True, **kwargs: Any) -> DetectionPredictor:
+def _predictor(
+    arch: Any,
+    assume_straight_pages: bool = True,
+    load_in_8_bit: bool = False,
+    engine_cfg: EngineConfig = EngineConfig(),
+    **kwargs: Any,
+) -> DetectionPredictor:
     if isinstance(arch, str):
         if arch not in ARCHS:
             raise ValueError(f"unknown architecture '{arch}'")
-        _model = detection.__dict__[arch](assume_straight_pages=assume_straight_pages)
+        _model = detection.__dict__[arch](
+            assume_straight_pages=assume_straight_pages, load_in_8_bit=load_in_8_bit, engine_cfg=engine_cfg
+        )
     else:
         if not isinstance(arch, (detection.DBNet, detection.LinkNet, detection.FAST)):
             raise ValueError(f"unknown architecture: {type(arch)}")
@@ -50,6 +59,8 @@ def _predictor(arch: Any, assume_straight_pages: bool = True, **kwargs: Any) ->
 def detection_predictor(
     arch: Any = "fast_base",
     assume_straight_pages: bool = True,
+    load_in_8_bit: bool = False,
+    engine_cfg: EngineConfig = EngineConfig(),
     **kwargs: Any,
 ) -> DetectionPredictor:
     """Text detection architecture.
@@ -64,10 +75,12 @@ def detection_predictor(
     ----
         arch: name of the architecture or model itself to use (e.g. 'db_resnet50')
         assume_straight_pages: If True, fit straight boxes to the page
+        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
+        engine_cfg: configuration for the inference engine
         **kwargs: optional keyword arguments passed to the architecture
     Returns:
     -------
         Detection predictor
     """
-    return _predictor(arch, assume_straight_pages, **kwargs)
+    return _predictor(arch, assume_straight_pages, load_in_8_bit, engine_cfg=engine_cfg, **kwargs)

onnxtr/models/engine.py CHANGED Viewed

@@ -3,14 +3,79 @@
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import Any, List, Union
+from typing import Any, Dict, List, Optional, Tuple, Union
 import numpy as np
-import onnxruntime
+from onnxruntime import (
+    ExecutionMode,
+    GraphOptimizationLevel,
+    InferenceSession,
+    SessionOptions,
+    get_available_providers,
+    get_device,
+)
 from onnxtr.utils.data import download_from_url
 from onnxtr.utils.geometry import shape_translate
+__all__ = ["EngineConfig"]
+class EngineConfig:
+    """Implements a configuration class for the engine of a model
+    Args:
+    ----
+        providers: list of providers to use for inference ref.: https://onnxruntime.ai/docs/execution-providers/
+        session_options: configuration for the inference session ref.: https://onnxruntime.ai/docs/api/python/api_summary.html#sessionoptions
+    """
+    def __init__(
+        self,
+        providers: Optional[Union[List[Tuple[str, Dict[str, Any]]], List[str]]] = None,
+        session_options: Optional[SessionOptions] = None,
+    ):
+        self._providers = providers or self._init_providers()
+        self._session_options = session_options or self._init_sess_opts()
+    def _init_providers(self) -> List[Tuple[str, Dict[str, Any]]]:
+        providers: Any = [("CPUExecutionProvider", {"arena_extend_strategy": "kSameAsRequested"})]
+        available_providers = get_available_providers()
+        if "CUDAExecutionProvider" in available_providers and get_device() == "GPU":  # pragma: no cover
+            providers.insert(
+                0,
+                (
+                    "CUDAExecutionProvider",
+                    {
+                        "device_id": 0,
+                        "arena_extend_strategy": "kNextPowerOfTwo",
+                        "cudnn_conv_algo_search": "EXHAUSTIVE",
+                        "do_copy_in_default_stream": True,
+                    },
+                ),
+            )
+        return providers
+    def _init_sess_opts(self) -> SessionOptions:
+        session_options = SessionOptions()
+        session_options.enable_cpu_mem_arena = True
+        session_options.execution_mode = ExecutionMode.ORT_SEQUENTIAL
+        session_options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
+        session_options.intra_op_num_threads = -1
+        session_options.inter_op_num_threads = -1
+        return session_options
+    @property
+    def providers(self) -> Union[List[Tuple[str, Dict[str, Any]]], List[str]]:
+        return self._providers
+    @property
+    def session_options(self) -> SessionOptions:
+        return self._session_options
+    def __repr__(self) -> str:
+        return f"EngineConfig(providers={self.providers}"
 class Engine:
     """Implements an abstract class for the engine of a model
@@ -18,15 +83,16 @@ class Engine:
     Args:
     ----
         url: the url to use to download a model if needed
-        providers: list of providers to use for inference
+        engine_cfg: the configuration of the engine
         **kwargs: additional arguments to be passed to `download_from_url`
     """
-    def __init__(
-        self, url: str, providers: List[str] = ["CPUExecutionProvider", "CUDAExecutionProvider"], **kwargs: Any
-    ) -> None:
+    def __init__(self, url: str, engine_cfg: EngineConfig = EngineConfig(), **kwargs: Any) -> None:
+        engine_cfg = engine_cfg or EngineConfig()
         archive_path = download_from_url(url, cache_subdir="models", **kwargs) if "http" in url else url
-        self.runtime = onnxruntime.InferenceSession(archive_path, providers=providers)
+        self.session_options = engine_cfg.session_options
+        self.providers = engine_cfg.providers
+        self.runtime = InferenceSession(archive_path, providers=self.providers, sess_options=self.session_options)
         self.runtime_inputs = self.runtime.get_inputs()[0]
         self.tf_exported = int(self.runtime_inputs.shape[-1]) == 3
         self.fixed_batch_size: Union[int, str] = self.runtime_inputs.shape[
@@ -43,8 +109,8 @@ class Engine:
             inputs = np.broadcast_to(inputs, (self.fixed_batch_size, *inputs.shape))
             # combine the results
             logits = np.concatenate(
-                [self.runtime.run(self.output_name, {"input": batch})[0] for batch in inputs], axis=0
+                [self.runtime.run(self.output_name, {self.runtime_inputs.name: batch})[0] for batch in inputs], axis=0
             )
         else:
-            logits = self.runtime.run(self.output_name, {"input": inputs})[0]
+            logits = self.runtime.run(self.output_name, {self.runtime_inputs.name: inputs})[0]
         return shape_translate(logits, format="BHWC")

onnxtr 0.1.2__py3-none-any.whl → 0.3.0__py3-none-any.whl

onnxtr 0.1.2py3-none-any.whl → 0.3.0py3-none-any.whl