PyPI - onnxtr - Versions diffs - 0.3.0__tar.gz → 0.3.1__tar.gz - Mend

onnxtr 0.3.0tar.gz → 0.3.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (80) hide show

{onnxtr-0.3.0 → onnxtr-0.3.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: onnxtr
-Version: 0.3.0
+Version: 0.3.1
 Summary: Onnx Text Recognition (OnnxTR): docTR Onnx-Wrapper for high-performance OCR on documents.
 Author-email: Felix Dittrich <felixdittrich92@gmail.com>
 Maintainer: Felix Dittrich
@@ -275,7 +275,7 @@ Requires-Dist: pre-commit>=2.17.0; extra == "dev"
 [![codecov](https://codecov.io/gh/felixdittrich92/OnnxTR/graph/badge.svg?token=WVFRCQBOLI)](https://codecov.io/gh/felixdittrich92/OnnxTR)
 [![Codacy Badge](https://app.codacy.com/project/badge/Grade/4fff4d764bb14fb8b4f4afeb9587231b)](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
 [![CodeFactor](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr/badge)](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
-[![Pypi](https://img.shields.io/badge/pypi-v0.3.0-blue.svg)](https://pypi.org/project/OnnxTR/)
+[![Pypi](https://img.shields.io/badge/pypi-v0.3.1-blue.svg)](https://pypi.org/project/OnnxTR/)
 > :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
@@ -345,8 +345,8 @@ from onnxtr.models import ocr_predictor, EngineConfig
 model = ocr_predictor(
     det_arch='fast_base',  # detection architecture
     reco_arch='vitstr_base',  # recognition architecture
-    det_bs=4, # detection batch size
-    reco_bs=1024, # recognition batch size
+    det_bs=2, # detection batch size
+    reco_bs=512, # recognition batch size
     assume_straight_pages=True,  # set to `False` if the pages are not straight (rotation, perspective, etc.) (default: True)
     straighten_pages=False,  # set to `True` if the pages should be straightened before final processing (default: False)
     # Preprocessing related parameters
@@ -419,7 +419,7 @@ general_options.enable_cpu_mem_arena = False
 # NOTE: The following would force to run only on the GPU if no GPU is available it will raise an error
 # List of strings e.g. ["CUDAExecutionProvider", "CPUExecutionProvider"] or a list of tuples with the provider and its options e.g.
 # [("CUDAExecutionProvider", {"device_id": 0}), ("CPUExecutionProvider", {"arena_extend_strategy": "kSameAsRequested"})]
-providers = [("CUDAExecutionProvider", {"device_id": 0})]  # For available providers see: https://onnxruntime.ai/docs/execution-providers/
+providers = [("CUDAExecutionProvider", {"device_id": 0, "cudnn_conv_algo_search": "DEFAULT"})]  # For available providers see: https://onnxruntime.ai/docs/execution-providers/
 engine_config = EngineConfig(
     session_options=general_options,
@@ -451,7 +451,7 @@ model = ocr_predictor(det_arch=det_model, reco_arch=reco_model)
 ## Models architectures
-Credits where it's due: this repository is implementing, among others, architectures from published research papers.
+Credits where it's due: this repository provides ONNX models for the following architectures, converted from the docTR models:
 ### Text Detection

{onnxtr-0.3.0 → onnxtr-0.3.1}/README.md RENAMED Viewed

@@ -7,7 +7,7 @@
 [![codecov](https://codecov.io/gh/felixdittrich92/OnnxTR/graph/badge.svg?token=WVFRCQBOLI)](https://codecov.io/gh/felixdittrich92/OnnxTR)
 [![Codacy Badge](https://app.codacy.com/project/badge/Grade/4fff4d764bb14fb8b4f4afeb9587231b)](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
 [![CodeFactor](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr/badge)](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
-[![Pypi](https://img.shields.io/badge/pypi-v0.3.0-blue.svg)](https://pypi.org/project/OnnxTR/)
+[![Pypi](https://img.shields.io/badge/pypi-v0.3.1-blue.svg)](https://pypi.org/project/OnnxTR/)
 > :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
@@ -77,8 +77,8 @@ from onnxtr.models import ocr_predictor, EngineConfig
 model = ocr_predictor(
     det_arch='fast_base',  # detection architecture
     reco_arch='vitstr_base',  # recognition architecture
-    det_bs=4, # detection batch size
-    reco_bs=1024, # recognition batch size
+    det_bs=2, # detection batch size
+    reco_bs=512, # recognition batch size
     assume_straight_pages=True,  # set to `False` if the pages are not straight (rotation, perspective, etc.) (default: True)
     straighten_pages=False,  # set to `True` if the pages should be straightened before final processing (default: False)
     # Preprocessing related parameters
@@ -151,7 +151,7 @@ general_options.enable_cpu_mem_arena = False
 # NOTE: The following would force to run only on the GPU if no GPU is available it will raise an error
 # List of strings e.g. ["CUDAExecutionProvider", "CPUExecutionProvider"] or a list of tuples with the provider and its options e.g.
 # [("CUDAExecutionProvider", {"device_id": 0}), ("CPUExecutionProvider", {"arena_extend_strategy": "kSameAsRequested"})]
-providers = [("CUDAExecutionProvider", {"device_id": 0})]  # For available providers see: https://onnxruntime.ai/docs/execution-providers/
+providers = [("CUDAExecutionProvider", {"device_id": 0, "cudnn_conv_algo_search": "DEFAULT"})]  # For available providers see: https://onnxruntime.ai/docs/execution-providers/
 engine_config = EngineConfig(
     session_options=general_options,
@@ -183,7 +183,7 @@ model = ocr_predictor(det_arch=det_model, reco_arch=reco_model)
 ## Models architectures
-Credits where it's due: this repository is implementing, among others, architectures from published research papers.
+Credits where it's due: this repository provides ONNX models for the following architectures, converted from the docTR models:
 ### Text Detection

{onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/contrib/base.py RENAMED Viewed

@@ -6,8 +6,8 @@
 from typing import Any, List, Optional
 import numpy as np
+import onnxruntime as ort
-from onnxtr.file_utils import requires_package
 from onnxtr.utils.data import download_from_url
@@ -44,9 +44,6 @@ class _BasePredictor:
         -------
             Any: the ONNX loaded model
         """
-        requires_package("onnxruntime", "`.contrib` module requires `onnxruntime` to be installed.")
-        import onnxruntime as ort
         if not url and not model_path:
             raise ValueError("You must provide either a url or a model_path")
         onnx_model_path = model_path if model_path else str(download_from_url(url, cache_subdir="models", **kwargs))  # type: ignore[arg-type]

{onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/classification/models/mobilenet.py RENAMED Viewed

@@ -51,7 +51,7 @@ class MobileNetV3(Engine):
     def __init__(
         self,
         model_path: str,
-        engine_cfg: EngineConfig = EngineConfig(),
+        engine_cfg: Optional[EngineConfig] = None,
         cfg: Optional[Dict[str, Any]] = None,
         **kwargs: Any,
     ) -> None:
@@ -69,7 +69,7 @@ def _mobilenet_v3(
     arch: str,
     model_path: str,
     load_in_8_bit: bool = False,
-    engine_cfg: EngineConfig = EngineConfig(),
+    engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> MobileNetV3:
     # Patch the url
@@ -81,7 +81,7 @@ def _mobilenet_v3(
 def mobilenet_v3_small_crop_orientation(
     model_path: str = default_cfgs["mobilenet_v3_small_crop_orientation"]["url"],
     load_in_8_bit: bool = False,
-    engine_cfg: EngineConfig = EngineConfig(),
+    engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> MobileNetV3:
     """MobileNetV3-Small architecture as described in
@@ -111,7 +111,7 @@ def mobilenet_v3_small_crop_orientation(
 def mobilenet_v3_small_page_orientation(
     model_path: str = default_cfgs["mobilenet_v3_small_page_orientation"]["url"],
     load_in_8_bit: bool = False,
-    engine_cfg: EngineConfig = EngineConfig(),
+    engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> MobileNetV3:
     """MobileNetV3-Small architecture as described in

{onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/classification/zoo.py RENAMED Viewed

@@ -3,7 +3,7 @@
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import Any, List
+from typing import Any, List, Optional
 from onnxtr.models.engine import EngineConfig
@@ -17,7 +17,7 @@ ORIENTATION_ARCHS: List[str] = ["mobilenet_v3_small_crop_orientation", "mobilene
 def _orientation_predictor(
-    arch: str, load_in_8_bit: bool = False, engine_cfg: EngineConfig = EngineConfig(), **kwargs: Any
+    arch: str, load_in_8_bit: bool = False, engine_cfg: Optional[EngineConfig] = None, **kwargs: Any
 ) -> OrientationPredictor:
     if arch not in ORIENTATION_ARCHS:
         raise ValueError(f"unknown architecture '{arch}'")
@@ -26,7 +26,7 @@ def _orientation_predictor(
     _model = classification.__dict__[arch](load_in_8_bit=load_in_8_bit, engine_cfg=engine_cfg)
     kwargs["mean"] = kwargs.get("mean", _model.cfg["mean"])
     kwargs["std"] = kwargs.get("std", _model.cfg["std"])
-    kwargs["batch_size"] = kwargs.get("batch_size", 128 if "crop" in arch else 4)
+    kwargs["batch_size"] = kwargs.get("batch_size", 512 if "crop" in arch else 2)
     input_shape = _model.cfg["input_shape"][1:]
     predictor = OrientationPredictor(
         PreProcessor(input_shape, preserve_aspect_ratio=True, symmetric_pad=True, **kwargs),
@@ -38,7 +38,7 @@ def _orientation_predictor(
 def crop_orientation_predictor(
     arch: Any = "mobilenet_v3_small_crop_orientation",
     load_in_8_bit: bool = False,
-    engine_cfg: EngineConfig = EngineConfig(),
+    engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> OrientationPredictor:
     """Crop orientation classification architecture.
@@ -66,7 +66,7 @@ def crop_orientation_predictor(
 def page_orientation_predictor(
     arch: Any = "mobilenet_v3_small_page_orientation",
     load_in_8_bit: bool = False,
-    engine_cfg: EngineConfig = EngineConfig(),
+    engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> OrientationPredictor:
     """Page orientation classification architecture.

{onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/detection/models/differentiable_binarization.py RENAMED Viewed

@@ -56,7 +56,7 @@ class DBNet(Engine):
     def __init__(
         self,
         model_path: str,
-        engine_cfg: EngineConfig = EngineConfig(),
+        engine_cfg: Optional[EngineConfig] = None,
         bin_thresh: float = 0.3,
         box_thresh: float = 0.1,
         assume_straight_pages: bool = True,
@@ -93,7 +93,7 @@ def _dbnet(
     arch: str,
     model_path: str,
     load_in_8_bit: bool = False,
-    engine_cfg: EngineConfig = EngineConfig(),
+    engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> DBNet:
     # Patch the url
@@ -105,7 +105,7 @@ def _dbnet(
 def db_resnet34(
     model_path: str = default_cfgs["db_resnet34"]["url"],
     load_in_8_bit: bool = False,
-    engine_cfg: EngineConfig = EngineConfig(),
+    engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> DBNet:
     """DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization"
@@ -134,7 +134,7 @@ def db_resnet34(
 def db_resnet50(
     model_path: str = default_cfgs["db_resnet50"]["url"],
     load_in_8_bit: bool = False,
-    engine_cfg: EngineConfig = EngineConfig(),
+    engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> DBNet:
     """DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization"
@@ -163,7 +163,7 @@ def db_resnet50(
 def db_mobilenet_v3_large(
     model_path: str = default_cfgs["db_mobilenet_v3_large"]["url"],
     load_in_8_bit: bool = False,
-    engine_cfg: EngineConfig = EngineConfig(),
+    engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> DBNet:
     """DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization"

{onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/detection/models/fast.py RENAMED Viewed

@@ -54,7 +54,7 @@ class FAST(Engine):
     def __init__(
         self,
         model_path: str,
-        engine_cfg: EngineConfig = EngineConfig(),
+        engine_cfg: Optional[EngineConfig] = None,
         bin_thresh: float = 0.1,
         box_thresh: float = 0.1,
         assume_straight_pages: bool = True,
@@ -92,7 +92,7 @@ def _fast(
     arch: str,
     model_path: str,
     load_in_8_bit: bool = False,
-    engine_cfg: EngineConfig = EngineConfig(),
+    engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> FAST:
     if load_in_8_bit:
@@ -104,7 +104,7 @@ def _fast(
 def fast_tiny(
     model_path: str = default_cfgs["fast_tiny"]["url"],
     load_in_8_bit: bool = False,
-    engine_cfg: EngineConfig = EngineConfig(),
+    engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> FAST:
     """FAST as described in `"FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation"
@@ -133,7 +133,7 @@ def fast_tiny(
 def fast_small(
     model_path: str = default_cfgs["fast_small"]["url"],
     load_in_8_bit: bool = False,
-    engine_cfg: EngineConfig = EngineConfig(),
+    engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> FAST:
     """FAST as described in `"FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation"
@@ -162,7 +162,7 @@ def fast_small(
 def fast_base(
     model_path: str = default_cfgs["fast_base"]["url"],
     load_in_8_bit: bool = False,
-    engine_cfg: EngineConfig = EngineConfig(),
+    engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> FAST:
     """FAST as described in `"FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation"

{onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/detection/models/linknet.py RENAMED Viewed

@@ -56,7 +56,7 @@ class LinkNet(Engine):
     def __init__(
         self,
         model_path: str,
-        engine_cfg: EngineConfig = EngineConfig(),
+        engine_cfg: Optional[EngineConfig] = None,
         bin_thresh: float = 0.1,
         box_thresh: float = 0.1,
         assume_straight_pages: bool = True,
@@ -94,7 +94,7 @@ def _linknet(
     arch: str,
     model_path: str,
     load_in_8_bit: bool = False,
-    engine_cfg: EngineConfig = EngineConfig(),
+    engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> LinkNet:
     # Patch the url
@@ -106,7 +106,7 @@ def _linknet(
 def linknet_resnet18(
     model_path: str = default_cfgs["linknet_resnet18"]["url"],
     load_in_8_bit: bool = False,
-    engine_cfg: EngineConfig = EngineConfig(),
+    engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> LinkNet:
     """LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation"
@@ -135,7 +135,7 @@ def linknet_resnet18(
 def linknet_resnet34(
     model_path: str = default_cfgs["linknet_resnet34"]["url"],
     load_in_8_bit: bool = False,
-    engine_cfg: EngineConfig = EngineConfig(),
+    engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> LinkNet:
     """LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation"
@@ -164,7 +164,7 @@ def linknet_resnet34(
 def linknet_resnet50(
     model_path: str = default_cfgs["linknet_resnet50"]["url"],
     load_in_8_bit: bool = False,
-    engine_cfg: EngineConfig = EngineConfig(),
+    engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> LinkNet:
     """LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation"

{onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/detection/zoo.py RENAMED Viewed

@@ -3,7 +3,7 @@
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import Any
+from typing import Any, Optional
 from .. import detection
 from ..engine import EngineConfig
@@ -29,7 +29,7 @@ def _predictor(
     arch: Any,
     assume_straight_pages: bool = True,
     load_in_8_bit: bool = False,
-    engine_cfg: EngineConfig = EngineConfig(),
+    engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> DetectionPredictor:
     if isinstance(arch, str):
@@ -48,7 +48,7 @@ def _predictor(
     kwargs["mean"] = kwargs.get("mean", _model.cfg["mean"])
     kwargs["std"] = kwargs.get("std", _model.cfg["std"])
-    kwargs["batch_size"] = kwargs.get("batch_size", 4)
+    kwargs["batch_size"] = kwargs.get("batch_size", 2)
     predictor = DetectionPredictor(
         PreProcessor(_model.cfg["input_shape"][1:], **kwargs),
         _model,
@@ -60,7 +60,7 @@ def detection_predictor(
     arch: Any = "fast_base",
     assume_straight_pages: bool = True,
     load_in_8_bit: bool = False,
-    engine_cfg: EngineConfig = EngineConfig(),
+    engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> DetectionPredictor:
     """Text detection architecture.

{onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/engine.py RENAMED Viewed

@@ -49,7 +49,7 @@ class EngineConfig:
                     {
                         "device_id": 0,
                         "arena_extend_strategy": "kNextPowerOfTwo",
-                        "cudnn_conv_algo_search": "EXHAUSTIVE",
+                        "cudnn_conv_algo_search": "DEFAULT",
                         "do_copy_in_default_stream": True,
                     },
                 ),
@@ -87,8 +87,8 @@ class Engine:
         **kwargs: additional arguments to be passed to `download_from_url`
     """
-    def __init__(self, url: str, engine_cfg: EngineConfig = EngineConfig(), **kwargs: Any) -> None:
-        engine_cfg = engine_cfg or EngineConfig()
+    def __init__(self, url: str, engine_cfg: Optional[EngineConfig] = None, **kwargs: Any) -> None:
+        engine_cfg = engine_cfg if isinstance(engine_cfg, EngineConfig) else EngineConfig()
         archive_path = download_from_url(url, cache_subdir="models", **kwargs) if "http" in url else url
         self.session_options = engine_cfg.session_options
         self.providers = engine_cfg.providers

{onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/predictor/base.py RENAMED Viewed

@@ -50,7 +50,7 @@ class _OCRPredictor:
         symmetric_pad: bool = True,
         detect_orientation: bool = False,
         load_in_8_bit: bool = False,
-        clf_engine_cfg: EngineConfig = EngineConfig(),
+        clf_engine_cfg: Optional[EngineConfig] = None,
         **kwargs: Any,
     ) -> None:
         self.assume_straight_pages = assume_straight_pages

{onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/predictor/predictor.py RENAMED Viewed

@@ -3,7 +3,7 @@
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import Any, List
+from typing import Any, List, Optional
 import numpy as np
@@ -52,7 +52,7 @@ class OCRPredictor(NestedObject, _OCRPredictor):
         symmetric_pad: bool = True,
         detect_orientation: bool = False,
         detect_language: bool = False,
-        clf_engine_cfg: EngineConfig = EngineConfig(),
+        clf_engine_cfg: Optional[EngineConfig] = None,
         **kwargs: Any,
     ) -> None:
         self.det_predictor = det_predictor

{onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/recognition/models/crnn.py RENAMED Viewed

@@ -124,7 +124,7 @@ class CRNN(Engine):
         self,
         model_path: str,
         vocab: str,
-        engine_cfg: EngineConfig = EngineConfig(),
+        engine_cfg: Optional[EngineConfig] = None,
         cfg: Optional[Dict[str, Any]] = None,
         **kwargs: Any,
     ) -> None:
@@ -154,7 +154,7 @@ def _crnn(
     arch: str,
     model_path: str,
     load_in_8_bit: bool = False,
-    engine_cfg: EngineConfig = EngineConfig(),
+    engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> CRNN:
     kwargs["vocab"] = kwargs.get("vocab", default_cfgs[arch]["vocab"])
@@ -172,7 +172,7 @@ def _crnn(
 def crnn_vgg16_bn(
     model_path: str = default_cfgs["crnn_vgg16_bn"]["url"],
     load_in_8_bit: bool = False,
-    engine_cfg: EngineConfig = EngineConfig(),
+    engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> CRNN:
     """CRNN with a VGG-16 backbone as described in `"An End-to-End Trainable Neural Network for Image-based
@@ -201,7 +201,7 @@ def crnn_vgg16_bn(
 def crnn_mobilenet_v3_small(
     model_path: str = default_cfgs["crnn_mobilenet_v3_small"]["url"],
     load_in_8_bit: bool = False,
-    engine_cfg: EngineConfig = EngineConfig(),
+    engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> CRNN:
     """CRNN with a MobileNet V3 Small backbone as described in `"An End-to-End Trainable Neural Network for Image-based
@@ -230,7 +230,7 @@ def crnn_mobilenet_v3_small(
 def crnn_mobilenet_v3_large(
     model_path: str = default_cfgs["crnn_mobilenet_v3_large"]["url"],
     load_in_8_bit: bool = False,
-    engine_cfg: EngineConfig = EngineConfig(),
+    engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> CRNN:
     """CRNN with a MobileNet V3 Large backbone as described in `"An End-to-End Trainable Neural Network for Image-based

{onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/recognition/models/master.py RENAMED Viewed

@@ -45,7 +45,7 @@ class MASTER(Engine):
         self,
         model_path: str,
         vocab: str,
-        engine_cfg: EngineConfig = EngineConfig(),
+        engine_cfg: Optional[EngineConfig] = None,
         cfg: Optional[Dict[str, Any]] = None,
         **kwargs: Any,
     ) -> None:
@@ -116,7 +116,7 @@ def _master(
     arch: str,
     model_path: str,
     load_in_8_bit: bool = False,
-    engine_cfg: EngineConfig = EngineConfig(),
+    engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> MASTER:
     # Patch the config
@@ -134,7 +134,7 @@ def _master(
 def master(
     model_path: str = default_cfgs["master"]["url"],
     load_in_8_bit: bool = False,
-    engine_cfg: EngineConfig = EngineConfig(),
+    engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> MASTER:
     """MASTER as described in paper: <https://arxiv.org/pdf/1910.02562.pdf>`_.

{onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/recognition/models/parseq.py RENAMED Viewed

@@ -44,7 +44,7 @@ class PARSeq(Engine):
         self,
         model_path: str,
         vocab: str,
-        engine_cfg: EngineConfig = EngineConfig(),
+        engine_cfg: Optional[EngineConfig] = None,
         cfg: Optional[Dict[str, Any]] = None,
         **kwargs: Any,
     ) -> None:
@@ -104,7 +104,7 @@ def _parseq(
     arch: str,
     model_path: str,
     load_in_8_bit: bool = False,
-    engine_cfg: EngineConfig = EngineConfig(),
+    engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> PARSeq:
     # Patch the config
@@ -123,7 +123,7 @@ def _parseq(
 def parseq(
     model_path: str = default_cfgs["parseq"]["url"],
     load_in_8_bit: bool = False,
-    engine_cfg: EngineConfig = EngineConfig(),
+    engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> PARSeq:
     """PARSeq architecture from

{onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/recognition/models/sar.py RENAMED Viewed

@@ -44,7 +44,7 @@ class SAR(Engine):
         self,
         model_path: str,
         vocab: str,
-        engine_cfg: EngineConfig = EngineConfig(),
+        engine_cfg: Optional[EngineConfig] = None,
         cfg: Optional[Dict[str, Any]] = None,
         **kwargs: Any,
     ) -> None:
@@ -103,7 +103,7 @@ def _sar(
     arch: str,
     model_path: str,
     load_in_8_bit: bool = False,
-    engine_cfg: EngineConfig = EngineConfig(),
+    engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> SAR:
     # Patch the config
@@ -122,7 +122,7 @@ def _sar(
 def sar_resnet31(
     model_path: str = default_cfgs["sar_resnet31"]["url"],
     load_in_8_bit: bool = False,
-    engine_cfg: EngineConfig = EngineConfig(),
+    engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> SAR:
     """SAR with a resnet-31 feature extractor as described in `"Show, Attend and Read:A Simple and Strong

{onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/recognition/models/vitstr.py RENAMED Viewed

@@ -52,7 +52,7 @@ class ViTSTR(Engine):
         self,
         model_path: str,
         vocab: str,
-        engine_cfg: EngineConfig = EngineConfig(),
+        engine_cfg: Optional[EngineConfig] = None,
         cfg: Optional[Dict[str, Any]] = None,
         **kwargs: Any,
     ) -> None:
@@ -114,7 +114,7 @@ def _vitstr(
     arch: str,
     model_path: str,
     load_in_8_bit: bool = False,
-    engine_cfg: EngineConfig = EngineConfig(),
+    engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> ViTSTR:
     # Patch the config
@@ -133,7 +133,7 @@ def _vitstr(
 def vitstr_small(
     model_path: str = default_cfgs["vitstr_small"]["url"],
     load_in_8_bit: bool = False,
-    engine_cfg: EngineConfig = EngineConfig(),
+    engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> ViTSTR:
     """ViTSTR-Small as described in `"Vision Transformer for Fast and Efficient Scene Text Recognition"
@@ -162,7 +162,7 @@ def vitstr_small(
 def vitstr_base(
     model_path: str = default_cfgs["vitstr_base"]["url"],
     load_in_8_bit: bool = False,
-    engine_cfg: EngineConfig = EngineConfig(),
+    engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> ViTSTR:
     """ViTSTR-Base as described in `"Vision Transformer for Fast and Efficient Scene Text Recognition"

{onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/recognition/zoo.py RENAMED Viewed

@@ -3,7 +3,7 @@
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import Any, List
+from typing import Any, List, Optional
 from .. import recognition
 from ..engine import EngineConfig
@@ -26,7 +26,7 @@ ARCHS: List[str] = [
 def _predictor(
-    arch: Any, load_in_8_bit: bool = False, engine_cfg: EngineConfig = EngineConfig(), **kwargs: Any
+    arch: Any, load_in_8_bit: bool = False, engine_cfg: Optional[EngineConfig] = None, **kwargs: Any
 ) -> RecognitionPredictor:
     if isinstance(arch, str):
         if arch not in ARCHS:
@@ -50,7 +50,7 @@ def _predictor(
 def recognition_predictor(
-    arch: Any = "crnn_vgg16_bn", load_in_8_bit: bool = False, engine_cfg: EngineConfig = EngineConfig(), **kwargs: Any
+    arch: Any = "crnn_vgg16_bn", load_in_8_bit: bool = False, engine_cfg: Optional[EngineConfig] = None, **kwargs: Any
 ) -> RecognitionPredictor:
     """Text recognition architecture.

{onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/zoo.py RENAMED Viewed

@@ -3,7 +3,7 @@
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import Any
+from typing import Any, Optional
 from .detection.zoo import detection_predictor
 from .engine import EngineConfig
@@ -19,15 +19,15 @@ def _predictor(
     assume_straight_pages: bool = True,
     preserve_aspect_ratio: bool = True,
     symmetric_pad: bool = True,
-    det_bs: int = 4,
-    reco_bs: int = 1024,
+    det_bs: int = 2,
+    reco_bs: int = 512,
     detect_orientation: bool = False,
     straighten_pages: bool = False,
     detect_language: bool = False,
     load_in_8_bit: bool = False,
-    det_engine_cfg: EngineConfig = EngineConfig(),
-    reco_engine_cfg: EngineConfig = EngineConfig(),
-    clf_engine_cfg: EngineConfig = EngineConfig(),
+    det_engine_cfg: Optional[EngineConfig] = None,
+    reco_engine_cfg: Optional[EngineConfig] = None,
+    clf_engine_cfg: Optional[EngineConfig] = None,
     **kwargs,
 ) -> OCRPredictor:
     # Detection
@@ -74,9 +74,9 @@ def ocr_predictor(
     straighten_pages: bool = False,
     detect_language: bool = False,
     load_in_8_bit: bool = False,
-    det_engine_cfg: EngineConfig = EngineConfig(),
-    reco_engine_cfg: EngineConfig = EngineConfig(),
-    clf_engine_cfg: EngineConfig = EngineConfig(),
+    det_engine_cfg: Optional[EngineConfig] = None,
+    reco_engine_cfg: Optional[EngineConfig] = None,
+    clf_engine_cfg: Optional[EngineConfig] = None,
     **kwargs: Any,
 ) -> OCRPredictor:
     """End-to-end OCR architecture using one model for localization, and another for text recognition.

onnxtr-0.3.1/onnxtr/version.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = 'v0.3.1'

{onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: onnxtr
-Version: 0.3.0
+Version: 0.3.1
 Summary: Onnx Text Recognition (OnnxTR): docTR Onnx-Wrapper for high-performance OCR on documents.
 Author-email: Felix Dittrich <felixdittrich92@gmail.com>
 Maintainer: Felix Dittrich
@@ -275,7 +275,7 @@ Requires-Dist: pre-commit>=2.17.0; extra == "dev"
 [![codecov](https://codecov.io/gh/felixdittrich92/OnnxTR/graph/badge.svg?token=WVFRCQBOLI)](https://codecov.io/gh/felixdittrich92/OnnxTR)
 [![Codacy Badge](https://app.codacy.com/project/badge/Grade/4fff4d764bb14fb8b4f4afeb9587231b)](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
 [![CodeFactor](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr/badge)](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
-[![Pypi](https://img.shields.io/badge/pypi-v0.3.0-blue.svg)](https://pypi.org/project/OnnxTR/)
+[![Pypi](https://img.shields.io/badge/pypi-v0.3.1-blue.svg)](https://pypi.org/project/OnnxTR/)
 > :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
@@ -345,8 +345,8 @@ from onnxtr.models import ocr_predictor, EngineConfig
 model = ocr_predictor(
     det_arch='fast_base',  # detection architecture
     reco_arch='vitstr_base',  # recognition architecture
-    det_bs=4, # detection batch size
-    reco_bs=1024, # recognition batch size
+    det_bs=2, # detection batch size
+    reco_bs=512, # recognition batch size
     assume_straight_pages=True,  # set to `False` if the pages are not straight (rotation, perspective, etc.) (default: True)
     straighten_pages=False,  # set to `True` if the pages should be straightened before final processing (default: False)
     # Preprocessing related parameters
@@ -419,7 +419,7 @@ general_options.enable_cpu_mem_arena = False
 # NOTE: The following would force to run only on the GPU if no GPU is available it will raise an error
 # List of strings e.g. ["CUDAExecutionProvider", "CPUExecutionProvider"] or a list of tuples with the provider and its options e.g.
 # [("CUDAExecutionProvider", {"device_id": 0}), ("CPUExecutionProvider", {"arena_extend_strategy": "kSameAsRequested"})]
-providers = [("CUDAExecutionProvider", {"device_id": 0})]  # For available providers see: https://onnxruntime.ai/docs/execution-providers/
+providers = [("CUDAExecutionProvider", {"device_id": 0, "cudnn_conv_algo_search": "DEFAULT"})]  # For available providers see: https://onnxruntime.ai/docs/execution-providers/
 engine_config = EngineConfig(
     session_options=general_options,
@@ -451,7 +451,7 @@ model = ocr_predictor(det_arch=det_model, reco_arch=reco_model)
 ## Models architectures
-Credits where it's due: this repository is implementing, among others, architectures from published research papers.
+Credits where it's due: this repository provides ONNX models for the following architectures, converted from the docTR models:
 ### Text Detection

{onnxtr-0.3.0 → onnxtr-0.3.1}/setup.py RENAMED Viewed

@@ -9,7 +9,7 @@ from pathlib import Path
 from setuptools import setup
 PKG_NAME = "onnxtr"
-VERSION = os.getenv("BUILD_VERSION", "0.3.0a0")
+VERSION = os.getenv("BUILD_VERSION", "0.3.1a0")
 if __name__ == "__main__":