onnxtr 0.3.0__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {onnxtr-0.3.0 → onnxtr-0.3.1}/PKG-INFO +6 -6
- {onnxtr-0.3.0 → onnxtr-0.3.1}/README.md +5 -5
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/contrib/base.py +1 -4
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/classification/models/mobilenet.py +4 -4
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/classification/zoo.py +5 -5
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/detection/models/differentiable_binarization.py +5 -5
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/detection/models/fast.py +5 -5
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/detection/models/linknet.py +5 -5
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/detection/zoo.py +4 -4
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/engine.py +3 -3
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/predictor/base.py +1 -1
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/predictor/predictor.py +2 -2
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/recognition/models/crnn.py +5 -5
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/recognition/models/master.py +3 -3
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/recognition/models/parseq.py +3 -3
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/recognition/models/sar.py +3 -3
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/recognition/models/vitstr.py +4 -4
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/recognition/zoo.py +3 -3
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/zoo.py +9 -9
- onnxtr-0.3.1/onnxtr/version.py +1 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr.egg-info/PKG-INFO +6 -6
- {onnxtr-0.3.0 → onnxtr-0.3.1}/setup.py +1 -1
- onnxtr-0.3.0/onnxtr/version.py +0 -1
- {onnxtr-0.3.0 → onnxtr-0.3.1}/LICENSE +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/__init__.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/contrib/__init__.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/contrib/artefacts.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/file_utils.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/io/__init__.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/io/elements.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/io/html.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/io/image.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/io/pdf.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/io/reader.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/__init__.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/_utils.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/builder.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/classification/__init__.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/classification/models/__init__.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/classification/predictor/__init__.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/classification/predictor/base.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/detection/__init__.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/detection/_utils/__init__.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/detection/_utils/base.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/detection/core.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/detection/models/__init__.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/detection/postprocessor/__init__.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/detection/postprocessor/base.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/detection/predictor/__init__.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/detection/predictor/base.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/predictor/__init__.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/preprocessor/__init__.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/preprocessor/base.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/recognition/__init__.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/recognition/core.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/recognition/models/__init__.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/recognition/predictor/__init__.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/recognition/predictor/_utils.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/recognition/predictor/base.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/models/recognition/utils.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/py.typed +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/transforms/__init__.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/transforms/base.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/utils/__init__.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/utils/common_types.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/utils/data.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/utils/fonts.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/utils/geometry.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/utils/multithreading.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/utils/reconstitution.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/utils/repr.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/utils/visualization.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr/utils/vocabs.py +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr.egg-info/SOURCES.txt +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr.egg-info/dependency_links.txt +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr.egg-info/requires.txt +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr.egg-info/top_level.txt +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/onnxtr.egg-info/zip-safe +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/pyproject.toml +0 -0
- {onnxtr-0.3.0 → onnxtr-0.3.1}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: onnxtr
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: Onnx Text Recognition (OnnxTR): docTR Onnx-Wrapper for high-performance OCR on documents.
|
|
5
5
|
Author-email: Felix Dittrich <felixdittrich92@gmail.com>
|
|
6
6
|
Maintainer: Felix Dittrich
|
|
@@ -275,7 +275,7 @@ Requires-Dist: pre-commit>=2.17.0; extra == "dev"
|
|
|
275
275
|
[](https://codecov.io/gh/felixdittrich92/OnnxTR)
|
|
276
276
|
[](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
|
|
277
277
|
[](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
|
|
278
|
-
[](https://pypi.org/project/OnnxTR/)
|
|
279
279
|
|
|
280
280
|
> :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
|
|
281
281
|
|
|
@@ -345,8 +345,8 @@ from onnxtr.models import ocr_predictor, EngineConfig
|
|
|
345
345
|
model = ocr_predictor(
|
|
346
346
|
det_arch='fast_base', # detection architecture
|
|
347
347
|
reco_arch='vitstr_base', # recognition architecture
|
|
348
|
-
det_bs=
|
|
349
|
-
reco_bs=
|
|
348
|
+
det_bs=2, # detection batch size
|
|
349
|
+
reco_bs=512, # recognition batch size
|
|
350
350
|
assume_straight_pages=True, # set to `False` if the pages are not straight (rotation, perspective, etc.) (default: True)
|
|
351
351
|
straighten_pages=False, # set to `True` if the pages should be straightened before final processing (default: False)
|
|
352
352
|
# Preprocessing related parameters
|
|
@@ -419,7 +419,7 @@ general_options.enable_cpu_mem_arena = False
|
|
|
419
419
|
# NOTE: The following would force to run only on the GPU if no GPU is available it will raise an error
|
|
420
420
|
# List of strings e.g. ["CUDAExecutionProvider", "CPUExecutionProvider"] or a list of tuples with the provider and its options e.g.
|
|
421
421
|
# [("CUDAExecutionProvider", {"device_id": 0}), ("CPUExecutionProvider", {"arena_extend_strategy": "kSameAsRequested"})]
|
|
422
|
-
providers = [("CUDAExecutionProvider", {"device_id": 0})] # For available providers see: https://onnxruntime.ai/docs/execution-providers/
|
|
422
|
+
providers = [("CUDAExecutionProvider", {"device_id": 0, "cudnn_conv_algo_search": "DEFAULT"})] # For available providers see: https://onnxruntime.ai/docs/execution-providers/
|
|
423
423
|
|
|
424
424
|
engine_config = EngineConfig(
|
|
425
425
|
session_options=general_options,
|
|
@@ -451,7 +451,7 @@ model = ocr_predictor(det_arch=det_model, reco_arch=reco_model)
|
|
|
451
451
|
|
|
452
452
|
## Models architectures
|
|
453
453
|
|
|
454
|
-
Credits where it's due: this repository
|
|
454
|
+
Credits where it's due: this repository provides ONNX models for the following architectures, converted from the docTR models:
|
|
455
455
|
|
|
456
456
|
### Text Detection
|
|
457
457
|
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
[](https://codecov.io/gh/felixdittrich92/OnnxTR)
|
|
8
8
|
[](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
|
|
9
9
|
[](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
|
|
10
|
-
[](https://pypi.org/project/OnnxTR/)
|
|
11
11
|
|
|
12
12
|
> :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
|
|
13
13
|
|
|
@@ -77,8 +77,8 @@ from onnxtr.models import ocr_predictor, EngineConfig
|
|
|
77
77
|
model = ocr_predictor(
|
|
78
78
|
det_arch='fast_base', # detection architecture
|
|
79
79
|
reco_arch='vitstr_base', # recognition architecture
|
|
80
|
-
det_bs=
|
|
81
|
-
reco_bs=
|
|
80
|
+
det_bs=2, # detection batch size
|
|
81
|
+
reco_bs=512, # recognition batch size
|
|
82
82
|
assume_straight_pages=True, # set to `False` if the pages are not straight (rotation, perspective, etc.) (default: True)
|
|
83
83
|
straighten_pages=False, # set to `True` if the pages should be straightened before final processing (default: False)
|
|
84
84
|
# Preprocessing related parameters
|
|
@@ -151,7 +151,7 @@ general_options.enable_cpu_mem_arena = False
|
|
|
151
151
|
# NOTE: The following would force to run only on the GPU if no GPU is available it will raise an error
|
|
152
152
|
# List of strings e.g. ["CUDAExecutionProvider", "CPUExecutionProvider"] or a list of tuples with the provider and its options e.g.
|
|
153
153
|
# [("CUDAExecutionProvider", {"device_id": 0}), ("CPUExecutionProvider", {"arena_extend_strategy": "kSameAsRequested"})]
|
|
154
|
-
providers = [("CUDAExecutionProvider", {"device_id": 0})] # For available providers see: https://onnxruntime.ai/docs/execution-providers/
|
|
154
|
+
providers = [("CUDAExecutionProvider", {"device_id": 0, "cudnn_conv_algo_search": "DEFAULT"})] # For available providers see: https://onnxruntime.ai/docs/execution-providers/
|
|
155
155
|
|
|
156
156
|
engine_config = EngineConfig(
|
|
157
157
|
session_options=general_options,
|
|
@@ -183,7 +183,7 @@ model = ocr_predictor(det_arch=det_model, reco_arch=reco_model)
|
|
|
183
183
|
|
|
184
184
|
## Models architectures
|
|
185
185
|
|
|
186
|
-
Credits where it's due: this repository
|
|
186
|
+
Credits where it's due: this repository provides ONNX models for the following architectures, converted from the docTR models:
|
|
187
187
|
|
|
188
188
|
### Text Detection
|
|
189
189
|
|
|
@@ -6,8 +6,8 @@
|
|
|
6
6
|
from typing import Any, List, Optional
|
|
7
7
|
|
|
8
8
|
import numpy as np
|
|
9
|
+
import onnxruntime as ort
|
|
9
10
|
|
|
10
|
-
from onnxtr.file_utils import requires_package
|
|
11
11
|
from onnxtr.utils.data import download_from_url
|
|
12
12
|
|
|
13
13
|
|
|
@@ -44,9 +44,6 @@ class _BasePredictor:
|
|
|
44
44
|
-------
|
|
45
45
|
Any: the ONNX loaded model
|
|
46
46
|
"""
|
|
47
|
-
requires_package("onnxruntime", "`.contrib` module requires `onnxruntime` to be installed.")
|
|
48
|
-
import onnxruntime as ort
|
|
49
|
-
|
|
50
47
|
if not url and not model_path:
|
|
51
48
|
raise ValueError("You must provide either a url or a model_path")
|
|
52
49
|
onnx_model_path = model_path if model_path else str(download_from_url(url, cache_subdir="models", **kwargs)) # type: ignore[arg-type]
|
|
@@ -51,7 +51,7 @@ class MobileNetV3(Engine):
|
|
|
51
51
|
def __init__(
|
|
52
52
|
self,
|
|
53
53
|
model_path: str,
|
|
54
|
-
engine_cfg: EngineConfig =
|
|
54
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
55
55
|
cfg: Optional[Dict[str, Any]] = None,
|
|
56
56
|
**kwargs: Any,
|
|
57
57
|
) -> None:
|
|
@@ -69,7 +69,7 @@ def _mobilenet_v3(
|
|
|
69
69
|
arch: str,
|
|
70
70
|
model_path: str,
|
|
71
71
|
load_in_8_bit: bool = False,
|
|
72
|
-
engine_cfg: EngineConfig =
|
|
72
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
73
73
|
**kwargs: Any,
|
|
74
74
|
) -> MobileNetV3:
|
|
75
75
|
# Patch the url
|
|
@@ -81,7 +81,7 @@ def _mobilenet_v3(
|
|
|
81
81
|
def mobilenet_v3_small_crop_orientation(
|
|
82
82
|
model_path: str = default_cfgs["mobilenet_v3_small_crop_orientation"]["url"],
|
|
83
83
|
load_in_8_bit: bool = False,
|
|
84
|
-
engine_cfg: EngineConfig =
|
|
84
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
85
85
|
**kwargs: Any,
|
|
86
86
|
) -> MobileNetV3:
|
|
87
87
|
"""MobileNetV3-Small architecture as described in
|
|
@@ -111,7 +111,7 @@ def mobilenet_v3_small_crop_orientation(
|
|
|
111
111
|
def mobilenet_v3_small_page_orientation(
|
|
112
112
|
model_path: str = default_cfgs["mobilenet_v3_small_page_orientation"]["url"],
|
|
113
113
|
load_in_8_bit: bool = False,
|
|
114
|
-
engine_cfg: EngineConfig =
|
|
114
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
115
115
|
**kwargs: Any,
|
|
116
116
|
) -> MobileNetV3:
|
|
117
117
|
"""MobileNetV3-Small architecture as described in
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
|
-
from typing import Any, List
|
|
6
|
+
from typing import Any, List, Optional
|
|
7
7
|
|
|
8
8
|
from onnxtr.models.engine import EngineConfig
|
|
9
9
|
|
|
@@ -17,7 +17,7 @@ ORIENTATION_ARCHS: List[str] = ["mobilenet_v3_small_crop_orientation", "mobilene
|
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
def _orientation_predictor(
|
|
20
|
-
arch: str, load_in_8_bit: bool = False, engine_cfg: EngineConfig =
|
|
20
|
+
arch: str, load_in_8_bit: bool = False, engine_cfg: Optional[EngineConfig] = None, **kwargs: Any
|
|
21
21
|
) -> OrientationPredictor:
|
|
22
22
|
if arch not in ORIENTATION_ARCHS:
|
|
23
23
|
raise ValueError(f"unknown architecture '{arch}'")
|
|
@@ -26,7 +26,7 @@ def _orientation_predictor(
|
|
|
26
26
|
_model = classification.__dict__[arch](load_in_8_bit=load_in_8_bit, engine_cfg=engine_cfg)
|
|
27
27
|
kwargs["mean"] = kwargs.get("mean", _model.cfg["mean"])
|
|
28
28
|
kwargs["std"] = kwargs.get("std", _model.cfg["std"])
|
|
29
|
-
kwargs["batch_size"] = kwargs.get("batch_size",
|
|
29
|
+
kwargs["batch_size"] = kwargs.get("batch_size", 512 if "crop" in arch else 2)
|
|
30
30
|
input_shape = _model.cfg["input_shape"][1:]
|
|
31
31
|
predictor = OrientationPredictor(
|
|
32
32
|
PreProcessor(input_shape, preserve_aspect_ratio=True, symmetric_pad=True, **kwargs),
|
|
@@ -38,7 +38,7 @@ def _orientation_predictor(
|
|
|
38
38
|
def crop_orientation_predictor(
|
|
39
39
|
arch: Any = "mobilenet_v3_small_crop_orientation",
|
|
40
40
|
load_in_8_bit: bool = False,
|
|
41
|
-
engine_cfg: EngineConfig =
|
|
41
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
42
42
|
**kwargs: Any,
|
|
43
43
|
) -> OrientationPredictor:
|
|
44
44
|
"""Crop orientation classification architecture.
|
|
@@ -66,7 +66,7 @@ def crop_orientation_predictor(
|
|
|
66
66
|
def page_orientation_predictor(
|
|
67
67
|
arch: Any = "mobilenet_v3_small_page_orientation",
|
|
68
68
|
load_in_8_bit: bool = False,
|
|
69
|
-
engine_cfg: EngineConfig =
|
|
69
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
70
70
|
**kwargs: Any,
|
|
71
71
|
) -> OrientationPredictor:
|
|
72
72
|
"""Page orientation classification architecture.
|
|
@@ -56,7 +56,7 @@ class DBNet(Engine):
|
|
|
56
56
|
def __init__(
|
|
57
57
|
self,
|
|
58
58
|
model_path: str,
|
|
59
|
-
engine_cfg: EngineConfig =
|
|
59
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
60
60
|
bin_thresh: float = 0.3,
|
|
61
61
|
box_thresh: float = 0.1,
|
|
62
62
|
assume_straight_pages: bool = True,
|
|
@@ -93,7 +93,7 @@ def _dbnet(
|
|
|
93
93
|
arch: str,
|
|
94
94
|
model_path: str,
|
|
95
95
|
load_in_8_bit: bool = False,
|
|
96
|
-
engine_cfg: EngineConfig =
|
|
96
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
97
97
|
**kwargs: Any,
|
|
98
98
|
) -> DBNet:
|
|
99
99
|
# Patch the url
|
|
@@ -105,7 +105,7 @@ def _dbnet(
|
|
|
105
105
|
def db_resnet34(
|
|
106
106
|
model_path: str = default_cfgs["db_resnet34"]["url"],
|
|
107
107
|
load_in_8_bit: bool = False,
|
|
108
|
-
engine_cfg: EngineConfig =
|
|
108
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
109
109
|
**kwargs: Any,
|
|
110
110
|
) -> DBNet:
|
|
111
111
|
"""DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization"
|
|
@@ -134,7 +134,7 @@ def db_resnet34(
|
|
|
134
134
|
def db_resnet50(
|
|
135
135
|
model_path: str = default_cfgs["db_resnet50"]["url"],
|
|
136
136
|
load_in_8_bit: bool = False,
|
|
137
|
-
engine_cfg: EngineConfig =
|
|
137
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
138
138
|
**kwargs: Any,
|
|
139
139
|
) -> DBNet:
|
|
140
140
|
"""DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization"
|
|
@@ -163,7 +163,7 @@ def db_resnet50(
|
|
|
163
163
|
def db_mobilenet_v3_large(
|
|
164
164
|
model_path: str = default_cfgs["db_mobilenet_v3_large"]["url"],
|
|
165
165
|
load_in_8_bit: bool = False,
|
|
166
|
-
engine_cfg: EngineConfig =
|
|
166
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
167
167
|
**kwargs: Any,
|
|
168
168
|
) -> DBNet:
|
|
169
169
|
"""DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization"
|
|
@@ -54,7 +54,7 @@ class FAST(Engine):
|
|
|
54
54
|
def __init__(
|
|
55
55
|
self,
|
|
56
56
|
model_path: str,
|
|
57
|
-
engine_cfg: EngineConfig =
|
|
57
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
58
58
|
bin_thresh: float = 0.1,
|
|
59
59
|
box_thresh: float = 0.1,
|
|
60
60
|
assume_straight_pages: bool = True,
|
|
@@ -92,7 +92,7 @@ def _fast(
|
|
|
92
92
|
arch: str,
|
|
93
93
|
model_path: str,
|
|
94
94
|
load_in_8_bit: bool = False,
|
|
95
|
-
engine_cfg: EngineConfig =
|
|
95
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
96
96
|
**kwargs: Any,
|
|
97
97
|
) -> FAST:
|
|
98
98
|
if load_in_8_bit:
|
|
@@ -104,7 +104,7 @@ def _fast(
|
|
|
104
104
|
def fast_tiny(
|
|
105
105
|
model_path: str = default_cfgs["fast_tiny"]["url"],
|
|
106
106
|
load_in_8_bit: bool = False,
|
|
107
|
-
engine_cfg: EngineConfig =
|
|
107
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
108
108
|
**kwargs: Any,
|
|
109
109
|
) -> FAST:
|
|
110
110
|
"""FAST as described in `"FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation"
|
|
@@ -133,7 +133,7 @@ def fast_tiny(
|
|
|
133
133
|
def fast_small(
|
|
134
134
|
model_path: str = default_cfgs["fast_small"]["url"],
|
|
135
135
|
load_in_8_bit: bool = False,
|
|
136
|
-
engine_cfg: EngineConfig =
|
|
136
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
137
137
|
**kwargs: Any,
|
|
138
138
|
) -> FAST:
|
|
139
139
|
"""FAST as described in `"FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation"
|
|
@@ -162,7 +162,7 @@ def fast_small(
|
|
|
162
162
|
def fast_base(
|
|
163
163
|
model_path: str = default_cfgs["fast_base"]["url"],
|
|
164
164
|
load_in_8_bit: bool = False,
|
|
165
|
-
engine_cfg: EngineConfig =
|
|
165
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
166
166
|
**kwargs: Any,
|
|
167
167
|
) -> FAST:
|
|
168
168
|
"""FAST as described in `"FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation"
|
|
@@ -56,7 +56,7 @@ class LinkNet(Engine):
|
|
|
56
56
|
def __init__(
|
|
57
57
|
self,
|
|
58
58
|
model_path: str,
|
|
59
|
-
engine_cfg: EngineConfig =
|
|
59
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
60
60
|
bin_thresh: float = 0.1,
|
|
61
61
|
box_thresh: float = 0.1,
|
|
62
62
|
assume_straight_pages: bool = True,
|
|
@@ -94,7 +94,7 @@ def _linknet(
|
|
|
94
94
|
arch: str,
|
|
95
95
|
model_path: str,
|
|
96
96
|
load_in_8_bit: bool = False,
|
|
97
|
-
engine_cfg: EngineConfig =
|
|
97
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
98
98
|
**kwargs: Any,
|
|
99
99
|
) -> LinkNet:
|
|
100
100
|
# Patch the url
|
|
@@ -106,7 +106,7 @@ def _linknet(
|
|
|
106
106
|
def linknet_resnet18(
|
|
107
107
|
model_path: str = default_cfgs["linknet_resnet18"]["url"],
|
|
108
108
|
load_in_8_bit: bool = False,
|
|
109
|
-
engine_cfg: EngineConfig =
|
|
109
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
110
110
|
**kwargs: Any,
|
|
111
111
|
) -> LinkNet:
|
|
112
112
|
"""LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation"
|
|
@@ -135,7 +135,7 @@ def linknet_resnet18(
|
|
|
135
135
|
def linknet_resnet34(
|
|
136
136
|
model_path: str = default_cfgs["linknet_resnet34"]["url"],
|
|
137
137
|
load_in_8_bit: bool = False,
|
|
138
|
-
engine_cfg: EngineConfig =
|
|
138
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
139
139
|
**kwargs: Any,
|
|
140
140
|
) -> LinkNet:
|
|
141
141
|
"""LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation"
|
|
@@ -164,7 +164,7 @@ def linknet_resnet34(
|
|
|
164
164
|
def linknet_resnet50(
|
|
165
165
|
model_path: str = default_cfgs["linknet_resnet50"]["url"],
|
|
166
166
|
load_in_8_bit: bool = False,
|
|
167
|
-
engine_cfg: EngineConfig =
|
|
167
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
168
168
|
**kwargs: Any,
|
|
169
169
|
) -> LinkNet:
|
|
170
170
|
"""LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation"
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
|
-
from typing import Any
|
|
6
|
+
from typing import Any, Optional
|
|
7
7
|
|
|
8
8
|
from .. import detection
|
|
9
9
|
from ..engine import EngineConfig
|
|
@@ -29,7 +29,7 @@ def _predictor(
|
|
|
29
29
|
arch: Any,
|
|
30
30
|
assume_straight_pages: bool = True,
|
|
31
31
|
load_in_8_bit: bool = False,
|
|
32
|
-
engine_cfg: EngineConfig =
|
|
32
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
33
33
|
**kwargs: Any,
|
|
34
34
|
) -> DetectionPredictor:
|
|
35
35
|
if isinstance(arch, str):
|
|
@@ -48,7 +48,7 @@ def _predictor(
|
|
|
48
48
|
|
|
49
49
|
kwargs["mean"] = kwargs.get("mean", _model.cfg["mean"])
|
|
50
50
|
kwargs["std"] = kwargs.get("std", _model.cfg["std"])
|
|
51
|
-
kwargs["batch_size"] = kwargs.get("batch_size",
|
|
51
|
+
kwargs["batch_size"] = kwargs.get("batch_size", 2)
|
|
52
52
|
predictor = DetectionPredictor(
|
|
53
53
|
PreProcessor(_model.cfg["input_shape"][1:], **kwargs),
|
|
54
54
|
_model,
|
|
@@ -60,7 +60,7 @@ def detection_predictor(
|
|
|
60
60
|
arch: Any = "fast_base",
|
|
61
61
|
assume_straight_pages: bool = True,
|
|
62
62
|
load_in_8_bit: bool = False,
|
|
63
|
-
engine_cfg: EngineConfig =
|
|
63
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
64
64
|
**kwargs: Any,
|
|
65
65
|
) -> DetectionPredictor:
|
|
66
66
|
"""Text detection architecture.
|
|
@@ -49,7 +49,7 @@ class EngineConfig:
|
|
|
49
49
|
{
|
|
50
50
|
"device_id": 0,
|
|
51
51
|
"arena_extend_strategy": "kNextPowerOfTwo",
|
|
52
|
-
"cudnn_conv_algo_search": "
|
|
52
|
+
"cudnn_conv_algo_search": "DEFAULT",
|
|
53
53
|
"do_copy_in_default_stream": True,
|
|
54
54
|
},
|
|
55
55
|
),
|
|
@@ -87,8 +87,8 @@ class Engine:
|
|
|
87
87
|
**kwargs: additional arguments to be passed to `download_from_url`
|
|
88
88
|
"""
|
|
89
89
|
|
|
90
|
-
def __init__(self, url: str, engine_cfg: EngineConfig =
|
|
91
|
-
engine_cfg = engine_cfg
|
|
90
|
+
def __init__(self, url: str, engine_cfg: Optional[EngineConfig] = None, **kwargs: Any) -> None:
|
|
91
|
+
engine_cfg = engine_cfg if isinstance(engine_cfg, EngineConfig) else EngineConfig()
|
|
92
92
|
archive_path = download_from_url(url, cache_subdir="models", **kwargs) if "http" in url else url
|
|
93
93
|
self.session_options = engine_cfg.session_options
|
|
94
94
|
self.providers = engine_cfg.providers
|
|
@@ -50,7 +50,7 @@ class _OCRPredictor:
|
|
|
50
50
|
symmetric_pad: bool = True,
|
|
51
51
|
detect_orientation: bool = False,
|
|
52
52
|
load_in_8_bit: bool = False,
|
|
53
|
-
clf_engine_cfg: EngineConfig =
|
|
53
|
+
clf_engine_cfg: Optional[EngineConfig] = None,
|
|
54
54
|
**kwargs: Any,
|
|
55
55
|
) -> None:
|
|
56
56
|
self.assume_straight_pages = assume_straight_pages
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
|
-
from typing import Any, List
|
|
6
|
+
from typing import Any, List, Optional
|
|
7
7
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
|
|
@@ -52,7 +52,7 @@ class OCRPredictor(NestedObject, _OCRPredictor):
|
|
|
52
52
|
symmetric_pad: bool = True,
|
|
53
53
|
detect_orientation: bool = False,
|
|
54
54
|
detect_language: bool = False,
|
|
55
|
-
clf_engine_cfg: EngineConfig =
|
|
55
|
+
clf_engine_cfg: Optional[EngineConfig] = None,
|
|
56
56
|
**kwargs: Any,
|
|
57
57
|
) -> None:
|
|
58
58
|
self.det_predictor = det_predictor
|
|
@@ -124,7 +124,7 @@ class CRNN(Engine):
|
|
|
124
124
|
self,
|
|
125
125
|
model_path: str,
|
|
126
126
|
vocab: str,
|
|
127
|
-
engine_cfg: EngineConfig =
|
|
127
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
128
128
|
cfg: Optional[Dict[str, Any]] = None,
|
|
129
129
|
**kwargs: Any,
|
|
130
130
|
) -> None:
|
|
@@ -154,7 +154,7 @@ def _crnn(
|
|
|
154
154
|
arch: str,
|
|
155
155
|
model_path: str,
|
|
156
156
|
load_in_8_bit: bool = False,
|
|
157
|
-
engine_cfg: EngineConfig =
|
|
157
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
158
158
|
**kwargs: Any,
|
|
159
159
|
) -> CRNN:
|
|
160
160
|
kwargs["vocab"] = kwargs.get("vocab", default_cfgs[arch]["vocab"])
|
|
@@ -172,7 +172,7 @@ def _crnn(
|
|
|
172
172
|
def crnn_vgg16_bn(
|
|
173
173
|
model_path: str = default_cfgs["crnn_vgg16_bn"]["url"],
|
|
174
174
|
load_in_8_bit: bool = False,
|
|
175
|
-
engine_cfg: EngineConfig =
|
|
175
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
176
176
|
**kwargs: Any,
|
|
177
177
|
) -> CRNN:
|
|
178
178
|
"""CRNN with a VGG-16 backbone as described in `"An End-to-End Trainable Neural Network for Image-based
|
|
@@ -201,7 +201,7 @@ def crnn_vgg16_bn(
|
|
|
201
201
|
def crnn_mobilenet_v3_small(
|
|
202
202
|
model_path: str = default_cfgs["crnn_mobilenet_v3_small"]["url"],
|
|
203
203
|
load_in_8_bit: bool = False,
|
|
204
|
-
engine_cfg: EngineConfig =
|
|
204
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
205
205
|
**kwargs: Any,
|
|
206
206
|
) -> CRNN:
|
|
207
207
|
"""CRNN with a MobileNet V3 Small backbone as described in `"An End-to-End Trainable Neural Network for Image-based
|
|
@@ -230,7 +230,7 @@ def crnn_mobilenet_v3_small(
|
|
|
230
230
|
def crnn_mobilenet_v3_large(
|
|
231
231
|
model_path: str = default_cfgs["crnn_mobilenet_v3_large"]["url"],
|
|
232
232
|
load_in_8_bit: bool = False,
|
|
233
|
-
engine_cfg: EngineConfig =
|
|
233
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
234
234
|
**kwargs: Any,
|
|
235
235
|
) -> CRNN:
|
|
236
236
|
"""CRNN with a MobileNet V3 Large backbone as described in `"An End-to-End Trainable Neural Network for Image-based
|
|
@@ -45,7 +45,7 @@ class MASTER(Engine):
|
|
|
45
45
|
self,
|
|
46
46
|
model_path: str,
|
|
47
47
|
vocab: str,
|
|
48
|
-
engine_cfg: EngineConfig =
|
|
48
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
49
49
|
cfg: Optional[Dict[str, Any]] = None,
|
|
50
50
|
**kwargs: Any,
|
|
51
51
|
) -> None:
|
|
@@ -116,7 +116,7 @@ def _master(
|
|
|
116
116
|
arch: str,
|
|
117
117
|
model_path: str,
|
|
118
118
|
load_in_8_bit: bool = False,
|
|
119
|
-
engine_cfg: EngineConfig =
|
|
119
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
120
120
|
**kwargs: Any,
|
|
121
121
|
) -> MASTER:
|
|
122
122
|
# Patch the config
|
|
@@ -134,7 +134,7 @@ def _master(
|
|
|
134
134
|
def master(
|
|
135
135
|
model_path: str = default_cfgs["master"]["url"],
|
|
136
136
|
load_in_8_bit: bool = False,
|
|
137
|
-
engine_cfg: EngineConfig =
|
|
137
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
138
138
|
**kwargs: Any,
|
|
139
139
|
) -> MASTER:
|
|
140
140
|
"""MASTER as described in paper: <https://arxiv.org/pdf/1910.02562.pdf>`_.
|
|
@@ -44,7 +44,7 @@ class PARSeq(Engine):
|
|
|
44
44
|
self,
|
|
45
45
|
model_path: str,
|
|
46
46
|
vocab: str,
|
|
47
|
-
engine_cfg: EngineConfig =
|
|
47
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
48
48
|
cfg: Optional[Dict[str, Any]] = None,
|
|
49
49
|
**kwargs: Any,
|
|
50
50
|
) -> None:
|
|
@@ -104,7 +104,7 @@ def _parseq(
|
|
|
104
104
|
arch: str,
|
|
105
105
|
model_path: str,
|
|
106
106
|
load_in_8_bit: bool = False,
|
|
107
|
-
engine_cfg: EngineConfig =
|
|
107
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
108
108
|
**kwargs: Any,
|
|
109
109
|
) -> PARSeq:
|
|
110
110
|
# Patch the config
|
|
@@ -123,7 +123,7 @@ def _parseq(
|
|
|
123
123
|
def parseq(
|
|
124
124
|
model_path: str = default_cfgs["parseq"]["url"],
|
|
125
125
|
load_in_8_bit: bool = False,
|
|
126
|
-
engine_cfg: EngineConfig =
|
|
126
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
127
127
|
**kwargs: Any,
|
|
128
128
|
) -> PARSeq:
|
|
129
129
|
"""PARSeq architecture from
|
|
@@ -44,7 +44,7 @@ class SAR(Engine):
|
|
|
44
44
|
self,
|
|
45
45
|
model_path: str,
|
|
46
46
|
vocab: str,
|
|
47
|
-
engine_cfg: EngineConfig =
|
|
47
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
48
48
|
cfg: Optional[Dict[str, Any]] = None,
|
|
49
49
|
**kwargs: Any,
|
|
50
50
|
) -> None:
|
|
@@ -103,7 +103,7 @@ def _sar(
|
|
|
103
103
|
arch: str,
|
|
104
104
|
model_path: str,
|
|
105
105
|
load_in_8_bit: bool = False,
|
|
106
|
-
engine_cfg: EngineConfig =
|
|
106
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
107
107
|
**kwargs: Any,
|
|
108
108
|
) -> SAR:
|
|
109
109
|
# Patch the config
|
|
@@ -122,7 +122,7 @@ def _sar(
|
|
|
122
122
|
def sar_resnet31(
|
|
123
123
|
model_path: str = default_cfgs["sar_resnet31"]["url"],
|
|
124
124
|
load_in_8_bit: bool = False,
|
|
125
|
-
engine_cfg: EngineConfig =
|
|
125
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
126
126
|
**kwargs: Any,
|
|
127
127
|
) -> SAR:
|
|
128
128
|
"""SAR with a resnet-31 feature extractor as described in `"Show, Attend and Read:A Simple and Strong
|
|
@@ -52,7 +52,7 @@ class ViTSTR(Engine):
|
|
|
52
52
|
self,
|
|
53
53
|
model_path: str,
|
|
54
54
|
vocab: str,
|
|
55
|
-
engine_cfg: EngineConfig =
|
|
55
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
56
56
|
cfg: Optional[Dict[str, Any]] = None,
|
|
57
57
|
**kwargs: Any,
|
|
58
58
|
) -> None:
|
|
@@ -114,7 +114,7 @@ def _vitstr(
|
|
|
114
114
|
arch: str,
|
|
115
115
|
model_path: str,
|
|
116
116
|
load_in_8_bit: bool = False,
|
|
117
|
-
engine_cfg: EngineConfig =
|
|
117
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
118
118
|
**kwargs: Any,
|
|
119
119
|
) -> ViTSTR:
|
|
120
120
|
# Patch the config
|
|
@@ -133,7 +133,7 @@ def _vitstr(
|
|
|
133
133
|
def vitstr_small(
|
|
134
134
|
model_path: str = default_cfgs["vitstr_small"]["url"],
|
|
135
135
|
load_in_8_bit: bool = False,
|
|
136
|
-
engine_cfg: EngineConfig =
|
|
136
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
137
137
|
**kwargs: Any,
|
|
138
138
|
) -> ViTSTR:
|
|
139
139
|
"""ViTSTR-Small as described in `"Vision Transformer for Fast and Efficient Scene Text Recognition"
|
|
@@ -162,7 +162,7 @@ def vitstr_small(
|
|
|
162
162
|
def vitstr_base(
|
|
163
163
|
model_path: str = default_cfgs["vitstr_base"]["url"],
|
|
164
164
|
load_in_8_bit: bool = False,
|
|
165
|
-
engine_cfg: EngineConfig =
|
|
165
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
166
166
|
**kwargs: Any,
|
|
167
167
|
) -> ViTSTR:
|
|
168
168
|
"""ViTSTR-Base as described in `"Vision Transformer for Fast and Efficient Scene Text Recognition"
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
|
-
from typing import Any, List
|
|
6
|
+
from typing import Any, List, Optional
|
|
7
7
|
|
|
8
8
|
from .. import recognition
|
|
9
9
|
from ..engine import EngineConfig
|
|
@@ -26,7 +26,7 @@ ARCHS: List[str] = [
|
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
def _predictor(
|
|
29
|
-
arch: Any, load_in_8_bit: bool = False, engine_cfg: EngineConfig =
|
|
29
|
+
arch: Any, load_in_8_bit: bool = False, engine_cfg: Optional[EngineConfig] = None, **kwargs: Any
|
|
30
30
|
) -> RecognitionPredictor:
|
|
31
31
|
if isinstance(arch, str):
|
|
32
32
|
if arch not in ARCHS:
|
|
@@ -50,7 +50,7 @@ def _predictor(
|
|
|
50
50
|
|
|
51
51
|
|
|
52
52
|
def recognition_predictor(
|
|
53
|
-
arch: Any = "crnn_vgg16_bn", load_in_8_bit: bool = False, engine_cfg: EngineConfig =
|
|
53
|
+
arch: Any = "crnn_vgg16_bn", load_in_8_bit: bool = False, engine_cfg: Optional[EngineConfig] = None, **kwargs: Any
|
|
54
54
|
) -> RecognitionPredictor:
|
|
55
55
|
"""Text recognition architecture.
|
|
56
56
|
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
|
-
from typing import Any
|
|
6
|
+
from typing import Any, Optional
|
|
7
7
|
|
|
8
8
|
from .detection.zoo import detection_predictor
|
|
9
9
|
from .engine import EngineConfig
|
|
@@ -19,15 +19,15 @@ def _predictor(
|
|
|
19
19
|
assume_straight_pages: bool = True,
|
|
20
20
|
preserve_aspect_ratio: bool = True,
|
|
21
21
|
symmetric_pad: bool = True,
|
|
22
|
-
det_bs: int =
|
|
23
|
-
reco_bs: int =
|
|
22
|
+
det_bs: int = 2,
|
|
23
|
+
reco_bs: int = 512,
|
|
24
24
|
detect_orientation: bool = False,
|
|
25
25
|
straighten_pages: bool = False,
|
|
26
26
|
detect_language: bool = False,
|
|
27
27
|
load_in_8_bit: bool = False,
|
|
28
|
-
det_engine_cfg: EngineConfig =
|
|
29
|
-
reco_engine_cfg: EngineConfig =
|
|
30
|
-
clf_engine_cfg: EngineConfig =
|
|
28
|
+
det_engine_cfg: Optional[EngineConfig] = None,
|
|
29
|
+
reco_engine_cfg: Optional[EngineConfig] = None,
|
|
30
|
+
clf_engine_cfg: Optional[EngineConfig] = None,
|
|
31
31
|
**kwargs,
|
|
32
32
|
) -> OCRPredictor:
|
|
33
33
|
# Detection
|
|
@@ -74,9 +74,9 @@ def ocr_predictor(
|
|
|
74
74
|
straighten_pages: bool = False,
|
|
75
75
|
detect_language: bool = False,
|
|
76
76
|
load_in_8_bit: bool = False,
|
|
77
|
-
det_engine_cfg: EngineConfig =
|
|
78
|
-
reco_engine_cfg: EngineConfig =
|
|
79
|
-
clf_engine_cfg: EngineConfig =
|
|
77
|
+
det_engine_cfg: Optional[EngineConfig] = None,
|
|
78
|
+
reco_engine_cfg: Optional[EngineConfig] = None,
|
|
79
|
+
clf_engine_cfg: Optional[EngineConfig] = None,
|
|
80
80
|
**kwargs: Any,
|
|
81
81
|
) -> OCRPredictor:
|
|
82
82
|
"""End-to-end OCR architecture using one model for localization, and another for text recognition.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = 'v0.3.1'
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: onnxtr
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: Onnx Text Recognition (OnnxTR): docTR Onnx-Wrapper for high-performance OCR on documents.
|
|
5
5
|
Author-email: Felix Dittrich <felixdittrich92@gmail.com>
|
|
6
6
|
Maintainer: Felix Dittrich
|
|
@@ -275,7 +275,7 @@ Requires-Dist: pre-commit>=2.17.0; extra == "dev"
|
|
|
275
275
|
[](https://codecov.io/gh/felixdittrich92/OnnxTR)
|
|
276
276
|
[](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
|
|
277
277
|
[](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
|
|
278
|
-
[](https://pypi.org/project/OnnxTR/)
|
|
279
279
|
|
|
280
280
|
> :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
|
|
281
281
|
|
|
@@ -345,8 +345,8 @@ from onnxtr.models import ocr_predictor, EngineConfig
|
|
|
345
345
|
model = ocr_predictor(
|
|
346
346
|
det_arch='fast_base', # detection architecture
|
|
347
347
|
reco_arch='vitstr_base', # recognition architecture
|
|
348
|
-
det_bs=
|
|
349
|
-
reco_bs=
|
|
348
|
+
det_bs=2, # detection batch size
|
|
349
|
+
reco_bs=512, # recognition batch size
|
|
350
350
|
assume_straight_pages=True, # set to `False` if the pages are not straight (rotation, perspective, etc.) (default: True)
|
|
351
351
|
straighten_pages=False, # set to `True` if the pages should be straightened before final processing (default: False)
|
|
352
352
|
# Preprocessing related parameters
|
|
@@ -419,7 +419,7 @@ general_options.enable_cpu_mem_arena = False
|
|
|
419
419
|
# NOTE: The following would force to run only on the GPU if no GPU is available it will raise an error
|
|
420
420
|
# List of strings e.g. ["CUDAExecutionProvider", "CPUExecutionProvider"] or a list of tuples with the provider and its options e.g.
|
|
421
421
|
# [("CUDAExecutionProvider", {"device_id": 0}), ("CPUExecutionProvider", {"arena_extend_strategy": "kSameAsRequested"})]
|
|
422
|
-
providers = [("CUDAExecutionProvider", {"device_id": 0})] # For available providers see: https://onnxruntime.ai/docs/execution-providers/
|
|
422
|
+
providers = [("CUDAExecutionProvider", {"device_id": 0, "cudnn_conv_algo_search": "DEFAULT"})] # For available providers see: https://onnxruntime.ai/docs/execution-providers/
|
|
423
423
|
|
|
424
424
|
engine_config = EngineConfig(
|
|
425
425
|
session_options=general_options,
|
|
@@ -451,7 +451,7 @@ model = ocr_predictor(det_arch=det_model, reco_arch=reco_model)
|
|
|
451
451
|
|
|
452
452
|
## Models architectures
|
|
453
453
|
|
|
454
|
-
Credits where it's due: this repository
|
|
454
|
+
Credits where it's due: this repository provides ONNX models for the following architectures, converted from the docTR models:
|
|
455
455
|
|
|
456
456
|
### Text Detection
|
|
457
457
|
|
onnxtr-0.3.0/onnxtr/version.py
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = 'v0.3.0'
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|