onnxtr 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- onnxtr/models/classification/models/mobilenet.py +15 -4
- onnxtr/models/classification/predictor/base.py +1 -0
- onnxtr/models/classification/zoo.py +10 -7
- onnxtr/models/detection/models/differentiable_binarization.py +21 -6
- onnxtr/models/detection/models/fast.py +13 -6
- onnxtr/models/detection/models/linknet.py +21 -6
- onnxtr/models/detection/zoo.py +7 -3
- onnxtr/models/engine.py +2 -2
- onnxtr/models/predictor/base.py +5 -1
- onnxtr/models/recognition/models/crnn.py +21 -6
- onnxtr/models/recognition/models/master.py +7 -2
- onnxtr/models/recognition/models/parseq.py +8 -2
- onnxtr/models/recognition/models/sar.py +9 -2
- onnxtr/models/recognition/models/vitstr.py +17 -6
- onnxtr/models/recognition/zoo.py +7 -4
- onnxtr/models/zoo.py +6 -0
- onnxtr/version.py +1 -1
- {onnxtr-0.1.1.dist-info → onnxtr-0.2.0.dist-info}/METADATA +53 -13
- {onnxtr-0.1.1.dist-info → onnxtr-0.2.0.dist-info}/RECORD +23 -23
- {onnxtr-0.1.1.dist-info → onnxtr-0.2.0.dist-info}/LICENSE +0 -0
- {onnxtr-0.1.1.dist-info → onnxtr-0.2.0.dist-info}/WHEEL +0 -0
- {onnxtr-0.1.1.dist-info → onnxtr-0.2.0.dist-info}/top_level.txt +0 -0
- {onnxtr-0.1.1.dist-info → onnxtr-0.2.0.dist-info}/zip-safe +0 -0
|
@@ -24,6 +24,7 @@ default_cfgs: Dict[str, Dict[str, Any]] = {
|
|
|
24
24
|
"input_shape": (3, 256, 256),
|
|
25
25
|
"classes": [0, -90, 180, 90],
|
|
26
26
|
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/mobilenet_v3_small_crop_orientation-5620cf7e.onnx",
|
|
27
|
+
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/mobilenet_v3_small_crop_orientation_static_8_bit-4cfaa621.onnx",
|
|
27
28
|
},
|
|
28
29
|
"mobilenet_v3_small_page_orientation": {
|
|
29
30
|
"mean": (0.694, 0.695, 0.693),
|
|
@@ -31,6 +32,7 @@ default_cfgs: Dict[str, Dict[str, Any]] = {
|
|
|
31
32
|
"input_shape": (3, 512, 512),
|
|
32
33
|
"classes": [0, -90, 180, 90],
|
|
33
34
|
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/mobilenet_v3_small_page_orientation-d3f76d79.onnx",
|
|
35
|
+
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/mobilenet_v3_small_page_orientation_static_8_bit-3e5ef3dc.onnx",
|
|
34
36
|
},
|
|
35
37
|
}
|
|
36
38
|
|
|
@@ -64,14 +66,19 @@ class MobileNetV3(Engine):
|
|
|
64
66
|
def _mobilenet_v3(
|
|
65
67
|
arch: str,
|
|
66
68
|
model_path: str,
|
|
69
|
+
load_in_8_bit: bool = False,
|
|
67
70
|
**kwargs: Any,
|
|
68
71
|
) -> MobileNetV3:
|
|
72
|
+
# Patch the url
|
|
73
|
+
model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path
|
|
69
74
|
_cfg = deepcopy(default_cfgs[arch])
|
|
70
75
|
return MobileNetV3(model_path, cfg=_cfg, **kwargs)
|
|
71
76
|
|
|
72
77
|
|
|
73
78
|
def mobilenet_v3_small_crop_orientation(
|
|
74
|
-
model_path: str = default_cfgs["mobilenet_v3_small_crop_orientation"]["url"],
|
|
79
|
+
model_path: str = default_cfgs["mobilenet_v3_small_crop_orientation"]["url"],
|
|
80
|
+
load_in_8_bit: bool = False,
|
|
81
|
+
**kwargs: Any,
|
|
75
82
|
) -> MobileNetV3:
|
|
76
83
|
"""MobileNetV3-Small architecture as described in
|
|
77
84
|
`"Searching for MobileNetV3",
|
|
@@ -86,17 +93,20 @@ def mobilenet_v3_small_crop_orientation(
|
|
|
86
93
|
Args:
|
|
87
94
|
----
|
|
88
95
|
model_path: path to onnx model file, defaults to url in default_cfgs
|
|
96
|
+
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
|
|
89
97
|
**kwargs: keyword arguments of the MobileNetV3 architecture
|
|
90
98
|
|
|
91
99
|
Returns:
|
|
92
100
|
-------
|
|
93
101
|
MobileNetV3
|
|
94
102
|
"""
|
|
95
|
-
return _mobilenet_v3("mobilenet_v3_small_crop_orientation", model_path, **kwargs)
|
|
103
|
+
return _mobilenet_v3("mobilenet_v3_small_crop_orientation", model_path, load_in_8_bit, **kwargs)
|
|
96
104
|
|
|
97
105
|
|
|
98
106
|
def mobilenet_v3_small_page_orientation(
|
|
99
|
-
model_path: str = default_cfgs["mobilenet_v3_small_page_orientation"]["url"],
|
|
107
|
+
model_path: str = default_cfgs["mobilenet_v3_small_page_orientation"]["url"],
|
|
108
|
+
load_in_8_bit: bool = False,
|
|
109
|
+
**kwargs: Any,
|
|
100
110
|
) -> MobileNetV3:
|
|
101
111
|
"""MobileNetV3-Small architecture as described in
|
|
102
112
|
`"Searching for MobileNetV3",
|
|
@@ -111,10 +121,11 @@ def mobilenet_v3_small_page_orientation(
|
|
|
111
121
|
Args:
|
|
112
122
|
----
|
|
113
123
|
model_path: path to onnx model file, defaults to url in default_cfgs
|
|
124
|
+
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
|
|
114
125
|
**kwargs: keyword arguments of the MobileNetV3 architecture
|
|
115
126
|
|
|
116
127
|
Returns:
|
|
117
128
|
-------
|
|
118
129
|
MobileNetV3
|
|
119
130
|
"""
|
|
120
|
-
return _mobilenet_v3("mobilenet_v3_small_page_orientation", model_path, **kwargs)
|
|
131
|
+
return _mobilenet_v3("mobilenet_v3_small_page_orientation", model_path, load_in_8_bit, **kwargs)
|
|
@@ -22,6 +22,7 @@ class OrientationPredictor(NestedObject):
|
|
|
22
22
|
----
|
|
23
23
|
pre_processor: transform inputs for easier batched model inference
|
|
24
24
|
model: core classification architecture (backbone + classification head)
|
|
25
|
+
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
|
|
25
26
|
"""
|
|
26
27
|
|
|
27
28
|
_children_names: List[str] = ["pre_processor", "model"]
|
|
@@ -14,24 +14,25 @@ __all__ = ["crop_orientation_predictor", "page_orientation_predictor"]
|
|
|
14
14
|
ORIENTATION_ARCHS: List[str] = ["mobilenet_v3_small_crop_orientation", "mobilenet_v3_small_page_orientation"]
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
def _orientation_predictor(arch: str, **kwargs: Any) -> OrientationPredictor:
|
|
17
|
+
def _orientation_predictor(arch: str, load_in_8_bit: bool = False, **kwargs: Any) -> OrientationPredictor:
|
|
18
18
|
if arch not in ORIENTATION_ARCHS:
|
|
19
19
|
raise ValueError(f"unknown architecture '{arch}'")
|
|
20
20
|
|
|
21
21
|
# Load directly classifier from backbone
|
|
22
|
-
_model = classification.__dict__[arch]()
|
|
22
|
+
_model = classification.__dict__[arch](load_in_8_bit=load_in_8_bit)
|
|
23
23
|
kwargs["mean"] = kwargs.get("mean", _model.cfg["mean"])
|
|
24
24
|
kwargs["std"] = kwargs.get("std", _model.cfg["std"])
|
|
25
25
|
kwargs["batch_size"] = kwargs.get("batch_size", 128 if "crop" in arch else 4)
|
|
26
26
|
input_shape = _model.cfg["input_shape"][1:]
|
|
27
27
|
predictor = OrientationPredictor(
|
|
28
|
-
PreProcessor(input_shape, preserve_aspect_ratio=True, symmetric_pad=True, **kwargs),
|
|
28
|
+
PreProcessor(input_shape, preserve_aspect_ratio=True, symmetric_pad=True, **kwargs),
|
|
29
|
+
_model,
|
|
29
30
|
)
|
|
30
31
|
return predictor
|
|
31
32
|
|
|
32
33
|
|
|
33
34
|
def crop_orientation_predictor(
|
|
34
|
-
arch: Any = "mobilenet_v3_small_crop_orientation", **kwargs: Any
|
|
35
|
+
arch: Any = "mobilenet_v3_small_crop_orientation", load_in_8_bit: bool = False, **kwargs: Any
|
|
35
36
|
) -> OrientationPredictor:
|
|
36
37
|
"""Crop orientation classification architecture.
|
|
37
38
|
|
|
@@ -44,17 +45,18 @@ def crop_orientation_predictor(
|
|
|
44
45
|
Args:
|
|
45
46
|
----
|
|
46
47
|
arch: name of the architecture to use (e.g. 'mobilenet_v3_small_crop_orientation')
|
|
48
|
+
load_in_8_bit: load the 8-bit quantized version of the model
|
|
47
49
|
**kwargs: keyword arguments to be passed to the OrientationPredictor
|
|
48
50
|
|
|
49
51
|
Returns:
|
|
50
52
|
-------
|
|
51
53
|
OrientationPredictor
|
|
52
54
|
"""
|
|
53
|
-
return _orientation_predictor(arch, **kwargs)
|
|
55
|
+
return _orientation_predictor(arch, load_in_8_bit, **kwargs)
|
|
54
56
|
|
|
55
57
|
|
|
56
58
|
def page_orientation_predictor(
|
|
57
|
-
arch: Any = "mobilenet_v3_small_page_orientation", **kwargs: Any
|
|
59
|
+
arch: Any = "mobilenet_v3_small_page_orientation", load_in_8_bit: bool = False, **kwargs: Any
|
|
58
60
|
) -> OrientationPredictor:
|
|
59
61
|
"""Page orientation classification architecture.
|
|
60
62
|
|
|
@@ -67,10 +69,11 @@ def page_orientation_predictor(
|
|
|
67
69
|
Args:
|
|
68
70
|
----
|
|
69
71
|
arch: name of the architecture to use (e.g. 'mobilenet_v3_small_page_orientation')
|
|
72
|
+
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
|
|
70
73
|
**kwargs: keyword arguments to be passed to the OrientationPredictor
|
|
71
74
|
|
|
72
75
|
Returns:
|
|
73
76
|
-------
|
|
74
77
|
OrientationPredictor
|
|
75
78
|
"""
|
|
76
|
-
return _orientation_predictor(arch, **kwargs)
|
|
79
|
+
return _orientation_predictor(arch, load_in_8_bit, **kwargs)
|
|
@@ -20,18 +20,21 @@ default_cfgs: Dict[str, Dict[str, Any]] = {
|
|
|
20
20
|
"mean": (0.798, 0.785, 0.772),
|
|
21
21
|
"std": (0.264, 0.2749, 0.287),
|
|
22
22
|
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/db_resnet50-69ba0015.onnx",
|
|
23
|
+
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/db_resnet50_static_8_bit-09a6104f.onnx",
|
|
23
24
|
},
|
|
24
25
|
"db_resnet34": {
|
|
25
26
|
"input_shape": (3, 1024, 1024),
|
|
26
27
|
"mean": (0.798, 0.785, 0.772),
|
|
27
28
|
"std": (0.264, 0.2749, 0.287),
|
|
28
29
|
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/db_resnet34-b4873198.onnx",
|
|
30
|
+
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/db_resnet34_static_8_bit-027e2c7f.onnx",
|
|
29
31
|
},
|
|
30
32
|
"db_mobilenet_v3_large": {
|
|
31
33
|
"input_shape": (3, 1024, 1024),
|
|
32
34
|
"mean": (0.798, 0.785, 0.772),
|
|
33
35
|
"std": (0.264, 0.2749, 0.287),
|
|
34
36
|
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/db_mobilenet_v3_large-1866973f.onnx",
|
|
37
|
+
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/db_mobilenet_v3_large_static_8_bit-51659bb9.onnx",
|
|
35
38
|
},
|
|
36
39
|
}
|
|
37
40
|
|
|
@@ -87,13 +90,18 @@ class DBNet(Engine):
|
|
|
87
90
|
def _dbnet(
|
|
88
91
|
arch: str,
|
|
89
92
|
model_path: str,
|
|
93
|
+
load_in_8_bit: bool = False,
|
|
90
94
|
**kwargs: Any,
|
|
91
95
|
) -> DBNet:
|
|
96
|
+
# Patch the url
|
|
97
|
+
model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path
|
|
92
98
|
# Build the model
|
|
93
99
|
return DBNet(model_path, cfg=default_cfgs[arch], **kwargs)
|
|
94
100
|
|
|
95
101
|
|
|
96
|
-
def db_resnet34(
|
|
102
|
+
def db_resnet34(
|
|
103
|
+
model_path: str = default_cfgs["db_resnet34"]["url"], load_in_8_bit: bool = False, **kwargs: Any
|
|
104
|
+
) -> DBNet:
|
|
97
105
|
"""DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization"
|
|
98
106
|
<https://arxiv.org/pdf/1911.08947.pdf>`_, using a ResNet-34 backbone.
|
|
99
107
|
|
|
@@ -106,16 +114,19 @@ def db_resnet34(model_path: str = default_cfgs["db_resnet34"]["url"], **kwargs:
|
|
|
106
114
|
Args:
|
|
107
115
|
----
|
|
108
116
|
model_path: path to onnx model file, defaults to url in default_cfgs
|
|
117
|
+
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
|
|
109
118
|
**kwargs: keyword arguments of the DBNet architecture
|
|
110
119
|
|
|
111
120
|
Returns:
|
|
112
121
|
-------
|
|
113
122
|
text detection architecture
|
|
114
123
|
"""
|
|
115
|
-
return _dbnet("db_resnet34", model_path, **kwargs)
|
|
124
|
+
return _dbnet("db_resnet34", model_path, load_in_8_bit, **kwargs)
|
|
116
125
|
|
|
117
126
|
|
|
118
|
-
def db_resnet50(
|
|
127
|
+
def db_resnet50(
|
|
128
|
+
model_path: str = default_cfgs["db_resnet50"]["url"], load_in_8_bit: bool = False, **kwargs: Any
|
|
129
|
+
) -> DBNet:
|
|
119
130
|
"""DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization"
|
|
120
131
|
<https://arxiv.org/pdf/1911.08947.pdf>`_, using a ResNet-50 backbone.
|
|
121
132
|
|
|
@@ -128,16 +139,19 @@ def db_resnet50(model_path: str = default_cfgs["db_resnet50"]["url"], **kwargs:
|
|
|
128
139
|
Args:
|
|
129
140
|
----
|
|
130
141
|
model_path: path to onnx model file, defaults to url in default_cfgs
|
|
142
|
+
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
|
|
131
143
|
**kwargs: keyword arguments of the DBNet architecture
|
|
132
144
|
|
|
133
145
|
Returns:
|
|
134
146
|
-------
|
|
135
147
|
text detection architecture
|
|
136
148
|
"""
|
|
137
|
-
return _dbnet("db_resnet50", model_path, **kwargs)
|
|
149
|
+
return _dbnet("db_resnet50", model_path, load_in_8_bit, **kwargs)
|
|
138
150
|
|
|
139
151
|
|
|
140
|
-
def db_mobilenet_v3_large(
|
|
152
|
+
def db_mobilenet_v3_large(
|
|
153
|
+
model_path: str = default_cfgs["db_mobilenet_v3_large"]["url"], load_in_8_bit: bool = False, **kwargs: Any
|
|
154
|
+
) -> DBNet:
|
|
141
155
|
"""DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization"
|
|
142
156
|
<https://arxiv.org/pdf/1911.08947.pdf>`_, using a MobileNet V3 Large backbone.
|
|
143
157
|
|
|
@@ -150,10 +164,11 @@ def db_mobilenet_v3_large(model_path: str = default_cfgs["db_mobilenet_v3_large"
|
|
|
150
164
|
Args:
|
|
151
165
|
----
|
|
152
166
|
model_path: path to onnx model file, defaults to url in default_cfgs
|
|
167
|
+
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
|
|
153
168
|
**kwargs: keyword arguments of the DBNet architecture
|
|
154
169
|
|
|
155
170
|
Returns:
|
|
156
171
|
-------
|
|
157
172
|
text detection architecture
|
|
158
173
|
"""
|
|
159
|
-
return _dbnet("db_mobilenet_v3_large", model_path, **kwargs)
|
|
174
|
+
return _dbnet("db_mobilenet_v3_large", model_path, load_in_8_bit, **kwargs)
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
|
+
import logging
|
|
6
7
|
from typing import Any, Dict, Optional
|
|
7
8
|
|
|
8
9
|
import numpy as np
|
|
@@ -88,13 +89,16 @@ class FAST(Engine):
|
|
|
88
89
|
def _fast(
|
|
89
90
|
arch: str,
|
|
90
91
|
model_path: str,
|
|
92
|
+
load_in_8_bit: bool = False,
|
|
91
93
|
**kwargs: Any,
|
|
92
94
|
) -> FAST:
|
|
95
|
+
if load_in_8_bit:
|
|
96
|
+
logging.warning("FAST models do not support 8-bit quantization yet. Loading full precision model...")
|
|
93
97
|
# Build the model
|
|
94
98
|
return FAST(model_path, cfg=default_cfgs[arch], **kwargs)
|
|
95
99
|
|
|
96
100
|
|
|
97
|
-
def fast_tiny(model_path: str = default_cfgs["fast_tiny"]["url"], **kwargs: Any) -> FAST:
|
|
101
|
+
def fast_tiny(model_path: str = default_cfgs["fast_tiny"]["url"], load_in_8_bit: bool = False, **kwargs: Any) -> FAST:
|
|
98
102
|
"""FAST as described in `"FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation"
|
|
99
103
|
<https://arxiv.org/pdf/2111.02394.pdf>`_, using a tiny TextNet backbone.
|
|
100
104
|
|
|
@@ -107,16 +111,17 @@ def fast_tiny(model_path: str = default_cfgs["fast_tiny"]["url"], **kwargs: Any)
|
|
|
107
111
|
Args:
|
|
108
112
|
----
|
|
109
113
|
model_path: path to onnx model file, defaults to url in default_cfgs
|
|
114
|
+
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
|
|
110
115
|
**kwargs: keyword arguments of the DBNet architecture
|
|
111
116
|
|
|
112
117
|
Returns:
|
|
113
118
|
-------
|
|
114
119
|
text detection architecture
|
|
115
120
|
"""
|
|
116
|
-
return _fast("fast_tiny", model_path, **kwargs)
|
|
121
|
+
return _fast("fast_tiny", model_path, load_in_8_bit, **kwargs)
|
|
117
122
|
|
|
118
123
|
|
|
119
|
-
def fast_small(model_path: str = default_cfgs["fast_small"]["url"], **kwargs: Any) -> FAST:
|
|
124
|
+
def fast_small(model_path: str = default_cfgs["fast_small"]["url"], load_in_8_bit: bool = False, **kwargs: Any) -> FAST:
|
|
120
125
|
"""FAST as described in `"FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation"
|
|
121
126
|
<https://arxiv.org/pdf/2111.02394.pdf>`_, using a small TextNet backbone.
|
|
122
127
|
|
|
@@ -129,16 +134,17 @@ def fast_small(model_path: str = default_cfgs["fast_small"]["url"], **kwargs: An
|
|
|
129
134
|
Args:
|
|
130
135
|
----
|
|
131
136
|
model_path: path to onnx model file, defaults to url in default_cfgs
|
|
137
|
+
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
|
|
132
138
|
**kwargs: keyword arguments of the DBNet architecture
|
|
133
139
|
|
|
134
140
|
Returns:
|
|
135
141
|
-------
|
|
136
142
|
text detection architecture
|
|
137
143
|
"""
|
|
138
|
-
return _fast("fast_small", model_path, **kwargs)
|
|
144
|
+
return _fast("fast_small", model_path, load_in_8_bit, **kwargs)
|
|
139
145
|
|
|
140
146
|
|
|
141
|
-
def fast_base(model_path: str = default_cfgs["fast_base"]["url"], **kwargs: Any) -> FAST:
|
|
147
|
+
def fast_base(model_path: str = default_cfgs["fast_base"]["url"], load_in_8_bit: bool = False, **kwargs: Any) -> FAST:
|
|
142
148
|
"""FAST as described in `"FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation"
|
|
143
149
|
<https://arxiv.org/pdf/2111.02394.pdf>`_, using a base TextNet backbone.
|
|
144
150
|
|
|
@@ -151,10 +157,11 @@ def fast_base(model_path: str = default_cfgs["fast_base"]["url"], **kwargs: Any)
|
|
|
151
157
|
Args:
|
|
152
158
|
----
|
|
153
159
|
model_path: path to onnx model file, defaults to url in default_cfgs
|
|
160
|
+
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
|
|
154
161
|
**kwargs: keyword arguments of the DBNet architecture
|
|
155
162
|
|
|
156
163
|
Returns:
|
|
157
164
|
-------
|
|
158
165
|
text detection architecture
|
|
159
166
|
"""
|
|
160
|
-
return _fast("fast_base", model_path, **kwargs)
|
|
167
|
+
return _fast("fast_base", model_path, load_in_8_bit, **kwargs)
|
|
@@ -20,18 +20,21 @@ default_cfgs: Dict[str, Dict[str, Any]] = {
|
|
|
20
20
|
"mean": (0.798, 0.785, 0.772),
|
|
21
21
|
"std": (0.264, 0.2749, 0.287),
|
|
22
22
|
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/linknet_resnet18-e0e0b9dc.onnx",
|
|
23
|
+
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/linknet_resnet18_static_8_bit-3b3a37dd.onnx",
|
|
23
24
|
},
|
|
24
25
|
"linknet_resnet34": {
|
|
25
26
|
"input_shape": (3, 1024, 1024),
|
|
26
27
|
"mean": (0.798, 0.785, 0.772),
|
|
27
28
|
"std": (0.264, 0.2749, 0.287),
|
|
28
29
|
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/linknet_resnet34-93e39a39.onnx",
|
|
30
|
+
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/linknet_resnet34_static_8_bit-2824329d.onnx",
|
|
29
31
|
},
|
|
30
32
|
"linknet_resnet50": {
|
|
31
33
|
"input_shape": (3, 1024, 1024),
|
|
32
34
|
"mean": (0.798, 0.785, 0.772),
|
|
33
35
|
"std": (0.264, 0.2749, 0.287),
|
|
34
36
|
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/linknet_resnet50-15d8c4ec.onnx",
|
|
37
|
+
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/linknet_resnet50_static_8_bit-65d6b0b8.onnx",
|
|
35
38
|
},
|
|
36
39
|
}
|
|
37
40
|
|
|
@@ -88,13 +91,18 @@ class LinkNet(Engine):
|
|
|
88
91
|
def _linknet(
|
|
89
92
|
arch: str,
|
|
90
93
|
model_path: str,
|
|
94
|
+
load_in_8_bit: bool = False,
|
|
91
95
|
**kwargs: Any,
|
|
92
96
|
) -> LinkNet:
|
|
97
|
+
# Patch the url
|
|
98
|
+
model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path
|
|
93
99
|
# Build the model
|
|
94
100
|
return LinkNet(model_path, cfg=default_cfgs[arch], **kwargs)
|
|
95
101
|
|
|
96
102
|
|
|
97
|
-
def linknet_resnet18(
|
|
103
|
+
def linknet_resnet18(
|
|
104
|
+
model_path: str = default_cfgs["linknet_resnet18"]["url"], load_in_8_bit: bool = False, **kwargs: Any
|
|
105
|
+
) -> LinkNet:
|
|
98
106
|
"""LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation"
|
|
99
107
|
<https://arxiv.org/pdf/1707.03718.pdf>`_.
|
|
100
108
|
|
|
@@ -107,16 +115,19 @@ def linknet_resnet18(model_path: str = default_cfgs["linknet_resnet18"]["url"],
|
|
|
107
115
|
Args:
|
|
108
116
|
----
|
|
109
117
|
model_path: path to onnx model file, defaults to url in default_cfgs
|
|
118
|
+
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
|
|
110
119
|
**kwargs: keyword arguments of the LinkNet architecture
|
|
111
120
|
|
|
112
121
|
Returns:
|
|
113
122
|
-------
|
|
114
123
|
text detection architecture
|
|
115
124
|
"""
|
|
116
|
-
return _linknet("linknet_resnet18", model_path, **kwargs)
|
|
125
|
+
return _linknet("linknet_resnet18", model_path, load_in_8_bit, **kwargs)
|
|
117
126
|
|
|
118
127
|
|
|
119
|
-
def linknet_resnet34(
|
|
128
|
+
def linknet_resnet34(
|
|
129
|
+
model_path: str = default_cfgs["linknet_resnet34"]["url"], load_in_8_bit: bool = False, **kwargs: Any
|
|
130
|
+
) -> LinkNet:
|
|
120
131
|
"""LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation"
|
|
121
132
|
<https://arxiv.org/pdf/1707.03718.pdf>`_.
|
|
122
133
|
|
|
@@ -129,16 +140,19 @@ def linknet_resnet34(model_path: str = default_cfgs["linknet_resnet34"]["url"],
|
|
|
129
140
|
Args:
|
|
130
141
|
----
|
|
131
142
|
model_path: path to onnx model file, defaults to url in default_cfgs
|
|
143
|
+
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
|
|
132
144
|
**kwargs: keyword arguments of the LinkNet architecture
|
|
133
145
|
|
|
134
146
|
Returns:
|
|
135
147
|
-------
|
|
136
148
|
text detection architecture
|
|
137
149
|
"""
|
|
138
|
-
return _linknet("linknet_resnet34", model_path, **kwargs)
|
|
150
|
+
return _linknet("linknet_resnet34", model_path, load_in_8_bit, **kwargs)
|
|
139
151
|
|
|
140
152
|
|
|
141
|
-
def linknet_resnet50(
|
|
153
|
+
def linknet_resnet50(
|
|
154
|
+
model_path: str = default_cfgs["linknet_resnet50"]["url"], load_in_8_bit: bool = False, **kwargs: Any
|
|
155
|
+
) -> LinkNet:
|
|
142
156
|
"""LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation"
|
|
143
157
|
<https://arxiv.org/pdf/1707.03718.pdf>`_.
|
|
144
158
|
|
|
@@ -151,10 +165,11 @@ def linknet_resnet50(model_path: str = default_cfgs["linknet_resnet50"]["url"],
|
|
|
151
165
|
Args:
|
|
152
166
|
----
|
|
153
167
|
model_path: path to onnx model file, defaults to url in default_cfgs
|
|
168
|
+
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
|
|
154
169
|
**kwargs: keyword arguments of the LinkNet architecture
|
|
155
170
|
|
|
156
171
|
Returns:
|
|
157
172
|
-------
|
|
158
173
|
text detection architecture
|
|
159
174
|
"""
|
|
160
|
-
return _linknet("linknet_resnet50", model_path, **kwargs)
|
|
175
|
+
return _linknet("linknet_resnet50", model_path, load_in_8_bit, **kwargs)
|
onnxtr/models/detection/zoo.py
CHANGED
|
@@ -24,12 +24,14 @@ ARCHS = [
|
|
|
24
24
|
]
|
|
25
25
|
|
|
26
26
|
|
|
27
|
-
def _predictor(
|
|
27
|
+
def _predictor(
|
|
28
|
+
arch: Any, assume_straight_pages: bool = True, load_in_8_bit: bool = False, **kwargs: Any
|
|
29
|
+
) -> DetectionPredictor:
|
|
28
30
|
if isinstance(arch, str):
|
|
29
31
|
if arch not in ARCHS:
|
|
30
32
|
raise ValueError(f"unknown architecture '{arch}'")
|
|
31
33
|
|
|
32
|
-
_model = detection.__dict__[arch](assume_straight_pages=assume_straight_pages)
|
|
34
|
+
_model = detection.__dict__[arch](assume_straight_pages=assume_straight_pages, load_in_8_bit=load_in_8_bit)
|
|
33
35
|
else:
|
|
34
36
|
if not isinstance(arch, (detection.DBNet, detection.LinkNet, detection.FAST)):
|
|
35
37
|
raise ValueError(f"unknown architecture: {type(arch)}")
|
|
@@ -50,6 +52,7 @@ def _predictor(arch: Any, assume_straight_pages: bool = True, **kwargs: Any) ->
|
|
|
50
52
|
def detection_predictor(
|
|
51
53
|
arch: Any = "fast_base",
|
|
52
54
|
assume_straight_pages: bool = True,
|
|
55
|
+
load_in_8_bit: bool = False,
|
|
53
56
|
**kwargs: Any,
|
|
54
57
|
) -> DetectionPredictor:
|
|
55
58
|
"""Text detection architecture.
|
|
@@ -64,10 +67,11 @@ def detection_predictor(
|
|
|
64
67
|
----
|
|
65
68
|
arch: name of the architecture or model itself to use (e.g. 'db_resnet50')
|
|
66
69
|
assume_straight_pages: If True, fit straight boxes to the page
|
|
70
|
+
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
|
|
67
71
|
**kwargs: optional keyword arguments passed to the architecture
|
|
68
72
|
|
|
69
73
|
Returns:
|
|
70
74
|
-------
|
|
71
75
|
Detection predictor
|
|
72
76
|
"""
|
|
73
|
-
return _predictor(arch, assume_straight_pages, **kwargs)
|
|
77
|
+
return _predictor(arch, assume_straight_pages, load_in_8_bit, **kwargs)
|
onnxtr/models/engine.py
CHANGED
|
@@ -43,8 +43,8 @@ class Engine:
|
|
|
43
43
|
inputs = np.broadcast_to(inputs, (self.fixed_batch_size, *inputs.shape))
|
|
44
44
|
# combine the results
|
|
45
45
|
logits = np.concatenate(
|
|
46
|
-
[self.runtime.run(self.output_name, {
|
|
46
|
+
[self.runtime.run(self.output_name, {self.runtime_inputs.name: batch})[0] for batch in inputs], axis=0
|
|
47
47
|
)
|
|
48
48
|
else:
|
|
49
|
-
logits = self.runtime.run(self.output_name, {
|
|
49
|
+
logits = self.runtime.run(self.output_name, {self.runtime_inputs.name: inputs})[0]
|
|
50
50
|
return shape_translate(logits, format="BHWC")
|
onnxtr/models/predictor/base.py
CHANGED
|
@@ -31,6 +31,7 @@ class _OCRPredictor:
|
|
|
31
31
|
accordingly. Doing so will improve performances for documents with page-uniform rotations.
|
|
32
32
|
preserve_aspect_ratio: if True, resize preserving the aspect ratio (with padding)
|
|
33
33
|
symmetric_pad: if True and preserve_aspect_ratio is True, pas the image symmetrically.
|
|
34
|
+
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
|
|
34
35
|
**kwargs: keyword args of `DocumentBuilder`
|
|
35
36
|
"""
|
|
36
37
|
|
|
@@ -42,11 +43,14 @@ class _OCRPredictor:
|
|
|
42
43
|
straighten_pages: bool = False,
|
|
43
44
|
preserve_aspect_ratio: bool = True,
|
|
44
45
|
symmetric_pad: bool = True,
|
|
46
|
+
load_in_8_bit: bool = False,
|
|
45
47
|
**kwargs: Any,
|
|
46
48
|
) -> None:
|
|
47
49
|
self.assume_straight_pages = assume_straight_pages
|
|
48
50
|
self.straighten_pages = straighten_pages
|
|
49
|
-
self.crop_orientation_predictor =
|
|
51
|
+
self.crop_orientation_predictor = (
|
|
52
|
+
None if assume_straight_pages else crop_orientation_predictor(load_in_8_bit=load_in_8_bit)
|
|
53
|
+
)
|
|
50
54
|
self.doc_builder = DocumentBuilder(**kwargs)
|
|
51
55
|
self.preserve_aspect_ratio = preserve_aspect_ratio
|
|
52
56
|
self.symmetric_pad = symmetric_pad
|
|
@@ -24,6 +24,7 @@ default_cfgs: Dict[str, Dict[str, Any]] = {
|
|
|
24
24
|
"input_shape": (3, 32, 128),
|
|
25
25
|
"vocab": VOCABS["legacy_french"],
|
|
26
26
|
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/crnn_vgg16_bn-662979cc.onnx",
|
|
27
|
+
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/crnn_vgg16_bn_static_8_bit-bce050c7.onnx",
|
|
27
28
|
},
|
|
28
29
|
"crnn_mobilenet_v3_small": {
|
|
29
30
|
"mean": (0.694, 0.695, 0.693),
|
|
@@ -31,6 +32,7 @@ default_cfgs: Dict[str, Dict[str, Any]] = {
|
|
|
31
32
|
"input_shape": (3, 32, 128),
|
|
32
33
|
"vocab": VOCABS["french"],
|
|
33
34
|
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/crnn_mobilenet_v3_small-bded4d49.onnx",
|
|
35
|
+
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/crnn_mobilenet_v3_small_static_8_bit-4949006f.onnx",
|
|
34
36
|
},
|
|
35
37
|
"crnn_mobilenet_v3_large": {
|
|
36
38
|
"mean": (0.694, 0.695, 0.693),
|
|
@@ -38,6 +40,7 @@ default_cfgs: Dict[str, Dict[str, Any]] = {
|
|
|
38
40
|
"input_shape": (3, 32, 128),
|
|
39
41
|
"vocab": VOCABS["french"],
|
|
40
42
|
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/crnn_mobilenet_v3_large-d42e8185.onnx",
|
|
43
|
+
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/crnn_mobilenet_v3_large_static_8_bit-459e856d.onnx",
|
|
41
44
|
},
|
|
42
45
|
}
|
|
43
46
|
|
|
@@ -148,6 +151,7 @@ class CRNN(Engine):
|
|
|
148
151
|
def _crnn(
|
|
149
152
|
arch: str,
|
|
150
153
|
model_path: str,
|
|
154
|
+
load_in_8_bit: bool = False,
|
|
151
155
|
**kwargs: Any,
|
|
152
156
|
) -> CRNN:
|
|
153
157
|
kwargs["vocab"] = kwargs.get("vocab", default_cfgs[arch]["vocab"])
|
|
@@ -155,12 +159,16 @@ def _crnn(
|
|
|
155
159
|
_cfg = deepcopy(default_cfgs[arch])
|
|
156
160
|
_cfg["vocab"] = kwargs["vocab"]
|
|
157
161
|
_cfg["input_shape"] = kwargs.get("input_shape", default_cfgs[arch]["input_shape"])
|
|
162
|
+
# Patch the url
|
|
163
|
+
model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path
|
|
158
164
|
|
|
159
165
|
# Build the model
|
|
160
166
|
return CRNN(model_path, cfg=_cfg, **kwargs)
|
|
161
167
|
|
|
162
168
|
|
|
163
|
-
def crnn_vgg16_bn(
|
|
169
|
+
def crnn_vgg16_bn(
|
|
170
|
+
model_path: str = default_cfgs["crnn_vgg16_bn"]["url"], load_in_8_bit: bool = False, **kwargs: Any
|
|
171
|
+
) -> CRNN:
|
|
164
172
|
"""CRNN with a VGG-16 backbone as described in `"An End-to-End Trainable Neural Network for Image-based
|
|
165
173
|
Sequence Recognition and Its Application to Scene Text Recognition" <https://arxiv.org/pdf/1507.05717.pdf>`_.
|
|
166
174
|
|
|
@@ -173,16 +181,19 @@ def crnn_vgg16_bn(model_path: str = default_cfgs["crnn_vgg16_bn"]["url"], **kwar
|
|
|
173
181
|
Args:
|
|
174
182
|
----
|
|
175
183
|
model_path: path to onnx model file, defaults to url in default_cfgs
|
|
184
|
+
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
|
|
176
185
|
**kwargs: keyword arguments of the CRNN architecture
|
|
177
186
|
|
|
178
187
|
Returns:
|
|
179
188
|
-------
|
|
180
189
|
text recognition architecture
|
|
181
190
|
"""
|
|
182
|
-
return _crnn("crnn_vgg16_bn", model_path, **kwargs)
|
|
191
|
+
return _crnn("crnn_vgg16_bn", model_path, load_in_8_bit, **kwargs)
|
|
183
192
|
|
|
184
193
|
|
|
185
|
-
def crnn_mobilenet_v3_small(
|
|
194
|
+
def crnn_mobilenet_v3_small(
|
|
195
|
+
model_path: str = default_cfgs["crnn_mobilenet_v3_small"]["url"], load_in_8_bit: bool = False, **kwargs: Any
|
|
196
|
+
) -> CRNN:
|
|
186
197
|
"""CRNN with a MobileNet V3 Small backbone as described in `"An End-to-End Trainable Neural Network for Image-based
|
|
187
198
|
Sequence Recognition and Its Application to Scene Text Recognition" <https://arxiv.org/pdf/1507.05717.pdf>`_.
|
|
188
199
|
|
|
@@ -195,16 +206,19 @@ def crnn_mobilenet_v3_small(model_path: str = default_cfgs["crnn_mobilenet_v3_sm
|
|
|
195
206
|
Args:
|
|
196
207
|
----
|
|
197
208
|
model_path: path to onnx model file, defaults to url in default_cfgs
|
|
209
|
+
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
|
|
198
210
|
**kwargs: keyword arguments of the CRNN architecture
|
|
199
211
|
|
|
200
212
|
Returns:
|
|
201
213
|
-------
|
|
202
214
|
text recognition architecture
|
|
203
215
|
"""
|
|
204
|
-
return _crnn("crnn_mobilenet_v3_small", model_path, **kwargs)
|
|
216
|
+
return _crnn("crnn_mobilenet_v3_small", model_path, load_in_8_bit, **kwargs)
|
|
205
217
|
|
|
206
218
|
|
|
207
|
-
def crnn_mobilenet_v3_large(
|
|
219
|
+
def crnn_mobilenet_v3_large(
|
|
220
|
+
model_path: str = default_cfgs["crnn_mobilenet_v3_large"]["url"], load_in_8_bit: bool = False, **kwargs: Any
|
|
221
|
+
) -> CRNN:
|
|
208
222
|
"""CRNN with a MobileNet V3 Large backbone as described in `"An End-to-End Trainable Neural Network for Image-based
|
|
209
223
|
Sequence Recognition and Its Application to Scene Text Recognition" <https://arxiv.org/pdf/1507.05717.pdf>`_.
|
|
210
224
|
|
|
@@ -217,10 +231,11 @@ def crnn_mobilenet_v3_large(model_path: str = default_cfgs["crnn_mobilenet_v3_la
|
|
|
217
231
|
Args:
|
|
218
232
|
----
|
|
219
233
|
model_path: path to onnx model file, defaults to url in default_cfgs
|
|
234
|
+
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
|
|
220
235
|
**kwargs: keyword arguments of the CRNN architecture
|
|
221
236
|
|
|
222
237
|
Returns:
|
|
223
238
|
-------
|
|
224
239
|
text recognition architecture
|
|
225
240
|
"""
|
|
226
|
-
return _crnn("crnn_mobilenet_v3_large", model_path, **kwargs)
|
|
241
|
+
return _crnn("crnn_mobilenet_v3_large", model_path, load_in_8_bit, **kwargs)
|
|
@@ -24,6 +24,7 @@ default_cfgs: Dict[str, Dict[str, Any]] = {
|
|
|
24
24
|
"input_shape": (3, 32, 128),
|
|
25
25
|
"vocab": VOCABS["french"],
|
|
26
26
|
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/master-b1287fcd.onnx",
|
|
27
|
+
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/master_dynamic_8_bit-d8bd8206.onnx",
|
|
27
28
|
},
|
|
28
29
|
}
|
|
29
30
|
|
|
@@ -112,6 +113,7 @@ class MASTERPostProcessor(RecognitionPostProcessor):
|
|
|
112
113
|
def _master(
|
|
113
114
|
arch: str,
|
|
114
115
|
model_path: str,
|
|
116
|
+
load_in_8_bit: bool = False,
|
|
115
117
|
**kwargs: Any,
|
|
116
118
|
) -> MASTER:
|
|
117
119
|
# Patch the config
|
|
@@ -120,11 +122,13 @@ def _master(
|
|
|
120
122
|
_cfg["vocab"] = kwargs.get("vocab", _cfg["vocab"])
|
|
121
123
|
|
|
122
124
|
kwargs["vocab"] = _cfg["vocab"]
|
|
125
|
+
# Patch the url
|
|
126
|
+
model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path
|
|
123
127
|
|
|
124
128
|
return MASTER(model_path, cfg=_cfg, **kwargs)
|
|
125
129
|
|
|
126
130
|
|
|
127
|
-
def master(model_path: str = default_cfgs["master"]["url"], **kwargs: Any) -> MASTER:
|
|
131
|
+
def master(model_path: str = default_cfgs["master"]["url"], load_in_8_bit: bool = False, **kwargs: Any) -> MASTER:
|
|
128
132
|
"""MASTER as described in paper: <https://arxiv.org/pdf/1910.02562.pdf>`_.
|
|
129
133
|
|
|
130
134
|
>>> import numpy as np
|
|
@@ -136,10 +140,11 @@ def master(model_path: str = default_cfgs["master"]["url"], **kwargs: Any) -> MA
|
|
|
136
140
|
Args:
|
|
137
141
|
----
|
|
138
142
|
model_path: path to onnx model file, defaults to url in default_cfgs
|
|
143
|
+
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
|
|
139
144
|
**kwargs: keywoard arguments passed to the MASTER architecture
|
|
140
145
|
|
|
141
146
|
Returns:
|
|
142
147
|
-------
|
|
143
148
|
text recognition architecture
|
|
144
149
|
"""
|
|
145
|
-
return _master("master", model_path, **kwargs)
|
|
150
|
+
return _master("master", model_path, load_in_8_bit, **kwargs)
|
|
@@ -23,6 +23,7 @@ default_cfgs: Dict[str, Dict[str, Any]] = {
|
|
|
23
23
|
"input_shape": (3, 32, 128),
|
|
24
24
|
"vocab": VOCABS["french"],
|
|
25
25
|
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/parseq-00b40714.onnx",
|
|
26
|
+
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/parseq_dynamic_8_bit-5b04d9f7.onnx",
|
|
26
27
|
},
|
|
27
28
|
}
|
|
28
29
|
|
|
@@ -32,6 +33,7 @@ class PARSeq(Engine):
|
|
|
32
33
|
|
|
33
34
|
Args:
|
|
34
35
|
----
|
|
36
|
+
model_path: path to onnx model file
|
|
35
37
|
vocab: vocabulary used for encoding
|
|
36
38
|
cfg: dictionary containing information about the model
|
|
37
39
|
**kwargs: additional arguments to be passed to `Engine`
|
|
@@ -99,6 +101,7 @@ class PARSeqPostProcessor(RecognitionPostProcessor):
|
|
|
99
101
|
def _parseq(
|
|
100
102
|
arch: str,
|
|
101
103
|
model_path: str,
|
|
104
|
+
load_in_8_bit: bool = False,
|
|
102
105
|
**kwargs: Any,
|
|
103
106
|
) -> PARSeq:
|
|
104
107
|
# Patch the config
|
|
@@ -107,12 +110,14 @@ def _parseq(
|
|
|
107
110
|
_cfg["input_shape"] = kwargs.get("input_shape", _cfg["input_shape"])
|
|
108
111
|
|
|
109
112
|
kwargs["vocab"] = _cfg["vocab"]
|
|
113
|
+
# Patch the url
|
|
114
|
+
model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path
|
|
110
115
|
|
|
111
116
|
# Build the model
|
|
112
117
|
return PARSeq(model_path, cfg=_cfg, **kwargs)
|
|
113
118
|
|
|
114
119
|
|
|
115
|
-
def parseq(model_path: str = default_cfgs["parseq"]["url"], **kwargs: Any) -> PARSeq:
|
|
120
|
+
def parseq(model_path: str = default_cfgs["parseq"]["url"], load_in_8_bit: bool = False, **kwargs: Any) -> PARSeq:
|
|
116
121
|
"""PARSeq architecture from
|
|
117
122
|
`"Scene Text Recognition with Permuted Autoregressive Sequence Models" <https://arxiv.org/pdf/2207.06966>`_.
|
|
118
123
|
|
|
@@ -125,10 +130,11 @@ def parseq(model_path: str = default_cfgs["parseq"]["url"], **kwargs: Any) -> PA
|
|
|
125
130
|
Args:
|
|
126
131
|
----
|
|
127
132
|
model_path: path to onnx model file, defaults to url in default_cfgs
|
|
133
|
+
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
|
|
128
134
|
**kwargs: keyword arguments of the PARSeq architecture
|
|
129
135
|
|
|
130
136
|
Returns:
|
|
131
137
|
-------
|
|
132
138
|
text recognition architecture
|
|
133
139
|
"""
|
|
134
|
-
return _parseq("parseq", model_path, **kwargs)
|
|
140
|
+
return _parseq("parseq", model_path, load_in_8_bit, **kwargs)
|
|
@@ -23,6 +23,7 @@ default_cfgs: Dict[str, Dict[str, Any]] = {
|
|
|
23
23
|
"input_shape": (3, 32, 128),
|
|
24
24
|
"vocab": VOCABS["french"],
|
|
25
25
|
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/sar_resnet31-395f8005.onnx",
|
|
26
|
+
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/sar_resnet31_static_8_bit-c07316bc.onnx",
|
|
26
27
|
},
|
|
27
28
|
}
|
|
28
29
|
|
|
@@ -99,6 +100,7 @@ class SARPostProcessor(RecognitionPostProcessor):
|
|
|
99
100
|
def _sar(
|
|
100
101
|
arch: str,
|
|
101
102
|
model_path: str,
|
|
103
|
+
load_in_8_bit: bool = False,
|
|
102
104
|
**kwargs: Any,
|
|
103
105
|
) -> SAR:
|
|
104
106
|
# Patch the config
|
|
@@ -107,12 +109,16 @@ def _sar(
|
|
|
107
109
|
_cfg["input_shape"] = kwargs.get("input_shape", _cfg["input_shape"])
|
|
108
110
|
|
|
109
111
|
kwargs["vocab"] = _cfg["vocab"]
|
|
112
|
+
# Patch the url
|
|
113
|
+
model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path
|
|
110
114
|
|
|
111
115
|
# Build the model
|
|
112
116
|
return SAR(model_path, cfg=_cfg, **kwargs)
|
|
113
117
|
|
|
114
118
|
|
|
115
|
-
def sar_resnet31(
|
|
119
|
+
def sar_resnet31(
|
|
120
|
+
model_path: str = default_cfgs["sar_resnet31"]["url"], load_in_8_bit: bool = False, **kwargs: Any
|
|
121
|
+
) -> SAR:
|
|
116
122
|
"""SAR with a resnet-31 feature extractor as described in `"Show, Attend and Read:A Simple and Strong
|
|
117
123
|
Baseline for Irregular Text Recognition" <https://arxiv.org/pdf/1811.00751.pdf>`_.
|
|
118
124
|
|
|
@@ -125,10 +131,11 @@ def sar_resnet31(model_path: str = default_cfgs["sar_resnet31"]["url"], **kwargs
|
|
|
125
131
|
Args:
|
|
126
132
|
----
|
|
127
133
|
model_path: path to onnx model file, defaults to url in default_cfgs
|
|
134
|
+
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
|
|
128
135
|
**kwargs: keyword arguments of the SAR architecture
|
|
129
136
|
|
|
130
137
|
Returns:
|
|
131
138
|
-------
|
|
132
139
|
text recognition architecture
|
|
133
140
|
"""
|
|
134
|
-
return _sar("sar_resnet31", model_path, **kwargs)
|
|
141
|
+
return _sar("sar_resnet31", model_path, load_in_8_bit, **kwargs)
|
|
@@ -23,6 +23,7 @@ default_cfgs: Dict[str, Dict[str, Any]] = {
|
|
|
23
23
|
"input_shape": (3, 32, 128),
|
|
24
24
|
"vocab": VOCABS["french"],
|
|
25
25
|
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/vitstr_small-3ff9c500.onnx",
|
|
26
|
+
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/vitstr_small_dynamic_8_bit-bec6c796.onnx",
|
|
26
27
|
},
|
|
27
28
|
"vitstr_base": {
|
|
28
29
|
"mean": (0.694, 0.695, 0.693),
|
|
@@ -30,6 +31,7 @@ default_cfgs: Dict[str, Dict[str, Any]] = {
|
|
|
30
31
|
"input_shape": (3, 32, 128),
|
|
31
32
|
"vocab": VOCABS["french"],
|
|
32
33
|
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/vitstr_base-ff62f5be.onnx",
|
|
34
|
+
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/vitstr_base_dynamic_8_bit-976c7cd6.onnx",
|
|
33
35
|
},
|
|
34
36
|
}
|
|
35
37
|
|
|
@@ -109,6 +111,7 @@ class ViTSTRPostProcessor(RecognitionPostProcessor):
|
|
|
109
111
|
def _vitstr(
|
|
110
112
|
arch: str,
|
|
111
113
|
model_path: str,
|
|
114
|
+
load_in_8_bit: bool = False,
|
|
112
115
|
**kwargs: Any,
|
|
113
116
|
) -> ViTSTR:
|
|
114
117
|
# Patch the config
|
|
@@ -117,12 +120,16 @@ def _vitstr(
|
|
|
117
120
|
_cfg["input_shape"] = kwargs.get("input_shape", _cfg["input_shape"])
|
|
118
121
|
|
|
119
122
|
kwargs["vocab"] = _cfg["vocab"]
|
|
123
|
+
# Patch the url
|
|
124
|
+
model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path
|
|
120
125
|
|
|
121
126
|
# Build the model
|
|
122
127
|
return ViTSTR(model_path, cfg=_cfg, **kwargs)
|
|
123
128
|
|
|
124
129
|
|
|
125
|
-
def vitstr_small(
|
|
130
|
+
def vitstr_small(
|
|
131
|
+
model_path: str = default_cfgs["vitstr_small"]["url"], load_in_8_bit: bool = False, **kwargs: Any
|
|
132
|
+
) -> ViTSTR:
|
|
126
133
|
"""ViTSTR-Small as described in `"Vision Transformer for Fast and Efficient Scene Text Recognition"
|
|
127
134
|
<https://arxiv.org/pdf/2105.08582.pdf>`_.
|
|
128
135
|
|
|
@@ -135,16 +142,19 @@ def vitstr_small(model_path: str = default_cfgs["vitstr_small"]["url"], **kwargs
|
|
|
135
142
|
Args:
|
|
136
143
|
----
|
|
137
144
|
model_path: path to onnx model file, defaults to url in default_cfgs
|
|
138
|
-
|
|
145
|
+
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
|
|
146
|
+
**kwargs: keyword arguments of the ViTSTR architecture
|
|
139
147
|
|
|
140
148
|
Returns:
|
|
141
149
|
-------
|
|
142
150
|
text recognition architecture
|
|
143
151
|
"""
|
|
144
|
-
return _vitstr("vitstr_small", model_path, **kwargs)
|
|
152
|
+
return _vitstr("vitstr_small", model_path, load_in_8_bit, **kwargs)
|
|
145
153
|
|
|
146
154
|
|
|
147
|
-
def vitstr_base(
|
|
155
|
+
def vitstr_base(
|
|
156
|
+
model_path: str = default_cfgs["vitstr_base"]["url"], load_in_8_bit: bool = False, **kwargs: Any
|
|
157
|
+
) -> ViTSTR:
|
|
148
158
|
"""ViTSTR-Base as described in `"Vision Transformer for Fast and Efficient Scene Text Recognition"
|
|
149
159
|
<https://arxiv.org/pdf/2105.08582.pdf>`_.
|
|
150
160
|
|
|
@@ -157,10 +167,11 @@ def vitstr_base(model_path: str = default_cfgs["vitstr_base"]["url"], **kwargs:
|
|
|
157
167
|
Args:
|
|
158
168
|
----
|
|
159
169
|
model_path: path to onnx model file, defaults to url in default_cfgs
|
|
160
|
-
|
|
170
|
+
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
|
|
171
|
+
**kwargs: keyword arguments of the ViTSTR architecture
|
|
161
172
|
|
|
162
173
|
Returns:
|
|
163
174
|
-------
|
|
164
175
|
text recognition architecture
|
|
165
176
|
"""
|
|
166
|
-
return _vitstr("vitstr_base", model_path, **kwargs)
|
|
177
|
+
return _vitstr("vitstr_base", model_path, load_in_8_bit, **kwargs)
|
onnxtr/models/recognition/zoo.py
CHANGED
|
@@ -25,12 +25,12 @@ ARCHS: List[str] = [
|
|
|
25
25
|
]
|
|
26
26
|
|
|
27
27
|
|
|
28
|
-
def _predictor(arch: Any, **kwargs: Any) -> RecognitionPredictor:
|
|
28
|
+
def _predictor(arch: Any, load_in_8_bit: bool = False, **kwargs: Any) -> RecognitionPredictor:
|
|
29
29
|
if isinstance(arch, str):
|
|
30
30
|
if arch not in ARCHS:
|
|
31
31
|
raise ValueError(f"unknown architecture '{arch}'")
|
|
32
32
|
|
|
33
|
-
_model = recognition.__dict__[arch]()
|
|
33
|
+
_model = recognition.__dict__[arch](load_in_8_bit=load_in_8_bit)
|
|
34
34
|
else:
|
|
35
35
|
if not isinstance(
|
|
36
36
|
arch, (recognition.CRNN, recognition.SAR, recognition.MASTER, recognition.ViTSTR, recognition.PARSeq)
|
|
@@ -47,7 +47,9 @@ def _predictor(arch: Any, **kwargs: Any) -> RecognitionPredictor:
|
|
|
47
47
|
return predictor
|
|
48
48
|
|
|
49
49
|
|
|
50
|
-
def recognition_predictor(
|
|
50
|
+
def recognition_predictor(
|
|
51
|
+
arch: Any = "crnn_vgg16_bn", load_in_8_bit: bool = False, **kwargs: Any
|
|
52
|
+
) -> RecognitionPredictor:
|
|
51
53
|
"""Text recognition architecture.
|
|
52
54
|
|
|
53
55
|
Example::
|
|
@@ -60,10 +62,11 @@ def recognition_predictor(arch: Any = "crnn_vgg16_bn", **kwargs: Any) -> Recogni
|
|
|
60
62
|
Args:
|
|
61
63
|
----
|
|
62
64
|
arch: name of the architecture or model itself to use (e.g. 'crnn_vgg16_bn')
|
|
65
|
+
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
|
|
63
66
|
**kwargs: optional parameters to be passed to the architecture
|
|
64
67
|
|
|
65
68
|
Returns:
|
|
66
69
|
-------
|
|
67
70
|
Recognition predictor
|
|
68
71
|
"""
|
|
69
|
-
return _predictor(arch, **kwargs)
|
|
72
|
+
return _predictor(arch, load_in_8_bit, **kwargs)
|
onnxtr/models/zoo.py
CHANGED
|
@@ -23,6 +23,7 @@ def _predictor(
|
|
|
23
23
|
detect_orientation: bool = False,
|
|
24
24
|
straighten_pages: bool = False,
|
|
25
25
|
detect_language: bool = False,
|
|
26
|
+
load_in_8_bit: bool = False,
|
|
26
27
|
**kwargs,
|
|
27
28
|
) -> OCRPredictor:
|
|
28
29
|
# Detection
|
|
@@ -32,12 +33,14 @@ def _predictor(
|
|
|
32
33
|
assume_straight_pages=assume_straight_pages,
|
|
33
34
|
preserve_aspect_ratio=preserve_aspect_ratio,
|
|
34
35
|
symmetric_pad=symmetric_pad,
|
|
36
|
+
load_in_8_bit=load_in_8_bit,
|
|
35
37
|
)
|
|
36
38
|
|
|
37
39
|
# Recognition
|
|
38
40
|
reco_predictor = recognition_predictor(
|
|
39
41
|
reco_arch,
|
|
40
42
|
batch_size=reco_bs,
|
|
43
|
+
load_in_8_bit=load_in_8_bit,
|
|
41
44
|
)
|
|
42
45
|
|
|
43
46
|
return OCRPredictor(
|
|
@@ -63,6 +66,7 @@ def ocr_predictor(
|
|
|
63
66
|
detect_orientation: bool = False,
|
|
64
67
|
straighten_pages: bool = False,
|
|
65
68
|
detect_language: bool = False,
|
|
69
|
+
load_in_8_bit: bool = False,
|
|
66
70
|
**kwargs: Any,
|
|
67
71
|
) -> OCRPredictor:
|
|
68
72
|
"""End-to-end OCR architecture using one model for localization, and another for text recognition.
|
|
@@ -94,6 +98,7 @@ def ocr_predictor(
|
|
|
94
98
|
Doing so will improve performances for documents with page-uniform rotations.
|
|
95
99
|
detect_language: if True, the language prediction will be added to the predictions for each
|
|
96
100
|
page. Doing so will slightly deteriorate the overall latency.
|
|
101
|
+
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
|
|
97
102
|
kwargs: keyword args of `OCRPredictor`
|
|
98
103
|
|
|
99
104
|
Returns:
|
|
@@ -110,5 +115,6 @@ def ocr_predictor(
|
|
|
110
115
|
detect_orientation=detect_orientation,
|
|
111
116
|
straighten_pages=straighten_pages,
|
|
112
117
|
detect_language=detect_language,
|
|
118
|
+
load_in_8_bit=load_in_8_bit,
|
|
113
119
|
**kwargs,
|
|
114
120
|
)
|
onnxtr/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = 'v0.
|
|
1
|
+
__version__ = 'v0.2.0'
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: onnxtr
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Onnx Text Recognition (OnnxTR): docTR Onnx-Wrapper for high-performance OCR on documents.
|
|
5
5
|
Author-email: Felix Dittrich <felixdittrich92@gmail.com>
|
|
6
6
|
Maintainer: Felix Dittrich
|
|
@@ -227,7 +227,6 @@ Description-Content-Type: text/markdown
|
|
|
227
227
|
License-File: LICENSE
|
|
228
228
|
Requires-Dist: numpy <2.0.0,>=1.16.0
|
|
229
229
|
Requires-Dist: scipy <2.0.0,>=1.4.0
|
|
230
|
-
Requires-Dist: onnx <2.0.0,>=1.12.0
|
|
231
230
|
Requires-Dist: opencv-python <5.0.0,>=4.5.0
|
|
232
231
|
Requires-Dist: pypdfium2 <5.0.0,>=4.0.0
|
|
233
232
|
Requires-Dist: pyclipper <2.0.0,>=1.2.0
|
|
@@ -276,17 +275,18 @@ Requires-Dist: mplcursors >=0.3 ; extra == 'viz'
|
|
|
276
275
|
[](https://codecov.io/gh/felixdittrich92/OnnxTR)
|
|
277
276
|
[](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
|
|
278
277
|
[](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
|
|
279
|
-
[](https://pypi.org/project/OnnxTR/)
|
|
280
279
|
|
|
281
|
-
> :warning: Please note that this is wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
|
|
280
|
+
> :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
|
|
282
281
|
|
|
283
282
|
**Optical Character Recognition made seamless & accessible to anyone, powered by Onnx**
|
|
284
283
|
|
|
285
284
|
What you can expect from this repository:
|
|
286
285
|
|
|
287
286
|
- efficient ways to parse textual information (localize and identify each word) from your documents
|
|
288
|
-
- a Onnx pipeline for docTR, a wrapper around the [doctr](https://github.com/mindee/doctr) library
|
|
287
|
+
- a Onnx pipeline for docTR, a wrapper around the [doctr](https://github.com/mindee/doctr) library - no PyTorch or TensorFlow dependencies
|
|
289
288
|
- more lightweight package with faster inference latency and less required resources
|
|
289
|
+
- 8-Bit quantized models for faster inference on CPU
|
|
290
290
|
|
|
291
291
|

|
|
292
292
|
|
|
@@ -300,10 +300,14 @@ Python 3.9 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to
|
|
|
300
300
|
|
|
301
301
|
You can then install the latest release of the package using [pypi](https://pypi.org/project/OnnxTR/) as follows:
|
|
302
302
|
|
|
303
|
-
**NOTE:**
|
|
303
|
+
**NOTE:**
|
|
304
|
+
|
|
305
|
+
For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/getting-started). Currently supported execution providers by default are: CPU, CUDA
|
|
306
|
+
|
|
307
|
+
- **Prerequisites:** CUDA & cuDNN needs to be installed before [Version table](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html).
|
|
304
308
|
|
|
305
309
|
```shell
|
|
306
|
-
pip install onnxtr[cpu]
|
|
310
|
+
pip install "onnxtr[cpu]"
|
|
307
311
|
# with gpu support
|
|
308
312
|
pip install "onnxtr[gpu]"
|
|
309
313
|
# with HTML support
|
|
@@ -340,17 +344,24 @@ from onnxtr.models import ocr_predictor
|
|
|
340
344
|
|
|
341
345
|
model = ocr_predictor(
|
|
342
346
|
det_arch='fast_base', # detection architecture
|
|
343
|
-
|
|
347
|
+
reco_arch='vitstr_base', # recognition architecture
|
|
344
348
|
det_bs=4, # detection batch size
|
|
345
349
|
reco_bs=1024, # recognition batch size
|
|
346
350
|
assume_straight_pages=True, # set to `False` if the pages are not straight (rotation, perspective, etc.) (default: True)
|
|
347
351
|
straighten_pages=False, # set to `True` if the pages should be straightened before final processing (default: False)
|
|
352
|
+
# Preprocessing related parameters
|
|
348
353
|
preserve_aspect_ratio=True, # set to `False` if the aspect ratio should not be preserved (default: True)
|
|
349
354
|
symmetric_pad=True, # set to `False` to disable symmetric padding (default: True)
|
|
355
|
+
# Additional parameters - meta information
|
|
356
|
+
detect_orientation=False, # set to `True` if the orientation of the pages should be detected (default: False)
|
|
357
|
+
detect_language=False, # set to `True` if the language of the pages should be detected (default: False)
|
|
350
358
|
# DocumentBuilder specific parameters
|
|
351
359
|
resolve_lines=True, # whether words should be automatically grouped into lines (default: True)
|
|
352
360
|
resolve_blocks=True, # whether lines should be automatically grouped into blocks (default: True)
|
|
353
361
|
paragraph_break=0.035, # relative length of the minimum space separating paragraphs (default: 0.035)
|
|
362
|
+
# OnnxTR specific parameters
|
|
363
|
+
# NOTE: 8-Bit quantized models are not available for FAST detection models and can in general lead to poorer accuracy
|
|
364
|
+
load_in_8_bit=False, # set to `True` to load 8-bit quantized models instead of the full precision onces (default: False)
|
|
354
365
|
)
|
|
355
366
|
# PDF
|
|
356
367
|
doc = DocumentFile.from_pdf("path/to/your/doc.pdf")
|
|
@@ -398,7 +409,7 @@ from onnxtr.models import ocr_predictor, linknet_resnet18, parseq
|
|
|
398
409
|
|
|
399
410
|
reco_model = parseq("path_to_custom_model.onnx", vocab="ABC")
|
|
400
411
|
det_model = linknet_resnet18("path_to_custom_model.onnx")
|
|
401
|
-
model = ocr_predictor(
|
|
412
|
+
model = ocr_predictor(det_arch=det_model, reco_arch=reco_model)
|
|
402
413
|
```
|
|
403
414
|
|
|
404
415
|
## Models architectures
|
|
@@ -431,9 +442,9 @@ predictor.list_archs()
|
|
|
431
442
|
'linknet_resnet18',
|
|
432
443
|
'linknet_resnet34',
|
|
433
444
|
'linknet_resnet50',
|
|
434
|
-
'fast_tiny',
|
|
435
|
-
'fast_small',
|
|
436
|
-
'fast_base'
|
|
445
|
+
'fast_tiny', # No 8-bit support
|
|
446
|
+
'fast_small', # No 8-bit support
|
|
447
|
+
'fast_base' # No 8-bit support
|
|
437
448
|
],
|
|
438
449
|
'recognition archs':
|
|
439
450
|
[
|
|
@@ -462,7 +473,36 @@ NOTE:
|
|
|
462
473
|
|
|
463
474
|
### Benchmarks
|
|
464
475
|
|
|
465
|
-
|
|
476
|
+
The CPU benchmarks was measured on a `i7-14700K Intel CPU`.
|
|
477
|
+
|
|
478
|
+
The GPU benchmarks was measured on a `RTX 4080 Nvidia GPU`.
|
|
479
|
+
|
|
480
|
+
Benchmarking performed on the FUNSD dataset and CORD dataset.
|
|
481
|
+
|
|
482
|
+
docTR / OnnxTR models used for the benchmarks are `fast_base` (full precision) | `db_resnet50` (8-bit variant) for detection and `crnn_vgg16_bn` for recognition.
|
|
483
|
+
|
|
484
|
+
The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_mobilenet_v3_small` takes as comparison `~0.17s / Page` on the FUNSD dataset and `~0.12s / Page` on the CORD dataset in **full precision**.
|
|
485
|
+
|
|
486
|
+
- CPU benchmarks:
|
|
487
|
+
|
|
488
|
+
|Library |FUNSD (199 pages) |CORD (900 pages) |
|
|
489
|
+
|--------------------------------|-------------------------------|-------------------------------|
|
|
490
|
+
|docTR (CPU) - v0.8.1 | ~1.29s / Page | ~0.60s / Page |
|
|
491
|
+
|**OnnxTR (CPU)** - v0.1.2 | ~0.57s / Page | **~0.25s / Page** |
|
|
492
|
+
|**OnnxTR (CPU) 8-bit** - v0.1.2 | **~0.38s / Page** | **~0.14s / Page** |
|
|
493
|
+
|EasyOCR (CPU) - v1.7.1 | ~1.96s / Page | ~1.75s / Page |
|
|
494
|
+
|**PyTesseract (CPU)** - v0.3.10 | **~0.50s / Page** | ~0.52s / Page |
|
|
495
|
+
|Surya (line) (CPU) - v0.4.4 | ~48.76s / Page | ~35.49s / Page |
|
|
496
|
+
|
|
497
|
+
- GPU benchmarks:
|
|
498
|
+
|
|
499
|
+
|Library |FUNSD (199 pages) |CORD (900 pages) |
|
|
500
|
+
|--------------------------------|-------------------------------|-------------------------------|
|
|
501
|
+
|docTR (GPU) - v0.8.1 | ~0.07s / Page | ~0.05s / Page |
|
|
502
|
+
|**docTR (GPU) float16** - v0.8.1| **~0.06s / Page** | **~0.03s / Page** |
|
|
503
|
+
|OnnxTR (GPU) - v0.1.2 | **~0.06s / Page** | ~0.04s / Page |
|
|
504
|
+
|EasyOCR (GPU) - v1.7.1 | ~0.31s / Page | ~0.19s / Page |
|
|
505
|
+
|Surya (GPU) float16 - v0.4.4 | ~3.70s / Page | ~2.81s / Page |
|
|
466
506
|
|
|
467
507
|
## Citation
|
|
468
508
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
onnxtr/__init__.py,sha256=h7Wc2tuHLsaoCk5xNpEFEK-g11A6SJA7nAasA76TQ_Y,100
|
|
2
2
|
onnxtr/file_utils.py,sha256=WjUKalEdR53aoeIY4e-ihy3r7J_C9qFxL40JHGPfutc,1107
|
|
3
|
-
onnxtr/version.py,sha256=
|
|
3
|
+
onnxtr/version.py,sha256=F-dLDdnsmwBrwqQCEb3nX0LORU_q1xg9pmFGVU_3j8o,23
|
|
4
4
|
onnxtr/contrib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
5
|
onnxtr/contrib/artefacts.py,sha256=tdmfhvfXVRYEH7uj4_hqf2cuUGoTieyNK8bXsD3zHwo,5383
|
|
6
6
|
onnxtr/contrib/base.py,sha256=PoCKtOIgj7u4xl-V0eBVh-QmVeTyk_eEggFHQ8R34AI,3445
|
|
@@ -13,40 +13,40 @@ onnxtr/io/reader.py,sha256=BA7DPhW-Gkmce_ZfzrOl4H3pSXVy2JBeQEuY3pWrBFg,2852
|
|
|
13
13
|
onnxtr/models/__init__.py,sha256=CzdiA34zjFq4a22XvgzG_Bojosi9aN5jeRCizRyh_7o,101
|
|
14
14
|
onnxtr/models/_utils.py,sha256=RnSmj70gFU9CCJlsCYiVULEs7L8MhogkLf2Xu_7wBhM,4824
|
|
15
15
|
onnxtr/models/builder.py,sha256=byUpCs9x5RS2lqNXLECR5GELqd1yF-MEg_09OGWj20Q,13642
|
|
16
|
-
onnxtr/models/engine.py,sha256=
|
|
17
|
-
onnxtr/models/zoo.py,sha256=
|
|
16
|
+
onnxtr/models/engine.py,sha256=EtBIceuxIdJ1bqEl59n35D-AUrkIEelSOqD1IWEtXAE,2174
|
|
17
|
+
onnxtr/models/zoo.py,sha256=oHk9BOt5U3fZBpeFRgKkQ_U6PthwfoKw0lBEi4Lu6ak,4567
|
|
18
18
|
onnxtr/models/classification/__init__.py,sha256=h1bZs55iLJBMATtzS4ntTKwfD6OGXBiiqGv_hEnOFnE,41
|
|
19
|
-
onnxtr/models/classification/zoo.py,sha256=
|
|
19
|
+
onnxtr/models/classification/zoo.py,sha256=MW2UH3MOEJ1aDwHQ_SjrUfPb_U6MrWA3lOGlhwxogaQ,3058
|
|
20
20
|
onnxtr/models/classification/models/__init__.py,sha256=rohbM6ZQslfYchi7feZwwh-sX3XXRUhgtEJQeurAytQ,24
|
|
21
|
-
onnxtr/models/classification/models/mobilenet.py,sha256=
|
|
21
|
+
onnxtr/models/classification/models/mobilenet.py,sha256=z9zN0GnXcOCTH-Hu-xKsjA4W7r3D8F9n_hE-T_Rzqhc,4427
|
|
22
22
|
onnxtr/models/classification/predictor/__init__.py,sha256=ERmmOxz_9mUkIuccNbzUa5Y6gVLLVDdyc4cCxbCCUbY,20
|
|
23
|
-
onnxtr/models/classification/predictor/base.py,sha256=
|
|
23
|
+
onnxtr/models/classification/predictor/base.py,sha256=Xfaj2XlaJuQ2R81XqF5RB0Wcvzd4wh7Z6j1ifn2niFc,2097
|
|
24
24
|
onnxtr/models/detection/__init__.py,sha256=h1bZs55iLJBMATtzS4ntTKwfD6OGXBiiqGv_hEnOFnE,41
|
|
25
25
|
onnxtr/models/detection/core.py,sha256=ZmVDHLJ1l4LQ8rFSKc7enXDkGcOWrcQv4H0SJWyLsag,3584
|
|
26
|
-
onnxtr/models/detection/zoo.py,sha256=
|
|
26
|
+
onnxtr/models/detection/zoo.py,sha256=4DDHWQtWAQpEhl0SHcqEV12HgjXpTBh4LB4tdQc7etk,2457
|
|
27
27
|
onnxtr/models/detection/models/__init__.py,sha256=6Ea6knYrVCR2jAmPlsVWmCdHe-c6lSRETSAuZGfhx8I,85
|
|
28
|
-
onnxtr/models/detection/models/differentiable_binarization.py,sha256=
|
|
29
|
-
onnxtr/models/detection/models/fast.py,sha256=
|
|
30
|
-
onnxtr/models/detection/models/linknet.py,sha256=
|
|
28
|
+
onnxtr/models/detection/models/differentiable_binarization.py,sha256=mn1kE7k6VLsiyERH9ghtVJBqalG_In2dv8SvtJNN_HM,6131
|
|
29
|
+
onnxtr/models/detection/models/fast.py,sha256=jhw8M6vf9FM8Q5a5Lgu6mfpzkLrIEErX-1GxCR_DfYw,5677
|
|
30
|
+
onnxtr/models/detection/models/linknet.py,sha256=PCwaYhTqFgt2BbixoVTeZWHN9fd4r2i1GTZ0CywZRvw,6173
|
|
31
31
|
onnxtr/models/detection/postprocessor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
32
32
|
onnxtr/models/detection/postprocessor/base.py,sha256=U8FgqveZt2qPHI0aUnIEeX370zGUEWUxL6EPqhFZnRI,5714
|
|
33
33
|
onnxtr/models/detection/predictor/__init__.py,sha256=ERmmOxz_9mUkIuccNbzUa5Y6gVLLVDdyc4cCxbCCUbY,20
|
|
34
34
|
onnxtr/models/detection/predictor/base.py,sha256=K0fSTFnA8-fGL9F3HzW3nqnEqO5YKbhFf2TvpUOe_Tw,1744
|
|
35
35
|
onnxtr/models/predictor/__init__.py,sha256=XL25XkRkgyK7mldF-CWhg2MMakSdP5vLpDLwL59hphk,25
|
|
36
|
-
onnxtr/models/predictor/base.py,sha256=
|
|
36
|
+
onnxtr/models/predictor/base.py,sha256=6rWf8FZrp6ku4yvP1jJl39t0Y8L7wqWb2GYYagTExtw,7731
|
|
37
37
|
onnxtr/models/predictor/predictor.py,sha256=sfwqRzabqavlI6ty_Z6jGm64olmmQBLQGU39_nulQZs,6045
|
|
38
38
|
onnxtr/models/preprocessor/__init__.py,sha256=ERmmOxz_9mUkIuccNbzUa5Y6gVLLVDdyc4cCxbCCUbY,20
|
|
39
39
|
onnxtr/models/preprocessor/base.py,sha256=f0t0rMCzvuxwgq7jlKvcVWyjeDOx7yCLUw52quEaETM,3990
|
|
40
40
|
onnxtr/models/recognition/__init__.py,sha256=h1bZs55iLJBMATtzS4ntTKwfD6OGXBiiqGv_hEnOFnE,41
|
|
41
41
|
onnxtr/models/recognition/core.py,sha256=0Q1dVXqRcDUr_ycT5tpoSH9-zuDF58GtnmxWpUS8Ibo,739
|
|
42
42
|
onnxtr/models/recognition/utils.py,sha256=04abbjx-_OuF5iEANWIAOK3tQQl1tExPmBQx4IG04Lc,3569
|
|
43
|
-
onnxtr/models/recognition/zoo.py,sha256=
|
|
43
|
+
onnxtr/models/recognition/zoo.py,sha256=MR5O5UVM0tmWGKdGzXTE7HTBCYKZBuOVwvRBASz-zRo,2309
|
|
44
44
|
onnxtr/models/recognition/models/__init__.py,sha256=IXfiuzzkft8O1CpBZWYTpFw19y49mt5rJ_iGSdaWiU0,105
|
|
45
|
-
onnxtr/models/recognition/models/crnn.py,sha256=
|
|
46
|
-
onnxtr/models/recognition/models/master.py,sha256=
|
|
47
|
-
onnxtr/models/recognition/models/parseq.py,sha256=
|
|
48
|
-
onnxtr/models/recognition/models/sar.py,sha256=
|
|
49
|
-
onnxtr/models/recognition/models/vitstr.py,sha256=
|
|
45
|
+
onnxtr/models/recognition/models/crnn.py,sha256=jtrbe2WqPCvWCgaZkWVCX0LPE3poQXQpe6Aq6pJN6i4,8365
|
|
46
|
+
onnxtr/models/recognition/models/master.py,sha256=iacF5zZNf3kWTetYbGemBMey8o0heHmgFi8sVoTMyik,4427
|
|
47
|
+
onnxtr/models/recognition/models/parseq.py,sha256=omHQVfJ1BR03JxiPcr5wxR_cx9okiWddTCikpUOJc-E,4227
|
|
48
|
+
onnxtr/models/recognition/models/sar.py,sha256=12U2RH8jQp4SE2axHvME3TjIa5WkSbZaJfzClFxVGUE,4244
|
|
49
|
+
onnxtr/models/recognition/models/vitstr.py,sha256=uPyBBE-PDPJNxzUFqJbaVdm6MTCbE5dgvst2MSPqswk,5583
|
|
50
50
|
onnxtr/models/recognition/predictor/__init__.py,sha256=ERmmOxz_9mUkIuccNbzUa5Y6gVLLVDdyc4cCxbCCUbY,20
|
|
51
51
|
onnxtr/models/recognition/predictor/_utils.py,sha256=ZNm5I7ibiWfTlz302uiifCkUOu65YWa-oUBUMPrrUuQ,3406
|
|
52
52
|
onnxtr/models/recognition/predictor/base.py,sha256=YvqSNEM3rCEttxl6hsC9zl1R97N9zO2WZfD5_-nfkR0,2483
|
|
@@ -62,9 +62,9 @@ onnxtr/utils/reconstitution.py,sha256=Hx1_ddLevKLzuxXc19UelPdsGlAwqi4f6vRSYKHDUB
|
|
|
62
62
|
onnxtr/utils/repr.py,sha256=kfbjGL6KymGT8spo2UL4FJXZ0XRwa7CO7Y1dTVR8dIk,2129
|
|
63
63
|
onnxtr/utils/visualization.py,sha256=CX09qvDnNIw3BFW5F3jM4R9OcpLWAeZyoDyTAOGRvls,9925
|
|
64
64
|
onnxtr/utils/vocabs.py,sha256=SCQ4XQjbHSxunj1tg2iHRiPfE8OaTAMhcJbKq5BNvFs,3138
|
|
65
|
-
onnxtr-0.
|
|
66
|
-
onnxtr-0.
|
|
67
|
-
onnxtr-0.
|
|
68
|
-
onnxtr-0.
|
|
69
|
-
onnxtr-0.
|
|
70
|
-
onnxtr-0.
|
|
65
|
+
onnxtr-0.2.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
66
|
+
onnxtr-0.2.0.dist-info/METADATA,sha256=Uc9NddF-vWik-X2Sohhx-3Jw8gZJxp86GFpt1Q7-QRg,27690
|
|
67
|
+
onnxtr-0.2.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
68
|
+
onnxtr-0.2.0.dist-info/top_level.txt,sha256=48aR9EH095hmgye7CNuxAW3o_cj4TjhWmuw3jEMdTs0,12
|
|
69
|
+
onnxtr-0.2.0.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
70
|
+
onnxtr-0.2.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|