onnxtr 0.1.1__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {onnxtr-0.1.1 → onnxtr-0.1.2}/PKG-INFO +23 -9
- {onnxtr-0.1.1 → onnxtr-0.1.2}/README.md +22 -7
- onnxtr-0.1.2/onnxtr/version.py +1 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr.egg-info/PKG-INFO +23 -9
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr.egg-info/requires.txt +0 -1
- {onnxtr-0.1.1 → onnxtr-0.1.2}/pyproject.toml +0 -2
- {onnxtr-0.1.1 → onnxtr-0.1.2}/setup.py +1 -1
- onnxtr-0.1.1/onnxtr/version.py +0 -1
- {onnxtr-0.1.1 → onnxtr-0.1.2}/LICENSE +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/__init__.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/contrib/__init__.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/contrib/artefacts.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/contrib/base.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/file_utils.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/io/__init__.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/io/elements.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/io/html.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/io/image.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/io/pdf.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/io/reader.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/__init__.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/_utils.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/builder.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/classification/__init__.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/classification/models/__init__.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/classification/models/mobilenet.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/classification/predictor/__init__.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/classification/predictor/base.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/classification/zoo.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/detection/__init__.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/detection/core.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/detection/models/__init__.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/detection/models/differentiable_binarization.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/detection/models/fast.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/detection/models/linknet.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/detection/postprocessor/__init__.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/detection/postprocessor/base.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/detection/predictor/__init__.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/detection/predictor/base.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/detection/zoo.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/engine.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/predictor/__init__.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/predictor/base.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/predictor/predictor.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/preprocessor/__init__.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/preprocessor/base.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/recognition/__init__.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/recognition/core.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/recognition/models/__init__.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/recognition/models/crnn.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/recognition/models/master.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/recognition/models/parseq.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/recognition/models/sar.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/recognition/models/vitstr.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/recognition/predictor/__init__.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/recognition/predictor/_utils.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/recognition/predictor/base.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/recognition/utils.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/recognition/zoo.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/models/zoo.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/transforms/__init__.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/transforms/base.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/utils/__init__.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/utils/common_types.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/utils/data.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/utils/fonts.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/utils/geometry.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/utils/multithreading.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/utils/reconstitution.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/utils/repr.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/utils/visualization.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr/utils/vocabs.py +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr.egg-info/SOURCES.txt +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr.egg-info/dependency_links.txt +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr.egg-info/top_level.txt +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/onnxtr.egg-info/zip-safe +0 -0
- {onnxtr-0.1.1 → onnxtr-0.1.2}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: onnxtr
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: Onnx Text Recognition (OnnxTR): docTR Onnx-Wrapper for high-performance OCR on documents.
|
|
5
5
|
Author-email: Felix Dittrich <felixdittrich92@gmail.com>
|
|
6
6
|
Maintainer: Felix Dittrich
|
|
@@ -227,7 +227,6 @@ Description-Content-Type: text/markdown
|
|
|
227
227
|
License-File: LICENSE
|
|
228
228
|
Requires-Dist: numpy<2.0.0,>=1.16.0
|
|
229
229
|
Requires-Dist: scipy<2.0.0,>=1.4.0
|
|
230
|
-
Requires-Dist: onnx<2.0.0,>=1.12.0
|
|
231
230
|
Requires-Dist: opencv-python<5.0.0,>=4.5.0
|
|
232
231
|
Requires-Dist: pypdfium2<5.0.0,>=4.0.0
|
|
233
232
|
Requires-Dist: pyclipper<2.0.0,>=1.2.0
|
|
@@ -276,9 +275,9 @@ Requires-Dist: pre-commit>=2.17.0; extra == "dev"
|
|
|
276
275
|
[](https://codecov.io/gh/felixdittrich92/OnnxTR)
|
|
277
276
|
[](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
|
|
278
277
|
[](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
|
|
279
|
-
[](https://pypi.org/project/OnnxTR/)
|
|
280
279
|
|
|
281
|
-
> :warning: Please note that this is wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
|
|
280
|
+
> :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
|
|
282
281
|
|
|
283
282
|
**Optical Character Recognition made seamless & accessible to anyone, powered by Onnx**
|
|
284
283
|
|
|
@@ -300,10 +299,14 @@ Python 3.9 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to
|
|
|
300
299
|
|
|
301
300
|
You can then install the latest release of the package using [pypi](https://pypi.org/project/OnnxTR/) as follows:
|
|
302
301
|
|
|
303
|
-
**NOTE:**
|
|
302
|
+
**NOTE:**
|
|
303
|
+
|
|
304
|
+
For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/getting-started). Currently supported execution providers by default are: CPU, CUDA
|
|
305
|
+
|
|
306
|
+
- **Prerequisites:** CUDA & cuDNN needs to be installed before [Version table](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html).
|
|
304
307
|
|
|
305
308
|
```shell
|
|
306
|
-
pip install onnxtr[cpu]
|
|
309
|
+
pip install "onnxtr[cpu]"
|
|
307
310
|
# with gpu support
|
|
308
311
|
pip install "onnxtr[gpu]"
|
|
309
312
|
# with HTML support
|
|
@@ -340,13 +343,17 @@ from onnxtr.models import ocr_predictor
|
|
|
340
343
|
|
|
341
344
|
model = ocr_predictor(
|
|
342
345
|
det_arch='fast_base', # detection architecture
|
|
343
|
-
|
|
346
|
+
reco_arch='vitstr_base', # recognition architecture
|
|
344
347
|
det_bs=4, # detection batch size
|
|
345
348
|
reco_bs=1024, # recognition batch size
|
|
346
349
|
assume_straight_pages=True, # set to `False` if the pages are not straight (rotation, perspective, etc.) (default: True)
|
|
347
350
|
straighten_pages=False, # set to `True` if the pages should be straightened before final processing (default: False)
|
|
351
|
+
# Preprocessing related parameters
|
|
348
352
|
preserve_aspect_ratio=True, # set to `False` if the aspect ratio should not be preserved (default: True)
|
|
349
353
|
symmetric_pad=True, # set to `False` to disable symmetric padding (default: True)
|
|
354
|
+
# Additional parameters - meta information
|
|
355
|
+
detect_orientation=False, # set to `True` if the orientation of the pages should be detected (default: False)
|
|
356
|
+
detect_language=False, # set to `True` if the language of the pages should be detected (default: False)
|
|
350
357
|
# DocumentBuilder specific parameters
|
|
351
358
|
resolve_lines=True, # whether words should be automatically grouped into lines (default: True)
|
|
352
359
|
resolve_blocks=True, # whether lines should be automatically grouped into blocks (default: True)
|
|
@@ -398,7 +405,7 @@ from onnxtr.models import ocr_predictor, linknet_resnet18, parseq
|
|
|
398
405
|
|
|
399
406
|
reco_model = parseq("path_to_custom_model.onnx", vocab="ABC")
|
|
400
407
|
det_model = linknet_resnet18("path_to_custom_model.onnx")
|
|
401
|
-
model = ocr_predictor(
|
|
408
|
+
model = ocr_predictor(det_arch=det_model, reco_arch=reco_model)
|
|
402
409
|
```
|
|
403
410
|
|
|
404
411
|
## Models architectures
|
|
@@ -462,7 +469,14 @@ NOTE:
|
|
|
462
469
|
|
|
463
470
|
### Benchmarks
|
|
464
471
|
|
|
465
|
-
|
|
472
|
+
The benchmarks was measured on a `i7-14700K Intel CPU`.
|
|
473
|
+
|
|
474
|
+
MORE BENCHMARKS COMING SOON
|
|
475
|
+
|
|
476
|
+
|Dataset |docTR (CPU) - v0.8.1 |OnnxTR (CPU) - v0.1.1 |
|
|
477
|
+
|--------------------------------|-------------------------------|-------------------------------|
|
|
478
|
+
|FUNSD (199 pages) | ~1.29s / Page | ~0.57s / Page |
|
|
479
|
+
|CORD (900 pages) | ~0.60s / Page | ~0.25s / Page |
|
|
466
480
|
|
|
467
481
|
## Citation
|
|
468
482
|
|
|
@@ -7,9 +7,9 @@
|
|
|
7
7
|
[](https://codecov.io/gh/felixdittrich92/OnnxTR)
|
|
8
8
|
[](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
|
|
9
9
|
[](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
|
|
10
|
-
[](https://pypi.org/project/OnnxTR/)
|
|
11
11
|
|
|
12
|
-
> :warning: Please note that this is wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
|
|
12
|
+
> :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
|
|
13
13
|
|
|
14
14
|
**Optical Character Recognition made seamless & accessible to anyone, powered by Onnx**
|
|
15
15
|
|
|
@@ -31,10 +31,14 @@ Python 3.9 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to
|
|
|
31
31
|
|
|
32
32
|
You can then install the latest release of the package using [pypi](https://pypi.org/project/OnnxTR/) as follows:
|
|
33
33
|
|
|
34
|
-
**NOTE:**
|
|
34
|
+
**NOTE:**
|
|
35
|
+
|
|
36
|
+
For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/getting-started). Currently supported execution providers by default are: CPU, CUDA
|
|
37
|
+
|
|
38
|
+
- **Prerequisites:** CUDA & cuDNN needs to be installed before [Version table](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html).
|
|
35
39
|
|
|
36
40
|
```shell
|
|
37
|
-
pip install onnxtr[cpu]
|
|
41
|
+
pip install "onnxtr[cpu]"
|
|
38
42
|
# with gpu support
|
|
39
43
|
pip install "onnxtr[gpu]"
|
|
40
44
|
# with HTML support
|
|
@@ -71,13 +75,17 @@ from onnxtr.models import ocr_predictor
|
|
|
71
75
|
|
|
72
76
|
model = ocr_predictor(
|
|
73
77
|
det_arch='fast_base', # detection architecture
|
|
74
|
-
|
|
78
|
+
reco_arch='vitstr_base', # recognition architecture
|
|
75
79
|
det_bs=4, # detection batch size
|
|
76
80
|
reco_bs=1024, # recognition batch size
|
|
77
81
|
assume_straight_pages=True, # set to `False` if the pages are not straight (rotation, perspective, etc.) (default: True)
|
|
78
82
|
straighten_pages=False, # set to `True` if the pages should be straightened before final processing (default: False)
|
|
83
|
+
# Preprocessing related parameters
|
|
79
84
|
preserve_aspect_ratio=True, # set to `False` if the aspect ratio should not be preserved (default: True)
|
|
80
85
|
symmetric_pad=True, # set to `False` to disable symmetric padding (default: True)
|
|
86
|
+
# Additional parameters - meta information
|
|
87
|
+
detect_orientation=False, # set to `True` if the orientation of the pages should be detected (default: False)
|
|
88
|
+
detect_language=False, # set to `True` if the language of the pages should be detected (default: False)
|
|
81
89
|
# DocumentBuilder specific parameters
|
|
82
90
|
resolve_lines=True, # whether words should be automatically grouped into lines (default: True)
|
|
83
91
|
resolve_blocks=True, # whether lines should be automatically grouped into blocks (default: True)
|
|
@@ -129,7 +137,7 @@ from onnxtr.models import ocr_predictor, linknet_resnet18, parseq
|
|
|
129
137
|
|
|
130
138
|
reco_model = parseq("path_to_custom_model.onnx", vocab="ABC")
|
|
131
139
|
det_model = linknet_resnet18("path_to_custom_model.onnx")
|
|
132
|
-
model = ocr_predictor(
|
|
140
|
+
model = ocr_predictor(det_arch=det_model, reco_arch=reco_model)
|
|
133
141
|
```
|
|
134
142
|
|
|
135
143
|
## Models architectures
|
|
@@ -193,7 +201,14 @@ NOTE:
|
|
|
193
201
|
|
|
194
202
|
### Benchmarks
|
|
195
203
|
|
|
196
|
-
|
|
204
|
+
The benchmarks was measured on a `i7-14700K Intel CPU`.
|
|
205
|
+
|
|
206
|
+
MORE BENCHMARKS COMING SOON
|
|
207
|
+
|
|
208
|
+
|Dataset |docTR (CPU) - v0.8.1 |OnnxTR (CPU) - v0.1.1 |
|
|
209
|
+
|--------------------------------|-------------------------------|-------------------------------|
|
|
210
|
+
|FUNSD (199 pages) | ~1.29s / Page | ~0.57s / Page |
|
|
211
|
+
|CORD (900 pages) | ~0.60s / Page | ~0.25s / Page |
|
|
197
212
|
|
|
198
213
|
## Citation
|
|
199
214
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = 'v0.1.2'
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: onnxtr
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: Onnx Text Recognition (OnnxTR): docTR Onnx-Wrapper for high-performance OCR on documents.
|
|
5
5
|
Author-email: Felix Dittrich <felixdittrich92@gmail.com>
|
|
6
6
|
Maintainer: Felix Dittrich
|
|
@@ -227,7 +227,6 @@ Description-Content-Type: text/markdown
|
|
|
227
227
|
License-File: LICENSE
|
|
228
228
|
Requires-Dist: numpy<2.0.0,>=1.16.0
|
|
229
229
|
Requires-Dist: scipy<2.0.0,>=1.4.0
|
|
230
|
-
Requires-Dist: onnx<2.0.0,>=1.12.0
|
|
231
230
|
Requires-Dist: opencv-python<5.0.0,>=4.5.0
|
|
232
231
|
Requires-Dist: pypdfium2<5.0.0,>=4.0.0
|
|
233
232
|
Requires-Dist: pyclipper<2.0.0,>=1.2.0
|
|
@@ -276,9 +275,9 @@ Requires-Dist: pre-commit>=2.17.0; extra == "dev"
|
|
|
276
275
|
[](https://codecov.io/gh/felixdittrich92/OnnxTR)
|
|
277
276
|
[](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
|
|
278
277
|
[](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
|
|
279
|
-
[](https://pypi.org/project/OnnxTR/)
|
|
280
279
|
|
|
281
|
-
> :warning: Please note that this is wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
|
|
280
|
+
> :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
|
|
282
281
|
|
|
283
282
|
**Optical Character Recognition made seamless & accessible to anyone, powered by Onnx**
|
|
284
283
|
|
|
@@ -300,10 +299,14 @@ Python 3.9 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to
|
|
|
300
299
|
|
|
301
300
|
You can then install the latest release of the package using [pypi](https://pypi.org/project/OnnxTR/) as follows:
|
|
302
301
|
|
|
303
|
-
**NOTE:**
|
|
302
|
+
**NOTE:**
|
|
303
|
+
|
|
304
|
+
For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/getting-started). Currently supported execution providers by default are: CPU, CUDA
|
|
305
|
+
|
|
306
|
+
- **Prerequisites:** CUDA & cuDNN needs to be installed before [Version table](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html).
|
|
304
307
|
|
|
305
308
|
```shell
|
|
306
|
-
pip install onnxtr[cpu]
|
|
309
|
+
pip install "onnxtr[cpu]"
|
|
307
310
|
# with gpu support
|
|
308
311
|
pip install "onnxtr[gpu]"
|
|
309
312
|
# with HTML support
|
|
@@ -340,13 +343,17 @@ from onnxtr.models import ocr_predictor
|
|
|
340
343
|
|
|
341
344
|
model = ocr_predictor(
|
|
342
345
|
det_arch='fast_base', # detection architecture
|
|
343
|
-
|
|
346
|
+
reco_arch='vitstr_base', # recognition architecture
|
|
344
347
|
det_bs=4, # detection batch size
|
|
345
348
|
reco_bs=1024, # recognition batch size
|
|
346
349
|
assume_straight_pages=True, # set to `False` if the pages are not straight (rotation, perspective, etc.) (default: True)
|
|
347
350
|
straighten_pages=False, # set to `True` if the pages should be straightened before final processing (default: False)
|
|
351
|
+
# Preprocessing related parameters
|
|
348
352
|
preserve_aspect_ratio=True, # set to `False` if the aspect ratio should not be preserved (default: True)
|
|
349
353
|
symmetric_pad=True, # set to `False` to disable symmetric padding (default: True)
|
|
354
|
+
# Additional parameters - meta information
|
|
355
|
+
detect_orientation=False, # set to `True` if the orientation of the pages should be detected (default: False)
|
|
356
|
+
detect_language=False, # set to `True` if the language of the pages should be detected (default: False)
|
|
350
357
|
# DocumentBuilder specific parameters
|
|
351
358
|
resolve_lines=True, # whether words should be automatically grouped into lines (default: True)
|
|
352
359
|
resolve_blocks=True, # whether lines should be automatically grouped into blocks (default: True)
|
|
@@ -398,7 +405,7 @@ from onnxtr.models import ocr_predictor, linknet_resnet18, parseq
|
|
|
398
405
|
|
|
399
406
|
reco_model = parseq("path_to_custom_model.onnx", vocab="ABC")
|
|
400
407
|
det_model = linknet_resnet18("path_to_custom_model.onnx")
|
|
401
|
-
model = ocr_predictor(
|
|
408
|
+
model = ocr_predictor(det_arch=det_model, reco_arch=reco_model)
|
|
402
409
|
```
|
|
403
410
|
|
|
404
411
|
## Models architectures
|
|
@@ -462,7 +469,14 @@ NOTE:
|
|
|
462
469
|
|
|
463
470
|
### Benchmarks
|
|
464
471
|
|
|
465
|
-
|
|
472
|
+
The benchmarks was measured on a `i7-14700K Intel CPU`.
|
|
473
|
+
|
|
474
|
+
MORE BENCHMARKS COMING SOON
|
|
475
|
+
|
|
476
|
+
|Dataset |docTR (CPU) - v0.8.1 |OnnxTR (CPU) - v0.1.1 |
|
|
477
|
+
|--------------------------------|-------------------------------|-------------------------------|
|
|
478
|
+
|FUNSD (199 pages) | ~1.29s / Page | ~0.57s / Page |
|
|
479
|
+
|CORD (900 pages) | ~0.60s / Page | ~0.25s / Page |
|
|
466
480
|
|
|
467
481
|
## Citation
|
|
468
482
|
|
|
@@ -33,7 +33,6 @@ dependencies = [
|
|
|
33
33
|
# Additional typing support is brought by numpy>=1.22.4, but core build sticks to >=1.16.0
|
|
34
34
|
"numpy>=1.16.0,<2.0.0",
|
|
35
35
|
"scipy>=1.4.0,<2.0.0",
|
|
36
|
-
"onnx>=1.12.0,<2.0.0",
|
|
37
36
|
"opencv-python>=4.5.0,<5.0.0",
|
|
38
37
|
"pypdfium2>=4.0.0,<5.0.0",
|
|
39
38
|
"pyclipper>=1.2.0,<2.0.0",
|
|
@@ -117,7 +116,6 @@ module = [
|
|
|
117
116
|
"cv2.*",
|
|
118
117
|
"matplotlib.*",
|
|
119
118
|
"numpy.*",
|
|
120
|
-
"onnx.*",
|
|
121
119
|
"pyclipper.*",
|
|
122
120
|
"shapely.*",
|
|
123
121
|
"mplcursors.*",
|
onnxtr-0.1.1/onnxtr/version.py
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = 'v0.1.1'
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|