onnxtr 0.1.0__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {onnxtr-0.1.0 → onnxtr-0.1.2}/PKG-INFO +30 -14
- {onnxtr-0.1.0 → onnxtr-0.1.2}/README.md +26 -11
- onnxtr-0.1.2/onnxtr/version.py +1 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr.egg-info/PKG-INFO +30 -14
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr.egg-info/requires.txt +4 -2
- {onnxtr-0.1.0 → onnxtr-0.1.2}/pyproject.toml +5 -3
- {onnxtr-0.1.0 → onnxtr-0.1.2}/setup.py +1 -1
- onnxtr-0.1.0/onnxtr/version.py +0 -1
- {onnxtr-0.1.0 → onnxtr-0.1.2}/LICENSE +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/__init__.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/contrib/__init__.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/contrib/artefacts.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/contrib/base.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/file_utils.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/io/__init__.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/io/elements.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/io/html.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/io/image.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/io/pdf.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/io/reader.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/__init__.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/_utils.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/builder.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/classification/__init__.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/classification/models/__init__.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/classification/models/mobilenet.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/classification/predictor/__init__.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/classification/predictor/base.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/classification/zoo.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/detection/__init__.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/detection/core.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/detection/models/__init__.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/detection/models/differentiable_binarization.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/detection/models/fast.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/detection/models/linknet.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/detection/postprocessor/__init__.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/detection/postprocessor/base.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/detection/predictor/__init__.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/detection/predictor/base.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/detection/zoo.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/engine.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/predictor/__init__.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/predictor/base.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/predictor/predictor.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/preprocessor/__init__.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/preprocessor/base.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/recognition/__init__.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/recognition/core.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/recognition/models/__init__.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/recognition/models/crnn.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/recognition/models/master.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/recognition/models/parseq.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/recognition/models/sar.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/recognition/models/vitstr.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/recognition/predictor/__init__.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/recognition/predictor/_utils.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/recognition/predictor/base.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/recognition/utils.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/recognition/zoo.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/models/zoo.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/transforms/__init__.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/transforms/base.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/utils/__init__.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/utils/common_types.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/utils/data.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/utils/fonts.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/utils/geometry.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/utils/multithreading.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/utils/reconstitution.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/utils/repr.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/utils/visualization.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr/utils/vocabs.py +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr.egg-info/SOURCES.txt +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr.egg-info/dependency_links.txt +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr.egg-info/top_level.txt +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/onnxtr.egg-info/zip-safe +0 -0
- {onnxtr-0.1.0 → onnxtr-0.1.2}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: onnxtr
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: Onnx Text Recognition (OnnxTR): docTR Onnx-Wrapper for high-performance OCR on documents.
|
|
5
5
|
Author-email: Felix Dittrich <felixdittrich92@gmail.com>
|
|
6
6
|
Maintainer: Felix Dittrich
|
|
@@ -227,8 +227,6 @@ Description-Content-Type: text/markdown
|
|
|
227
227
|
License-File: LICENSE
|
|
228
228
|
Requires-Dist: numpy<2.0.0,>=1.16.0
|
|
229
229
|
Requires-Dist: scipy<2.0.0,>=1.4.0
|
|
230
|
-
Requires-Dist: onnx<2.0.0,>=1.12.0
|
|
231
|
-
Requires-Dist: onnxruntime>=1.11.0
|
|
232
230
|
Requires-Dist: opencv-python<5.0.0,>=4.5.0
|
|
233
231
|
Requires-Dist: pypdfium2<5.0.0,>=4.0.0
|
|
234
232
|
Requires-Dist: pyclipper<2.0.0,>=1.2.0
|
|
@@ -239,6 +237,8 @@ Requires-Dist: Pillow>=9.2.0
|
|
|
239
237
|
Requires-Dist: defusedxml>=0.7.0
|
|
240
238
|
Requires-Dist: anyascii>=0.3.2
|
|
241
239
|
Requires-Dist: tqdm>=4.30.0
|
|
240
|
+
Provides-Extra: cpu
|
|
241
|
+
Requires-Dist: onnxruntime>=1.11.0; extra == "cpu"
|
|
242
242
|
Provides-Extra: gpu
|
|
243
243
|
Requires-Dist: onnxruntime-gpu>=1.11.0; extra == "gpu"
|
|
244
244
|
Provides-Extra: html
|
|
@@ -255,6 +255,7 @@ Requires-Dist: ruff>=0.1.5; extra == "quality"
|
|
|
255
255
|
Requires-Dist: mypy>=0.812; extra == "quality"
|
|
256
256
|
Requires-Dist: pre-commit>=2.17.0; extra == "quality"
|
|
257
257
|
Provides-Extra: dev
|
|
258
|
+
Requires-Dist: onnxruntime>=1.11.0; extra == "dev"
|
|
258
259
|
Requires-Dist: weasyprint>=55.0; extra == "dev"
|
|
259
260
|
Requires-Dist: matplotlib>=3.1.0; extra == "dev"
|
|
260
261
|
Requires-Dist: mplcursors>=0.3; extra == "dev"
|
|
@@ -274,9 +275,9 @@ Requires-Dist: pre-commit>=2.17.0; extra == "dev"
|
|
|
274
275
|
[](https://codecov.io/gh/felixdittrich92/OnnxTR)
|
|
275
276
|
[](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
|
|
276
277
|
[](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
|
|
277
|
-
[](https://pypi.org/project/OnnxTR/)
|
|
278
279
|
|
|
279
|
-
> :warning: Please note that this is wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
|
|
280
|
+
> :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
|
|
280
281
|
|
|
281
282
|
**Optical Character Recognition made seamless & accessible to anyone, powered by Onnx**
|
|
282
283
|
|
|
@@ -298,18 +299,22 @@ Python 3.9 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to
|
|
|
298
299
|
|
|
299
300
|
You can then install the latest release of the package using [pypi](https://pypi.org/project/OnnxTR/) as follows:
|
|
300
301
|
|
|
301
|
-
**NOTE:**
|
|
302
|
+
**NOTE:**
|
|
303
|
+
|
|
304
|
+
For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/getting-started). Currently supported execution providers by default are: CPU, CUDA
|
|
305
|
+
|
|
306
|
+
- **Prerequisites:** CUDA & cuDNN needs to be installed before [Version table](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html).
|
|
302
307
|
|
|
303
308
|
```shell
|
|
304
|
-
pip install
|
|
309
|
+
pip install "onnxtr[cpu]"
|
|
305
310
|
# with gpu support
|
|
306
|
-
pip install "
|
|
311
|
+
pip install "onnxtr[gpu]"
|
|
307
312
|
# with HTML support
|
|
308
|
-
pip install "
|
|
313
|
+
pip install "onnxtr[html]"
|
|
309
314
|
# with support for visualization
|
|
310
|
-
pip install "
|
|
315
|
+
pip install "onnxtr[viz]"
|
|
311
316
|
# with support for all dependencies
|
|
312
|
-
pip install "
|
|
317
|
+
pip install "onnxtr[html, gpu, viz]"
|
|
313
318
|
```
|
|
314
319
|
|
|
315
320
|
### Reading files
|
|
@@ -338,13 +343,17 @@ from onnxtr.models import ocr_predictor
|
|
|
338
343
|
|
|
339
344
|
model = ocr_predictor(
|
|
340
345
|
det_arch='fast_base', # detection architecture
|
|
341
|
-
|
|
346
|
+
reco_arch='vitstr_base', # recognition architecture
|
|
342
347
|
det_bs=4, # detection batch size
|
|
343
348
|
reco_bs=1024, # recognition batch size
|
|
344
349
|
assume_straight_pages=True, # set to `False` if the pages are not straight (rotation, perspective, etc.) (default: True)
|
|
345
350
|
straighten_pages=False, # set to `True` if the pages should be straightened before final processing (default: False)
|
|
351
|
+
# Preprocessing related parameters
|
|
346
352
|
preserve_aspect_ratio=True, # set to `False` if the aspect ratio should not be preserved (default: True)
|
|
347
353
|
symmetric_pad=True, # set to `False` to disable symmetric padding (default: True)
|
|
354
|
+
# Additional parameters - meta information
|
|
355
|
+
detect_orientation=False, # set to `True` if the orientation of the pages should be detected (default: False)
|
|
356
|
+
detect_language=False, # set to `True` if the language of the pages should be detected (default: False)
|
|
348
357
|
# DocumentBuilder specific parameters
|
|
349
358
|
resolve_lines=True, # whether words should be automatically grouped into lines (default: True)
|
|
350
359
|
resolve_blocks=True, # whether lines should be automatically grouped into blocks (default: True)
|
|
@@ -396,7 +405,7 @@ from onnxtr.models import ocr_predictor, linknet_resnet18, parseq
|
|
|
396
405
|
|
|
397
406
|
reco_model = parseq("path_to_custom_model.onnx", vocab="ABC")
|
|
398
407
|
det_model = linknet_resnet18("path_to_custom_model.onnx")
|
|
399
|
-
model = ocr_predictor(
|
|
408
|
+
model = ocr_predictor(det_arch=det_model, reco_arch=reco_model)
|
|
400
409
|
```
|
|
401
410
|
|
|
402
411
|
## Models architectures
|
|
@@ -460,7 +469,14 @@ NOTE:
|
|
|
460
469
|
|
|
461
470
|
### Benchmarks
|
|
462
471
|
|
|
463
|
-
|
|
472
|
+
The benchmarks was measured on a `i7-14700K Intel CPU`.
|
|
473
|
+
|
|
474
|
+
MORE BENCHMARKS COMING SOON
|
|
475
|
+
|
|
476
|
+
|Dataset |docTR (CPU) - v0.8.1 |OnnxTR (CPU) - v0.1.1 |
|
|
477
|
+
|--------------------------------|-------------------------------|-------------------------------|
|
|
478
|
+
|FUNSD (199 pages) | ~1.29s / Page | ~0.57s / Page |
|
|
479
|
+
|CORD (900 pages) | ~0.60s / Page | ~0.25s / Page |
|
|
464
480
|
|
|
465
481
|
## Citation
|
|
466
482
|
|
|
@@ -7,9 +7,9 @@
|
|
|
7
7
|
[](https://codecov.io/gh/felixdittrich92/OnnxTR)
|
|
8
8
|
[](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
|
|
9
9
|
[](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
|
|
10
|
-
[](https://pypi.org/project/OnnxTR/)
|
|
11
11
|
|
|
12
|
-
> :warning: Please note that this is wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
|
|
12
|
+
> :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
|
|
13
13
|
|
|
14
14
|
**Optical Character Recognition made seamless & accessible to anyone, powered by Onnx**
|
|
15
15
|
|
|
@@ -31,18 +31,22 @@ Python 3.9 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to
|
|
|
31
31
|
|
|
32
32
|
You can then install the latest release of the package using [pypi](https://pypi.org/project/OnnxTR/) as follows:
|
|
33
33
|
|
|
34
|
-
**NOTE:**
|
|
34
|
+
**NOTE:**
|
|
35
|
+
|
|
36
|
+
For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/getting-started). Currently supported execution providers by default are: CPU, CUDA
|
|
37
|
+
|
|
38
|
+
- **Prerequisites:** CUDA & cuDNN needs to be installed before [Version table](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html).
|
|
35
39
|
|
|
36
40
|
```shell
|
|
37
|
-
pip install
|
|
41
|
+
pip install "onnxtr[cpu]"
|
|
38
42
|
# with gpu support
|
|
39
|
-
pip install "
|
|
43
|
+
pip install "onnxtr[gpu]"
|
|
40
44
|
# with HTML support
|
|
41
|
-
pip install "
|
|
45
|
+
pip install "onnxtr[html]"
|
|
42
46
|
# with support for visualization
|
|
43
|
-
pip install "
|
|
47
|
+
pip install "onnxtr[viz]"
|
|
44
48
|
# with support for all dependencies
|
|
45
|
-
pip install "
|
|
49
|
+
pip install "onnxtr[html, gpu, viz]"
|
|
46
50
|
```
|
|
47
51
|
|
|
48
52
|
### Reading files
|
|
@@ -71,13 +75,17 @@ from onnxtr.models import ocr_predictor
|
|
|
71
75
|
|
|
72
76
|
model = ocr_predictor(
|
|
73
77
|
det_arch='fast_base', # detection architecture
|
|
74
|
-
|
|
78
|
+
reco_arch='vitstr_base', # recognition architecture
|
|
75
79
|
det_bs=4, # detection batch size
|
|
76
80
|
reco_bs=1024, # recognition batch size
|
|
77
81
|
assume_straight_pages=True, # set to `False` if the pages are not straight (rotation, perspective, etc.) (default: True)
|
|
78
82
|
straighten_pages=False, # set to `True` if the pages should be straightened before final processing (default: False)
|
|
83
|
+
# Preprocessing related parameters
|
|
79
84
|
preserve_aspect_ratio=True, # set to `False` if the aspect ratio should not be preserved (default: True)
|
|
80
85
|
symmetric_pad=True, # set to `False` to disable symmetric padding (default: True)
|
|
86
|
+
# Additional parameters - meta information
|
|
87
|
+
detect_orientation=False, # set to `True` if the orientation of the pages should be detected (default: False)
|
|
88
|
+
detect_language=False, # set to `True` if the language of the pages should be detected (default: False)
|
|
81
89
|
# DocumentBuilder specific parameters
|
|
82
90
|
resolve_lines=True, # whether words should be automatically grouped into lines (default: True)
|
|
83
91
|
resolve_blocks=True, # whether lines should be automatically grouped into blocks (default: True)
|
|
@@ -129,7 +137,7 @@ from onnxtr.models import ocr_predictor, linknet_resnet18, parseq
|
|
|
129
137
|
|
|
130
138
|
reco_model = parseq("path_to_custom_model.onnx", vocab="ABC")
|
|
131
139
|
det_model = linknet_resnet18("path_to_custom_model.onnx")
|
|
132
|
-
model = ocr_predictor(
|
|
140
|
+
model = ocr_predictor(det_arch=det_model, reco_arch=reco_model)
|
|
133
141
|
```
|
|
134
142
|
|
|
135
143
|
## Models architectures
|
|
@@ -193,7 +201,14 @@ NOTE:
|
|
|
193
201
|
|
|
194
202
|
### Benchmarks
|
|
195
203
|
|
|
196
|
-
|
|
204
|
+
The benchmarks was measured on a `i7-14700K Intel CPU`.
|
|
205
|
+
|
|
206
|
+
MORE BENCHMARKS COMING SOON
|
|
207
|
+
|
|
208
|
+
|Dataset |docTR (CPU) - v0.8.1 |OnnxTR (CPU) - v0.1.1 |
|
|
209
|
+
|--------------------------------|-------------------------------|-------------------------------|
|
|
210
|
+
|FUNSD (199 pages) | ~1.29s / Page | ~0.57s / Page |
|
|
211
|
+
|CORD (900 pages) | ~0.60s / Page | ~0.25s / Page |
|
|
197
212
|
|
|
198
213
|
## Citation
|
|
199
214
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = 'v0.1.2'
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: onnxtr
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: Onnx Text Recognition (OnnxTR): docTR Onnx-Wrapper for high-performance OCR on documents.
|
|
5
5
|
Author-email: Felix Dittrich <felixdittrich92@gmail.com>
|
|
6
6
|
Maintainer: Felix Dittrich
|
|
@@ -227,8 +227,6 @@ Description-Content-Type: text/markdown
|
|
|
227
227
|
License-File: LICENSE
|
|
228
228
|
Requires-Dist: numpy<2.0.0,>=1.16.0
|
|
229
229
|
Requires-Dist: scipy<2.0.0,>=1.4.0
|
|
230
|
-
Requires-Dist: onnx<2.0.0,>=1.12.0
|
|
231
|
-
Requires-Dist: onnxruntime>=1.11.0
|
|
232
230
|
Requires-Dist: opencv-python<5.0.0,>=4.5.0
|
|
233
231
|
Requires-Dist: pypdfium2<5.0.0,>=4.0.0
|
|
234
232
|
Requires-Dist: pyclipper<2.0.0,>=1.2.0
|
|
@@ -239,6 +237,8 @@ Requires-Dist: Pillow>=9.2.0
|
|
|
239
237
|
Requires-Dist: defusedxml>=0.7.0
|
|
240
238
|
Requires-Dist: anyascii>=0.3.2
|
|
241
239
|
Requires-Dist: tqdm>=4.30.0
|
|
240
|
+
Provides-Extra: cpu
|
|
241
|
+
Requires-Dist: onnxruntime>=1.11.0; extra == "cpu"
|
|
242
242
|
Provides-Extra: gpu
|
|
243
243
|
Requires-Dist: onnxruntime-gpu>=1.11.0; extra == "gpu"
|
|
244
244
|
Provides-Extra: html
|
|
@@ -255,6 +255,7 @@ Requires-Dist: ruff>=0.1.5; extra == "quality"
|
|
|
255
255
|
Requires-Dist: mypy>=0.812; extra == "quality"
|
|
256
256
|
Requires-Dist: pre-commit>=2.17.0; extra == "quality"
|
|
257
257
|
Provides-Extra: dev
|
|
258
|
+
Requires-Dist: onnxruntime>=1.11.0; extra == "dev"
|
|
258
259
|
Requires-Dist: weasyprint>=55.0; extra == "dev"
|
|
259
260
|
Requires-Dist: matplotlib>=3.1.0; extra == "dev"
|
|
260
261
|
Requires-Dist: mplcursors>=0.3; extra == "dev"
|
|
@@ -274,9 +275,9 @@ Requires-Dist: pre-commit>=2.17.0; extra == "dev"
|
|
|
274
275
|
[](https://codecov.io/gh/felixdittrich92/OnnxTR)
|
|
275
276
|
[](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
|
|
276
277
|
[](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
|
|
277
|
-
[](https://pypi.org/project/OnnxTR/)
|
|
278
279
|
|
|
279
|
-
> :warning: Please note that this is wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
|
|
280
|
+
> :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
|
|
280
281
|
|
|
281
282
|
**Optical Character Recognition made seamless & accessible to anyone, powered by Onnx**
|
|
282
283
|
|
|
@@ -298,18 +299,22 @@ Python 3.9 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to
|
|
|
298
299
|
|
|
299
300
|
You can then install the latest release of the package using [pypi](https://pypi.org/project/OnnxTR/) as follows:
|
|
300
301
|
|
|
301
|
-
**NOTE:**
|
|
302
|
+
**NOTE:**
|
|
303
|
+
|
|
304
|
+
For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/getting-started). Currently supported execution providers by default are: CPU, CUDA
|
|
305
|
+
|
|
306
|
+
- **Prerequisites:** CUDA & cuDNN needs to be installed before [Version table](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html).
|
|
302
307
|
|
|
303
308
|
```shell
|
|
304
|
-
pip install
|
|
309
|
+
pip install "onnxtr[cpu]"
|
|
305
310
|
# with gpu support
|
|
306
|
-
pip install "
|
|
311
|
+
pip install "onnxtr[gpu]"
|
|
307
312
|
# with HTML support
|
|
308
|
-
pip install "
|
|
313
|
+
pip install "onnxtr[html]"
|
|
309
314
|
# with support for visualization
|
|
310
|
-
pip install "
|
|
315
|
+
pip install "onnxtr[viz]"
|
|
311
316
|
# with support for all dependencies
|
|
312
|
-
pip install "
|
|
317
|
+
pip install "onnxtr[html, gpu, viz]"
|
|
313
318
|
```
|
|
314
319
|
|
|
315
320
|
### Reading files
|
|
@@ -338,13 +343,17 @@ from onnxtr.models import ocr_predictor
|
|
|
338
343
|
|
|
339
344
|
model = ocr_predictor(
|
|
340
345
|
det_arch='fast_base', # detection architecture
|
|
341
|
-
|
|
346
|
+
reco_arch='vitstr_base', # recognition architecture
|
|
342
347
|
det_bs=4, # detection batch size
|
|
343
348
|
reco_bs=1024, # recognition batch size
|
|
344
349
|
assume_straight_pages=True, # set to `False` if the pages are not straight (rotation, perspective, etc.) (default: True)
|
|
345
350
|
straighten_pages=False, # set to `True` if the pages should be straightened before final processing (default: False)
|
|
351
|
+
# Preprocessing related parameters
|
|
346
352
|
preserve_aspect_ratio=True, # set to `False` if the aspect ratio should not be preserved (default: True)
|
|
347
353
|
symmetric_pad=True, # set to `False` to disable symmetric padding (default: True)
|
|
354
|
+
# Additional parameters - meta information
|
|
355
|
+
detect_orientation=False, # set to `True` if the orientation of the pages should be detected (default: False)
|
|
356
|
+
detect_language=False, # set to `True` if the language of the pages should be detected (default: False)
|
|
348
357
|
# DocumentBuilder specific parameters
|
|
349
358
|
resolve_lines=True, # whether words should be automatically grouped into lines (default: True)
|
|
350
359
|
resolve_blocks=True, # whether lines should be automatically grouped into blocks (default: True)
|
|
@@ -396,7 +405,7 @@ from onnxtr.models import ocr_predictor, linknet_resnet18, parseq
|
|
|
396
405
|
|
|
397
406
|
reco_model = parseq("path_to_custom_model.onnx", vocab="ABC")
|
|
398
407
|
det_model = linknet_resnet18("path_to_custom_model.onnx")
|
|
399
|
-
model = ocr_predictor(
|
|
408
|
+
model = ocr_predictor(det_arch=det_model, reco_arch=reco_model)
|
|
400
409
|
```
|
|
401
410
|
|
|
402
411
|
## Models architectures
|
|
@@ -460,7 +469,14 @@ NOTE:
|
|
|
460
469
|
|
|
461
470
|
### Benchmarks
|
|
462
471
|
|
|
463
|
-
|
|
472
|
+
The benchmarks was measured on a `i7-14700K Intel CPU`.
|
|
473
|
+
|
|
474
|
+
MORE BENCHMARKS COMING SOON
|
|
475
|
+
|
|
476
|
+
|Dataset |docTR (CPU) - v0.8.1 |OnnxTR (CPU) - v0.1.1 |
|
|
477
|
+
|--------------------------------|-------------------------------|-------------------------------|
|
|
478
|
+
|FUNSD (199 pages) | ~1.29s / Page | ~0.57s / Page |
|
|
479
|
+
|CORD (900 pages) | ~0.60s / Page | ~0.25s / Page |
|
|
464
480
|
|
|
465
481
|
## Citation
|
|
466
482
|
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
numpy<2.0.0,>=1.16.0
|
|
2
2
|
scipy<2.0.0,>=1.4.0
|
|
3
|
-
onnx<2.0.0,>=1.12.0
|
|
4
|
-
onnxruntime>=1.11.0
|
|
5
3
|
opencv-python<5.0.0,>=4.5.0
|
|
6
4
|
pypdfium2<5.0.0,>=4.0.0
|
|
7
5
|
pyclipper<2.0.0,>=1.2.0
|
|
@@ -13,7 +11,11 @@ defusedxml>=0.7.0
|
|
|
13
11
|
anyascii>=0.3.2
|
|
14
12
|
tqdm>=4.30.0
|
|
15
13
|
|
|
14
|
+
[cpu]
|
|
15
|
+
onnxruntime>=1.11.0
|
|
16
|
+
|
|
16
17
|
[dev]
|
|
18
|
+
onnxruntime>=1.11.0
|
|
17
19
|
weasyprint>=55.0
|
|
18
20
|
matplotlib>=3.1.0
|
|
19
21
|
mplcursors>=0.3
|
|
@@ -33,8 +33,6 @@ dependencies = [
|
|
|
33
33
|
# Additional typing support is brought by numpy>=1.22.4, but core build sticks to >=1.16.0
|
|
34
34
|
"numpy>=1.16.0,<2.0.0",
|
|
35
35
|
"scipy>=1.4.0,<2.0.0",
|
|
36
|
-
"onnx>=1.12.0,<2.0.0",
|
|
37
|
-
"onnxruntime>=1.11.0",
|
|
38
36
|
"opencv-python>=4.5.0,<5.0.0",
|
|
39
37
|
"pypdfium2>=4.0.0,<5.0.0",
|
|
40
38
|
"pyclipper>=1.2.0,<2.0.0",
|
|
@@ -48,6 +46,9 @@ dependencies = [
|
|
|
48
46
|
]
|
|
49
47
|
|
|
50
48
|
[project.optional-dependencies]
|
|
49
|
+
cpu = [
|
|
50
|
+
"onnxruntime>=1.11.0",
|
|
51
|
+
]
|
|
51
52
|
gpu = [
|
|
52
53
|
"onnxruntime-gpu>=1.11.0",
|
|
53
54
|
]
|
|
@@ -69,6 +70,8 @@ quality = [
|
|
|
69
70
|
"pre-commit>=2.17.0",
|
|
70
71
|
]
|
|
71
72
|
dev = [
|
|
73
|
+
# Runtime
|
|
74
|
+
"onnxruntime>=1.11.0",
|
|
72
75
|
# HTML
|
|
73
76
|
"weasyprint>=55.0",
|
|
74
77
|
# Visualization
|
|
@@ -113,7 +116,6 @@ module = [
|
|
|
113
116
|
"cv2.*",
|
|
114
117
|
"matplotlib.*",
|
|
115
118
|
"numpy.*",
|
|
116
|
-
"onnx.*",
|
|
117
119
|
"pyclipper.*",
|
|
118
120
|
"shapely.*",
|
|
119
121
|
"mplcursors.*",
|
onnxtr-0.1.0/onnxtr/version.py
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = 'v0.1.0'
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|