onnxtr 0.4.0__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {onnxtr-0.4.0 → onnxtr-0.5.0}/PKG-INFO +30 -7
- {onnxtr-0.4.0 → onnxtr-0.5.0}/README.md +19 -4
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/builder.py +1 -1
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/classification/models/mobilenet.py +1 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/classification/predictor/base.py +8 -4
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/classification/zoo.py +24 -8
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/predictor/base.py +24 -12
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/predictor/predictor.py +3 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/utils/geometry.py +106 -19
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/utils/vocabs.py +4 -0
- onnxtr-0.5.0/onnxtr/version.py +1 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr.egg-info/PKG-INFO +30 -7
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr.egg-info/requires.txt +12 -2
- {onnxtr-0.4.0 → onnxtr-0.5.0}/pyproject.toml +12 -2
- {onnxtr-0.4.0 → onnxtr-0.5.0}/setup.py +1 -1
- onnxtr-0.4.0/onnxtr/version.py +0 -1
- {onnxtr-0.4.0 → onnxtr-0.5.0}/LICENSE +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/__init__.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/contrib/__init__.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/contrib/artefacts.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/contrib/base.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/file_utils.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/io/__init__.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/io/elements.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/io/html.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/io/image.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/io/pdf.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/io/reader.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/__init__.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/_utils.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/classification/__init__.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/classification/models/__init__.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/classification/predictor/__init__.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/detection/__init__.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/detection/_utils/__init__.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/detection/_utils/base.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/detection/core.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/detection/models/__init__.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/detection/models/differentiable_binarization.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/detection/models/fast.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/detection/models/linknet.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/detection/postprocessor/__init__.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/detection/postprocessor/base.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/detection/predictor/__init__.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/detection/predictor/base.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/detection/zoo.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/engine.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/factory/__init__.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/factory/hub.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/predictor/__init__.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/preprocessor/__init__.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/preprocessor/base.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/recognition/__init__.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/recognition/core.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/recognition/models/__init__.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/recognition/models/crnn.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/recognition/models/master.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/recognition/models/parseq.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/recognition/models/sar.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/recognition/models/vitstr.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/recognition/predictor/__init__.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/recognition/predictor/_utils.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/recognition/predictor/base.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/recognition/utils.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/recognition/zoo.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/zoo.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/py.typed +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/transforms/__init__.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/transforms/base.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/utils/__init__.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/utils/common_types.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/utils/data.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/utils/fonts.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/utils/multithreading.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/utils/reconstitution.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/utils/repr.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/utils/visualization.py +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr.egg-info/SOURCES.txt +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr.egg-info/dependency_links.txt +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr.egg-info/top_level.txt +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr.egg-info/zip-safe +0 -0
- {onnxtr-0.4.0 → onnxtr-0.5.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: onnxtr
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: Onnx Text Recognition (OnnxTR): docTR Onnx-Wrapper for high-performance OCR on documents.
|
|
5
5
|
Author-email: Felix Dittrich <felixdittrich92@gmail.com>
|
|
6
6
|
Maintainer: Felix Dittrich
|
|
@@ -225,9 +225,8 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
225
225
|
Requires-Python: <4,>=3.9.0
|
|
226
226
|
Description-Content-Type: text/markdown
|
|
227
227
|
License-File: LICENSE
|
|
228
|
-
Requires-Dist: numpy<
|
|
228
|
+
Requires-Dist: numpy<3.0.0,>=1.16.0
|
|
229
229
|
Requires-Dist: scipy<2.0.0,>=1.4.0
|
|
230
|
-
Requires-Dist: opencv-python<5.0.0,>=4.5.0
|
|
231
230
|
Requires-Dist: pypdfium2<5.0.0,>=4.11.0
|
|
232
231
|
Requires-Dist: pyclipper<2.0.0,>=1.2.0
|
|
233
232
|
Requires-Dist: shapely<3.0.0,>=1.6.0
|
|
@@ -240,8 +239,16 @@ Requires-Dist: anyascii>=0.3.2
|
|
|
240
239
|
Requires-Dist: tqdm>=4.30.0
|
|
241
240
|
Provides-Extra: cpu
|
|
242
241
|
Requires-Dist: onnxruntime>=1.11.0; extra == "cpu"
|
|
242
|
+
Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "cpu"
|
|
243
243
|
Provides-Extra: gpu
|
|
244
244
|
Requires-Dist: onnxruntime-gpu>=1.11.0; extra == "gpu"
|
|
245
|
+
Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "gpu"
|
|
246
|
+
Provides-Extra: cpu-headless
|
|
247
|
+
Requires-Dist: onnxruntime>=1.11.0; extra == "cpu-headless"
|
|
248
|
+
Requires-Dist: opencv-python-headless<5.0.0,>=4.5.0; extra == "cpu-headless"
|
|
249
|
+
Provides-Extra: gpu-headless
|
|
250
|
+
Requires-Dist: onnxruntime-gpu>=1.11.0; extra == "gpu-headless"
|
|
251
|
+
Requires-Dist: opencv-python-headless<5.0.0,>=4.5.0; extra == "gpu-headless"
|
|
245
252
|
Provides-Extra: html
|
|
246
253
|
Requires-Dist: weasyprint>=55.0; extra == "html"
|
|
247
254
|
Provides-Extra: viz
|
|
@@ -257,6 +264,7 @@ Requires-Dist: mypy>=0.812; extra == "quality"
|
|
|
257
264
|
Requires-Dist: pre-commit>=2.17.0; extra == "quality"
|
|
258
265
|
Provides-Extra: dev
|
|
259
266
|
Requires-Dist: onnxruntime>=1.11.0; extra == "dev"
|
|
267
|
+
Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "dev"
|
|
260
268
|
Requires-Dist: weasyprint>=55.0; extra == "dev"
|
|
261
269
|
Requires-Dist: matplotlib>=3.1.0; extra == "dev"
|
|
262
270
|
Requires-Dist: mplcursors>=0.3; extra == "dev"
|
|
@@ -276,7 +284,7 @@ Requires-Dist: pre-commit>=2.17.0; extra == "dev"
|
|
|
276
284
|
[](https://codecov.io/gh/felixdittrich92/OnnxTR)
|
|
277
285
|
[](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
|
|
278
286
|
[](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
|
|
279
|
-
[](https://pypi.org/project/OnnxTR/)
|
|
280
288
|
|
|
281
289
|
> :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
|
|
282
290
|
|
|
@@ -309,8 +317,10 @@ For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/get
|
|
|
309
317
|
|
|
310
318
|
```shell
|
|
311
319
|
pip install "onnxtr[cpu]"
|
|
320
|
+
pip install "onnxtr[cpu-headless]" # same as cpu but with opencv-headless
|
|
312
321
|
# with gpu support
|
|
313
322
|
pip install "onnxtr[gpu]"
|
|
323
|
+
pip install "onnxtr[gpu-headless]" # same as gpu but with opencv-headless
|
|
314
324
|
# with HTML support
|
|
315
325
|
pip install "onnxtr[html]"
|
|
316
326
|
# with support for visualization
|
|
@@ -356,6 +366,9 @@ model = ocr_predictor(
|
|
|
356
366
|
# Additional parameters - meta information
|
|
357
367
|
detect_orientation=False, # set to `True` if the orientation of the pages should be detected (default: False)
|
|
358
368
|
detect_language=False, # set to `True` if the language of the pages should be detected (default: False)
|
|
369
|
+
# Orientation specific parameters in combination with `assume_straight_pages=False` and/or `straighten_pages=True`
|
|
370
|
+
disable_crop_orientation=False, # set to `True` if the crop orientation classification should be disabled (default: False)
|
|
371
|
+
disable_page_orientation=False, # set to `True` if the general page orientation classification should be disabled (default: False)
|
|
359
372
|
# DocumentBuilder specific parameters
|
|
360
373
|
resolve_lines=True, # whether words should be automatically grouped into lines (default: True)
|
|
361
374
|
resolve_blocks=False, # whether lines should be automatically grouped into blocks (default: False)
|
|
@@ -589,8 +602,8 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_
|
|
|
589
602
|
|Library |FUNSD (199 pages) |CORD (900 pages) |
|
|
590
603
|
|---------------------------------|-------------------------------|-------------------------------|
|
|
591
604
|
|docTR (CPU) - v0.8.1 | ~1.29s / Page | ~0.60s / Page |
|
|
592
|
-
|**OnnxTR (CPU)** - v0.1
|
|
593
|
-
|**OnnxTR (CPU) 8-bit** - v0.1
|
|
605
|
+
|**OnnxTR (CPU)** - v0.4.1 | ~0.57s / Page | **~0.25s / Page** |
|
|
606
|
+
|**OnnxTR (CPU) 8-bit** - v0.4.1 | **~0.38s / Page** | **~0.14s / Page** |
|
|
594
607
|
|EasyOCR (CPU) - v1.7.1 | ~1.96s / Page | ~1.75s / Page |
|
|
595
608
|
|**PyTesseract (CPU)** - v0.3.10 | **~0.50s / Page** | ~0.52s / Page |
|
|
596
609
|
|Surya (line) (CPU) - v0.4.4 | ~48.76s / Page | ~35.49s / Page |
|
|
@@ -602,7 +615,7 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_
|
|
|
602
615
|
|-------------------------------------|-------------------------------|-------------------------------|
|
|
603
616
|
|docTR (GPU) - v0.8.1 | ~0.07s / Page | ~0.05s / Page |
|
|
604
617
|
|**docTR (GPU) float16** - v0.8.1 | **~0.06s / Page** | **~0.03s / Page** |
|
|
605
|
-
|OnnxTR (GPU) - v0.1
|
|
618
|
+
|OnnxTR (GPU) - v0.4.1 | **~0.06s / Page** | ~0.04s / Page |
|
|
606
619
|
|EasyOCR (GPU) - v1.7.1 | ~0.31s / Page | ~0.19s / Page |
|
|
607
620
|
|Surya (GPU) float16 - v0.4.4 | ~3.70s / Page | ~2.81s / Page |
|
|
608
621
|
|**PaddleOCR (GPU) - no cls - v2.7.3**| ~0.08s / Page | **~0.03s / Page** |
|
|
@@ -621,6 +634,16 @@ If you wish to cite please refer to the base project citation, feel free to use
|
|
|
621
634
|
}
|
|
622
635
|
```
|
|
623
636
|
|
|
637
|
+
```bibtex
|
|
638
|
+
@misc{onnxtr2024,
|
|
639
|
+
title={OnnxTR: Optical Character Recognition made seamless & accessible to anyone, powered by Onnx},
|
|
640
|
+
author={Felix Dittrich},
|
|
641
|
+
year={2024},
|
|
642
|
+
publisher = {GitHub},
|
|
643
|
+
howpublished = {\url{https://github.com/felixdittrich92/OnnxTR}}
|
|
644
|
+
}
|
|
645
|
+
```
|
|
646
|
+
|
|
624
647
|
## License
|
|
625
648
|
|
|
626
649
|
Distributed under the Apache 2.0 License. See [`LICENSE`](https://github.com/felixdittrich92/OnnxTR?tab=Apache-2.0-1-ov-file#readme) for more information.
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
[](https://codecov.io/gh/felixdittrich92/OnnxTR)
|
|
8
8
|
[](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
|
|
9
9
|
[](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
|
|
10
|
-
[](https://pypi.org/project/OnnxTR/)
|
|
11
11
|
|
|
12
12
|
> :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
|
|
13
13
|
|
|
@@ -40,8 +40,10 @@ For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/get
|
|
|
40
40
|
|
|
41
41
|
```shell
|
|
42
42
|
pip install "onnxtr[cpu]"
|
|
43
|
+
pip install "onnxtr[cpu-headless]" # same as cpu but with opencv-headless
|
|
43
44
|
# with gpu support
|
|
44
45
|
pip install "onnxtr[gpu]"
|
|
46
|
+
pip install "onnxtr[gpu-headless]" # same as gpu but with opencv-headless
|
|
45
47
|
# with HTML support
|
|
46
48
|
pip install "onnxtr[html]"
|
|
47
49
|
# with support for visualization
|
|
@@ -87,6 +89,9 @@ model = ocr_predictor(
|
|
|
87
89
|
# Additional parameters - meta information
|
|
88
90
|
detect_orientation=False, # set to `True` if the orientation of the pages should be detected (default: False)
|
|
89
91
|
detect_language=False, # set to `True` if the language of the pages should be detected (default: False)
|
|
92
|
+
# Orientation specific parameters in combination with `assume_straight_pages=False` and/or `straighten_pages=True`
|
|
93
|
+
disable_crop_orientation=False, # set to `True` if the crop orientation classification should be disabled (default: False)
|
|
94
|
+
disable_page_orientation=False, # set to `True` if the general page orientation classification should be disabled (default: False)
|
|
90
95
|
# DocumentBuilder specific parameters
|
|
91
96
|
resolve_lines=True, # whether words should be automatically grouped into lines (default: True)
|
|
92
97
|
resolve_blocks=False, # whether lines should be automatically grouped into blocks (default: False)
|
|
@@ -320,8 +325,8 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_
|
|
|
320
325
|
|Library |FUNSD (199 pages) |CORD (900 pages) |
|
|
321
326
|
|---------------------------------|-------------------------------|-------------------------------|
|
|
322
327
|
|docTR (CPU) - v0.8.1 | ~1.29s / Page | ~0.60s / Page |
|
|
323
|
-
|**OnnxTR (CPU)** - v0.1
|
|
324
|
-
|**OnnxTR (CPU) 8-bit** - v0.1
|
|
328
|
+
|**OnnxTR (CPU)** - v0.4.1 | ~0.57s / Page | **~0.25s / Page** |
|
|
329
|
+
|**OnnxTR (CPU) 8-bit** - v0.4.1 | **~0.38s / Page** | **~0.14s / Page** |
|
|
325
330
|
|EasyOCR (CPU) - v1.7.1 | ~1.96s / Page | ~1.75s / Page |
|
|
326
331
|
|**PyTesseract (CPU)** - v0.3.10 | **~0.50s / Page** | ~0.52s / Page |
|
|
327
332
|
|Surya (line) (CPU) - v0.4.4 | ~48.76s / Page | ~35.49s / Page |
|
|
@@ -333,7 +338,7 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_
|
|
|
333
338
|
|-------------------------------------|-------------------------------|-------------------------------|
|
|
334
339
|
|docTR (GPU) - v0.8.1 | ~0.07s / Page | ~0.05s / Page |
|
|
335
340
|
|**docTR (GPU) float16** - v0.8.1 | **~0.06s / Page** | **~0.03s / Page** |
|
|
336
|
-
|OnnxTR (GPU) - v0.1
|
|
341
|
+
|OnnxTR (GPU) - v0.4.1 | **~0.06s / Page** | ~0.04s / Page |
|
|
337
342
|
|EasyOCR (GPU) - v1.7.1 | ~0.31s / Page | ~0.19s / Page |
|
|
338
343
|
|Surya (GPU) float16 - v0.4.4 | ~3.70s / Page | ~2.81s / Page |
|
|
339
344
|
|**PaddleOCR (GPU) - no cls - v2.7.3**| ~0.08s / Page | **~0.03s / Page** |
|
|
@@ -352,6 +357,16 @@ If you wish to cite please refer to the base project citation, feel free to use
|
|
|
352
357
|
}
|
|
353
358
|
```
|
|
354
359
|
|
|
360
|
+
```bibtex
|
|
361
|
+
@misc{onnxtr2024,
|
|
362
|
+
title={OnnxTR: Optical Character Recognition made seamless & accessible to anyone, powered by Onnx},
|
|
363
|
+
author={Felix Dittrich},
|
|
364
|
+
year={2024},
|
|
365
|
+
publisher = {GitHub},
|
|
366
|
+
howpublished = {\url{https://github.com/felixdittrich92/OnnxTR}}
|
|
367
|
+
}
|
|
368
|
+
```
|
|
369
|
+
|
|
355
370
|
## License
|
|
356
371
|
|
|
357
372
|
Distributed under the Apache 2.0 License. See [`LICENSE`](https://github.com/felixdittrich92/OnnxTR?tab=Apache-2.0-1-ov-file#readme) for more information.
|
|
@@ -266,7 +266,7 @@ class DocumentBuilder(NestedObject):
|
|
|
266
266
|
Line([
|
|
267
267
|
Word(
|
|
268
268
|
*word_preds[idx],
|
|
269
|
-
tuple(
|
|
269
|
+
tuple(tuple(pt) for pt in boxes[idx].tolist()), # type: ignore[arg-type]
|
|
270
270
|
float(objectness_scores[idx]),
|
|
271
271
|
crop_orientations[idx],
|
|
272
272
|
)
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
|
-
from typing import Any, List, Union
|
|
6
|
+
from typing import Any, List, Optional, Union
|
|
7
7
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
from scipy.special import softmax
|
|
@@ -29,10 +29,10 @@ class OrientationPredictor(NestedObject):
|
|
|
29
29
|
|
|
30
30
|
def __init__(
|
|
31
31
|
self,
|
|
32
|
-
pre_processor: PreProcessor,
|
|
33
|
-
model: Any,
|
|
32
|
+
pre_processor: Optional[PreProcessor],
|
|
33
|
+
model: Optional[Any],
|
|
34
34
|
) -> None:
|
|
35
|
-
self.pre_processor = pre_processor
|
|
35
|
+
self.pre_processor = pre_processor if isinstance(pre_processor, PreProcessor) else None
|
|
36
36
|
self.model = model
|
|
37
37
|
|
|
38
38
|
def __call__(
|
|
@@ -43,6 +43,10 @@ class OrientationPredictor(NestedObject):
|
|
|
43
43
|
if any(input.ndim != 3 for input in inputs):
|
|
44
44
|
raise ValueError("incorrect input shape: all inputs are expected to be multi-channel 2D images.")
|
|
45
45
|
|
|
46
|
+
if self.model is None or self.pre_processor is None:
|
|
47
|
+
# predictor is disabled
|
|
48
|
+
return [[0] * len(inputs), [0] * len(inputs), [1.0] * len(inputs)]
|
|
49
|
+
|
|
46
50
|
processed_batches = self.pre_processor(inputs)
|
|
47
51
|
predicted_batches = [self.model(batch) for batch in processed_batches]
|
|
48
52
|
|
|
@@ -17,16 +17,30 @@ ORIENTATION_ARCHS: List[str] = ["mobilenet_v3_small_crop_orientation", "mobilene
|
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
def _orientation_predictor(
|
|
20
|
-
arch:
|
|
20
|
+
arch: Any,
|
|
21
|
+
model_type: str,
|
|
22
|
+
load_in_8_bit: bool = False,
|
|
23
|
+
engine_cfg: Optional[EngineConfig] = None,
|
|
24
|
+
disabled: bool = False,
|
|
25
|
+
**kwargs: Any,
|
|
21
26
|
) -> OrientationPredictor:
|
|
22
|
-
if
|
|
23
|
-
|
|
27
|
+
if disabled:
|
|
28
|
+
# Case where the orientation predictor is disabled
|
|
29
|
+
return OrientationPredictor(None, None)
|
|
30
|
+
|
|
31
|
+
if isinstance(arch, str):
|
|
32
|
+
if arch not in ORIENTATION_ARCHS:
|
|
33
|
+
raise ValueError(f"unknown architecture '{arch}'")
|
|
34
|
+
# Load directly classifier from backbone
|
|
35
|
+
_model = classification.__dict__[arch](load_in_8_bit=load_in_8_bit, engine_cfg=engine_cfg)
|
|
36
|
+
else:
|
|
37
|
+
if not isinstance(arch, classification.MobileNetV3):
|
|
38
|
+
raise ValueError(f"unknown architecture: {type(arch)}")
|
|
39
|
+
_model = arch
|
|
24
40
|
|
|
25
|
-
# Load directly classifier from backbone
|
|
26
|
-
_model = classification.__dict__[arch](load_in_8_bit=load_in_8_bit, engine_cfg=engine_cfg)
|
|
27
41
|
kwargs["mean"] = kwargs.get("mean", _model.cfg["mean"])
|
|
28
42
|
kwargs["std"] = kwargs.get("std", _model.cfg["std"])
|
|
29
|
-
kwargs["batch_size"] = kwargs.get("batch_size", 512 if "crop"
|
|
43
|
+
kwargs["batch_size"] = kwargs.get("batch_size", 512 if model_type == "crop" else 2)
|
|
30
44
|
input_shape = _model.cfg["input_shape"][1:]
|
|
31
45
|
predictor = OrientationPredictor(
|
|
32
46
|
PreProcessor(input_shape, preserve_aspect_ratio=True, symmetric_pad=True, **kwargs),
|
|
@@ -60,7 +74,8 @@ def crop_orientation_predictor(
|
|
|
60
74
|
-------
|
|
61
75
|
OrientationPredictor
|
|
62
76
|
"""
|
|
63
|
-
|
|
77
|
+
model_type = "crop"
|
|
78
|
+
return _orientation_predictor(arch, model_type, load_in_8_bit, engine_cfg, **kwargs)
|
|
64
79
|
|
|
65
80
|
|
|
66
81
|
def page_orientation_predictor(
|
|
@@ -88,4 +103,5 @@ def page_orientation_predictor(
|
|
|
88
103
|
-------
|
|
89
104
|
OrientationPredictor
|
|
90
105
|
"""
|
|
91
|
-
|
|
106
|
+
model_type = "page"
|
|
107
|
+
return _orientation_predictor(arch, model_type, load_in_8_bit, engine_cfg, **kwargs)
|
|
@@ -9,7 +9,7 @@ import numpy as np
|
|
|
9
9
|
|
|
10
10
|
from onnxtr.models.builder import DocumentBuilder
|
|
11
11
|
from onnxtr.models.engine import EngineConfig
|
|
12
|
-
from onnxtr.utils.geometry import extract_crops, extract_rcrops, rotate_image
|
|
12
|
+
from onnxtr.utils.geometry import extract_crops, extract_rcrops, remove_image_padding, rotate_image
|
|
13
13
|
|
|
14
14
|
from .._utils import estimate_orientation, rectify_crops, rectify_loc_preds
|
|
15
15
|
from ..classification import crop_orientation_predictor, page_orientation_predictor
|
|
@@ -55,13 +55,19 @@ class _OCRPredictor:
|
|
|
55
55
|
) -> None:
|
|
56
56
|
self.assume_straight_pages = assume_straight_pages
|
|
57
57
|
self.straighten_pages = straighten_pages
|
|
58
|
+
self._page_orientation_disabled = kwargs.pop("disable_page_orientation", False)
|
|
59
|
+
self._crop_orientation_disabled = kwargs.pop("disable_crop_orientation", False)
|
|
58
60
|
self.crop_orientation_predictor = (
|
|
59
61
|
None
|
|
60
62
|
if assume_straight_pages
|
|
61
|
-
else crop_orientation_predictor(
|
|
63
|
+
else crop_orientation_predictor(
|
|
64
|
+
load_in_8_bit=load_in_8_bit, engine_cfg=clf_engine_cfg, disabled=self._crop_orientation_disabled
|
|
65
|
+
)
|
|
62
66
|
)
|
|
63
67
|
self.page_orientation_predictor = (
|
|
64
|
-
page_orientation_predictor(
|
|
68
|
+
page_orientation_predictor(
|
|
69
|
+
load_in_8_bit=load_in_8_bit, engine_cfg=clf_engine_cfg, disabled=self._crop_orientation_disabled
|
|
70
|
+
)
|
|
65
71
|
if detect_orientation or straighten_pages or not assume_straight_pages
|
|
66
72
|
else None
|
|
67
73
|
)
|
|
@@ -112,8 +118,8 @@ class _OCRPredictor:
|
|
|
112
118
|
]
|
|
113
119
|
)
|
|
114
120
|
return [
|
|
115
|
-
#
|
|
116
|
-
rotate_image(page, angle, expand=page.shape[
|
|
121
|
+
# expand if height and width are not equal, afterwards remove padding
|
|
122
|
+
remove_image_padding(rotate_image(page, angle, expand=page.shape[0] != page.shape[1]))
|
|
117
123
|
for page, angle in zip(pages, origin_pages_orientations)
|
|
118
124
|
]
|
|
119
125
|
|
|
@@ -123,13 +129,18 @@ class _OCRPredictor:
|
|
|
123
129
|
loc_preds: List[np.ndarray],
|
|
124
130
|
channels_last: bool,
|
|
125
131
|
assume_straight_pages: bool = False,
|
|
132
|
+
assume_horizontal: bool = False,
|
|
126
133
|
) -> List[List[np.ndarray]]:
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
134
|
+
if assume_straight_pages:
|
|
135
|
+
crops = [
|
|
136
|
+
extract_crops(page, _boxes[:, :4], channels_last=channels_last)
|
|
137
|
+
for page, _boxes in zip(pages, loc_preds)
|
|
138
|
+
]
|
|
139
|
+
else:
|
|
140
|
+
crops = [
|
|
141
|
+
extract_rcrops(page, _boxes[:, :4], channels_last=channels_last, assume_horizontal=assume_horizontal)
|
|
142
|
+
for page, _boxes in zip(pages, loc_preds)
|
|
143
|
+
]
|
|
133
144
|
return crops
|
|
134
145
|
|
|
135
146
|
@staticmethod
|
|
@@ -138,8 +149,9 @@ class _OCRPredictor:
|
|
|
138
149
|
loc_preds: List[np.ndarray],
|
|
139
150
|
channels_last: bool,
|
|
140
151
|
assume_straight_pages: bool = False,
|
|
152
|
+
assume_horizontal: bool = False,
|
|
141
153
|
) -> Tuple[List[List[np.ndarray]], List[np.ndarray]]:
|
|
142
|
-
crops = _OCRPredictor._generate_crops(pages, loc_preds, channels_last, assume_straight_pages)
|
|
154
|
+
crops = _OCRPredictor._generate_crops(pages, loc_preds, channels_last, assume_straight_pages, assume_horizontal)
|
|
143
155
|
|
|
144
156
|
# Avoid sending zero-sized crops
|
|
145
157
|
is_kept = [[all(s > 0 for s in crop.shape) for crop in page_crops] for page_crops in crops]
|
|
@@ -100,6 +100,8 @@ class OCRPredictor(NestedObject, _OCRPredictor):
|
|
|
100
100
|
origin_pages_orientations = None
|
|
101
101
|
if self.straighten_pages:
|
|
102
102
|
pages = self._straighten_pages(pages, seg_maps, general_pages_orientations, origin_pages_orientations)
|
|
103
|
+
# update page shapes after straightening
|
|
104
|
+
origin_page_shapes = [page.shape[:2] for page in pages]
|
|
103
105
|
|
|
104
106
|
# forward again to get predictions on straight pages
|
|
105
107
|
loc_preds = self.det_predictor(pages, **kwargs) # type: ignore[assignment]
|
|
@@ -117,6 +119,7 @@ class OCRPredictor(NestedObject, _OCRPredictor):
|
|
|
117
119
|
loc_preds, # type: ignore[arg-type]
|
|
118
120
|
channels_last=True,
|
|
119
121
|
assume_straight_pages=self.assume_straight_pages,
|
|
122
|
+
assume_horizontal=self._page_orientation_disabled,
|
|
120
123
|
)
|
|
121
124
|
# Rectify crop orientation and get crop orientation predictions
|
|
122
125
|
crop_orientations: Any = []
|
|
@@ -391,6 +391,26 @@ def rotate_image(
|
|
|
391
391
|
return rot_img
|
|
392
392
|
|
|
393
393
|
|
|
394
|
+
def remove_image_padding(image: np.ndarray) -> np.ndarray:
|
|
395
|
+
"""Remove black border padding from an image
|
|
396
|
+
|
|
397
|
+
Args:
|
|
398
|
+
----
|
|
399
|
+
image: numpy tensor to remove padding from
|
|
400
|
+
|
|
401
|
+
Returns:
|
|
402
|
+
-------
|
|
403
|
+
Image with padding removed
|
|
404
|
+
"""
|
|
405
|
+
# Find the bounding box of the non-black region
|
|
406
|
+
rows = np.any(image, axis=1)
|
|
407
|
+
cols = np.any(image, axis=0)
|
|
408
|
+
rmin, rmax = np.where(rows)[0][[0, -1]]
|
|
409
|
+
cmin, cmax = np.where(cols)[0][[0, -1]]
|
|
410
|
+
|
|
411
|
+
return image[rmin : rmax + 1, cmin : cmax + 1]
|
|
412
|
+
|
|
413
|
+
|
|
394
414
|
def estimate_page_angle(polys: np.ndarray) -> float:
|
|
395
415
|
"""Takes a batch of rotated previously ORIENTED polys (N, 4, 2) (rectified by the classifier) and return the
|
|
396
416
|
estimated angle ccw in degrees
|
|
@@ -471,7 +491,7 @@ def extract_crops(img: np.ndarray, boxes: np.ndarray, channels_last: bool = True
|
|
|
471
491
|
|
|
472
492
|
|
|
473
493
|
def extract_rcrops(
|
|
474
|
-
img: np.ndarray, polys: np.ndarray, dtype=np.float32, channels_last: bool = True
|
|
494
|
+
img: np.ndarray, polys: np.ndarray, dtype=np.float32, channels_last: bool = True, assume_horizontal: bool = False
|
|
475
495
|
) -> List[np.ndarray]:
|
|
476
496
|
"""Created cropped images from list of rotated bounding boxes
|
|
477
497
|
|
|
@@ -481,6 +501,7 @@ def extract_rcrops(
|
|
|
481
501
|
polys: bounding boxes of shape (N, 4, 2)
|
|
482
502
|
dtype: target data type of bounding boxes
|
|
483
503
|
channels_last: whether the channel dimensions is the last one instead of the last one
|
|
504
|
+
assume_horizontal: whether the boxes are assumed to be only horizontally oriented
|
|
484
505
|
|
|
485
506
|
Returns:
|
|
486
507
|
-------
|
|
@@ -498,22 +519,88 @@ def extract_rcrops(
|
|
|
498
519
|
_boxes[:, :, 0] *= width
|
|
499
520
|
_boxes[:, :, 1] *= height
|
|
500
521
|
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
522
|
+
src_img = img if channels_last else img.transpose(1, 2, 0)
|
|
523
|
+
|
|
524
|
+
# Handle only horizontal oriented boxes
|
|
525
|
+
if assume_horizontal:
|
|
526
|
+
crops = []
|
|
527
|
+
|
|
528
|
+
for box in _boxes:
|
|
529
|
+
# Calculate the centroid of the quadrilateral
|
|
530
|
+
centroid = np.mean(box, axis=0)
|
|
531
|
+
|
|
532
|
+
# Divide the points into left and right
|
|
533
|
+
left_points = box[box[:, 0] < centroid[0]]
|
|
534
|
+
right_points = box[box[:, 0] >= centroid[0]]
|
|
535
|
+
|
|
536
|
+
# Sort the left points according to the y-axis
|
|
537
|
+
left_points = left_points[np.argsort(left_points[:, 1])]
|
|
538
|
+
top_left_pt = left_points[0]
|
|
539
|
+
bottom_left_pt = left_points[-1]
|
|
540
|
+
# Sort the right points according to the y-axis
|
|
541
|
+
right_points = right_points[np.argsort(right_points[:, 1])]
|
|
542
|
+
top_right_pt = right_points[0]
|
|
543
|
+
bottom_right_pt = right_points[-1]
|
|
544
|
+
box_points = np.array(
|
|
545
|
+
[top_left_pt, bottom_left_pt, top_right_pt, bottom_right_pt],
|
|
546
|
+
dtype=dtype,
|
|
547
|
+
)
|
|
548
|
+
|
|
549
|
+
# Get the width and height of the rectangle that will contain the warped quadrilateral
|
|
550
|
+
width_upper = np.linalg.norm(top_right_pt - top_left_pt)
|
|
551
|
+
width_lower = np.linalg.norm(bottom_right_pt - bottom_left_pt)
|
|
552
|
+
height_left = np.linalg.norm(bottom_left_pt - top_left_pt)
|
|
553
|
+
height_right = np.linalg.norm(bottom_right_pt - top_right_pt)
|
|
554
|
+
|
|
555
|
+
# Get the maximum width and height
|
|
556
|
+
rect_width = max(int(width_upper), int(width_lower))
|
|
557
|
+
rect_height = max(int(height_left), int(height_right))
|
|
558
|
+
|
|
559
|
+
dst_pts = np.array(
|
|
560
|
+
[
|
|
561
|
+
[0, 0], # top-left
|
|
562
|
+
# bottom-left
|
|
563
|
+
[0, rect_height - 1],
|
|
564
|
+
# top-right
|
|
565
|
+
[rect_width - 1, 0],
|
|
566
|
+
# bottom-right
|
|
567
|
+
[rect_width - 1, rect_height - 1],
|
|
568
|
+
],
|
|
569
|
+
dtype=dtype,
|
|
570
|
+
)
|
|
571
|
+
|
|
572
|
+
# Get the perspective transform matrix using the box points
|
|
573
|
+
affine_mat = cv2.getPerspectiveTransform(box_points, dst_pts)
|
|
574
|
+
|
|
575
|
+
# Perform the perspective warp to get the rectified crop
|
|
576
|
+
crop = cv2.warpPerspective(
|
|
577
|
+
src_img,
|
|
578
|
+
affine_mat,
|
|
579
|
+
(rect_width, rect_height),
|
|
580
|
+
)
|
|
581
|
+
|
|
582
|
+
# Add the crop to the list of crops
|
|
583
|
+
crops.append(crop)
|
|
584
|
+
|
|
585
|
+
# Handle any oriented boxes
|
|
586
|
+
else:
|
|
587
|
+
src_pts = _boxes[:, :3].astype(np.float32)
|
|
588
|
+
# Preserve size
|
|
589
|
+
d1 = np.linalg.norm(src_pts[:, 0] - src_pts[:, 1], axis=-1)
|
|
590
|
+
d2 = np.linalg.norm(src_pts[:, 1] - src_pts[:, 2], axis=-1)
|
|
591
|
+
# (N, 3, 2)
|
|
592
|
+
dst_pts = np.zeros((_boxes.shape[0], 3, 2), dtype=dtype)
|
|
593
|
+
dst_pts[:, 1, 0] = dst_pts[:, 2, 0] = d1 - 1
|
|
594
|
+
dst_pts[:, 2, 1] = d2 - 1
|
|
595
|
+
# Use a warp transformation to extract the crop
|
|
596
|
+
crops = [
|
|
597
|
+
cv2.warpAffine(
|
|
598
|
+
src_img,
|
|
599
|
+
# Transformation matrix
|
|
600
|
+
cv2.getAffineTransform(src_pts[idx], dst_pts[idx]),
|
|
601
|
+
(int(d1[idx]), int(d2[idx])),
|
|
602
|
+
)
|
|
603
|
+
for idx in range(_boxes.shape[0])
|
|
604
|
+
]
|
|
605
|
+
|
|
519
606
|
return crops # type: ignore[return-value]
|
|
@@ -25,6 +25,7 @@ VOCABS: Dict[str, str] = {
|
|
|
25
25
|
"hindi_punctuation": "।,?!:्ॐ॰॥॰",
|
|
26
26
|
"bangla_letters": "অআইঈউঊঋএঐওঔকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহ়ঽািীুূৃেৈোৌ্ৎংঃঁ",
|
|
27
27
|
"bangla_digits": "০১২৩৪৫৬৭৮৯",
|
|
28
|
+
"generic_cyrillic_letters": "абвгдежзийклмнопрстуфхцчшщьюяАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЬЮЯ",
|
|
28
29
|
}
|
|
29
30
|
|
|
30
31
|
VOCABS["latin"] = VOCABS["digits"] + VOCABS["ascii_letters"] + VOCABS["punctuation"]
|
|
@@ -59,6 +60,9 @@ VOCABS["vietnamese"] = (
|
|
|
59
60
|
VOCABS["hebrew"] = VOCABS["english"] + "אבגדהוזחטיכלמנסעפצקרשת" + "₪"
|
|
60
61
|
VOCABS["hindi"] = VOCABS["hindi_letters"] + VOCABS["hindi_digits"] + VOCABS["hindi_punctuation"]
|
|
61
62
|
VOCABS["bangla"] = VOCABS["bangla_letters"] + VOCABS["bangla_digits"]
|
|
63
|
+
VOCABS["ukrainian"] = (
|
|
64
|
+
VOCABS["generic_cyrillic_letters"] + VOCABS["digits"] + VOCABS["punctuation"] + VOCABS["currency"] + "ґіїєҐІЇЄ₴"
|
|
65
|
+
)
|
|
62
66
|
VOCABS["multilingual"] = "".join(
|
|
63
67
|
dict.fromkeys(
|
|
64
68
|
VOCABS["french"]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = 'v0.5.0'
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: onnxtr
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: Onnx Text Recognition (OnnxTR): docTR Onnx-Wrapper for high-performance OCR on documents.
|
|
5
5
|
Author-email: Felix Dittrich <felixdittrich92@gmail.com>
|
|
6
6
|
Maintainer: Felix Dittrich
|
|
@@ -225,9 +225,8 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
225
225
|
Requires-Python: <4,>=3.9.0
|
|
226
226
|
Description-Content-Type: text/markdown
|
|
227
227
|
License-File: LICENSE
|
|
228
|
-
Requires-Dist: numpy<
|
|
228
|
+
Requires-Dist: numpy<3.0.0,>=1.16.0
|
|
229
229
|
Requires-Dist: scipy<2.0.0,>=1.4.0
|
|
230
|
-
Requires-Dist: opencv-python<5.0.0,>=4.5.0
|
|
231
230
|
Requires-Dist: pypdfium2<5.0.0,>=4.11.0
|
|
232
231
|
Requires-Dist: pyclipper<2.0.0,>=1.2.0
|
|
233
232
|
Requires-Dist: shapely<3.0.0,>=1.6.0
|
|
@@ -240,8 +239,16 @@ Requires-Dist: anyascii>=0.3.2
|
|
|
240
239
|
Requires-Dist: tqdm>=4.30.0
|
|
241
240
|
Provides-Extra: cpu
|
|
242
241
|
Requires-Dist: onnxruntime>=1.11.0; extra == "cpu"
|
|
242
|
+
Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "cpu"
|
|
243
243
|
Provides-Extra: gpu
|
|
244
244
|
Requires-Dist: onnxruntime-gpu>=1.11.0; extra == "gpu"
|
|
245
|
+
Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "gpu"
|
|
246
|
+
Provides-Extra: cpu-headless
|
|
247
|
+
Requires-Dist: onnxruntime>=1.11.0; extra == "cpu-headless"
|
|
248
|
+
Requires-Dist: opencv-python-headless<5.0.0,>=4.5.0; extra == "cpu-headless"
|
|
249
|
+
Provides-Extra: gpu-headless
|
|
250
|
+
Requires-Dist: onnxruntime-gpu>=1.11.0; extra == "gpu-headless"
|
|
251
|
+
Requires-Dist: opencv-python-headless<5.0.0,>=4.5.0; extra == "gpu-headless"
|
|
245
252
|
Provides-Extra: html
|
|
246
253
|
Requires-Dist: weasyprint>=55.0; extra == "html"
|
|
247
254
|
Provides-Extra: viz
|
|
@@ -257,6 +264,7 @@ Requires-Dist: mypy>=0.812; extra == "quality"
|
|
|
257
264
|
Requires-Dist: pre-commit>=2.17.0; extra == "quality"
|
|
258
265
|
Provides-Extra: dev
|
|
259
266
|
Requires-Dist: onnxruntime>=1.11.0; extra == "dev"
|
|
267
|
+
Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "dev"
|
|
260
268
|
Requires-Dist: weasyprint>=55.0; extra == "dev"
|
|
261
269
|
Requires-Dist: matplotlib>=3.1.0; extra == "dev"
|
|
262
270
|
Requires-Dist: mplcursors>=0.3; extra == "dev"
|
|
@@ -276,7 +284,7 @@ Requires-Dist: pre-commit>=2.17.0; extra == "dev"
|
|
|
276
284
|
[](https://codecov.io/gh/felixdittrich92/OnnxTR)
|
|
277
285
|
[](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
|
|
278
286
|
[](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
|
|
279
|
-
[](https://pypi.org/project/OnnxTR/)
|
|
280
288
|
|
|
281
289
|
> :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
|
|
282
290
|
|
|
@@ -309,8 +317,10 @@ For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/get
|
|
|
309
317
|
|
|
310
318
|
```shell
|
|
311
319
|
pip install "onnxtr[cpu]"
|
|
320
|
+
pip install "onnxtr[cpu-headless]" # same as cpu but with opencv-headless
|
|
312
321
|
# with gpu support
|
|
313
322
|
pip install "onnxtr[gpu]"
|
|
323
|
+
pip install "onnxtr[gpu-headless]" # same as gpu but with opencv-headless
|
|
314
324
|
# with HTML support
|
|
315
325
|
pip install "onnxtr[html]"
|
|
316
326
|
# with support for visualization
|
|
@@ -356,6 +366,9 @@ model = ocr_predictor(
|
|
|
356
366
|
# Additional parameters - meta information
|
|
357
367
|
detect_orientation=False, # set to `True` if the orientation of the pages should be detected (default: False)
|
|
358
368
|
detect_language=False, # set to `True` if the language of the pages should be detected (default: False)
|
|
369
|
+
# Orientation specific parameters in combination with `assume_straight_pages=False` and/or `straighten_pages=True`
|
|
370
|
+
disable_crop_orientation=False, # set to `True` if the crop orientation classification should be disabled (default: False)
|
|
371
|
+
disable_page_orientation=False, # set to `True` if the general page orientation classification should be disabled (default: False)
|
|
359
372
|
# DocumentBuilder specific parameters
|
|
360
373
|
resolve_lines=True, # whether words should be automatically grouped into lines (default: True)
|
|
361
374
|
resolve_blocks=False, # whether lines should be automatically grouped into blocks (default: False)
|
|
@@ -589,8 +602,8 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_
|
|
|
589
602
|
|Library |FUNSD (199 pages) |CORD (900 pages) |
|
|
590
603
|
|---------------------------------|-------------------------------|-------------------------------|
|
|
591
604
|
|docTR (CPU) - v0.8.1 | ~1.29s / Page | ~0.60s / Page |
|
|
592
|
-
|**OnnxTR (CPU)** - v0.1
|
|
593
|
-
|**OnnxTR (CPU) 8-bit** - v0.1
|
|
605
|
+
|**OnnxTR (CPU)** - v0.4.1 | ~0.57s / Page | **~0.25s / Page** |
|
|
606
|
+
|**OnnxTR (CPU) 8-bit** - v0.4.1 | **~0.38s / Page** | **~0.14s / Page** |
|
|
594
607
|
|EasyOCR (CPU) - v1.7.1 | ~1.96s / Page | ~1.75s / Page |
|
|
595
608
|
|**PyTesseract (CPU)** - v0.3.10 | **~0.50s / Page** | ~0.52s / Page |
|
|
596
609
|
|Surya (line) (CPU) - v0.4.4 | ~48.76s / Page | ~35.49s / Page |
|
|
@@ -602,7 +615,7 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_
|
|
|
602
615
|
|-------------------------------------|-------------------------------|-------------------------------|
|
|
603
616
|
|docTR (GPU) - v0.8.1 | ~0.07s / Page | ~0.05s / Page |
|
|
604
617
|
|**docTR (GPU) float16** - v0.8.1 | **~0.06s / Page** | **~0.03s / Page** |
|
|
605
|
-
|OnnxTR (GPU) - v0.1
|
|
618
|
+
|OnnxTR (GPU) - v0.4.1 | **~0.06s / Page** | ~0.04s / Page |
|
|
606
619
|
|EasyOCR (GPU) - v1.7.1 | ~0.31s / Page | ~0.19s / Page |
|
|
607
620
|
|Surya (GPU) float16 - v0.4.4 | ~3.70s / Page | ~2.81s / Page |
|
|
608
621
|
|**PaddleOCR (GPU) - no cls - v2.7.3**| ~0.08s / Page | **~0.03s / Page** |
|
|
@@ -621,6 +634,16 @@ If you wish to cite please refer to the base project citation, feel free to use
|
|
|
621
634
|
}
|
|
622
635
|
```
|
|
623
636
|
|
|
637
|
+
```bibtex
|
|
638
|
+
@misc{onnxtr2024,
|
|
639
|
+
title={OnnxTR: Optical Character Recognition made seamless & accessible to anyone, powered by Onnx},
|
|
640
|
+
author={Felix Dittrich},
|
|
641
|
+
year={2024},
|
|
642
|
+
publisher = {GitHub},
|
|
643
|
+
howpublished = {\url{https://github.com/felixdittrich92/OnnxTR}}
|
|
644
|
+
}
|
|
645
|
+
```
|
|
646
|
+
|
|
624
647
|
## License
|
|
625
648
|
|
|
626
649
|
Distributed under the Apache 2.0 License. See [`LICENSE`](https://github.com/felixdittrich92/OnnxTR?tab=Apache-2.0-1-ov-file#readme) for more information.
|
|
@@ -1,6 +1,5 @@
|
|
|
1
|
-
numpy<
|
|
1
|
+
numpy<3.0.0,>=1.16.0
|
|
2
2
|
scipy<2.0.0,>=1.4.0
|
|
3
|
-
opencv-python<5.0.0,>=4.5.0
|
|
4
3
|
pypdfium2<5.0.0,>=4.11.0
|
|
5
4
|
pyclipper<2.0.0,>=1.2.0
|
|
6
5
|
shapely<3.0.0,>=1.6.0
|
|
@@ -14,9 +13,15 @@ tqdm>=4.30.0
|
|
|
14
13
|
|
|
15
14
|
[cpu]
|
|
16
15
|
onnxruntime>=1.11.0
|
|
16
|
+
opencv-python<5.0.0,>=4.5.0
|
|
17
|
+
|
|
18
|
+
[cpu-headless]
|
|
19
|
+
onnxruntime>=1.11.0
|
|
20
|
+
opencv-python-headless<5.0.0,>=4.5.0
|
|
17
21
|
|
|
18
22
|
[dev]
|
|
19
23
|
onnxruntime>=1.11.0
|
|
24
|
+
opencv-python<5.0.0,>=4.5.0
|
|
20
25
|
weasyprint>=55.0
|
|
21
26
|
matplotlib>=3.1.0
|
|
22
27
|
mplcursors>=0.3
|
|
@@ -29,6 +34,11 @@ pre-commit>=2.17.0
|
|
|
29
34
|
|
|
30
35
|
[gpu]
|
|
31
36
|
onnxruntime-gpu>=1.11.0
|
|
37
|
+
opencv-python<5.0.0,>=4.5.0
|
|
38
|
+
|
|
39
|
+
[gpu-headless]
|
|
40
|
+
onnxruntime-gpu>=1.11.0
|
|
41
|
+
opencv-python-headless<5.0.0,>=4.5.0
|
|
32
42
|
|
|
33
43
|
[html]
|
|
34
44
|
weasyprint>=55.0
|
|
@@ -31,9 +31,8 @@ dynamic = ["version"]
|
|
|
31
31
|
dependencies = [
|
|
32
32
|
# For proper typing, mypy needs numpy>=1.20.0 (cf. https://github.com/numpy/numpy/pull/16515)
|
|
33
33
|
# Additional typing support is brought by numpy>=1.22.4, but core build sticks to >=1.16.0
|
|
34
|
-
"numpy>=1.16.0,<
|
|
34
|
+
"numpy>=1.16.0,<3.0.0",
|
|
35
35
|
"scipy>=1.4.0,<2.0.0",
|
|
36
|
-
"opencv-python>=4.5.0,<5.0.0",
|
|
37
36
|
"pypdfium2>=4.11.0,<5.0.0",
|
|
38
37
|
"pyclipper>=1.2.0,<2.0.0",
|
|
39
38
|
"shapely>=1.6.0,<3.0.0",
|
|
@@ -49,9 +48,19 @@ dependencies = [
|
|
|
49
48
|
[project.optional-dependencies]
|
|
50
49
|
cpu = [
|
|
51
50
|
"onnxruntime>=1.11.0",
|
|
51
|
+
"opencv-python>=4.5.0,<5.0.0",
|
|
52
52
|
]
|
|
53
53
|
gpu = [
|
|
54
54
|
"onnxruntime-gpu>=1.11.0",
|
|
55
|
+
"opencv-python>=4.5.0,<5.0.0",
|
|
56
|
+
]
|
|
57
|
+
cpu-headless = [
|
|
58
|
+
"onnxruntime>=1.11.0",
|
|
59
|
+
"opencv-python-headless>=4.5.0,<5.0.0",
|
|
60
|
+
]
|
|
61
|
+
gpu-headless = [
|
|
62
|
+
"onnxruntime-gpu>=1.11.0",
|
|
63
|
+
"opencv-python-headless>=4.5.0,<5.0.0",
|
|
55
64
|
]
|
|
56
65
|
html = [
|
|
57
66
|
"weasyprint>=55.0",
|
|
@@ -73,6 +82,7 @@ quality = [
|
|
|
73
82
|
dev = [
|
|
74
83
|
# Runtime
|
|
75
84
|
"onnxruntime>=1.11.0",
|
|
85
|
+
"opencv-python>=4.5.0,<5.0.0",
|
|
76
86
|
# HTML
|
|
77
87
|
"weasyprint>=55.0",
|
|
78
88
|
# Visualization
|
onnxtr-0.4.0/onnxtr/version.py
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = 'v0.4.0'
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|