onnxtr 0.5.1__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {onnxtr-0.5.1 → onnxtr-0.6.0}/PKG-INFO +52 -23
- {onnxtr-0.5.1 → onnxtr-0.6.0}/README.md +37 -14
- onnxtr-0.6.0/onnxtr/contrib/__init__.py +1 -0
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/contrib/artefacts.py +6 -8
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/contrib/base.py +7 -16
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/file_utils.py +1 -3
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/io/elements.py +45 -59
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/io/html.py +0 -2
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/io/image.py +1 -4
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/io/pdf.py +3 -5
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/io/reader.py +4 -10
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/_utils.py +10 -17
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/builder.py +17 -30
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/classification/models/mobilenet.py +7 -12
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/classification/predictor/base.py +6 -7
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/classification/zoo.py +25 -11
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/detection/_utils/base.py +3 -7
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/detection/core.py +2 -8
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/detection/models/differentiable_binarization.py +10 -17
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/detection/models/fast.py +10 -17
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/detection/models/linknet.py +10 -17
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/detection/postprocessor/base.py +3 -9
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/detection/predictor/base.py +4 -5
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/detection/zoo.py +20 -6
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/engine.py +9 -9
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/factory/hub.py +3 -7
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/predictor/base.py +29 -30
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/predictor/predictor.py +4 -5
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/preprocessor/base.py +8 -12
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/recognition/core.py +0 -1
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/recognition/models/crnn.py +11 -23
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/recognition/models/master.py +9 -15
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/recognition/models/parseq.py +8 -12
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/recognition/models/sar.py +8 -12
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/recognition/models/vitstr.py +9 -15
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/recognition/predictor/_utils.py +6 -9
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/recognition/predictor/base.py +3 -3
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/recognition/utils.py +2 -7
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/recognition/zoo.py +19 -7
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/zoo.py +7 -9
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/transforms/base.py +17 -6
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/utils/common_types.py +7 -8
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/utils/data.py +7 -11
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/utils/fonts.py +1 -6
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/utils/geometry.py +18 -49
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/utils/multithreading.py +3 -5
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/utils/reconstitution.py +6 -8
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/utils/repr.py +1 -2
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/utils/visualization.py +12 -21
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/utils/vocabs.py +1 -2
- onnxtr-0.6.0/onnxtr/version.py +1 -0
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr.egg-info/PKG-INFO +52 -23
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr.egg-info/requires.txt +13 -5
- {onnxtr-0.5.1 → onnxtr-0.6.0}/pyproject.toml +18 -10
- {onnxtr-0.5.1 → onnxtr-0.6.0}/setup.py +1 -1
- onnxtr-0.5.1/onnxtr/models/detection/postprocessor/__init__.py +0 -0
- onnxtr-0.5.1/onnxtr/version.py +0 -1
- {onnxtr-0.5.1 → onnxtr-0.6.0}/LICENSE +0 -0
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/__init__.py +0 -0
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/io/__init__.py +0 -0
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/__init__.py +0 -0
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/classification/__init__.py +0 -0
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/classification/models/__init__.py +0 -0
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/classification/predictor/__init__.py +0 -0
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/detection/__init__.py +0 -0
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/detection/_utils/__init__.py +0 -0
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/detection/models/__init__.py +0 -0
- {onnxtr-0.5.1/onnxtr/contrib → onnxtr-0.6.0/onnxtr/models/detection/postprocessor}/__init__.py +0 -0
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/detection/predictor/__init__.py +0 -0
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/factory/__init__.py +0 -0
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/predictor/__init__.py +0 -0
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/preprocessor/__init__.py +0 -0
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/recognition/__init__.py +0 -0
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/recognition/models/__init__.py +0 -0
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/recognition/predictor/__init__.py +0 -0
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/py.typed +0 -0
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/transforms/__init__.py +0 -0
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/utils/__init__.py +0 -0
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr.egg-info/SOURCES.txt +0 -0
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr.egg-info/dependency_links.txt +0 -0
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr.egg-info/top_level.txt +0 -0
- {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr.egg-info/zip-safe +0 -0
- {onnxtr-0.5.1 → onnxtr-0.6.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: onnxtr
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: Onnx Text Recognition (OnnxTR): docTR Onnx-Wrapper for high-performance OCR on documents.
|
|
5
5
|
Author-email: Felix Dittrich <felixdittrich92@gmail.com>
|
|
6
6
|
Maintainer: Felix Dittrich
|
|
@@ -209,7 +209,7 @@ License: Apache License
|
|
|
209
209
|
Project-URL: repository, https://github.com/felixdittrich92/OnnxTR
|
|
210
210
|
Project-URL: tracker, https://github.com/felixdittrich92/OnnxTR/issues
|
|
211
211
|
Project-URL: changelog, https://github.com/felixdittrich92/OnnxTR/releases
|
|
212
|
-
Keywords: OCR,deep learning,computer vision,onnx,text detection,text recognition,docTR,document analysis,document processing
|
|
212
|
+
Keywords: OCR,deep learning,computer vision,onnx,text detection,text recognition,docTR,document analysis,document processing,document AI
|
|
213
213
|
Classifier: Development Status :: 4 - Beta
|
|
214
214
|
Classifier: Intended Audience :: Developers
|
|
215
215
|
Classifier: Intended Audience :: Education
|
|
@@ -218,11 +218,11 @@ Classifier: License :: OSI Approved :: Apache Software License
|
|
|
218
218
|
Classifier: Natural Language :: English
|
|
219
219
|
Classifier: Operating System :: OS Independent
|
|
220
220
|
Classifier: Programming Language :: Python :: 3
|
|
221
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
222
221
|
Classifier: Programming Language :: Python :: 3.10
|
|
223
222
|
Classifier: Programming Language :: Python :: 3.11
|
|
223
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
224
224
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
225
|
-
Requires-Python: <4,>=3.
|
|
225
|
+
Requires-Python: <4,>=3.10.0
|
|
226
226
|
Description-Content-Type: text/markdown
|
|
227
227
|
License-File: LICENSE
|
|
228
228
|
Requires-Dist: numpy<3.0.0,>=1.16.0
|
|
@@ -238,17 +238,23 @@ Requires-Dist: defusedxml>=0.7.0
|
|
|
238
238
|
Requires-Dist: anyascii>=0.3.2
|
|
239
239
|
Requires-Dist: tqdm>=4.30.0
|
|
240
240
|
Provides-Extra: cpu
|
|
241
|
-
Requires-Dist: onnxruntime>=1.
|
|
241
|
+
Requires-Dist: onnxruntime>=1.18.0; extra == "cpu"
|
|
242
242
|
Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "cpu"
|
|
243
243
|
Provides-Extra: gpu
|
|
244
|
-
Requires-Dist: onnxruntime-gpu>=1.
|
|
244
|
+
Requires-Dist: onnxruntime-gpu>=1.18.0; extra == "gpu"
|
|
245
245
|
Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "gpu"
|
|
246
|
+
Provides-Extra: openvino
|
|
247
|
+
Requires-Dist: onnxruntime-openvino>=1.18.0; extra == "openvino"
|
|
248
|
+
Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "openvino"
|
|
246
249
|
Provides-Extra: cpu-headless
|
|
247
|
-
Requires-Dist: onnxruntime>=1.
|
|
250
|
+
Requires-Dist: onnxruntime>=1.18.0; extra == "cpu-headless"
|
|
248
251
|
Requires-Dist: opencv-python-headless<5.0.0,>=4.5.0; extra == "cpu-headless"
|
|
249
252
|
Provides-Extra: gpu-headless
|
|
250
|
-
Requires-Dist: onnxruntime-gpu>=1.
|
|
253
|
+
Requires-Dist: onnxruntime-gpu>=1.18.0; extra == "gpu-headless"
|
|
251
254
|
Requires-Dist: opencv-python-headless<5.0.0,>=4.5.0; extra == "gpu-headless"
|
|
255
|
+
Provides-Extra: openvino-headless
|
|
256
|
+
Requires-Dist: onnxruntime-openvino>=1.18.0; extra == "openvino-headless"
|
|
257
|
+
Requires-Dist: opencv-python-headless<5.0.0,>=4.5.0; extra == "openvino-headless"
|
|
252
258
|
Provides-Extra: html
|
|
253
259
|
Requires-Dist: weasyprint>=55.0; extra == "html"
|
|
254
260
|
Provides-Extra: viz
|
|
@@ -263,7 +269,7 @@ Requires-Dist: ruff>=0.1.5; extra == "quality"
|
|
|
263
269
|
Requires-Dist: mypy>=0.812; extra == "quality"
|
|
264
270
|
Requires-Dist: pre-commit>=2.17.0; extra == "quality"
|
|
265
271
|
Provides-Extra: dev
|
|
266
|
-
Requires-Dist: onnxruntime>=1.
|
|
272
|
+
Requires-Dist: onnxruntime>=1.18.0; extra == "dev"
|
|
267
273
|
Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "dev"
|
|
268
274
|
Requires-Dist: weasyprint>=55.0; extra == "dev"
|
|
269
275
|
Requires-Dist: matplotlib>=3.1.0; extra == "dev"
|
|
@@ -284,7 +290,8 @@ Requires-Dist: pre-commit>=2.17.0; extra == "dev"
|
|
|
284
290
|
[](https://codecov.io/gh/felixdittrich92/OnnxTR)
|
|
285
291
|
[](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
|
|
286
292
|
[](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
|
|
287
|
-
[](https://pypi.org/project/OnnxTR/)
|
|
294
|
+
[](https://github.com/felixdittrich92/OnnxTR/pkgs/container/onnxtr)
|
|
288
295
|
[](https://huggingface.co/spaces/Felix92/OnnxTR-OCR)
|
|
289
296
|
|
|
290
297
|
> :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
|
|
@@ -304,7 +311,7 @@ What you can expect from this repository:
|
|
|
304
311
|
|
|
305
312
|
### Prerequisites
|
|
306
313
|
|
|
307
|
-
Python 3.
|
|
314
|
+
Python 3.10 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install OnnxTR.
|
|
308
315
|
|
|
309
316
|
### Latest release
|
|
310
317
|
|
|
@@ -312,16 +319,22 @@ You can then install the latest release of the package using [pypi](https://pypi
|
|
|
312
319
|
|
|
313
320
|
**NOTE:**
|
|
314
321
|
|
|
315
|
-
|
|
322
|
+
Currently supported execution providers by default are: CPU, CUDA (NVIDIA GPU), OpenVINO (Intel CPU | GPU).
|
|
323
|
+
|
|
324
|
+
For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/getting-started).
|
|
316
325
|
|
|
317
326
|
- **Prerequisites:** CUDA & cuDNN needs to be installed before [Version table](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html).
|
|
318
327
|
|
|
319
328
|
```shell
|
|
329
|
+
# standard cpu support
|
|
320
330
|
pip install "onnxtr[cpu]"
|
|
321
331
|
pip install "onnxtr[cpu-headless]" # same as cpu but with opencv-headless
|
|
322
332
|
# with gpu support
|
|
323
333
|
pip install "onnxtr[gpu]"
|
|
324
334
|
pip install "onnxtr[gpu-headless]" # same as gpu but with opencv-headless
|
|
335
|
+
# OpenVINO cpu | gpu support for Intel CPUs | GPUs
|
|
336
|
+
pip install "onnxtr[openvino]"
|
|
337
|
+
pip install "onnxtr[openvino-headless]" # same as openvino but with opencv-headless
|
|
325
338
|
# with HTML support
|
|
326
339
|
pip install "onnxtr[html]"
|
|
327
340
|
# with support for visualization
|
|
@@ -330,6 +343,18 @@ pip install "onnxtr[viz]"
|
|
|
330
343
|
pip install "onnxtr[html, gpu, viz]"
|
|
331
344
|
```
|
|
332
345
|
|
|
346
|
+
**Recommendation:**
|
|
347
|
+
|
|
348
|
+
If you have:
|
|
349
|
+
|
|
350
|
+
- a NVIDIA GPU, use one of the `gpu` variants
|
|
351
|
+
- an Intel CPU or GPU, use one of the `openvino` variants
|
|
352
|
+
- otherwise, use one of the `cpu` variants
|
|
353
|
+
|
|
354
|
+
**OpenVINO:**
|
|
355
|
+
|
|
356
|
+
By default OnnxTR running with the OpenVINO execution provider backend uses the `CPU` device with `FP32` precision, to change the device or for further configuaration please refer to the [ONNX Runtime OpenVINO documentation](https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#summary-of-options).
|
|
357
|
+
|
|
333
358
|
### Reading files
|
|
334
359
|
|
|
335
360
|
Documents can be interpreted from PDF / Images / Webpages / Multiple page images using the following code snippet:
|
|
@@ -359,8 +384,10 @@ model = ocr_predictor(
|
|
|
359
384
|
reco_arch='vitstr_base', # recognition architecture
|
|
360
385
|
det_bs=2, # detection batch size
|
|
361
386
|
reco_bs=512, # recognition batch size
|
|
387
|
+
# Document related parameters
|
|
362
388
|
assume_straight_pages=True, # set to `False` if the pages are not straight (rotation, perspective, etc.) (default: True)
|
|
363
389
|
straighten_pages=False, # set to `True` if the pages should be straightened before final processing (default: False)
|
|
390
|
+
export_as_straight_boxes=False, # set to `True` if the boxes should be exported as if the pages were straight (default: False)
|
|
364
391
|
# Preprocessing related parameters
|
|
365
392
|
preserve_aspect_ratio=True, # set to `False` if the aspect ratio should not be preserved (default: True)
|
|
366
393
|
symmetric_pad=True, # set to `False` to disable symmetric padding (default: True)
|
|
@@ -596,19 +623,20 @@ Benchmarking performed on the FUNSD dataset and CORD dataset.
|
|
|
596
623
|
|
|
597
624
|
docTR / OnnxTR models used for the benchmarks are `fast_base` (full precision) | `db_resnet50` (8-bit variant) for detection and `crnn_vgg16_bn` for recognition.
|
|
598
625
|
|
|
599
|
-
The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_mobilenet_v3_small` takes as comparison `~0.17s / Page` on the FUNSD dataset and `~0.12s / Page` on the CORD dataset in **full precision
|
|
626
|
+
The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_mobilenet_v3_small` takes as comparison `~0.17s / Page` on the FUNSD dataset and `~0.12s / Page` on the CORD dataset in **full precision** on CPU.
|
|
600
627
|
|
|
601
628
|
- CPU benchmarks:
|
|
602
629
|
|
|
603
|
-
|Library
|
|
604
|
-
|
|
605
|
-
|docTR (CPU) - v0.8.1
|
|
606
|
-
|**OnnxTR (CPU)** - v0.
|
|
607
|
-
|**OnnxTR (CPU) 8-bit** - v0.
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
|
630
|
+
|Library |FUNSD (199 pages) |CORD (900 pages) |
|
|
631
|
+
|------------------------------------|-------------------------------|-------------------------------|
|
|
632
|
+
|docTR (CPU) - v0.8.1 | ~1.29s / Page | ~0.60s / Page |
|
|
633
|
+
|**OnnxTR (CPU)** - v0.6.0 | ~0.57s / Page | **~0.25s / Page** |
|
|
634
|
+
|**OnnxTR (CPU) 8-bit** - v0.6.0 | **~0.38s / Page** | **~0.14s / Page** |
|
|
635
|
+
|**OnnxTR (CPU-OpenVINO)** - v0.6.0 | **~0.15s / Page** | **~0.14s / Page** |
|
|
636
|
+
|EasyOCR (CPU) - v1.7.1 | ~1.96s / Page | ~1.75s / Page |
|
|
637
|
+
|**PyTesseract (CPU)** - v0.3.10 | **~0.50s / Page** | ~0.52s / Page |
|
|
638
|
+
|Surya (line) (CPU) - v0.4.4 | ~48.76s / Page | ~35.49s / Page |
|
|
639
|
+
|PaddleOCR (CPU) - no cls - v2.7.3 | ~1.27s / Page | ~0.38s / Page |
|
|
612
640
|
|
|
613
641
|
- GPU benchmarks:
|
|
614
642
|
|
|
@@ -616,7 +644,8 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_
|
|
|
616
644
|
|-------------------------------------|-------------------------------|-------------------------------|
|
|
617
645
|
|docTR (GPU) - v0.8.1 | ~0.07s / Page | ~0.05s / Page |
|
|
618
646
|
|**docTR (GPU) float16** - v0.8.1 | **~0.06s / Page** | **~0.03s / Page** |
|
|
619
|
-
|OnnxTR (GPU) - v0.
|
|
647
|
+
|OnnxTR (GPU) - v0.6.0 | **~0.06s / Page** | ~0.04s / Page |
|
|
648
|
+
|**OnnxTR (GPU) float16 - v0.6.0** | **~0.05s / Page** | **~0.03s / Page** |
|
|
620
649
|
|EasyOCR (GPU) - v1.7.1 | ~0.31s / Page | ~0.19s / Page |
|
|
621
650
|
|Surya (GPU) float16 - v0.4.4 | ~3.70s / Page | ~2.81s / Page |
|
|
622
651
|
|**PaddleOCR (GPU) - no cls - v2.7.3**| ~0.08s / Page | **~0.03s / Page** |
|
|
@@ -7,7 +7,8 @@
|
|
|
7
7
|
[](https://codecov.io/gh/felixdittrich92/OnnxTR)
|
|
8
8
|
[](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
|
|
9
9
|
[](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
|
|
10
|
-
[](https://pypi.org/project/OnnxTR/)
|
|
11
|
+
[](https://github.com/felixdittrich92/OnnxTR/pkgs/container/onnxtr)
|
|
11
12
|
[](https://huggingface.co/spaces/Felix92/OnnxTR-OCR)
|
|
12
13
|
|
|
13
14
|
> :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
|
|
@@ -27,7 +28,7 @@ What you can expect from this repository:
|
|
|
27
28
|
|
|
28
29
|
### Prerequisites
|
|
29
30
|
|
|
30
|
-
Python 3.
|
|
31
|
+
Python 3.10 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install OnnxTR.
|
|
31
32
|
|
|
32
33
|
### Latest release
|
|
33
34
|
|
|
@@ -35,16 +36,22 @@ You can then install the latest release of the package using [pypi](https://pypi
|
|
|
35
36
|
|
|
36
37
|
**NOTE:**
|
|
37
38
|
|
|
38
|
-
|
|
39
|
+
Currently supported execution providers by default are: CPU, CUDA (NVIDIA GPU), OpenVINO (Intel CPU | GPU).
|
|
40
|
+
|
|
41
|
+
For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/getting-started).
|
|
39
42
|
|
|
40
43
|
- **Prerequisites:** CUDA & cuDNN needs to be installed before [Version table](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html).
|
|
41
44
|
|
|
42
45
|
```shell
|
|
46
|
+
# standard cpu support
|
|
43
47
|
pip install "onnxtr[cpu]"
|
|
44
48
|
pip install "onnxtr[cpu-headless]" # same as cpu but with opencv-headless
|
|
45
49
|
# with gpu support
|
|
46
50
|
pip install "onnxtr[gpu]"
|
|
47
51
|
pip install "onnxtr[gpu-headless]" # same as gpu but with opencv-headless
|
|
52
|
+
# OpenVINO cpu | gpu support for Intel CPUs | GPUs
|
|
53
|
+
pip install "onnxtr[openvino]"
|
|
54
|
+
pip install "onnxtr[openvino-headless]" # same as openvino but with opencv-headless
|
|
48
55
|
# with HTML support
|
|
49
56
|
pip install "onnxtr[html]"
|
|
50
57
|
# with support for visualization
|
|
@@ -53,6 +60,18 @@ pip install "onnxtr[viz]"
|
|
|
53
60
|
pip install "onnxtr[html, gpu, viz]"
|
|
54
61
|
```
|
|
55
62
|
|
|
63
|
+
**Recommendation:**
|
|
64
|
+
|
|
65
|
+
If you have:
|
|
66
|
+
|
|
67
|
+
- a NVIDIA GPU, use one of the `gpu` variants
|
|
68
|
+
- an Intel CPU or GPU, use one of the `openvino` variants
|
|
69
|
+
- otherwise, use one of the `cpu` variants
|
|
70
|
+
|
|
71
|
+
**OpenVINO:**
|
|
72
|
+
|
|
73
|
+
By default OnnxTR running with the OpenVINO execution provider backend uses the `CPU` device with `FP32` precision, to change the device or for further configuaration please refer to the [ONNX Runtime OpenVINO documentation](https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#summary-of-options).
|
|
74
|
+
|
|
56
75
|
### Reading files
|
|
57
76
|
|
|
58
77
|
Documents can be interpreted from PDF / Images / Webpages / Multiple page images using the following code snippet:
|
|
@@ -82,8 +101,10 @@ model = ocr_predictor(
|
|
|
82
101
|
reco_arch='vitstr_base', # recognition architecture
|
|
83
102
|
det_bs=2, # detection batch size
|
|
84
103
|
reco_bs=512, # recognition batch size
|
|
104
|
+
# Document related parameters
|
|
85
105
|
assume_straight_pages=True, # set to `False` if the pages are not straight (rotation, perspective, etc.) (default: True)
|
|
86
106
|
straighten_pages=False, # set to `True` if the pages should be straightened before final processing (default: False)
|
|
107
|
+
export_as_straight_boxes=False, # set to `True` if the boxes should be exported as if the pages were straight (default: False)
|
|
87
108
|
# Preprocessing related parameters
|
|
88
109
|
preserve_aspect_ratio=True, # set to `False` if the aspect ratio should not be preserved (default: True)
|
|
89
110
|
symmetric_pad=True, # set to `False` to disable symmetric padding (default: True)
|
|
@@ -319,19 +340,20 @@ Benchmarking performed on the FUNSD dataset and CORD dataset.
|
|
|
319
340
|
|
|
320
341
|
docTR / OnnxTR models used for the benchmarks are `fast_base` (full precision) | `db_resnet50` (8-bit variant) for detection and `crnn_vgg16_bn` for recognition.
|
|
321
342
|
|
|
322
|
-
The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_mobilenet_v3_small` takes as comparison `~0.17s / Page` on the FUNSD dataset and `~0.12s / Page` on the CORD dataset in **full precision
|
|
343
|
+
The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_mobilenet_v3_small` takes as comparison `~0.17s / Page` on the FUNSD dataset and `~0.12s / Page` on the CORD dataset in **full precision** on CPU.
|
|
323
344
|
|
|
324
345
|
- CPU benchmarks:
|
|
325
346
|
|
|
326
|
-
|Library
|
|
327
|
-
|
|
328
|
-
|docTR (CPU) - v0.8.1
|
|
329
|
-
|**OnnxTR (CPU)** - v0.
|
|
330
|
-
|**OnnxTR (CPU) 8-bit** - v0.
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
|
347
|
+
|Library |FUNSD (199 pages) |CORD (900 pages) |
|
|
348
|
+
|------------------------------------|-------------------------------|-------------------------------|
|
|
349
|
+
|docTR (CPU) - v0.8.1 | ~1.29s / Page | ~0.60s / Page |
|
|
350
|
+
|**OnnxTR (CPU)** - v0.6.0 | ~0.57s / Page | **~0.25s / Page** |
|
|
351
|
+
|**OnnxTR (CPU) 8-bit** - v0.6.0 | **~0.38s / Page** | **~0.14s / Page** |
|
|
352
|
+
|**OnnxTR (CPU-OpenVINO)** - v0.6.0 | **~0.15s / Page** | **~0.14s / Page** |
|
|
353
|
+
|EasyOCR (CPU) - v1.7.1 | ~1.96s / Page | ~1.75s / Page |
|
|
354
|
+
|**PyTesseract (CPU)** - v0.3.10 | **~0.50s / Page** | ~0.52s / Page |
|
|
355
|
+
|Surya (line) (CPU) - v0.4.4 | ~48.76s / Page | ~35.49s / Page |
|
|
356
|
+
|PaddleOCR (CPU) - no cls - v2.7.3 | ~1.27s / Page | ~0.38s / Page |
|
|
335
357
|
|
|
336
358
|
- GPU benchmarks:
|
|
337
359
|
|
|
@@ -339,7 +361,8 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_
|
|
|
339
361
|
|-------------------------------------|-------------------------------|-------------------------------|
|
|
340
362
|
|docTR (GPU) - v0.8.1 | ~0.07s / Page | ~0.05s / Page |
|
|
341
363
|
|**docTR (GPU) float16** - v0.8.1 | **~0.06s / Page** | **~0.03s / Page** |
|
|
342
|
-
|OnnxTR (GPU) - v0.
|
|
364
|
+
|OnnxTR (GPU) - v0.6.0 | **~0.06s / Page** | ~0.04s / Page |
|
|
365
|
+
|**OnnxTR (GPU) float16 - v0.6.0** | **~0.05s / Page** | **~0.03s / Page** |
|
|
343
366
|
|EasyOCR (GPU) - v1.7.1 | ~0.31s / Page | ~0.19s / Page |
|
|
344
367
|
|Surya (GPU) float16 - v0.4.4 | ~3.70s / Page | ~2.81s / Page |
|
|
345
368
|
|**PaddleOCR (GPU) - no cls - v2.7.3**| ~0.08s / Page | **~0.03s / Page** |
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .artefacts import ArtefactDetector
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
|
-
from typing import Any
|
|
6
|
+
from typing import Any
|
|
7
7
|
|
|
8
8
|
import cv2
|
|
9
9
|
import numpy as np
|
|
@@ -14,7 +14,7 @@ from .base import _BasePredictor
|
|
|
14
14
|
|
|
15
15
|
__all__ = ["ArtefactDetector"]
|
|
16
16
|
|
|
17
|
-
default_cfgs:
|
|
17
|
+
default_cfgs: dict[str, dict[str, Any]] = {
|
|
18
18
|
"yolov8_artefact": {
|
|
19
19
|
"input_shape": (3, 1024, 1024),
|
|
20
20
|
"labels": ["bar_code", "qr_code", "logo", "photo"],
|
|
@@ -34,7 +34,6 @@ class ArtefactDetector(_BasePredictor):
|
|
|
34
34
|
>>> results = detector(doc)
|
|
35
35
|
|
|
36
36
|
Args:
|
|
37
|
-
----
|
|
38
37
|
arch: the architecture to use
|
|
39
38
|
batch_size: the batch size to use
|
|
40
39
|
model_path: the path to the model to use
|
|
@@ -50,9 +49,9 @@ class ArtefactDetector(_BasePredictor):
|
|
|
50
49
|
self,
|
|
51
50
|
arch: str = "yolov8_artefact",
|
|
52
51
|
batch_size: int = 2,
|
|
53
|
-
model_path:
|
|
54
|
-
labels:
|
|
55
|
-
input_shape:
|
|
52
|
+
model_path: str | None = None,
|
|
53
|
+
labels: list[str] | None = None,
|
|
54
|
+
input_shape: tuple[int, int, int] | None = None,
|
|
56
55
|
conf_threshold: float = 0.5,
|
|
57
56
|
iou_threshold: float = 0.5,
|
|
58
57
|
**kwargs: Any,
|
|
@@ -66,7 +65,7 @@ class ArtefactDetector(_BasePredictor):
|
|
|
66
65
|
def preprocess(self, img: np.ndarray) -> np.ndarray:
|
|
67
66
|
return np.transpose(cv2.resize(img, (self.input_shape[2], self.input_shape[1])), (2, 0, 1)) / np.array(255.0)
|
|
68
67
|
|
|
69
|
-
def postprocess(self, output:
|
|
68
|
+
def postprocess(self, output: list[np.ndarray], input_images: list[list[np.ndarray]]) -> list[list[dict[str, Any]]]:
|
|
70
69
|
results = []
|
|
71
70
|
|
|
72
71
|
for batch in zip(output, input_images):
|
|
@@ -109,7 +108,6 @@ class ArtefactDetector(_BasePredictor):
|
|
|
109
108
|
Display the results
|
|
110
109
|
|
|
111
110
|
Args:
|
|
112
|
-
----
|
|
113
111
|
**kwargs: additional keyword arguments to be passed to `plt.show`
|
|
114
112
|
"""
|
|
115
113
|
requires_package("matplotlib", "`.show()` requires matplotlib installed")
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
|
-
from typing import Any
|
|
6
|
+
from typing import Any
|
|
7
7
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
import onnxruntime as ort
|
|
@@ -16,32 +16,29 @@ class _BasePredictor:
|
|
|
16
16
|
Base class for all predictors
|
|
17
17
|
|
|
18
18
|
Args:
|
|
19
|
-
----
|
|
20
19
|
batch_size: the batch size to use
|
|
21
20
|
url: the url to use to download a model if needed
|
|
22
21
|
model_path: the path to the model to use
|
|
23
22
|
**kwargs: additional arguments to be passed to `download_from_url`
|
|
24
23
|
"""
|
|
25
24
|
|
|
26
|
-
def __init__(self, batch_size: int, url:
|
|
25
|
+
def __init__(self, batch_size: int, url: str | None = None, model_path: str | None = None, **kwargs) -> None:
|
|
27
26
|
self.batch_size = batch_size
|
|
28
27
|
self.session = self._init_model(url, model_path, **kwargs)
|
|
29
28
|
|
|
30
|
-
self._inputs:
|
|
31
|
-
self._results:
|
|
29
|
+
self._inputs: list[np.ndarray] = []
|
|
30
|
+
self._results: list[Any] = []
|
|
32
31
|
|
|
33
|
-
def _init_model(self, url:
|
|
32
|
+
def _init_model(self, url: str | None = None, model_path: str | None = None, **kwargs: Any) -> Any:
|
|
34
33
|
"""
|
|
35
34
|
Download the model from the given url if needed
|
|
36
35
|
|
|
37
36
|
Args:
|
|
38
|
-
----
|
|
39
37
|
url: the url to use
|
|
40
38
|
model_path: the path to the model to use
|
|
41
39
|
**kwargs: additional arguments to be passed to `download_from_url`
|
|
42
40
|
|
|
43
41
|
Returns:
|
|
44
|
-
-------
|
|
45
42
|
Any: the ONNX loaded model
|
|
46
43
|
"""
|
|
47
44
|
if not url and not model_path:
|
|
@@ -54,40 +51,34 @@ class _BasePredictor:
|
|
|
54
51
|
Preprocess the input image
|
|
55
52
|
|
|
56
53
|
Args:
|
|
57
|
-
----
|
|
58
54
|
img: the input image to preprocess
|
|
59
55
|
|
|
60
56
|
Returns:
|
|
61
|
-
-------
|
|
62
57
|
np.ndarray: the preprocessed image
|
|
63
58
|
"""
|
|
64
59
|
raise NotImplementedError
|
|
65
60
|
|
|
66
|
-
def postprocess(self, output:
|
|
61
|
+
def postprocess(self, output: list[np.ndarray], input_images: list[list[np.ndarray]]) -> Any:
|
|
67
62
|
"""
|
|
68
63
|
Postprocess the model output
|
|
69
64
|
|
|
70
65
|
Args:
|
|
71
|
-
----
|
|
72
66
|
output: the model output to postprocess
|
|
73
67
|
input_images: the input images used to generate the output
|
|
74
68
|
|
|
75
69
|
Returns:
|
|
76
|
-
-------
|
|
77
70
|
Any: the postprocessed output
|
|
78
71
|
"""
|
|
79
72
|
raise NotImplementedError
|
|
80
73
|
|
|
81
|
-
def __call__(self, inputs:
|
|
74
|
+
def __call__(self, inputs: list[np.ndarray]) -> Any:
|
|
82
75
|
"""
|
|
83
76
|
Call the model on the given inputs
|
|
84
77
|
|
|
85
78
|
Args:
|
|
86
|
-
----
|
|
87
79
|
inputs: the inputs to use
|
|
88
80
|
|
|
89
81
|
Returns:
|
|
90
|
-
-------
|
|
91
82
|
Any: the postprocessed output
|
|
92
83
|
"""
|
|
93
84
|
self._inputs = inputs
|
|
@@ -6,7 +6,6 @@
|
|
|
6
6
|
import importlib.metadata
|
|
7
7
|
import importlib.util
|
|
8
8
|
import logging
|
|
9
|
-
from typing import Optional
|
|
10
9
|
|
|
11
10
|
__all__ = ["requires_package"]
|
|
12
11
|
|
|
@@ -14,12 +13,11 @@ ENV_VARS_TRUE_VALUES = {"1", "ON", "YES", "TRUE"}
|
|
|
14
13
|
ENV_VARS_TRUE_AND_AUTO_VALUES = ENV_VARS_TRUE_VALUES.union({"AUTO"})
|
|
15
14
|
|
|
16
15
|
|
|
17
|
-
def requires_package(name: str, extra_message:
|
|
16
|
+
def requires_package(name: str, extra_message: str | None = None) -> None: # pragma: no cover
|
|
18
17
|
"""
|
|
19
18
|
package requirement helper
|
|
20
19
|
|
|
21
20
|
Args:
|
|
22
|
-
----
|
|
23
21
|
name: name of the package
|
|
24
22
|
extra_message: additional message to display if the package is not found
|
|
25
23
|
"""
|