onnxtr 0.5.0__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {onnxtr-0.5.0 → onnxtr-0.6.0}/PKG-INFO +53 -23
- {onnxtr-0.5.0 → onnxtr-0.6.0}/README.md +38 -14
- onnxtr-0.6.0/onnxtr/contrib/__init__.py +1 -0
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/contrib/artefacts.py +6 -8
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/contrib/base.py +7 -16
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/file_utils.py +1 -3
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/io/elements.py +54 -60
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/io/html.py +0 -2
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/io/image.py +1 -4
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/io/pdf.py +3 -5
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/io/reader.py +4 -10
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/_utils.py +10 -17
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/builder.py +17 -30
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/classification/models/mobilenet.py +7 -12
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/classification/predictor/base.py +6 -7
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/classification/zoo.py +25 -11
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/detection/_utils/base.py +3 -7
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/detection/core.py +2 -8
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/detection/models/differentiable_binarization.py +10 -17
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/detection/models/fast.py +10 -17
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/detection/models/linknet.py +10 -17
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/detection/postprocessor/base.py +3 -9
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/detection/predictor/base.py +4 -5
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/detection/zoo.py +20 -6
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/engine.py +9 -9
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/factory/hub.py +3 -7
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/predictor/base.py +29 -30
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/predictor/predictor.py +4 -5
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/preprocessor/base.py +8 -12
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/recognition/core.py +0 -1
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/recognition/models/crnn.py +11 -23
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/recognition/models/master.py +9 -15
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/recognition/models/parseq.py +8 -12
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/recognition/models/sar.py +8 -12
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/recognition/models/vitstr.py +9 -15
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/recognition/predictor/_utils.py +6 -9
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/recognition/predictor/base.py +3 -3
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/recognition/utils.py +2 -7
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/recognition/zoo.py +19 -7
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/zoo.py +7 -9
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/transforms/base.py +17 -6
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/utils/common_types.py +7 -8
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/utils/data.py +7 -11
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/utils/fonts.py +1 -6
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/utils/geometry.py +18 -49
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/utils/multithreading.py +3 -5
- onnxtr-0.6.0/onnxtr/utils/reconstitution.py +171 -0
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/utils/repr.py +1 -2
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/utils/visualization.py +12 -21
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/utils/vocabs.py +1 -2
- onnxtr-0.6.0/onnxtr/version.py +1 -0
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr.egg-info/PKG-INFO +53 -23
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr.egg-info/requires.txt +13 -5
- {onnxtr-0.5.0 → onnxtr-0.6.0}/pyproject.toml +20 -11
- {onnxtr-0.5.0 → onnxtr-0.6.0}/setup.py +1 -1
- onnxtr-0.5.0/onnxtr/models/detection/postprocessor/__init__.py +0 -0
- onnxtr-0.5.0/onnxtr/utils/reconstitution.py +0 -70
- onnxtr-0.5.0/onnxtr/version.py +0 -1
- {onnxtr-0.5.0 → onnxtr-0.6.0}/LICENSE +0 -0
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/__init__.py +0 -0
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/io/__init__.py +0 -0
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/__init__.py +0 -0
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/classification/__init__.py +0 -0
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/classification/models/__init__.py +0 -0
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/classification/predictor/__init__.py +0 -0
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/detection/__init__.py +0 -0
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/detection/_utils/__init__.py +0 -0
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/detection/models/__init__.py +0 -0
- {onnxtr-0.5.0/onnxtr/contrib → onnxtr-0.6.0/onnxtr/models/detection/postprocessor}/__init__.py +0 -0
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/detection/predictor/__init__.py +0 -0
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/factory/__init__.py +0 -0
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/predictor/__init__.py +0 -0
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/preprocessor/__init__.py +0 -0
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/recognition/__init__.py +0 -0
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/recognition/models/__init__.py +0 -0
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/recognition/predictor/__init__.py +0 -0
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/py.typed +0 -0
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/transforms/__init__.py +0 -0
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/utils/__init__.py +0 -0
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr.egg-info/SOURCES.txt +0 -0
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr.egg-info/dependency_links.txt +0 -0
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr.egg-info/top_level.txt +0 -0
- {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr.egg-info/zip-safe +0 -0
- {onnxtr-0.5.0 → onnxtr-0.6.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: onnxtr
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: Onnx Text Recognition (OnnxTR): docTR Onnx-Wrapper for high-performance OCR on documents.
|
|
5
5
|
Author-email: Felix Dittrich <felixdittrich92@gmail.com>
|
|
6
6
|
Maintainer: Felix Dittrich
|
|
@@ -209,7 +209,7 @@ License: Apache License
|
|
|
209
209
|
Project-URL: repository, https://github.com/felixdittrich92/OnnxTR
|
|
210
210
|
Project-URL: tracker, https://github.com/felixdittrich92/OnnxTR/issues
|
|
211
211
|
Project-URL: changelog, https://github.com/felixdittrich92/OnnxTR/releases
|
|
212
|
-
Keywords: OCR,deep learning,computer vision,onnx,text detection,text recognition,docTR,document analysis,document processing
|
|
212
|
+
Keywords: OCR,deep learning,computer vision,onnx,text detection,text recognition,docTR,document analysis,document processing,document AI
|
|
213
213
|
Classifier: Development Status :: 4 - Beta
|
|
214
214
|
Classifier: Intended Audience :: Developers
|
|
215
215
|
Classifier: Intended Audience :: Education
|
|
@@ -218,11 +218,11 @@ Classifier: License :: OSI Approved :: Apache Software License
|
|
|
218
218
|
Classifier: Natural Language :: English
|
|
219
219
|
Classifier: Operating System :: OS Independent
|
|
220
220
|
Classifier: Programming Language :: Python :: 3
|
|
221
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
222
221
|
Classifier: Programming Language :: Python :: 3.10
|
|
223
222
|
Classifier: Programming Language :: Python :: 3.11
|
|
223
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
224
224
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
225
|
-
Requires-Python: <4,>=3.
|
|
225
|
+
Requires-Python: <4,>=3.10.0
|
|
226
226
|
Description-Content-Type: text/markdown
|
|
227
227
|
License-File: LICENSE
|
|
228
228
|
Requires-Dist: numpy<3.0.0,>=1.16.0
|
|
@@ -238,17 +238,23 @@ Requires-Dist: defusedxml>=0.7.0
|
|
|
238
238
|
Requires-Dist: anyascii>=0.3.2
|
|
239
239
|
Requires-Dist: tqdm>=4.30.0
|
|
240
240
|
Provides-Extra: cpu
|
|
241
|
-
Requires-Dist: onnxruntime>=1.
|
|
241
|
+
Requires-Dist: onnxruntime>=1.18.0; extra == "cpu"
|
|
242
242
|
Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "cpu"
|
|
243
243
|
Provides-Extra: gpu
|
|
244
|
-
Requires-Dist: onnxruntime-gpu>=1.
|
|
244
|
+
Requires-Dist: onnxruntime-gpu>=1.18.0; extra == "gpu"
|
|
245
245
|
Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "gpu"
|
|
246
|
+
Provides-Extra: openvino
|
|
247
|
+
Requires-Dist: onnxruntime-openvino>=1.18.0; extra == "openvino"
|
|
248
|
+
Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "openvino"
|
|
246
249
|
Provides-Extra: cpu-headless
|
|
247
|
-
Requires-Dist: onnxruntime>=1.
|
|
250
|
+
Requires-Dist: onnxruntime>=1.18.0; extra == "cpu-headless"
|
|
248
251
|
Requires-Dist: opencv-python-headless<5.0.0,>=4.5.0; extra == "cpu-headless"
|
|
249
252
|
Provides-Extra: gpu-headless
|
|
250
|
-
Requires-Dist: onnxruntime-gpu>=1.
|
|
253
|
+
Requires-Dist: onnxruntime-gpu>=1.18.0; extra == "gpu-headless"
|
|
251
254
|
Requires-Dist: opencv-python-headless<5.0.0,>=4.5.0; extra == "gpu-headless"
|
|
255
|
+
Provides-Extra: openvino-headless
|
|
256
|
+
Requires-Dist: onnxruntime-openvino>=1.18.0; extra == "openvino-headless"
|
|
257
|
+
Requires-Dist: opencv-python-headless<5.0.0,>=4.5.0; extra == "openvino-headless"
|
|
252
258
|
Provides-Extra: html
|
|
253
259
|
Requires-Dist: weasyprint>=55.0; extra == "html"
|
|
254
260
|
Provides-Extra: viz
|
|
@@ -263,7 +269,7 @@ Requires-Dist: ruff>=0.1.5; extra == "quality"
|
|
|
263
269
|
Requires-Dist: mypy>=0.812; extra == "quality"
|
|
264
270
|
Requires-Dist: pre-commit>=2.17.0; extra == "quality"
|
|
265
271
|
Provides-Extra: dev
|
|
266
|
-
Requires-Dist: onnxruntime>=1.
|
|
272
|
+
Requires-Dist: onnxruntime>=1.18.0; extra == "dev"
|
|
267
273
|
Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "dev"
|
|
268
274
|
Requires-Dist: weasyprint>=55.0; extra == "dev"
|
|
269
275
|
Requires-Dist: matplotlib>=3.1.0; extra == "dev"
|
|
@@ -284,7 +290,9 @@ Requires-Dist: pre-commit>=2.17.0; extra == "dev"
|
|
|
284
290
|
[](https://codecov.io/gh/felixdittrich92/OnnxTR)
|
|
285
291
|
[](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
|
|
286
292
|
[](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
|
|
287
|
-
[](https://pypi.org/project/OnnxTR/)
|
|
294
|
+
[](https://github.com/felixdittrich92/OnnxTR/pkgs/container/onnxtr)
|
|
295
|
+
[](https://huggingface.co/spaces/Felix92/OnnxTR-OCR)
|
|
288
296
|
|
|
289
297
|
> :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
|
|
290
298
|
|
|
@@ -303,7 +311,7 @@ What you can expect from this repository:
|
|
|
303
311
|
|
|
304
312
|
### Prerequisites
|
|
305
313
|
|
|
306
|
-
Python 3.
|
|
314
|
+
Python 3.10 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install OnnxTR.
|
|
307
315
|
|
|
308
316
|
### Latest release
|
|
309
317
|
|
|
@@ -311,16 +319,22 @@ You can then install the latest release of the package using [pypi](https://pypi
|
|
|
311
319
|
|
|
312
320
|
**NOTE:**
|
|
313
321
|
|
|
314
|
-
|
|
322
|
+
Currently supported execution providers by default are: CPU, CUDA (NVIDIA GPU), OpenVINO (Intel CPU | GPU).
|
|
323
|
+
|
|
324
|
+
For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/getting-started).
|
|
315
325
|
|
|
316
326
|
- **Prerequisites:** CUDA & cuDNN needs to be installed before [Version table](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html).
|
|
317
327
|
|
|
318
328
|
```shell
|
|
329
|
+
# standard cpu support
|
|
319
330
|
pip install "onnxtr[cpu]"
|
|
320
331
|
pip install "onnxtr[cpu-headless]" # same as cpu but with opencv-headless
|
|
321
332
|
# with gpu support
|
|
322
333
|
pip install "onnxtr[gpu]"
|
|
323
334
|
pip install "onnxtr[gpu-headless]" # same as gpu but with opencv-headless
|
|
335
|
+
# OpenVINO cpu | gpu support for Intel CPUs | GPUs
|
|
336
|
+
pip install "onnxtr[openvino]"
|
|
337
|
+
pip install "onnxtr[openvino-headless]" # same as openvino but with opencv-headless
|
|
324
338
|
# with HTML support
|
|
325
339
|
pip install "onnxtr[html]"
|
|
326
340
|
# with support for visualization
|
|
@@ -329,6 +343,18 @@ pip install "onnxtr[viz]"
|
|
|
329
343
|
pip install "onnxtr[html, gpu, viz]"
|
|
330
344
|
```
|
|
331
345
|
|
|
346
|
+
**Recommendation:**
|
|
347
|
+
|
|
348
|
+
If you have:
|
|
349
|
+
|
|
350
|
+
- a NVIDIA GPU, use one of the `gpu` variants
|
|
351
|
+
- an Intel CPU or GPU, use one of the `openvino` variants
|
|
352
|
+
- otherwise, use one of the `cpu` variants
|
|
353
|
+
|
|
354
|
+
**OpenVINO:**
|
|
355
|
+
|
|
356
|
+
By default OnnxTR running with the OpenVINO execution provider backend uses the `CPU` device with `FP32` precision, to change the device or for further configuaration please refer to the [ONNX Runtime OpenVINO documentation](https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#summary-of-options).
|
|
357
|
+
|
|
332
358
|
### Reading files
|
|
333
359
|
|
|
334
360
|
Documents can be interpreted from PDF / Images / Webpages / Multiple page images using the following code snippet:
|
|
@@ -358,8 +384,10 @@ model = ocr_predictor(
|
|
|
358
384
|
reco_arch='vitstr_base', # recognition architecture
|
|
359
385
|
det_bs=2, # detection batch size
|
|
360
386
|
reco_bs=512, # recognition batch size
|
|
387
|
+
# Document related parameters
|
|
361
388
|
assume_straight_pages=True, # set to `False` if the pages are not straight (rotation, perspective, etc.) (default: True)
|
|
362
389
|
straighten_pages=False, # set to `True` if the pages should be straightened before final processing (default: False)
|
|
390
|
+
export_as_straight_boxes=False, # set to `True` if the boxes should be exported as if the pages were straight (default: False)
|
|
363
391
|
# Preprocessing related parameters
|
|
364
392
|
preserve_aspect_ratio=True, # set to `False` if the aspect ratio should not be preserved (default: True)
|
|
365
393
|
symmetric_pad=True, # set to `False` to disable symmetric padding (default: True)
|
|
@@ -595,19 +623,20 @@ Benchmarking performed on the FUNSD dataset and CORD dataset.
|
|
|
595
623
|
|
|
596
624
|
docTR / OnnxTR models used for the benchmarks are `fast_base` (full precision) | `db_resnet50` (8-bit variant) for detection and `crnn_vgg16_bn` for recognition.
|
|
597
625
|
|
|
598
|
-
The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_mobilenet_v3_small` takes as comparison `~0.17s / Page` on the FUNSD dataset and `~0.12s / Page` on the CORD dataset in **full precision
|
|
626
|
+
The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_mobilenet_v3_small` takes as comparison `~0.17s / Page` on the FUNSD dataset and `~0.12s / Page` on the CORD dataset in **full precision** on CPU.
|
|
599
627
|
|
|
600
628
|
- CPU benchmarks:
|
|
601
629
|
|
|
602
|
-
|Library
|
|
603
|
-
|
|
604
|
-
|docTR (CPU) - v0.8.1
|
|
605
|
-
|**OnnxTR (CPU)** - v0.
|
|
606
|
-
|**OnnxTR (CPU) 8-bit** - v0.
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
|
630
|
+
|Library |FUNSD (199 pages) |CORD (900 pages) |
|
|
631
|
+
|------------------------------------|-------------------------------|-------------------------------|
|
|
632
|
+
|docTR (CPU) - v0.8.1 | ~1.29s / Page | ~0.60s / Page |
|
|
633
|
+
|**OnnxTR (CPU)** - v0.6.0 | ~0.57s / Page | **~0.25s / Page** |
|
|
634
|
+
|**OnnxTR (CPU) 8-bit** - v0.6.0 | **~0.38s / Page** | **~0.14s / Page** |
|
|
635
|
+
|**OnnxTR (CPU-OpenVINO)** - v0.6.0 | **~0.15s / Page** | **~0.14s / Page** |
|
|
636
|
+
|EasyOCR (CPU) - v1.7.1 | ~1.96s / Page | ~1.75s / Page |
|
|
637
|
+
|**PyTesseract (CPU)** - v0.3.10 | **~0.50s / Page** | ~0.52s / Page |
|
|
638
|
+
|Surya (line) (CPU) - v0.4.4 | ~48.76s / Page | ~35.49s / Page |
|
|
639
|
+
|PaddleOCR (CPU) - no cls - v2.7.3 | ~1.27s / Page | ~0.38s / Page |
|
|
611
640
|
|
|
612
641
|
- GPU benchmarks:
|
|
613
642
|
|
|
@@ -615,7 +644,8 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_
|
|
|
615
644
|
|-------------------------------------|-------------------------------|-------------------------------|
|
|
616
645
|
|docTR (GPU) - v0.8.1 | ~0.07s / Page | ~0.05s / Page |
|
|
617
646
|
|**docTR (GPU) float16** - v0.8.1 | **~0.06s / Page** | **~0.03s / Page** |
|
|
618
|
-
|OnnxTR (GPU) - v0.
|
|
647
|
+
|OnnxTR (GPU) - v0.6.0 | **~0.06s / Page** | ~0.04s / Page |
|
|
648
|
+
|**OnnxTR (GPU) float16 - v0.6.0** | **~0.05s / Page** | **~0.03s / Page** |
|
|
619
649
|
|EasyOCR (GPU) - v1.7.1 | ~0.31s / Page | ~0.19s / Page |
|
|
620
650
|
|Surya (GPU) float16 - v0.4.4 | ~3.70s / Page | ~2.81s / Page |
|
|
621
651
|
|**PaddleOCR (GPU) - no cls - v2.7.3**| ~0.08s / Page | **~0.03s / Page** |
|
|
@@ -7,7 +7,9 @@
|
|
|
7
7
|
[](https://codecov.io/gh/felixdittrich92/OnnxTR)
|
|
8
8
|
[](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
|
|
9
9
|
[](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
|
|
10
|
-
[](https://pypi.org/project/OnnxTR/)
|
|
11
|
+
[](https://github.com/felixdittrich92/OnnxTR/pkgs/container/onnxtr)
|
|
12
|
+
[](https://huggingface.co/spaces/Felix92/OnnxTR-OCR)
|
|
11
13
|
|
|
12
14
|
> :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
|
|
13
15
|
|
|
@@ -26,7 +28,7 @@ What you can expect from this repository:
|
|
|
26
28
|
|
|
27
29
|
### Prerequisites
|
|
28
30
|
|
|
29
|
-
Python 3.
|
|
31
|
+
Python 3.10 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install OnnxTR.
|
|
30
32
|
|
|
31
33
|
### Latest release
|
|
32
34
|
|
|
@@ -34,16 +36,22 @@ You can then install the latest release of the package using [pypi](https://pypi
|
|
|
34
36
|
|
|
35
37
|
**NOTE:**
|
|
36
38
|
|
|
37
|
-
|
|
39
|
+
Currently supported execution providers by default are: CPU, CUDA (NVIDIA GPU), OpenVINO (Intel CPU | GPU).
|
|
40
|
+
|
|
41
|
+
For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/getting-started).
|
|
38
42
|
|
|
39
43
|
- **Prerequisites:** CUDA & cuDNN needs to be installed before [Version table](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html).
|
|
40
44
|
|
|
41
45
|
```shell
|
|
46
|
+
# standard cpu support
|
|
42
47
|
pip install "onnxtr[cpu]"
|
|
43
48
|
pip install "onnxtr[cpu-headless]" # same as cpu but with opencv-headless
|
|
44
49
|
# with gpu support
|
|
45
50
|
pip install "onnxtr[gpu]"
|
|
46
51
|
pip install "onnxtr[gpu-headless]" # same as gpu but with opencv-headless
|
|
52
|
+
# OpenVINO cpu | gpu support for Intel CPUs | GPUs
|
|
53
|
+
pip install "onnxtr[openvino]"
|
|
54
|
+
pip install "onnxtr[openvino-headless]" # same as openvino but with opencv-headless
|
|
47
55
|
# with HTML support
|
|
48
56
|
pip install "onnxtr[html]"
|
|
49
57
|
# with support for visualization
|
|
@@ -52,6 +60,18 @@ pip install "onnxtr[viz]"
|
|
|
52
60
|
pip install "onnxtr[html, gpu, viz]"
|
|
53
61
|
```
|
|
54
62
|
|
|
63
|
+
**Recommendation:**
|
|
64
|
+
|
|
65
|
+
If you have:
|
|
66
|
+
|
|
67
|
+
- a NVIDIA GPU, use one of the `gpu` variants
|
|
68
|
+
- an Intel CPU or GPU, use one of the `openvino` variants
|
|
69
|
+
- otherwise, use one of the `cpu` variants
|
|
70
|
+
|
|
71
|
+
**OpenVINO:**
|
|
72
|
+
|
|
73
|
+
By default OnnxTR running with the OpenVINO execution provider backend uses the `CPU` device with `FP32` precision, to change the device or for further configuaration please refer to the [ONNX Runtime OpenVINO documentation](https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#summary-of-options).
|
|
74
|
+
|
|
55
75
|
### Reading files
|
|
56
76
|
|
|
57
77
|
Documents can be interpreted from PDF / Images / Webpages / Multiple page images using the following code snippet:
|
|
@@ -81,8 +101,10 @@ model = ocr_predictor(
|
|
|
81
101
|
reco_arch='vitstr_base', # recognition architecture
|
|
82
102
|
det_bs=2, # detection batch size
|
|
83
103
|
reco_bs=512, # recognition batch size
|
|
104
|
+
# Document related parameters
|
|
84
105
|
assume_straight_pages=True, # set to `False` if the pages are not straight (rotation, perspective, etc.) (default: True)
|
|
85
106
|
straighten_pages=False, # set to `True` if the pages should be straightened before final processing (default: False)
|
|
107
|
+
export_as_straight_boxes=False, # set to `True` if the boxes should be exported as if the pages were straight (default: False)
|
|
86
108
|
# Preprocessing related parameters
|
|
87
109
|
preserve_aspect_ratio=True, # set to `False` if the aspect ratio should not be preserved (default: True)
|
|
88
110
|
symmetric_pad=True, # set to `False` to disable symmetric padding (default: True)
|
|
@@ -318,19 +340,20 @@ Benchmarking performed on the FUNSD dataset and CORD dataset.
|
|
|
318
340
|
|
|
319
341
|
docTR / OnnxTR models used for the benchmarks are `fast_base` (full precision) | `db_resnet50` (8-bit variant) for detection and `crnn_vgg16_bn` for recognition.
|
|
320
342
|
|
|
321
|
-
The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_mobilenet_v3_small` takes as comparison `~0.17s / Page` on the FUNSD dataset and `~0.12s / Page` on the CORD dataset in **full precision
|
|
343
|
+
The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_mobilenet_v3_small` takes as comparison `~0.17s / Page` on the FUNSD dataset and `~0.12s / Page` on the CORD dataset in **full precision** on CPU.
|
|
322
344
|
|
|
323
345
|
- CPU benchmarks:
|
|
324
346
|
|
|
325
|
-
|Library
|
|
326
|
-
|
|
327
|
-
|docTR (CPU) - v0.8.1
|
|
328
|
-
|**OnnxTR (CPU)** - v0.
|
|
329
|
-
|**OnnxTR (CPU) 8-bit** - v0.
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
|
347
|
+
|Library |FUNSD (199 pages) |CORD (900 pages) |
|
|
348
|
+
|------------------------------------|-------------------------------|-------------------------------|
|
|
349
|
+
|docTR (CPU) - v0.8.1 | ~1.29s / Page | ~0.60s / Page |
|
|
350
|
+
|**OnnxTR (CPU)** - v0.6.0 | ~0.57s / Page | **~0.25s / Page** |
|
|
351
|
+
|**OnnxTR (CPU) 8-bit** - v0.6.0 | **~0.38s / Page** | **~0.14s / Page** |
|
|
352
|
+
|**OnnxTR (CPU-OpenVINO)** - v0.6.0 | **~0.15s / Page** | **~0.14s / Page** |
|
|
353
|
+
|EasyOCR (CPU) - v1.7.1 | ~1.96s / Page | ~1.75s / Page |
|
|
354
|
+
|**PyTesseract (CPU)** - v0.3.10 | **~0.50s / Page** | ~0.52s / Page |
|
|
355
|
+
|Surya (line) (CPU) - v0.4.4 | ~48.76s / Page | ~35.49s / Page |
|
|
356
|
+
|PaddleOCR (CPU) - no cls - v2.7.3 | ~1.27s / Page | ~0.38s / Page |
|
|
334
357
|
|
|
335
358
|
- GPU benchmarks:
|
|
336
359
|
|
|
@@ -338,7 +361,8 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_
|
|
|
338
361
|
|-------------------------------------|-------------------------------|-------------------------------|
|
|
339
362
|
|docTR (GPU) - v0.8.1 | ~0.07s / Page | ~0.05s / Page |
|
|
340
363
|
|**docTR (GPU) float16** - v0.8.1 | **~0.06s / Page** | **~0.03s / Page** |
|
|
341
|
-
|OnnxTR (GPU) - v0.
|
|
364
|
+
|OnnxTR (GPU) - v0.6.0 | **~0.06s / Page** | ~0.04s / Page |
|
|
365
|
+
|**OnnxTR (GPU) float16 - v0.6.0** | **~0.05s / Page** | **~0.03s / Page** |
|
|
342
366
|
|EasyOCR (GPU) - v1.7.1 | ~0.31s / Page | ~0.19s / Page |
|
|
343
367
|
|Surya (GPU) float16 - v0.4.4 | ~3.70s / Page | ~2.81s / Page |
|
|
344
368
|
|**PaddleOCR (GPU) - no cls - v2.7.3**| ~0.08s / Page | **~0.03s / Page** |
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .artefacts import ArtefactDetector
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
|
-
from typing import Any
|
|
6
|
+
from typing import Any
|
|
7
7
|
|
|
8
8
|
import cv2
|
|
9
9
|
import numpy as np
|
|
@@ -14,7 +14,7 @@ from .base import _BasePredictor
|
|
|
14
14
|
|
|
15
15
|
__all__ = ["ArtefactDetector"]
|
|
16
16
|
|
|
17
|
-
default_cfgs:
|
|
17
|
+
default_cfgs: dict[str, dict[str, Any]] = {
|
|
18
18
|
"yolov8_artefact": {
|
|
19
19
|
"input_shape": (3, 1024, 1024),
|
|
20
20
|
"labels": ["bar_code", "qr_code", "logo", "photo"],
|
|
@@ -34,7 +34,6 @@ class ArtefactDetector(_BasePredictor):
|
|
|
34
34
|
>>> results = detector(doc)
|
|
35
35
|
|
|
36
36
|
Args:
|
|
37
|
-
----
|
|
38
37
|
arch: the architecture to use
|
|
39
38
|
batch_size: the batch size to use
|
|
40
39
|
model_path: the path to the model to use
|
|
@@ -50,9 +49,9 @@ class ArtefactDetector(_BasePredictor):
|
|
|
50
49
|
self,
|
|
51
50
|
arch: str = "yolov8_artefact",
|
|
52
51
|
batch_size: int = 2,
|
|
53
|
-
model_path:
|
|
54
|
-
labels:
|
|
55
|
-
input_shape:
|
|
52
|
+
model_path: str | None = None,
|
|
53
|
+
labels: list[str] | None = None,
|
|
54
|
+
input_shape: tuple[int, int, int] | None = None,
|
|
56
55
|
conf_threshold: float = 0.5,
|
|
57
56
|
iou_threshold: float = 0.5,
|
|
58
57
|
**kwargs: Any,
|
|
@@ -66,7 +65,7 @@ class ArtefactDetector(_BasePredictor):
|
|
|
66
65
|
def preprocess(self, img: np.ndarray) -> np.ndarray:
|
|
67
66
|
return np.transpose(cv2.resize(img, (self.input_shape[2], self.input_shape[1])), (2, 0, 1)) / np.array(255.0)
|
|
68
67
|
|
|
69
|
-
def postprocess(self, output:
|
|
68
|
+
def postprocess(self, output: list[np.ndarray], input_images: list[list[np.ndarray]]) -> list[list[dict[str, Any]]]:
|
|
70
69
|
results = []
|
|
71
70
|
|
|
72
71
|
for batch in zip(output, input_images):
|
|
@@ -109,7 +108,6 @@ class ArtefactDetector(_BasePredictor):
|
|
|
109
108
|
Display the results
|
|
110
109
|
|
|
111
110
|
Args:
|
|
112
|
-
----
|
|
113
111
|
**kwargs: additional keyword arguments to be passed to `plt.show`
|
|
114
112
|
"""
|
|
115
113
|
requires_package("matplotlib", "`.show()` requires matplotlib installed")
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
|
-
from typing import Any
|
|
6
|
+
from typing import Any
|
|
7
7
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
import onnxruntime as ort
|
|
@@ -16,32 +16,29 @@ class _BasePredictor:
|
|
|
16
16
|
Base class for all predictors
|
|
17
17
|
|
|
18
18
|
Args:
|
|
19
|
-
----
|
|
20
19
|
batch_size: the batch size to use
|
|
21
20
|
url: the url to use to download a model if needed
|
|
22
21
|
model_path: the path to the model to use
|
|
23
22
|
**kwargs: additional arguments to be passed to `download_from_url`
|
|
24
23
|
"""
|
|
25
24
|
|
|
26
|
-
def __init__(self, batch_size: int, url:
|
|
25
|
+
def __init__(self, batch_size: int, url: str | None = None, model_path: str | None = None, **kwargs) -> None:
|
|
27
26
|
self.batch_size = batch_size
|
|
28
27
|
self.session = self._init_model(url, model_path, **kwargs)
|
|
29
28
|
|
|
30
|
-
self._inputs:
|
|
31
|
-
self._results:
|
|
29
|
+
self._inputs: list[np.ndarray] = []
|
|
30
|
+
self._results: list[Any] = []
|
|
32
31
|
|
|
33
|
-
def _init_model(self, url:
|
|
32
|
+
def _init_model(self, url: str | None = None, model_path: str | None = None, **kwargs: Any) -> Any:
|
|
34
33
|
"""
|
|
35
34
|
Download the model from the given url if needed
|
|
36
35
|
|
|
37
36
|
Args:
|
|
38
|
-
----
|
|
39
37
|
url: the url to use
|
|
40
38
|
model_path: the path to the model to use
|
|
41
39
|
**kwargs: additional arguments to be passed to `download_from_url`
|
|
42
40
|
|
|
43
41
|
Returns:
|
|
44
|
-
-------
|
|
45
42
|
Any: the ONNX loaded model
|
|
46
43
|
"""
|
|
47
44
|
if not url and not model_path:
|
|
@@ -54,40 +51,34 @@ class _BasePredictor:
|
|
|
54
51
|
Preprocess the input image
|
|
55
52
|
|
|
56
53
|
Args:
|
|
57
|
-
----
|
|
58
54
|
img: the input image to preprocess
|
|
59
55
|
|
|
60
56
|
Returns:
|
|
61
|
-
-------
|
|
62
57
|
np.ndarray: the preprocessed image
|
|
63
58
|
"""
|
|
64
59
|
raise NotImplementedError
|
|
65
60
|
|
|
66
|
-
def postprocess(self, output:
|
|
61
|
+
def postprocess(self, output: list[np.ndarray], input_images: list[list[np.ndarray]]) -> Any:
|
|
67
62
|
"""
|
|
68
63
|
Postprocess the model output
|
|
69
64
|
|
|
70
65
|
Args:
|
|
71
|
-
----
|
|
72
66
|
output: the model output to postprocess
|
|
73
67
|
input_images: the input images used to generate the output
|
|
74
68
|
|
|
75
69
|
Returns:
|
|
76
|
-
-------
|
|
77
70
|
Any: the postprocessed output
|
|
78
71
|
"""
|
|
79
72
|
raise NotImplementedError
|
|
80
73
|
|
|
81
|
-
def __call__(self, inputs:
|
|
74
|
+
def __call__(self, inputs: list[np.ndarray]) -> Any:
|
|
82
75
|
"""
|
|
83
76
|
Call the model on the given inputs
|
|
84
77
|
|
|
85
78
|
Args:
|
|
86
|
-
----
|
|
87
79
|
inputs: the inputs to use
|
|
88
80
|
|
|
89
81
|
Returns:
|
|
90
|
-
-------
|
|
91
82
|
Any: the postprocessed output
|
|
92
83
|
"""
|
|
93
84
|
self._inputs = inputs
|
|
@@ -6,7 +6,6 @@
|
|
|
6
6
|
import importlib.metadata
|
|
7
7
|
import importlib.util
|
|
8
8
|
import logging
|
|
9
|
-
from typing import Optional
|
|
10
9
|
|
|
11
10
|
__all__ = ["requires_package"]
|
|
12
11
|
|
|
@@ -14,12 +13,11 @@ ENV_VARS_TRUE_VALUES = {"1", "ON", "YES", "TRUE"}
|
|
|
14
13
|
ENV_VARS_TRUE_AND_AUTO_VALUES = ENV_VARS_TRUE_VALUES.union({"AUTO"})
|
|
15
14
|
|
|
16
15
|
|
|
17
|
-
def requires_package(name: str, extra_message:
|
|
16
|
+
def requires_package(name: str, extra_message: str | None = None) -> None: # pragma: no cover
|
|
18
17
|
"""
|
|
19
18
|
package requirement helper
|
|
20
19
|
|
|
21
20
|
Args:
|
|
22
|
-
----
|
|
23
21
|
name: name of the package
|
|
24
22
|
extra_message: additional message to display if the package is not found
|
|
25
23
|
"""
|