onnxtr 0.5.1__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. {onnxtr-0.5.1 → onnxtr-0.6.0}/PKG-INFO +52 -23
  2. {onnxtr-0.5.1 → onnxtr-0.6.0}/README.md +37 -14
  3. onnxtr-0.6.0/onnxtr/contrib/__init__.py +1 -0
  4. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/contrib/artefacts.py +6 -8
  5. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/contrib/base.py +7 -16
  6. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/file_utils.py +1 -3
  7. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/io/elements.py +45 -59
  8. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/io/html.py +0 -2
  9. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/io/image.py +1 -4
  10. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/io/pdf.py +3 -5
  11. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/io/reader.py +4 -10
  12. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/_utils.py +10 -17
  13. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/builder.py +17 -30
  14. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/classification/models/mobilenet.py +7 -12
  15. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/classification/predictor/base.py +6 -7
  16. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/classification/zoo.py +25 -11
  17. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/detection/_utils/base.py +3 -7
  18. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/detection/core.py +2 -8
  19. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/detection/models/differentiable_binarization.py +10 -17
  20. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/detection/models/fast.py +10 -17
  21. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/detection/models/linknet.py +10 -17
  22. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/detection/postprocessor/base.py +3 -9
  23. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/detection/predictor/base.py +4 -5
  24. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/detection/zoo.py +20 -6
  25. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/engine.py +9 -9
  26. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/factory/hub.py +3 -7
  27. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/predictor/base.py +29 -30
  28. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/predictor/predictor.py +4 -5
  29. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/preprocessor/base.py +8 -12
  30. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/recognition/core.py +0 -1
  31. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/recognition/models/crnn.py +11 -23
  32. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/recognition/models/master.py +9 -15
  33. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/recognition/models/parseq.py +8 -12
  34. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/recognition/models/sar.py +8 -12
  35. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/recognition/models/vitstr.py +9 -15
  36. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/recognition/predictor/_utils.py +6 -9
  37. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/recognition/predictor/base.py +3 -3
  38. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/recognition/utils.py +2 -7
  39. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/recognition/zoo.py +19 -7
  40. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/zoo.py +7 -9
  41. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/transforms/base.py +17 -6
  42. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/utils/common_types.py +7 -8
  43. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/utils/data.py +7 -11
  44. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/utils/fonts.py +1 -6
  45. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/utils/geometry.py +18 -49
  46. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/utils/multithreading.py +3 -5
  47. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/utils/reconstitution.py +6 -8
  48. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/utils/repr.py +1 -2
  49. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/utils/visualization.py +12 -21
  50. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/utils/vocabs.py +1 -2
  51. onnxtr-0.6.0/onnxtr/version.py +1 -0
  52. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr.egg-info/PKG-INFO +52 -23
  53. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr.egg-info/requires.txt +13 -5
  54. {onnxtr-0.5.1 → onnxtr-0.6.0}/pyproject.toml +18 -10
  55. {onnxtr-0.5.1 → onnxtr-0.6.0}/setup.py +1 -1
  56. onnxtr-0.5.1/onnxtr/models/detection/postprocessor/__init__.py +0 -0
  57. onnxtr-0.5.1/onnxtr/version.py +0 -1
  58. {onnxtr-0.5.1 → onnxtr-0.6.0}/LICENSE +0 -0
  59. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/__init__.py +0 -0
  60. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/io/__init__.py +0 -0
  61. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/__init__.py +0 -0
  62. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/classification/__init__.py +0 -0
  63. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/classification/models/__init__.py +0 -0
  64. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/classification/predictor/__init__.py +0 -0
  65. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/detection/__init__.py +0 -0
  66. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/detection/_utils/__init__.py +0 -0
  67. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/detection/models/__init__.py +0 -0
  68. {onnxtr-0.5.1/onnxtr/contrib → onnxtr-0.6.0/onnxtr/models/detection/postprocessor}/__init__.py +0 -0
  69. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/detection/predictor/__init__.py +0 -0
  70. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/factory/__init__.py +0 -0
  71. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/predictor/__init__.py +0 -0
  72. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/preprocessor/__init__.py +0 -0
  73. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/recognition/__init__.py +0 -0
  74. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/recognition/models/__init__.py +0 -0
  75. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/models/recognition/predictor/__init__.py +0 -0
  76. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/py.typed +0 -0
  77. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/transforms/__init__.py +0 -0
  78. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr/utils/__init__.py +0 -0
  79. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr.egg-info/SOURCES.txt +0 -0
  80. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr.egg-info/dependency_links.txt +0 -0
  81. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr.egg-info/top_level.txt +0 -0
  82. {onnxtr-0.5.1 → onnxtr-0.6.0}/onnxtr.egg-info/zip-safe +0 -0
  83. {onnxtr-0.5.1 → onnxtr-0.6.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: onnxtr
3
- Version: 0.5.1
3
+ Version: 0.6.0
4
4
  Summary: Onnx Text Recognition (OnnxTR): docTR Onnx-Wrapper for high-performance OCR on documents.
5
5
  Author-email: Felix Dittrich <felixdittrich92@gmail.com>
6
6
  Maintainer: Felix Dittrich
@@ -209,7 +209,7 @@ License: Apache License
209
209
  Project-URL: repository, https://github.com/felixdittrich92/OnnxTR
210
210
  Project-URL: tracker, https://github.com/felixdittrich92/OnnxTR/issues
211
211
  Project-URL: changelog, https://github.com/felixdittrich92/OnnxTR/releases
212
- Keywords: OCR,deep learning,computer vision,onnx,text detection,text recognition,docTR,document analysis,document processing
212
+ Keywords: OCR,deep learning,computer vision,onnx,text detection,text recognition,docTR,document analysis,document processing,document AI
213
213
  Classifier: Development Status :: 4 - Beta
214
214
  Classifier: Intended Audience :: Developers
215
215
  Classifier: Intended Audience :: Education
@@ -218,11 +218,11 @@ Classifier: License :: OSI Approved :: Apache Software License
218
218
  Classifier: Natural Language :: English
219
219
  Classifier: Operating System :: OS Independent
220
220
  Classifier: Programming Language :: Python :: 3
221
- Classifier: Programming Language :: Python :: 3.9
222
221
  Classifier: Programming Language :: Python :: 3.10
223
222
  Classifier: Programming Language :: Python :: 3.11
223
+ Classifier: Programming Language :: Python :: 3.12
224
224
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
225
- Requires-Python: <4,>=3.9.0
225
+ Requires-Python: <4,>=3.10.0
226
226
  Description-Content-Type: text/markdown
227
227
  License-File: LICENSE
228
228
  Requires-Dist: numpy<3.0.0,>=1.16.0
@@ -238,17 +238,23 @@ Requires-Dist: defusedxml>=0.7.0
238
238
  Requires-Dist: anyascii>=0.3.2
239
239
  Requires-Dist: tqdm>=4.30.0
240
240
  Provides-Extra: cpu
241
- Requires-Dist: onnxruntime>=1.11.0; extra == "cpu"
241
+ Requires-Dist: onnxruntime>=1.18.0; extra == "cpu"
242
242
  Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "cpu"
243
243
  Provides-Extra: gpu
244
- Requires-Dist: onnxruntime-gpu>=1.11.0; extra == "gpu"
244
+ Requires-Dist: onnxruntime-gpu>=1.18.0; extra == "gpu"
245
245
  Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "gpu"
246
+ Provides-Extra: openvino
247
+ Requires-Dist: onnxruntime-openvino>=1.18.0; extra == "openvino"
248
+ Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "openvino"
246
249
  Provides-Extra: cpu-headless
247
- Requires-Dist: onnxruntime>=1.11.0; extra == "cpu-headless"
250
+ Requires-Dist: onnxruntime>=1.18.0; extra == "cpu-headless"
248
251
  Requires-Dist: opencv-python-headless<5.0.0,>=4.5.0; extra == "cpu-headless"
249
252
  Provides-Extra: gpu-headless
250
- Requires-Dist: onnxruntime-gpu>=1.11.0; extra == "gpu-headless"
253
+ Requires-Dist: onnxruntime-gpu>=1.18.0; extra == "gpu-headless"
251
254
  Requires-Dist: opencv-python-headless<5.0.0,>=4.5.0; extra == "gpu-headless"
255
+ Provides-Extra: openvino-headless
256
+ Requires-Dist: onnxruntime-openvino>=1.18.0; extra == "openvino-headless"
257
+ Requires-Dist: opencv-python-headless<5.0.0,>=4.5.0; extra == "openvino-headless"
252
258
  Provides-Extra: html
253
259
  Requires-Dist: weasyprint>=55.0; extra == "html"
254
260
  Provides-Extra: viz
@@ -263,7 +269,7 @@ Requires-Dist: ruff>=0.1.5; extra == "quality"
263
269
  Requires-Dist: mypy>=0.812; extra == "quality"
264
270
  Requires-Dist: pre-commit>=2.17.0; extra == "quality"
265
271
  Provides-Extra: dev
266
- Requires-Dist: onnxruntime>=1.11.0; extra == "dev"
272
+ Requires-Dist: onnxruntime>=1.18.0; extra == "dev"
267
273
  Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "dev"
268
274
  Requires-Dist: weasyprint>=55.0; extra == "dev"
269
275
  Requires-Dist: matplotlib>=3.1.0; extra == "dev"
@@ -284,7 +290,8 @@ Requires-Dist: pre-commit>=2.17.0; extra == "dev"
284
290
  [![codecov](https://codecov.io/gh/felixdittrich92/OnnxTR/graph/badge.svg?token=WVFRCQBOLI)](https://codecov.io/gh/felixdittrich92/OnnxTR)
285
291
  [![Codacy Badge](https://app.codacy.com/project/badge/Grade/4fff4d764bb14fb8b4f4afeb9587231b)](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
286
292
  [![CodeFactor](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr/badge)](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
287
- [![Pypi](https://img.shields.io/badge/pypi-v0.5.0-blue.svg)](https://pypi.org/project/OnnxTR/)
293
+ [![Pypi](https://img.shields.io/badge/pypi-v0.6.0-blue.svg)](https://pypi.org/project/OnnxTR/)
294
+ [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/felixdittrich92/OnnxTR/pkgs/container/onnxtr)
288
295
  [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/Felix92/OnnxTR-OCR)
289
296
 
290
297
  > :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
@@ -304,7 +311,7 @@ What you can expect from this repository:
304
311
 
305
312
  ### Prerequisites
306
313
 
307
- Python 3.9 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install OnnxTR.
314
+ Python 3.10 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install OnnxTR.
308
315
 
309
316
  ### Latest release
310
317
 
@@ -312,16 +319,22 @@ You can then install the latest release of the package using [pypi](https://pypi
312
319
 
313
320
  **NOTE:**
314
321
 
315
- For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/getting-started). Currently supported execution providers by default are: CPU, CUDA
322
+ Currently supported execution providers by default are: CPU, CUDA (NVIDIA GPU), OpenVINO (Intel CPU | GPU).
323
+
324
+ For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/getting-started).
316
325
 
317
326
  - **Prerequisites:** CUDA & cuDNN needs to be installed before [Version table](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html).
318
327
 
319
328
  ```shell
329
+ # standard cpu support
320
330
  pip install "onnxtr[cpu]"
321
331
  pip install "onnxtr[cpu-headless]" # same as cpu but with opencv-headless
322
332
  # with gpu support
323
333
  pip install "onnxtr[gpu]"
324
334
  pip install "onnxtr[gpu-headless]" # same as gpu but with opencv-headless
335
+ # OpenVINO cpu | gpu support for Intel CPUs | GPUs
336
+ pip install "onnxtr[openvino]"
337
+ pip install "onnxtr[openvino-headless]" # same as openvino but with opencv-headless
325
338
  # with HTML support
326
339
  pip install "onnxtr[html]"
327
340
  # with support for visualization
@@ -330,6 +343,18 @@ pip install "onnxtr[viz]"
330
343
  pip install "onnxtr[html, gpu, viz]"
331
344
  ```
332
345
 
346
+ **Recommendation:**
347
+
348
+ If you have:
349
+
350
+ - a NVIDIA GPU, use one of the `gpu` variants
351
+ - an Intel CPU or GPU, use one of the `openvino` variants
352
+ - otherwise, use one of the `cpu` variants
353
+
354
+ **OpenVINO:**
355
+
356
+ By default OnnxTR running with the OpenVINO execution provider backend uses the `CPU` device with `FP32` precision, to change the device or for further configuaration please refer to the [ONNX Runtime OpenVINO documentation](https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#summary-of-options).
357
+
333
358
  ### Reading files
334
359
 
335
360
  Documents can be interpreted from PDF / Images / Webpages / Multiple page images using the following code snippet:
@@ -359,8 +384,10 @@ model = ocr_predictor(
359
384
  reco_arch='vitstr_base', # recognition architecture
360
385
  det_bs=2, # detection batch size
361
386
  reco_bs=512, # recognition batch size
387
+ # Document related parameters
362
388
  assume_straight_pages=True, # set to `False` if the pages are not straight (rotation, perspective, etc.) (default: True)
363
389
  straighten_pages=False, # set to `True` if the pages should be straightened before final processing (default: False)
390
+ export_as_straight_boxes=False, # set to `True` if the boxes should be exported as if the pages were straight (default: False)
364
391
  # Preprocessing related parameters
365
392
  preserve_aspect_ratio=True, # set to `False` if the aspect ratio should not be preserved (default: True)
366
393
  symmetric_pad=True, # set to `False` to disable symmetric padding (default: True)
@@ -596,19 +623,20 @@ Benchmarking performed on the FUNSD dataset and CORD dataset.
596
623
 
597
624
  docTR / OnnxTR models used for the benchmarks are `fast_base` (full precision) | `db_resnet50` (8-bit variant) for detection and `crnn_vgg16_bn` for recognition.
598
625
 
599
- The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_mobilenet_v3_small` takes as comparison `~0.17s / Page` on the FUNSD dataset and `~0.12s / Page` on the CORD dataset in **full precision**.
626
+ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_mobilenet_v3_small` takes as comparison `~0.17s / Page` on the FUNSD dataset and `~0.12s / Page` on the CORD dataset in **full precision** on CPU.
600
627
 
601
628
  - CPU benchmarks:
602
629
 
603
- |Library |FUNSD (199 pages) |CORD (900 pages) |
604
- |---------------------------------|-------------------------------|-------------------------------|
605
- |docTR (CPU) - v0.8.1 | ~1.29s / Page | ~0.60s / Page |
606
- |**OnnxTR (CPU)** - v0.4.1 | ~0.57s / Page | **~0.25s / Page** |
607
- |**OnnxTR (CPU) 8-bit** - v0.4.1 | **~0.38s / Page** | **~0.14s / Page** |
608
- |EasyOCR (CPU) - v1.7.1 | ~1.96s / Page | ~1.75s / Page |
609
- |**PyTesseract (CPU)** - v0.3.10 | **~0.50s / Page** | ~0.52s / Page |
610
- |Surya (line) (CPU) - v0.4.4 | ~48.76s / Page | ~35.49s / Page |
611
- |PaddleOCR (CPU) - no cls - v2.7.3| ~1.27s / Page | ~0.38s / Page |
630
+ |Library |FUNSD (199 pages) |CORD (900 pages) |
631
+ |------------------------------------|-------------------------------|-------------------------------|
632
+ |docTR (CPU) - v0.8.1 | ~1.29s / Page | ~0.60s / Page |
633
+ |**OnnxTR (CPU)** - v0.6.0 | ~0.57s / Page | **~0.25s / Page** |
634
+ |**OnnxTR (CPU) 8-bit** - v0.6.0 | **~0.38s / Page** | **~0.14s / Page** |
635
+ |**OnnxTR (CPU-OpenVINO)** - v0.6.0 | **~0.15s / Page** | **~0.14s / Page** |
636
+ |EasyOCR (CPU) - v1.7.1 | ~1.96s / Page | ~1.75s / Page |
637
+ |**PyTesseract (CPU)** - v0.3.10 | **~0.50s / Page** | ~0.52s / Page |
638
+ |Surya (line) (CPU) - v0.4.4 | ~48.76s / Page | ~35.49s / Page |
639
+ |PaddleOCR (CPU) - no cls - v2.7.3 | ~1.27s / Page | ~0.38s / Page |
612
640
 
613
641
  - GPU benchmarks:
614
642
 
@@ -616,7 +644,8 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_
616
644
  |-------------------------------------|-------------------------------|-------------------------------|
617
645
  |docTR (GPU) - v0.8.1 | ~0.07s / Page | ~0.05s / Page |
618
646
  |**docTR (GPU) float16** - v0.8.1 | **~0.06s / Page** | **~0.03s / Page** |
619
- |OnnxTR (GPU) - v0.4.1 | **~0.06s / Page** | ~0.04s / Page |
647
+ |OnnxTR (GPU) - v0.6.0 | **~0.06s / Page** | ~0.04s / Page |
648
+ |**OnnxTR (GPU) float16 - v0.6.0** | **~0.05s / Page** | **~0.03s / Page** |
620
649
  |EasyOCR (GPU) - v1.7.1 | ~0.31s / Page | ~0.19s / Page |
621
650
  |Surya (GPU) float16 - v0.4.4 | ~3.70s / Page | ~2.81s / Page |
622
651
  |**PaddleOCR (GPU) - no cls - v2.7.3**| ~0.08s / Page | **~0.03s / Page** |
@@ -7,7 +7,8 @@
7
7
  [![codecov](https://codecov.io/gh/felixdittrich92/OnnxTR/graph/badge.svg?token=WVFRCQBOLI)](https://codecov.io/gh/felixdittrich92/OnnxTR)
8
8
  [![Codacy Badge](https://app.codacy.com/project/badge/Grade/4fff4d764bb14fb8b4f4afeb9587231b)](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
9
9
  [![CodeFactor](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr/badge)](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
10
- [![Pypi](https://img.shields.io/badge/pypi-v0.5.0-blue.svg)](https://pypi.org/project/OnnxTR/)
10
+ [![Pypi](https://img.shields.io/badge/pypi-v0.6.0-blue.svg)](https://pypi.org/project/OnnxTR/)
11
+ [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/felixdittrich92/OnnxTR/pkgs/container/onnxtr)
11
12
  [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/Felix92/OnnxTR-OCR)
12
13
 
13
14
  > :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
@@ -27,7 +28,7 @@ What you can expect from this repository:
27
28
 
28
29
  ### Prerequisites
29
30
 
30
- Python 3.9 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install OnnxTR.
31
+ Python 3.10 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install OnnxTR.
31
32
 
32
33
  ### Latest release
33
34
 
@@ -35,16 +36,22 @@ You can then install the latest release of the package using [pypi](https://pypi
35
36
 
36
37
  **NOTE:**
37
38
 
38
- For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/getting-started). Currently supported execution providers by default are: CPU, CUDA
39
+ Currently supported execution providers by default are: CPU, CUDA (NVIDIA GPU), OpenVINO (Intel CPU | GPU).
40
+
41
+ For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/getting-started).
39
42
 
40
43
  - **Prerequisites:** CUDA & cuDNN needs to be installed before [Version table](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html).
41
44
 
42
45
  ```shell
46
+ # standard cpu support
43
47
  pip install "onnxtr[cpu]"
44
48
  pip install "onnxtr[cpu-headless]" # same as cpu but with opencv-headless
45
49
  # with gpu support
46
50
  pip install "onnxtr[gpu]"
47
51
  pip install "onnxtr[gpu-headless]" # same as gpu but with opencv-headless
52
+ # OpenVINO cpu | gpu support for Intel CPUs | GPUs
53
+ pip install "onnxtr[openvino]"
54
+ pip install "onnxtr[openvino-headless]" # same as openvino but with opencv-headless
48
55
  # with HTML support
49
56
  pip install "onnxtr[html]"
50
57
  # with support for visualization
@@ -53,6 +60,18 @@ pip install "onnxtr[viz]"
53
60
  pip install "onnxtr[html, gpu, viz]"
54
61
  ```
55
62
 
63
+ **Recommendation:**
64
+
65
+ If you have:
66
+
67
+ - a NVIDIA GPU, use one of the `gpu` variants
68
+ - an Intel CPU or GPU, use one of the `openvino` variants
69
+ - otherwise, use one of the `cpu` variants
70
+
71
+ **OpenVINO:**
72
+
73
+ By default OnnxTR running with the OpenVINO execution provider backend uses the `CPU` device with `FP32` precision, to change the device or for further configuaration please refer to the [ONNX Runtime OpenVINO documentation](https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#summary-of-options).
74
+
56
75
  ### Reading files
57
76
 
58
77
  Documents can be interpreted from PDF / Images / Webpages / Multiple page images using the following code snippet:
@@ -82,8 +101,10 @@ model = ocr_predictor(
82
101
  reco_arch='vitstr_base', # recognition architecture
83
102
  det_bs=2, # detection batch size
84
103
  reco_bs=512, # recognition batch size
104
+ # Document related parameters
85
105
  assume_straight_pages=True, # set to `False` if the pages are not straight (rotation, perspective, etc.) (default: True)
86
106
  straighten_pages=False, # set to `True` if the pages should be straightened before final processing (default: False)
107
+ export_as_straight_boxes=False, # set to `True` if the boxes should be exported as if the pages were straight (default: False)
87
108
  # Preprocessing related parameters
88
109
  preserve_aspect_ratio=True, # set to `False` if the aspect ratio should not be preserved (default: True)
89
110
  symmetric_pad=True, # set to `False` to disable symmetric padding (default: True)
@@ -319,19 +340,20 @@ Benchmarking performed on the FUNSD dataset and CORD dataset.
319
340
 
320
341
  docTR / OnnxTR models used for the benchmarks are `fast_base` (full precision) | `db_resnet50` (8-bit variant) for detection and `crnn_vgg16_bn` for recognition.
321
342
 
322
- The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_mobilenet_v3_small` takes as comparison `~0.17s / Page` on the FUNSD dataset and `~0.12s / Page` on the CORD dataset in **full precision**.
343
+ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_mobilenet_v3_small` takes as comparison `~0.17s / Page` on the FUNSD dataset and `~0.12s / Page` on the CORD dataset in **full precision** on CPU.
323
344
 
324
345
  - CPU benchmarks:
325
346
 
326
- |Library |FUNSD (199 pages) |CORD (900 pages) |
327
- |---------------------------------|-------------------------------|-------------------------------|
328
- |docTR (CPU) - v0.8.1 | ~1.29s / Page | ~0.60s / Page |
329
- |**OnnxTR (CPU)** - v0.4.1 | ~0.57s / Page | **~0.25s / Page** |
330
- |**OnnxTR (CPU) 8-bit** - v0.4.1 | **~0.38s / Page** | **~0.14s / Page** |
331
- |EasyOCR (CPU) - v1.7.1 | ~1.96s / Page | ~1.75s / Page |
332
- |**PyTesseract (CPU)** - v0.3.10 | **~0.50s / Page** | ~0.52s / Page |
333
- |Surya (line) (CPU) - v0.4.4 | ~48.76s / Page | ~35.49s / Page |
334
- |PaddleOCR (CPU) - no cls - v2.7.3| ~1.27s / Page | ~0.38s / Page |
347
+ |Library |FUNSD (199 pages) |CORD (900 pages) |
348
+ |------------------------------------|-------------------------------|-------------------------------|
349
+ |docTR (CPU) - v0.8.1 | ~1.29s / Page | ~0.60s / Page |
350
+ |**OnnxTR (CPU)** - v0.6.0 | ~0.57s / Page | **~0.25s / Page** |
351
+ |**OnnxTR (CPU) 8-bit** - v0.6.0 | **~0.38s / Page** | **~0.14s / Page** |
352
+ |**OnnxTR (CPU-OpenVINO)** - v0.6.0 | **~0.15s / Page** | **~0.14s / Page** |
353
+ |EasyOCR (CPU) - v1.7.1 | ~1.96s / Page | ~1.75s / Page |
354
+ |**PyTesseract (CPU)** - v0.3.10 | **~0.50s / Page** | ~0.52s / Page |
355
+ |Surya (line) (CPU) - v0.4.4 | ~48.76s / Page | ~35.49s / Page |
356
+ |PaddleOCR (CPU) - no cls - v2.7.3 | ~1.27s / Page | ~0.38s / Page |
335
357
 
336
358
  - GPU benchmarks:
337
359
 
@@ -339,7 +361,8 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_
339
361
  |-------------------------------------|-------------------------------|-------------------------------|
340
362
  |docTR (GPU) - v0.8.1 | ~0.07s / Page | ~0.05s / Page |
341
363
  |**docTR (GPU) float16** - v0.8.1 | **~0.06s / Page** | **~0.03s / Page** |
342
- |OnnxTR (GPU) - v0.4.1 | **~0.06s / Page** | ~0.04s / Page |
364
+ |OnnxTR (GPU) - v0.6.0 | **~0.06s / Page** | ~0.04s / Page |
365
+ |**OnnxTR (GPU) float16 - v0.6.0** | **~0.05s / Page** | **~0.03s / Page** |
343
366
  |EasyOCR (GPU) - v1.7.1 | ~0.31s / Page | ~0.19s / Page |
344
367
  |Surya (GPU) float16 - v0.4.4 | ~3.70s / Page | ~2.81s / Page |
345
368
  |**PaddleOCR (GPU) - no cls - v2.7.3**| ~0.08s / Page | **~0.03s / Page** |
@@ -0,0 +1 @@
1
+ from .artefacts import ArtefactDetector
@@ -3,7 +3,7 @@
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
 
6
- from typing import Any, Dict, List, Optional, Tuple
6
+ from typing import Any
7
7
 
8
8
  import cv2
9
9
  import numpy as np
@@ -14,7 +14,7 @@ from .base import _BasePredictor
14
14
 
15
15
  __all__ = ["ArtefactDetector"]
16
16
 
17
- default_cfgs: Dict[str, Dict[str, Any]] = {
17
+ default_cfgs: dict[str, dict[str, Any]] = {
18
18
  "yolov8_artefact": {
19
19
  "input_shape": (3, 1024, 1024),
20
20
  "labels": ["bar_code", "qr_code", "logo", "photo"],
@@ -34,7 +34,6 @@ class ArtefactDetector(_BasePredictor):
34
34
  >>> results = detector(doc)
35
35
 
36
36
  Args:
37
- ----
38
37
  arch: the architecture to use
39
38
  batch_size: the batch size to use
40
39
  model_path: the path to the model to use
@@ -50,9 +49,9 @@ class ArtefactDetector(_BasePredictor):
50
49
  self,
51
50
  arch: str = "yolov8_artefact",
52
51
  batch_size: int = 2,
53
- model_path: Optional[str] = None,
54
- labels: Optional[List[str]] = None,
55
- input_shape: Optional[Tuple[int, int, int]] = None,
52
+ model_path: str | None = None,
53
+ labels: list[str] | None = None,
54
+ input_shape: tuple[int, int, int] | None = None,
56
55
  conf_threshold: float = 0.5,
57
56
  iou_threshold: float = 0.5,
58
57
  **kwargs: Any,
@@ -66,7 +65,7 @@ class ArtefactDetector(_BasePredictor):
66
65
  def preprocess(self, img: np.ndarray) -> np.ndarray:
67
66
  return np.transpose(cv2.resize(img, (self.input_shape[2], self.input_shape[1])), (2, 0, 1)) / np.array(255.0)
68
67
 
69
- def postprocess(self, output: List[np.ndarray], input_images: List[List[np.ndarray]]) -> List[List[Dict[str, Any]]]:
68
+ def postprocess(self, output: list[np.ndarray], input_images: list[list[np.ndarray]]) -> list[list[dict[str, Any]]]:
70
69
  results = []
71
70
 
72
71
  for batch in zip(output, input_images):
@@ -109,7 +108,6 @@ class ArtefactDetector(_BasePredictor):
109
108
  Display the results
110
109
 
111
110
  Args:
112
- ----
113
111
  **kwargs: additional keyword arguments to be passed to `plt.show`
114
112
  """
115
113
  requires_package("matplotlib", "`.show()` requires matplotlib installed")
@@ -3,7 +3,7 @@
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
 
6
- from typing import Any, List, Optional
6
+ from typing import Any
7
7
 
8
8
  import numpy as np
9
9
  import onnxruntime as ort
@@ -16,32 +16,29 @@ class _BasePredictor:
16
16
  Base class for all predictors
17
17
 
18
18
  Args:
19
- ----
20
19
  batch_size: the batch size to use
21
20
  url: the url to use to download a model if needed
22
21
  model_path: the path to the model to use
23
22
  **kwargs: additional arguments to be passed to `download_from_url`
24
23
  """
25
24
 
26
- def __init__(self, batch_size: int, url: Optional[str] = None, model_path: Optional[str] = None, **kwargs) -> None:
25
+ def __init__(self, batch_size: int, url: str | None = None, model_path: str | None = None, **kwargs) -> None:
27
26
  self.batch_size = batch_size
28
27
  self.session = self._init_model(url, model_path, **kwargs)
29
28
 
30
- self._inputs: List[np.ndarray] = []
31
- self._results: List[Any] = []
29
+ self._inputs: list[np.ndarray] = []
30
+ self._results: list[Any] = []
32
31
 
33
- def _init_model(self, url: Optional[str] = None, model_path: Optional[str] = None, **kwargs: Any) -> Any:
32
+ def _init_model(self, url: str | None = None, model_path: str | None = None, **kwargs: Any) -> Any:
34
33
  """
35
34
  Download the model from the given url if needed
36
35
 
37
36
  Args:
38
- ----
39
37
  url: the url to use
40
38
  model_path: the path to the model to use
41
39
  **kwargs: additional arguments to be passed to `download_from_url`
42
40
 
43
41
  Returns:
44
- -------
45
42
  Any: the ONNX loaded model
46
43
  """
47
44
  if not url and not model_path:
@@ -54,40 +51,34 @@ class _BasePredictor:
54
51
  Preprocess the input image
55
52
 
56
53
  Args:
57
- ----
58
54
  img: the input image to preprocess
59
55
 
60
56
  Returns:
61
- -------
62
57
  np.ndarray: the preprocessed image
63
58
  """
64
59
  raise NotImplementedError
65
60
 
66
- def postprocess(self, output: List[np.ndarray], input_images: List[List[np.ndarray]]) -> Any:
61
+ def postprocess(self, output: list[np.ndarray], input_images: list[list[np.ndarray]]) -> Any:
67
62
  """
68
63
  Postprocess the model output
69
64
 
70
65
  Args:
71
- ----
72
66
  output: the model output to postprocess
73
67
  input_images: the input images used to generate the output
74
68
 
75
69
  Returns:
76
- -------
77
70
  Any: the postprocessed output
78
71
  """
79
72
  raise NotImplementedError
80
73
 
81
- def __call__(self, inputs: List[np.ndarray]) -> Any:
74
+ def __call__(self, inputs: list[np.ndarray]) -> Any:
82
75
  """
83
76
  Call the model on the given inputs
84
77
 
85
78
  Args:
86
- ----
87
79
  inputs: the inputs to use
88
80
 
89
81
  Returns:
90
- -------
91
82
  Any: the postprocessed output
92
83
  """
93
84
  self._inputs = inputs
@@ -6,7 +6,6 @@
6
6
  import importlib.metadata
7
7
  import importlib.util
8
8
  import logging
9
- from typing import Optional
10
9
 
11
10
  __all__ = ["requires_package"]
12
11
 
@@ -14,12 +13,11 @@ ENV_VARS_TRUE_VALUES = {"1", "ON", "YES", "TRUE"}
14
13
  ENV_VARS_TRUE_AND_AUTO_VALUES = ENV_VARS_TRUE_VALUES.union({"AUTO"})
15
14
 
16
15
 
17
- def requires_package(name: str, extra_message: Optional[str] = None) -> None: # pragma: no cover
16
+ def requires_package(name: str, extra_message: str | None = None) -> None: # pragma: no cover
18
17
  """
19
18
  package requirement helper
20
19
 
21
20
  Args:
22
- ----
23
21
  name: name of the package
24
22
  extra_message: additional message to display if the package is not found
25
23
  """