onnxtr 0.5.0__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. {onnxtr-0.5.0 → onnxtr-0.6.0}/PKG-INFO +53 -23
  2. {onnxtr-0.5.0 → onnxtr-0.6.0}/README.md +38 -14
  3. onnxtr-0.6.0/onnxtr/contrib/__init__.py +1 -0
  4. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/contrib/artefacts.py +6 -8
  5. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/contrib/base.py +7 -16
  6. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/file_utils.py +1 -3
  7. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/io/elements.py +54 -60
  8. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/io/html.py +0 -2
  9. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/io/image.py +1 -4
  10. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/io/pdf.py +3 -5
  11. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/io/reader.py +4 -10
  12. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/_utils.py +10 -17
  13. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/builder.py +17 -30
  14. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/classification/models/mobilenet.py +7 -12
  15. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/classification/predictor/base.py +6 -7
  16. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/classification/zoo.py +25 -11
  17. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/detection/_utils/base.py +3 -7
  18. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/detection/core.py +2 -8
  19. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/detection/models/differentiable_binarization.py +10 -17
  20. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/detection/models/fast.py +10 -17
  21. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/detection/models/linknet.py +10 -17
  22. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/detection/postprocessor/base.py +3 -9
  23. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/detection/predictor/base.py +4 -5
  24. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/detection/zoo.py +20 -6
  25. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/engine.py +9 -9
  26. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/factory/hub.py +3 -7
  27. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/predictor/base.py +29 -30
  28. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/predictor/predictor.py +4 -5
  29. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/preprocessor/base.py +8 -12
  30. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/recognition/core.py +0 -1
  31. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/recognition/models/crnn.py +11 -23
  32. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/recognition/models/master.py +9 -15
  33. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/recognition/models/parseq.py +8 -12
  34. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/recognition/models/sar.py +8 -12
  35. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/recognition/models/vitstr.py +9 -15
  36. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/recognition/predictor/_utils.py +6 -9
  37. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/recognition/predictor/base.py +3 -3
  38. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/recognition/utils.py +2 -7
  39. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/recognition/zoo.py +19 -7
  40. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/zoo.py +7 -9
  41. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/transforms/base.py +17 -6
  42. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/utils/common_types.py +7 -8
  43. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/utils/data.py +7 -11
  44. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/utils/fonts.py +1 -6
  45. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/utils/geometry.py +18 -49
  46. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/utils/multithreading.py +3 -5
  47. onnxtr-0.6.0/onnxtr/utils/reconstitution.py +171 -0
  48. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/utils/repr.py +1 -2
  49. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/utils/visualization.py +12 -21
  50. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/utils/vocabs.py +1 -2
  51. onnxtr-0.6.0/onnxtr/version.py +1 -0
  52. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr.egg-info/PKG-INFO +53 -23
  53. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr.egg-info/requires.txt +13 -5
  54. {onnxtr-0.5.0 → onnxtr-0.6.0}/pyproject.toml +20 -11
  55. {onnxtr-0.5.0 → onnxtr-0.6.0}/setup.py +1 -1
  56. onnxtr-0.5.0/onnxtr/models/detection/postprocessor/__init__.py +0 -0
  57. onnxtr-0.5.0/onnxtr/utils/reconstitution.py +0 -70
  58. onnxtr-0.5.0/onnxtr/version.py +0 -1
  59. {onnxtr-0.5.0 → onnxtr-0.6.0}/LICENSE +0 -0
  60. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/__init__.py +0 -0
  61. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/io/__init__.py +0 -0
  62. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/__init__.py +0 -0
  63. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/classification/__init__.py +0 -0
  64. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/classification/models/__init__.py +0 -0
  65. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/classification/predictor/__init__.py +0 -0
  66. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/detection/__init__.py +0 -0
  67. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/detection/_utils/__init__.py +0 -0
  68. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/detection/models/__init__.py +0 -0
  69. {onnxtr-0.5.0/onnxtr/contrib → onnxtr-0.6.0/onnxtr/models/detection/postprocessor}/__init__.py +0 -0
  70. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/detection/predictor/__init__.py +0 -0
  71. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/factory/__init__.py +0 -0
  72. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/predictor/__init__.py +0 -0
  73. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/preprocessor/__init__.py +0 -0
  74. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/recognition/__init__.py +0 -0
  75. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/recognition/models/__init__.py +0 -0
  76. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/models/recognition/predictor/__init__.py +0 -0
  77. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/py.typed +0 -0
  78. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/transforms/__init__.py +0 -0
  79. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr/utils/__init__.py +0 -0
  80. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr.egg-info/SOURCES.txt +0 -0
  81. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr.egg-info/dependency_links.txt +0 -0
  82. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr.egg-info/top_level.txt +0 -0
  83. {onnxtr-0.5.0 → onnxtr-0.6.0}/onnxtr.egg-info/zip-safe +0 -0
  84. {onnxtr-0.5.0 → onnxtr-0.6.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: onnxtr
3
- Version: 0.5.0
3
+ Version: 0.6.0
4
4
  Summary: Onnx Text Recognition (OnnxTR): docTR Onnx-Wrapper for high-performance OCR on documents.
5
5
  Author-email: Felix Dittrich <felixdittrich92@gmail.com>
6
6
  Maintainer: Felix Dittrich
@@ -209,7 +209,7 @@ License: Apache License
209
209
  Project-URL: repository, https://github.com/felixdittrich92/OnnxTR
210
210
  Project-URL: tracker, https://github.com/felixdittrich92/OnnxTR/issues
211
211
  Project-URL: changelog, https://github.com/felixdittrich92/OnnxTR/releases
212
- Keywords: OCR,deep learning,computer vision,onnx,text detection,text recognition,docTR,document analysis,document processing
212
+ Keywords: OCR,deep learning,computer vision,onnx,text detection,text recognition,docTR,document analysis,document processing,document AI
213
213
  Classifier: Development Status :: 4 - Beta
214
214
  Classifier: Intended Audience :: Developers
215
215
  Classifier: Intended Audience :: Education
@@ -218,11 +218,11 @@ Classifier: License :: OSI Approved :: Apache Software License
218
218
  Classifier: Natural Language :: English
219
219
  Classifier: Operating System :: OS Independent
220
220
  Classifier: Programming Language :: Python :: 3
221
- Classifier: Programming Language :: Python :: 3.9
222
221
  Classifier: Programming Language :: Python :: 3.10
223
222
  Classifier: Programming Language :: Python :: 3.11
223
+ Classifier: Programming Language :: Python :: 3.12
224
224
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
225
- Requires-Python: <4,>=3.9.0
225
+ Requires-Python: <4,>=3.10.0
226
226
  Description-Content-Type: text/markdown
227
227
  License-File: LICENSE
228
228
  Requires-Dist: numpy<3.0.0,>=1.16.0
@@ -238,17 +238,23 @@ Requires-Dist: defusedxml>=0.7.0
238
238
  Requires-Dist: anyascii>=0.3.2
239
239
  Requires-Dist: tqdm>=4.30.0
240
240
  Provides-Extra: cpu
241
- Requires-Dist: onnxruntime>=1.11.0; extra == "cpu"
241
+ Requires-Dist: onnxruntime>=1.18.0; extra == "cpu"
242
242
  Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "cpu"
243
243
  Provides-Extra: gpu
244
- Requires-Dist: onnxruntime-gpu>=1.11.0; extra == "gpu"
244
+ Requires-Dist: onnxruntime-gpu>=1.18.0; extra == "gpu"
245
245
  Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "gpu"
246
+ Provides-Extra: openvino
247
+ Requires-Dist: onnxruntime-openvino>=1.18.0; extra == "openvino"
248
+ Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "openvino"
246
249
  Provides-Extra: cpu-headless
247
- Requires-Dist: onnxruntime>=1.11.0; extra == "cpu-headless"
250
+ Requires-Dist: onnxruntime>=1.18.0; extra == "cpu-headless"
248
251
  Requires-Dist: opencv-python-headless<5.0.0,>=4.5.0; extra == "cpu-headless"
249
252
  Provides-Extra: gpu-headless
250
- Requires-Dist: onnxruntime-gpu>=1.11.0; extra == "gpu-headless"
253
+ Requires-Dist: onnxruntime-gpu>=1.18.0; extra == "gpu-headless"
251
254
  Requires-Dist: opencv-python-headless<5.0.0,>=4.5.0; extra == "gpu-headless"
255
+ Provides-Extra: openvino-headless
256
+ Requires-Dist: onnxruntime-openvino>=1.18.0; extra == "openvino-headless"
257
+ Requires-Dist: opencv-python-headless<5.0.0,>=4.5.0; extra == "openvino-headless"
252
258
  Provides-Extra: html
253
259
  Requires-Dist: weasyprint>=55.0; extra == "html"
254
260
  Provides-Extra: viz
@@ -263,7 +269,7 @@ Requires-Dist: ruff>=0.1.5; extra == "quality"
263
269
  Requires-Dist: mypy>=0.812; extra == "quality"
264
270
  Requires-Dist: pre-commit>=2.17.0; extra == "quality"
265
271
  Provides-Extra: dev
266
- Requires-Dist: onnxruntime>=1.11.0; extra == "dev"
272
+ Requires-Dist: onnxruntime>=1.18.0; extra == "dev"
267
273
  Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "dev"
268
274
  Requires-Dist: weasyprint>=55.0; extra == "dev"
269
275
  Requires-Dist: matplotlib>=3.1.0; extra == "dev"
@@ -284,7 +290,9 @@ Requires-Dist: pre-commit>=2.17.0; extra == "dev"
284
290
  [![codecov](https://codecov.io/gh/felixdittrich92/OnnxTR/graph/badge.svg?token=WVFRCQBOLI)](https://codecov.io/gh/felixdittrich92/OnnxTR)
285
291
  [![Codacy Badge](https://app.codacy.com/project/badge/Grade/4fff4d764bb14fb8b4f4afeb9587231b)](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
286
292
  [![CodeFactor](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr/badge)](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
287
- [![Pypi](https://img.shields.io/badge/pypi-v0.5.0-blue.svg)](https://pypi.org/project/OnnxTR/)
293
+ [![Pypi](https://img.shields.io/badge/pypi-v0.6.0-blue.svg)](https://pypi.org/project/OnnxTR/)
294
+ [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/felixdittrich92/OnnxTR/pkgs/container/onnxtr)
295
+ [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/Felix92/OnnxTR-OCR)
288
296
 
289
297
  > :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
290
298
 
@@ -303,7 +311,7 @@ What you can expect from this repository:
303
311
 
304
312
  ### Prerequisites
305
313
 
306
- Python 3.9 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install OnnxTR.
314
+ Python 3.10 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install OnnxTR.
307
315
 
308
316
  ### Latest release
309
317
 
@@ -311,16 +319,22 @@ You can then install the latest release of the package using [pypi](https://pypi
311
319
 
312
320
  **NOTE:**
313
321
 
314
- For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/getting-started). Currently supported execution providers by default are: CPU, CUDA
322
+ Currently supported execution providers by default are: CPU, CUDA (NVIDIA GPU), OpenVINO (Intel CPU | GPU).
323
+
324
+ For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/getting-started).
315
325
 
316
326
  - **Prerequisites:** CUDA & cuDNN needs to be installed before [Version table](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html).
317
327
 
318
328
  ```shell
329
+ # standard cpu support
319
330
  pip install "onnxtr[cpu]"
320
331
  pip install "onnxtr[cpu-headless]" # same as cpu but with opencv-headless
321
332
  # with gpu support
322
333
  pip install "onnxtr[gpu]"
323
334
  pip install "onnxtr[gpu-headless]" # same as gpu but with opencv-headless
335
+ # OpenVINO cpu | gpu support for Intel CPUs | GPUs
336
+ pip install "onnxtr[openvino]"
337
+ pip install "onnxtr[openvino-headless]" # same as openvino but with opencv-headless
324
338
  # with HTML support
325
339
  pip install "onnxtr[html]"
326
340
  # with support for visualization
@@ -329,6 +343,18 @@ pip install "onnxtr[viz]"
329
343
  pip install "onnxtr[html, gpu, viz]"
330
344
  ```
331
345
 
346
+ **Recommendation:**
347
+
348
+ If you have:
349
+
350
+ - a NVIDIA GPU, use one of the `gpu` variants
351
+ - an Intel CPU or GPU, use one of the `openvino` variants
352
+ - otherwise, use one of the `cpu` variants
353
+
354
+ **OpenVINO:**
355
+
356
+ By default OnnxTR running with the OpenVINO execution provider backend uses the `CPU` device with `FP32` precision, to change the device or for further configuaration please refer to the [ONNX Runtime OpenVINO documentation](https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#summary-of-options).
357
+
332
358
  ### Reading files
333
359
 
334
360
  Documents can be interpreted from PDF / Images / Webpages / Multiple page images using the following code snippet:
@@ -358,8 +384,10 @@ model = ocr_predictor(
358
384
  reco_arch='vitstr_base', # recognition architecture
359
385
  det_bs=2, # detection batch size
360
386
  reco_bs=512, # recognition batch size
387
+ # Document related parameters
361
388
  assume_straight_pages=True, # set to `False` if the pages are not straight (rotation, perspective, etc.) (default: True)
362
389
  straighten_pages=False, # set to `True` if the pages should be straightened before final processing (default: False)
390
+ export_as_straight_boxes=False, # set to `True` if the boxes should be exported as if the pages were straight (default: False)
363
391
  # Preprocessing related parameters
364
392
  preserve_aspect_ratio=True, # set to `False` if the aspect ratio should not be preserved (default: True)
365
393
  symmetric_pad=True, # set to `False` to disable symmetric padding (default: True)
@@ -595,19 +623,20 @@ Benchmarking performed on the FUNSD dataset and CORD dataset.
595
623
 
596
624
  docTR / OnnxTR models used for the benchmarks are `fast_base` (full precision) | `db_resnet50` (8-bit variant) for detection and `crnn_vgg16_bn` for recognition.
597
625
 
598
- The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_mobilenet_v3_small` takes as comparison `~0.17s / Page` on the FUNSD dataset and `~0.12s / Page` on the CORD dataset in **full precision**.
626
+ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_mobilenet_v3_small` takes as comparison `~0.17s / Page` on the FUNSD dataset and `~0.12s / Page` on the CORD dataset in **full precision** on CPU.
599
627
 
600
628
  - CPU benchmarks:
601
629
 
602
- |Library |FUNSD (199 pages) |CORD (900 pages) |
603
- |---------------------------------|-------------------------------|-------------------------------|
604
- |docTR (CPU) - v0.8.1 | ~1.29s / Page | ~0.60s / Page |
605
- |**OnnxTR (CPU)** - v0.4.1 | ~0.57s / Page | **~0.25s / Page** |
606
- |**OnnxTR (CPU) 8-bit** - v0.4.1 | **~0.38s / Page** | **~0.14s / Page** |
607
- |EasyOCR (CPU) - v1.7.1 | ~1.96s / Page | ~1.75s / Page |
608
- |**PyTesseract (CPU)** - v0.3.10 | **~0.50s / Page** | ~0.52s / Page |
609
- |Surya (line) (CPU) - v0.4.4 | ~48.76s / Page | ~35.49s / Page |
610
- |PaddleOCR (CPU) - no cls - v2.7.3| ~1.27s / Page | ~0.38s / Page |
630
+ |Library |FUNSD (199 pages) |CORD (900 pages) |
631
+ |------------------------------------|-------------------------------|-------------------------------|
632
+ |docTR (CPU) - v0.8.1 | ~1.29s / Page | ~0.60s / Page |
633
+ |**OnnxTR (CPU)** - v0.6.0 | ~0.57s / Page | **~0.25s / Page** |
634
+ |**OnnxTR (CPU) 8-bit** - v0.6.0 | **~0.38s / Page** | **~0.14s / Page** |
635
+ |**OnnxTR (CPU-OpenVINO)** - v0.6.0 | **~0.15s / Page** | **~0.14s / Page** |
636
+ |EasyOCR (CPU) - v1.7.1 | ~1.96s / Page | ~1.75s / Page |
637
+ |**PyTesseract (CPU)** - v0.3.10 | **~0.50s / Page** | ~0.52s / Page |
638
+ |Surya (line) (CPU) - v0.4.4 | ~48.76s / Page | ~35.49s / Page |
639
+ |PaddleOCR (CPU) - no cls - v2.7.3 | ~1.27s / Page | ~0.38s / Page |
611
640
 
612
641
  - GPU benchmarks:
613
642
 
@@ -615,7 +644,8 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_
615
644
  |-------------------------------------|-------------------------------|-------------------------------|
616
645
  |docTR (GPU) - v0.8.1 | ~0.07s / Page | ~0.05s / Page |
617
646
  |**docTR (GPU) float16** - v0.8.1 | **~0.06s / Page** | **~0.03s / Page** |
618
- |OnnxTR (GPU) - v0.4.1 | **~0.06s / Page** | ~0.04s / Page |
647
+ |OnnxTR (GPU) - v0.6.0 | **~0.06s / Page** | ~0.04s / Page |
648
+ |**OnnxTR (GPU) float16 - v0.6.0** | **~0.05s / Page** | **~0.03s / Page** |
619
649
  |EasyOCR (GPU) - v1.7.1 | ~0.31s / Page | ~0.19s / Page |
620
650
  |Surya (GPU) float16 - v0.4.4 | ~3.70s / Page | ~2.81s / Page |
621
651
  |**PaddleOCR (GPU) - no cls - v2.7.3**| ~0.08s / Page | **~0.03s / Page** |
@@ -7,7 +7,9 @@
7
7
  [![codecov](https://codecov.io/gh/felixdittrich92/OnnxTR/graph/badge.svg?token=WVFRCQBOLI)](https://codecov.io/gh/felixdittrich92/OnnxTR)
8
8
  [![Codacy Badge](https://app.codacy.com/project/badge/Grade/4fff4d764bb14fb8b4f4afeb9587231b)](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
9
9
  [![CodeFactor](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr/badge)](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
10
- [![Pypi](https://img.shields.io/badge/pypi-v0.5.0-blue.svg)](https://pypi.org/project/OnnxTR/)
10
+ [![Pypi](https://img.shields.io/badge/pypi-v0.6.0-blue.svg)](https://pypi.org/project/OnnxTR/)
11
+ [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/felixdittrich92/OnnxTR/pkgs/container/onnxtr)
12
+ [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/Felix92/OnnxTR-OCR)
11
13
 
12
14
  > :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
13
15
 
@@ -26,7 +28,7 @@ What you can expect from this repository:
26
28
 
27
29
  ### Prerequisites
28
30
 
29
- Python 3.9 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install OnnxTR.
31
+ Python 3.10 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install OnnxTR.
30
32
 
31
33
  ### Latest release
32
34
 
@@ -34,16 +36,22 @@ You can then install the latest release of the package using [pypi](https://pypi
34
36
 
35
37
  **NOTE:**
36
38
 
37
- For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/getting-started). Currently supported execution providers by default are: CPU, CUDA
39
+ Currently supported execution providers by default are: CPU, CUDA (NVIDIA GPU), OpenVINO (Intel CPU | GPU).
40
+
41
+ For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/getting-started).
38
42
 
39
43
  - **Prerequisites:** CUDA & cuDNN needs to be installed before [Version table](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html).
40
44
 
41
45
  ```shell
46
+ # standard cpu support
42
47
  pip install "onnxtr[cpu]"
43
48
  pip install "onnxtr[cpu-headless]" # same as cpu but with opencv-headless
44
49
  # with gpu support
45
50
  pip install "onnxtr[gpu]"
46
51
  pip install "onnxtr[gpu-headless]" # same as gpu but with opencv-headless
52
+ # OpenVINO cpu | gpu support for Intel CPUs | GPUs
53
+ pip install "onnxtr[openvino]"
54
+ pip install "onnxtr[openvino-headless]" # same as openvino but with opencv-headless
47
55
  # with HTML support
48
56
  pip install "onnxtr[html]"
49
57
  # with support for visualization
@@ -52,6 +60,18 @@ pip install "onnxtr[viz]"
52
60
  pip install "onnxtr[html, gpu, viz]"
53
61
  ```
54
62
 
63
+ **Recommendation:**
64
+
65
+ If you have:
66
+
67
+ - a NVIDIA GPU, use one of the `gpu` variants
68
+ - an Intel CPU or GPU, use one of the `openvino` variants
69
+ - otherwise, use one of the `cpu` variants
70
+
71
+ **OpenVINO:**
72
+
73
+ By default OnnxTR running with the OpenVINO execution provider backend uses the `CPU` device with `FP32` precision, to change the device or for further configuaration please refer to the [ONNX Runtime OpenVINO documentation](https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#summary-of-options).
74
+
55
75
  ### Reading files
56
76
 
57
77
  Documents can be interpreted from PDF / Images / Webpages / Multiple page images using the following code snippet:
@@ -81,8 +101,10 @@ model = ocr_predictor(
81
101
  reco_arch='vitstr_base', # recognition architecture
82
102
  det_bs=2, # detection batch size
83
103
  reco_bs=512, # recognition batch size
104
+ # Document related parameters
84
105
  assume_straight_pages=True, # set to `False` if the pages are not straight (rotation, perspective, etc.) (default: True)
85
106
  straighten_pages=False, # set to `True` if the pages should be straightened before final processing (default: False)
107
+ export_as_straight_boxes=False, # set to `True` if the boxes should be exported as if the pages were straight (default: False)
86
108
  # Preprocessing related parameters
87
109
  preserve_aspect_ratio=True, # set to `False` if the aspect ratio should not be preserved (default: True)
88
110
  symmetric_pad=True, # set to `False` to disable symmetric padding (default: True)
@@ -318,19 +340,20 @@ Benchmarking performed on the FUNSD dataset and CORD dataset.
318
340
 
319
341
  docTR / OnnxTR models used for the benchmarks are `fast_base` (full precision) | `db_resnet50` (8-bit variant) for detection and `crnn_vgg16_bn` for recognition.
320
342
 
321
- The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_mobilenet_v3_small` takes as comparison `~0.17s / Page` on the FUNSD dataset and `~0.12s / Page` on the CORD dataset in **full precision**.
343
+ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_mobilenet_v3_small` takes as comparison `~0.17s / Page` on the FUNSD dataset and `~0.12s / Page` on the CORD dataset in **full precision** on CPU.
322
344
 
323
345
  - CPU benchmarks:
324
346
 
325
- |Library |FUNSD (199 pages) |CORD (900 pages) |
326
- |---------------------------------|-------------------------------|-------------------------------|
327
- |docTR (CPU) - v0.8.1 | ~1.29s / Page | ~0.60s / Page |
328
- |**OnnxTR (CPU)** - v0.4.1 | ~0.57s / Page | **~0.25s / Page** |
329
- |**OnnxTR (CPU) 8-bit** - v0.4.1 | **~0.38s / Page** | **~0.14s / Page** |
330
- |EasyOCR (CPU) - v1.7.1 | ~1.96s / Page | ~1.75s / Page |
331
- |**PyTesseract (CPU)** - v0.3.10 | **~0.50s / Page** | ~0.52s / Page |
332
- |Surya (line) (CPU) - v0.4.4 | ~48.76s / Page | ~35.49s / Page |
333
- |PaddleOCR (CPU) - no cls - v2.7.3| ~1.27s / Page | ~0.38s / Page |
347
+ |Library |FUNSD (199 pages) |CORD (900 pages) |
348
+ |------------------------------------|-------------------------------|-------------------------------|
349
+ |docTR (CPU) - v0.8.1 | ~1.29s / Page | ~0.60s / Page |
350
+ |**OnnxTR (CPU)** - v0.6.0 | ~0.57s / Page | **~0.25s / Page** |
351
+ |**OnnxTR (CPU) 8-bit** - v0.6.0 | **~0.38s / Page** | **~0.14s / Page** |
352
+ |**OnnxTR (CPU-OpenVINO)** - v0.6.0 | **~0.15s / Page** | **~0.14s / Page** |
353
+ |EasyOCR (CPU) - v1.7.1 | ~1.96s / Page | ~1.75s / Page |
354
+ |**PyTesseract (CPU)** - v0.3.10 | **~0.50s / Page** | ~0.52s / Page |
355
+ |Surya (line) (CPU) - v0.4.4 | ~48.76s / Page | ~35.49s / Page |
356
+ |PaddleOCR (CPU) - no cls - v2.7.3 | ~1.27s / Page | ~0.38s / Page |
334
357
 
335
358
  - GPU benchmarks:
336
359
 
@@ -338,7 +361,8 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_
338
361
  |-------------------------------------|-------------------------------|-------------------------------|
339
362
  |docTR (GPU) - v0.8.1 | ~0.07s / Page | ~0.05s / Page |
340
363
  |**docTR (GPU) float16** - v0.8.1 | **~0.06s / Page** | **~0.03s / Page** |
341
- |OnnxTR (GPU) - v0.4.1 | **~0.06s / Page** | ~0.04s / Page |
364
+ |OnnxTR (GPU) - v0.6.0 | **~0.06s / Page** | ~0.04s / Page |
365
+ |**OnnxTR (GPU) float16 - v0.6.0** | **~0.05s / Page** | **~0.03s / Page** |
342
366
  |EasyOCR (GPU) - v1.7.1 | ~0.31s / Page | ~0.19s / Page |
343
367
  |Surya (GPU) float16 - v0.4.4 | ~3.70s / Page | ~2.81s / Page |
344
368
  |**PaddleOCR (GPU) - no cls - v2.7.3**| ~0.08s / Page | **~0.03s / Page** |
@@ -0,0 +1 @@
1
+ from .artefacts import ArtefactDetector
@@ -3,7 +3,7 @@
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
 
6
- from typing import Any, Dict, List, Optional, Tuple
6
+ from typing import Any
7
7
 
8
8
  import cv2
9
9
  import numpy as np
@@ -14,7 +14,7 @@ from .base import _BasePredictor
14
14
 
15
15
  __all__ = ["ArtefactDetector"]
16
16
 
17
- default_cfgs: Dict[str, Dict[str, Any]] = {
17
+ default_cfgs: dict[str, dict[str, Any]] = {
18
18
  "yolov8_artefact": {
19
19
  "input_shape": (3, 1024, 1024),
20
20
  "labels": ["bar_code", "qr_code", "logo", "photo"],
@@ -34,7 +34,6 @@ class ArtefactDetector(_BasePredictor):
34
34
  >>> results = detector(doc)
35
35
 
36
36
  Args:
37
- ----
38
37
  arch: the architecture to use
39
38
  batch_size: the batch size to use
40
39
  model_path: the path to the model to use
@@ -50,9 +49,9 @@ class ArtefactDetector(_BasePredictor):
50
49
  self,
51
50
  arch: str = "yolov8_artefact",
52
51
  batch_size: int = 2,
53
- model_path: Optional[str] = None,
54
- labels: Optional[List[str]] = None,
55
- input_shape: Optional[Tuple[int, int, int]] = None,
52
+ model_path: str | None = None,
53
+ labels: list[str] | None = None,
54
+ input_shape: tuple[int, int, int] | None = None,
56
55
  conf_threshold: float = 0.5,
57
56
  iou_threshold: float = 0.5,
58
57
  **kwargs: Any,
@@ -66,7 +65,7 @@ class ArtefactDetector(_BasePredictor):
66
65
  def preprocess(self, img: np.ndarray) -> np.ndarray:
67
66
  return np.transpose(cv2.resize(img, (self.input_shape[2], self.input_shape[1])), (2, 0, 1)) / np.array(255.0)
68
67
 
69
- def postprocess(self, output: List[np.ndarray], input_images: List[List[np.ndarray]]) -> List[List[Dict[str, Any]]]:
68
+ def postprocess(self, output: list[np.ndarray], input_images: list[list[np.ndarray]]) -> list[list[dict[str, Any]]]:
70
69
  results = []
71
70
 
72
71
  for batch in zip(output, input_images):
@@ -109,7 +108,6 @@ class ArtefactDetector(_BasePredictor):
109
108
  Display the results
110
109
 
111
110
  Args:
112
- ----
113
111
  **kwargs: additional keyword arguments to be passed to `plt.show`
114
112
  """
115
113
  requires_package("matplotlib", "`.show()` requires matplotlib installed")
@@ -3,7 +3,7 @@
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
 
6
- from typing import Any, List, Optional
6
+ from typing import Any
7
7
 
8
8
  import numpy as np
9
9
  import onnxruntime as ort
@@ -16,32 +16,29 @@ class _BasePredictor:
16
16
  Base class for all predictors
17
17
 
18
18
  Args:
19
- ----
20
19
  batch_size: the batch size to use
21
20
  url: the url to use to download a model if needed
22
21
  model_path: the path to the model to use
23
22
  **kwargs: additional arguments to be passed to `download_from_url`
24
23
  """
25
24
 
26
- def __init__(self, batch_size: int, url: Optional[str] = None, model_path: Optional[str] = None, **kwargs) -> None:
25
+ def __init__(self, batch_size: int, url: str | None = None, model_path: str | None = None, **kwargs) -> None:
27
26
  self.batch_size = batch_size
28
27
  self.session = self._init_model(url, model_path, **kwargs)
29
28
 
30
- self._inputs: List[np.ndarray] = []
31
- self._results: List[Any] = []
29
+ self._inputs: list[np.ndarray] = []
30
+ self._results: list[Any] = []
32
31
 
33
- def _init_model(self, url: Optional[str] = None, model_path: Optional[str] = None, **kwargs: Any) -> Any:
32
+ def _init_model(self, url: str | None = None, model_path: str | None = None, **kwargs: Any) -> Any:
34
33
  """
35
34
  Download the model from the given url if needed
36
35
 
37
36
  Args:
38
- ----
39
37
  url: the url to use
40
38
  model_path: the path to the model to use
41
39
  **kwargs: additional arguments to be passed to `download_from_url`
42
40
 
43
41
  Returns:
44
- -------
45
42
  Any: the ONNX loaded model
46
43
  """
47
44
  if not url and not model_path:
@@ -54,40 +51,34 @@ class _BasePredictor:
54
51
  Preprocess the input image
55
52
 
56
53
  Args:
57
- ----
58
54
  img: the input image to preprocess
59
55
 
60
56
  Returns:
61
- -------
62
57
  np.ndarray: the preprocessed image
63
58
  """
64
59
  raise NotImplementedError
65
60
 
66
- def postprocess(self, output: List[np.ndarray], input_images: List[List[np.ndarray]]) -> Any:
61
+ def postprocess(self, output: list[np.ndarray], input_images: list[list[np.ndarray]]) -> Any:
67
62
  """
68
63
  Postprocess the model output
69
64
 
70
65
  Args:
71
- ----
72
66
  output: the model output to postprocess
73
67
  input_images: the input images used to generate the output
74
68
 
75
69
  Returns:
76
- -------
77
70
  Any: the postprocessed output
78
71
  """
79
72
  raise NotImplementedError
80
73
 
81
- def __call__(self, inputs: List[np.ndarray]) -> Any:
74
+ def __call__(self, inputs: list[np.ndarray]) -> Any:
82
75
  """
83
76
  Call the model on the given inputs
84
77
 
85
78
  Args:
86
- ----
87
79
  inputs: the inputs to use
88
80
 
89
81
  Returns:
90
- -------
91
82
  Any: the postprocessed output
92
83
  """
93
84
  self._inputs = inputs
@@ -6,7 +6,6 @@
6
6
  import importlib.metadata
7
7
  import importlib.util
8
8
  import logging
9
- from typing import Optional
10
9
 
11
10
  __all__ = ["requires_package"]
12
11
 
@@ -14,12 +13,11 @@ ENV_VARS_TRUE_VALUES = {"1", "ON", "YES", "TRUE"}
14
13
  ENV_VARS_TRUE_AND_AUTO_VALUES = ENV_VARS_TRUE_VALUES.union({"AUTO"})
15
14
 
16
15
 
17
- def requires_package(name: str, extra_message: Optional[str] = None) -> None: # pragma: no cover
16
+ def requires_package(name: str, extra_message: str | None = None) -> None: # pragma: no cover
18
17
  """
19
18
  package requirement helper
20
19
 
21
20
  Args:
22
- ----
23
21
  name: name of the package
24
22
  extra_message: additional message to display if the package is not found
25
23
  """