onnxtr 0.4.0__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. {onnxtr-0.4.0 → onnxtr-0.5.0}/PKG-INFO +30 -7
  2. {onnxtr-0.4.0 → onnxtr-0.5.0}/README.md +19 -4
  3. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/builder.py +1 -1
  4. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/classification/models/mobilenet.py +1 -0
  5. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/classification/predictor/base.py +8 -4
  6. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/classification/zoo.py +24 -8
  7. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/predictor/base.py +24 -12
  8. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/predictor/predictor.py +3 -0
  9. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/utils/geometry.py +106 -19
  10. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/utils/vocabs.py +4 -0
  11. onnxtr-0.5.0/onnxtr/version.py +1 -0
  12. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr.egg-info/PKG-INFO +30 -7
  13. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr.egg-info/requires.txt +12 -2
  14. {onnxtr-0.4.0 → onnxtr-0.5.0}/pyproject.toml +12 -2
  15. {onnxtr-0.4.0 → onnxtr-0.5.0}/setup.py +1 -1
  16. onnxtr-0.4.0/onnxtr/version.py +0 -1
  17. {onnxtr-0.4.0 → onnxtr-0.5.0}/LICENSE +0 -0
  18. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/__init__.py +0 -0
  19. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/contrib/__init__.py +0 -0
  20. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/contrib/artefacts.py +0 -0
  21. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/contrib/base.py +0 -0
  22. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/file_utils.py +0 -0
  23. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/io/__init__.py +0 -0
  24. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/io/elements.py +0 -0
  25. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/io/html.py +0 -0
  26. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/io/image.py +0 -0
  27. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/io/pdf.py +0 -0
  28. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/io/reader.py +0 -0
  29. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/__init__.py +0 -0
  30. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/_utils.py +0 -0
  31. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/classification/__init__.py +0 -0
  32. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/classification/models/__init__.py +0 -0
  33. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/classification/predictor/__init__.py +0 -0
  34. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/detection/__init__.py +0 -0
  35. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/detection/_utils/__init__.py +0 -0
  36. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/detection/_utils/base.py +0 -0
  37. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/detection/core.py +0 -0
  38. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/detection/models/__init__.py +0 -0
  39. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/detection/models/differentiable_binarization.py +0 -0
  40. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/detection/models/fast.py +0 -0
  41. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/detection/models/linknet.py +0 -0
  42. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/detection/postprocessor/__init__.py +0 -0
  43. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/detection/postprocessor/base.py +0 -0
  44. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/detection/predictor/__init__.py +0 -0
  45. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/detection/predictor/base.py +0 -0
  46. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/detection/zoo.py +0 -0
  47. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/engine.py +0 -0
  48. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/factory/__init__.py +0 -0
  49. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/factory/hub.py +0 -0
  50. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/predictor/__init__.py +0 -0
  51. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/preprocessor/__init__.py +0 -0
  52. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/preprocessor/base.py +0 -0
  53. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/recognition/__init__.py +0 -0
  54. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/recognition/core.py +0 -0
  55. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/recognition/models/__init__.py +0 -0
  56. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/recognition/models/crnn.py +0 -0
  57. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/recognition/models/master.py +0 -0
  58. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/recognition/models/parseq.py +0 -0
  59. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/recognition/models/sar.py +0 -0
  60. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/recognition/models/vitstr.py +0 -0
  61. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/recognition/predictor/__init__.py +0 -0
  62. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/recognition/predictor/_utils.py +0 -0
  63. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/recognition/predictor/base.py +0 -0
  64. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/recognition/utils.py +0 -0
  65. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/recognition/zoo.py +0 -0
  66. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/models/zoo.py +0 -0
  67. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/py.typed +0 -0
  68. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/transforms/__init__.py +0 -0
  69. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/transforms/base.py +0 -0
  70. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/utils/__init__.py +0 -0
  71. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/utils/common_types.py +0 -0
  72. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/utils/data.py +0 -0
  73. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/utils/fonts.py +0 -0
  74. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/utils/multithreading.py +0 -0
  75. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/utils/reconstitution.py +0 -0
  76. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/utils/repr.py +0 -0
  77. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr/utils/visualization.py +0 -0
  78. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr.egg-info/SOURCES.txt +0 -0
  79. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr.egg-info/dependency_links.txt +0 -0
  80. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr.egg-info/top_level.txt +0 -0
  81. {onnxtr-0.4.0 → onnxtr-0.5.0}/onnxtr.egg-info/zip-safe +0 -0
  82. {onnxtr-0.4.0 → onnxtr-0.5.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: onnxtr
3
- Version: 0.4.0
3
+ Version: 0.5.0
4
4
  Summary: Onnx Text Recognition (OnnxTR): docTR Onnx-Wrapper for high-performance OCR on documents.
5
5
  Author-email: Felix Dittrich <felixdittrich92@gmail.com>
6
6
  Maintainer: Felix Dittrich
@@ -225,9 +225,8 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
225
225
  Requires-Python: <4,>=3.9.0
226
226
  Description-Content-Type: text/markdown
227
227
  License-File: LICENSE
228
- Requires-Dist: numpy<2.0.0,>=1.16.0
228
+ Requires-Dist: numpy<3.0.0,>=1.16.0
229
229
  Requires-Dist: scipy<2.0.0,>=1.4.0
230
- Requires-Dist: opencv-python<5.0.0,>=4.5.0
231
230
  Requires-Dist: pypdfium2<5.0.0,>=4.11.0
232
231
  Requires-Dist: pyclipper<2.0.0,>=1.2.0
233
232
  Requires-Dist: shapely<3.0.0,>=1.6.0
@@ -240,8 +239,16 @@ Requires-Dist: anyascii>=0.3.2
240
239
  Requires-Dist: tqdm>=4.30.0
241
240
  Provides-Extra: cpu
242
241
  Requires-Dist: onnxruntime>=1.11.0; extra == "cpu"
242
+ Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "cpu"
243
243
  Provides-Extra: gpu
244
244
  Requires-Dist: onnxruntime-gpu>=1.11.0; extra == "gpu"
245
+ Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "gpu"
246
+ Provides-Extra: cpu-headless
247
+ Requires-Dist: onnxruntime>=1.11.0; extra == "cpu-headless"
248
+ Requires-Dist: opencv-python-headless<5.0.0,>=4.5.0; extra == "cpu-headless"
249
+ Provides-Extra: gpu-headless
250
+ Requires-Dist: onnxruntime-gpu>=1.11.0; extra == "gpu-headless"
251
+ Requires-Dist: opencv-python-headless<5.0.0,>=4.5.0; extra == "gpu-headless"
245
252
  Provides-Extra: html
246
253
  Requires-Dist: weasyprint>=55.0; extra == "html"
247
254
  Provides-Extra: viz
@@ -257,6 +264,7 @@ Requires-Dist: mypy>=0.812; extra == "quality"
257
264
  Requires-Dist: pre-commit>=2.17.0; extra == "quality"
258
265
  Provides-Extra: dev
259
266
  Requires-Dist: onnxruntime>=1.11.0; extra == "dev"
267
+ Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "dev"
260
268
  Requires-Dist: weasyprint>=55.0; extra == "dev"
261
269
  Requires-Dist: matplotlib>=3.1.0; extra == "dev"
262
270
  Requires-Dist: mplcursors>=0.3; extra == "dev"
@@ -276,7 +284,7 @@ Requires-Dist: pre-commit>=2.17.0; extra == "dev"
276
284
  [![codecov](https://codecov.io/gh/felixdittrich92/OnnxTR/graph/badge.svg?token=WVFRCQBOLI)](https://codecov.io/gh/felixdittrich92/OnnxTR)
277
285
  [![Codacy Badge](https://app.codacy.com/project/badge/Grade/4fff4d764bb14fb8b4f4afeb9587231b)](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
278
286
  [![CodeFactor](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr/badge)](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
279
- [![Pypi](https://img.shields.io/badge/pypi-v0.3.2-blue.svg)](https://pypi.org/project/OnnxTR/)
287
+ [![Pypi](https://img.shields.io/badge/pypi-v0.5.0-blue.svg)](https://pypi.org/project/OnnxTR/)
280
288
 
281
289
  > :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
282
290
 
@@ -309,8 +317,10 @@ For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/get
309
317
 
310
318
  ```shell
311
319
  pip install "onnxtr[cpu]"
320
+ pip install "onnxtr[cpu-headless]" # same as cpu but with opencv-headless
312
321
  # with gpu support
313
322
  pip install "onnxtr[gpu]"
323
+ pip install "onnxtr[gpu-headless]" # same as gpu but with opencv-headless
314
324
  # with HTML support
315
325
  pip install "onnxtr[html]"
316
326
  # with support for visualization
@@ -356,6 +366,9 @@ model = ocr_predictor(
356
366
  # Additional parameters - meta information
357
367
  detect_orientation=False, # set to `True` if the orientation of the pages should be detected (default: False)
358
368
  detect_language=False, # set to `True` if the language of the pages should be detected (default: False)
369
+ # Orientation specific parameters in combination with `assume_straight_pages=False` and/or `straighten_pages=True`
370
+ disable_crop_orientation=False, # set to `True` if the crop orientation classification should be disabled (default: False)
371
+ disable_page_orientation=False, # set to `True` if the general page orientation classification should be disabled (default: False)
359
372
  # DocumentBuilder specific parameters
360
373
  resolve_lines=True, # whether words should be automatically grouped into lines (default: True)
361
374
  resolve_blocks=False, # whether lines should be automatically grouped into blocks (default: False)
@@ -589,8 +602,8 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_
589
602
  |Library |FUNSD (199 pages) |CORD (900 pages) |
590
603
  |---------------------------------|-------------------------------|-------------------------------|
591
604
  |docTR (CPU) - v0.8.1 | ~1.29s / Page | ~0.60s / Page |
592
- |**OnnxTR (CPU)** - v0.1.2 | ~0.57s / Page | **~0.25s / Page** |
593
- |**OnnxTR (CPU) 8-bit** - v0.1.2 | **~0.38s / Page** | **~0.14s / Page** |
605
+ |**OnnxTR (CPU)** - v0.4.1 | ~0.57s / Page | **~0.25s / Page** |
606
+ |**OnnxTR (CPU) 8-bit** - v0.4.1 | **~0.38s / Page** | **~0.14s / Page** |
594
607
  |EasyOCR (CPU) - v1.7.1 | ~1.96s / Page | ~1.75s / Page |
595
608
  |**PyTesseract (CPU)** - v0.3.10 | **~0.50s / Page** | ~0.52s / Page |
596
609
  |Surya (line) (CPU) - v0.4.4 | ~48.76s / Page | ~35.49s / Page |
@@ -602,7 +615,7 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_
602
615
  |-------------------------------------|-------------------------------|-------------------------------|
603
616
  |docTR (GPU) - v0.8.1 | ~0.07s / Page | ~0.05s / Page |
604
617
  |**docTR (GPU) float16** - v0.8.1 | **~0.06s / Page** | **~0.03s / Page** |
605
- |OnnxTR (GPU) - v0.1.2 | **~0.06s / Page** | ~0.04s / Page |
618
+ |OnnxTR (GPU) - v0.4.1 | **~0.06s / Page** | ~0.04s / Page |
606
619
  |EasyOCR (GPU) - v1.7.1 | ~0.31s / Page | ~0.19s / Page |
607
620
  |Surya (GPU) float16 - v0.4.4 | ~3.70s / Page | ~2.81s / Page |
608
621
  |**PaddleOCR (GPU) - no cls - v2.7.3**| ~0.08s / Page | **~0.03s / Page** |
@@ -621,6 +634,16 @@ If you wish to cite please refer to the base project citation, feel free to use
621
634
  }
622
635
  ```
623
636
 
637
+ ```bibtex
638
+ @misc{onnxtr2024,
639
+ title={OnnxTR: Optical Character Recognition made seamless & accessible to anyone, powered by Onnx},
640
+ author={Felix Dittrich},
641
+ year={2024},
642
+ publisher = {GitHub},
643
+ howpublished = {\url{https://github.com/felixdittrich92/OnnxTR}}
644
+ }
645
+ ```
646
+
624
647
  ## License
625
648
 
626
649
  Distributed under the Apache 2.0 License. See [`LICENSE`](https://github.com/felixdittrich92/OnnxTR?tab=Apache-2.0-1-ov-file#readme) for more information.
@@ -7,7 +7,7 @@
7
7
  [![codecov](https://codecov.io/gh/felixdittrich92/OnnxTR/graph/badge.svg?token=WVFRCQBOLI)](https://codecov.io/gh/felixdittrich92/OnnxTR)
8
8
  [![Codacy Badge](https://app.codacy.com/project/badge/Grade/4fff4d764bb14fb8b4f4afeb9587231b)](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
9
9
  [![CodeFactor](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr/badge)](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
10
- [![Pypi](https://img.shields.io/badge/pypi-v0.3.2-blue.svg)](https://pypi.org/project/OnnxTR/)
10
+ [![Pypi](https://img.shields.io/badge/pypi-v0.5.0-blue.svg)](https://pypi.org/project/OnnxTR/)
11
11
 
12
12
  > :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
13
13
 
@@ -40,8 +40,10 @@ For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/get
40
40
 
41
41
  ```shell
42
42
  pip install "onnxtr[cpu]"
43
+ pip install "onnxtr[cpu-headless]" # same as cpu but with opencv-headless
43
44
  # with gpu support
44
45
  pip install "onnxtr[gpu]"
46
+ pip install "onnxtr[gpu-headless]" # same as gpu but with opencv-headless
45
47
  # with HTML support
46
48
  pip install "onnxtr[html]"
47
49
  # with support for visualization
@@ -87,6 +89,9 @@ model = ocr_predictor(
87
89
  # Additional parameters - meta information
88
90
  detect_orientation=False, # set to `True` if the orientation of the pages should be detected (default: False)
89
91
  detect_language=False, # set to `True` if the language of the pages should be detected (default: False)
92
+ # Orientation specific parameters in combination with `assume_straight_pages=False` and/or `straighten_pages=True`
93
+ disable_crop_orientation=False, # set to `True` if the crop orientation classification should be disabled (default: False)
94
+ disable_page_orientation=False, # set to `True` if the general page orientation classification should be disabled (default: False)
90
95
  # DocumentBuilder specific parameters
91
96
  resolve_lines=True, # whether words should be automatically grouped into lines (default: True)
92
97
  resolve_blocks=False, # whether lines should be automatically grouped into blocks (default: False)
@@ -320,8 +325,8 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_
320
325
  |Library |FUNSD (199 pages) |CORD (900 pages) |
321
326
  |---------------------------------|-------------------------------|-------------------------------|
322
327
  |docTR (CPU) - v0.8.1 | ~1.29s / Page | ~0.60s / Page |
323
- |**OnnxTR (CPU)** - v0.1.2 | ~0.57s / Page | **~0.25s / Page** |
324
- |**OnnxTR (CPU) 8-bit** - v0.1.2 | **~0.38s / Page** | **~0.14s / Page** |
328
+ |**OnnxTR (CPU)** - v0.4.1 | ~0.57s / Page | **~0.25s / Page** |
329
+ |**OnnxTR (CPU) 8-bit** - v0.4.1 | **~0.38s / Page** | **~0.14s / Page** |
325
330
  |EasyOCR (CPU) - v1.7.1 | ~1.96s / Page | ~1.75s / Page |
326
331
  |**PyTesseract (CPU)** - v0.3.10 | **~0.50s / Page** | ~0.52s / Page |
327
332
  |Surya (line) (CPU) - v0.4.4 | ~48.76s / Page | ~35.49s / Page |
@@ -333,7 +338,7 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_
333
338
  |-------------------------------------|-------------------------------|-------------------------------|
334
339
  |docTR (GPU) - v0.8.1 | ~0.07s / Page | ~0.05s / Page |
335
340
  |**docTR (GPU) float16** - v0.8.1 | **~0.06s / Page** | **~0.03s / Page** |
336
- |OnnxTR (GPU) - v0.1.2 | **~0.06s / Page** | ~0.04s / Page |
341
+ |OnnxTR (GPU) - v0.4.1 | **~0.06s / Page** | ~0.04s / Page |
337
342
  |EasyOCR (GPU) - v1.7.1 | ~0.31s / Page | ~0.19s / Page |
338
343
  |Surya (GPU) float16 - v0.4.4 | ~3.70s / Page | ~2.81s / Page |
339
344
  |**PaddleOCR (GPU) - no cls - v2.7.3**| ~0.08s / Page | **~0.03s / Page** |
@@ -352,6 +357,16 @@ If you wish to cite please refer to the base project citation, feel free to use
352
357
  }
353
358
  ```
354
359
 
360
+ ```bibtex
361
+ @misc{onnxtr2024,
362
+ title={OnnxTR: Optical Character Recognition made seamless & accessible to anyone, powered by Onnx},
363
+ author={Felix Dittrich},
364
+ year={2024},
365
+ publisher = {GitHub},
366
+ howpublished = {\url{https://github.com/felixdittrich92/OnnxTR}}
367
+ }
368
+ ```
369
+
355
370
  ## License
356
371
 
357
372
  Distributed under the Apache 2.0 License. See [`LICENSE`](https://github.com/felixdittrich92/OnnxTR?tab=Apache-2.0-1-ov-file#readme) for more information.
@@ -266,7 +266,7 @@ class DocumentBuilder(NestedObject):
266
266
  Line([
267
267
  Word(
268
268
  *word_preds[idx],
269
- tuple([tuple(pt) for pt in boxes[idx].tolist()]), # type: ignore[arg-type]
269
+ tuple(tuple(pt) for pt in boxes[idx].tolist()), # type: ignore[arg-type]
270
270
  float(objectness_scores[idx]),
271
271
  crop_orientations[idx],
272
272
  )
@@ -13,6 +13,7 @@ import numpy as np
13
13
  from ...engine import Engine, EngineConfig
14
14
 
15
15
  __all__ = [
16
+ "MobileNetV3",
16
17
  "mobilenet_v3_small_crop_orientation",
17
18
  "mobilenet_v3_small_page_orientation",
18
19
  ]
@@ -3,7 +3,7 @@
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
 
6
- from typing import Any, List, Union
6
+ from typing import Any, List, Optional, Union
7
7
 
8
8
  import numpy as np
9
9
  from scipy.special import softmax
@@ -29,10 +29,10 @@ class OrientationPredictor(NestedObject):
29
29
 
30
30
  def __init__(
31
31
  self,
32
- pre_processor: PreProcessor,
33
- model: Any,
32
+ pre_processor: Optional[PreProcessor],
33
+ model: Optional[Any],
34
34
  ) -> None:
35
- self.pre_processor = pre_processor
35
+ self.pre_processor = pre_processor if isinstance(pre_processor, PreProcessor) else None
36
36
  self.model = model
37
37
 
38
38
  def __call__(
@@ -43,6 +43,10 @@ class OrientationPredictor(NestedObject):
43
43
  if any(input.ndim != 3 for input in inputs):
44
44
  raise ValueError("incorrect input shape: all inputs are expected to be multi-channel 2D images.")
45
45
 
46
+ if self.model is None or self.pre_processor is None:
47
+ # predictor is disabled
48
+ return [[0] * len(inputs), [0] * len(inputs), [1.0] * len(inputs)]
49
+
46
50
  processed_batches = self.pre_processor(inputs)
47
51
  predicted_batches = [self.model(batch) for batch in processed_batches]
48
52
 
@@ -17,16 +17,30 @@ ORIENTATION_ARCHS: List[str] = ["mobilenet_v3_small_crop_orientation", "mobilene
17
17
 
18
18
 
19
19
  def _orientation_predictor(
20
- arch: str, load_in_8_bit: bool = False, engine_cfg: Optional[EngineConfig] = None, **kwargs: Any
20
+ arch: Any,
21
+ model_type: str,
22
+ load_in_8_bit: bool = False,
23
+ engine_cfg: Optional[EngineConfig] = None,
24
+ disabled: bool = False,
25
+ **kwargs: Any,
21
26
  ) -> OrientationPredictor:
22
- if arch not in ORIENTATION_ARCHS:
23
- raise ValueError(f"unknown architecture '{arch}'")
27
+ if disabled:
28
+ # Case where the orientation predictor is disabled
29
+ return OrientationPredictor(None, None)
30
+
31
+ if isinstance(arch, str):
32
+ if arch not in ORIENTATION_ARCHS:
33
+ raise ValueError(f"unknown architecture '{arch}'")
34
+ # Load directly classifier from backbone
35
+ _model = classification.__dict__[arch](load_in_8_bit=load_in_8_bit, engine_cfg=engine_cfg)
36
+ else:
37
+ if not isinstance(arch, classification.MobileNetV3):
38
+ raise ValueError(f"unknown architecture: {type(arch)}")
39
+ _model = arch
24
40
 
25
- # Load directly classifier from backbone
26
- _model = classification.__dict__[arch](load_in_8_bit=load_in_8_bit, engine_cfg=engine_cfg)
27
41
  kwargs["mean"] = kwargs.get("mean", _model.cfg["mean"])
28
42
  kwargs["std"] = kwargs.get("std", _model.cfg["std"])
29
- kwargs["batch_size"] = kwargs.get("batch_size", 512 if "crop" in arch else 2)
43
+ kwargs["batch_size"] = kwargs.get("batch_size", 512 if model_type == "crop" else 2)
30
44
  input_shape = _model.cfg["input_shape"][1:]
31
45
  predictor = OrientationPredictor(
32
46
  PreProcessor(input_shape, preserve_aspect_ratio=True, symmetric_pad=True, **kwargs),
@@ -60,7 +74,8 @@ def crop_orientation_predictor(
60
74
  -------
61
75
  OrientationPredictor
62
76
  """
63
- return _orientation_predictor(arch, load_in_8_bit, engine_cfg, **kwargs)
77
+ model_type = "crop"
78
+ return _orientation_predictor(arch, model_type, load_in_8_bit, engine_cfg, **kwargs)
64
79
 
65
80
 
66
81
  def page_orientation_predictor(
@@ -88,4 +103,5 @@ def page_orientation_predictor(
88
103
  -------
89
104
  OrientationPredictor
90
105
  """
91
- return _orientation_predictor(arch, load_in_8_bit, engine_cfg, **kwargs)
106
+ model_type = "page"
107
+ return _orientation_predictor(arch, model_type, load_in_8_bit, engine_cfg, **kwargs)
@@ -9,7 +9,7 @@ import numpy as np
9
9
 
10
10
  from onnxtr.models.builder import DocumentBuilder
11
11
  from onnxtr.models.engine import EngineConfig
12
- from onnxtr.utils.geometry import extract_crops, extract_rcrops, rotate_image
12
+ from onnxtr.utils.geometry import extract_crops, extract_rcrops, remove_image_padding, rotate_image
13
13
 
14
14
  from .._utils import estimate_orientation, rectify_crops, rectify_loc_preds
15
15
  from ..classification import crop_orientation_predictor, page_orientation_predictor
@@ -55,13 +55,19 @@ class _OCRPredictor:
55
55
  ) -> None:
56
56
  self.assume_straight_pages = assume_straight_pages
57
57
  self.straighten_pages = straighten_pages
58
+ self._page_orientation_disabled = kwargs.pop("disable_page_orientation", False)
59
+ self._crop_orientation_disabled = kwargs.pop("disable_crop_orientation", False)
58
60
  self.crop_orientation_predictor = (
59
61
  None
60
62
  if assume_straight_pages
61
- else crop_orientation_predictor(load_in_8_bit=load_in_8_bit, engine_cfg=clf_engine_cfg)
63
+ else crop_orientation_predictor(
64
+ load_in_8_bit=load_in_8_bit, engine_cfg=clf_engine_cfg, disabled=self._crop_orientation_disabled
65
+ )
62
66
  )
63
67
  self.page_orientation_predictor = (
64
- page_orientation_predictor(load_in_8_bit=load_in_8_bit, engine_cfg=clf_engine_cfg)
68
+ page_orientation_predictor(
69
+ load_in_8_bit=load_in_8_bit, engine_cfg=clf_engine_cfg, disabled=self._crop_orientation_disabled
70
+ )
65
71
  if detect_orientation or straighten_pages or not assume_straight_pages
66
72
  else None
67
73
  )
@@ -112,8 +118,8 @@ class _OCRPredictor:
112
118
  ]
113
119
  )
114
120
  return [
115
- # We exapnd if the page is wider than tall and the angle is 90 or -90
116
- rotate_image(page, angle, expand=page.shape[1] > page.shape[0] and abs(angle) == 90)
121
+ # expand if height and width are not equal, afterwards remove padding
122
+ remove_image_padding(rotate_image(page, angle, expand=page.shape[0] != page.shape[1]))
117
123
  for page, angle in zip(pages, origin_pages_orientations)
118
124
  ]
119
125
 
@@ -123,13 +129,18 @@ class _OCRPredictor:
123
129
  loc_preds: List[np.ndarray],
124
130
  channels_last: bool,
125
131
  assume_straight_pages: bool = False,
132
+ assume_horizontal: bool = False,
126
133
  ) -> List[List[np.ndarray]]:
127
- extraction_fn = extract_crops if assume_straight_pages else extract_rcrops
128
-
129
- crops = [
130
- extraction_fn(page, _boxes[:, :4], channels_last=channels_last) # type: ignore[operator]
131
- for page, _boxes in zip(pages, loc_preds)
132
- ]
134
+ if assume_straight_pages:
135
+ crops = [
136
+ extract_crops(page, _boxes[:, :4], channels_last=channels_last)
137
+ for page, _boxes in zip(pages, loc_preds)
138
+ ]
139
+ else:
140
+ crops = [
141
+ extract_rcrops(page, _boxes[:, :4], channels_last=channels_last, assume_horizontal=assume_horizontal)
142
+ for page, _boxes in zip(pages, loc_preds)
143
+ ]
133
144
  return crops
134
145
 
135
146
  @staticmethod
@@ -138,8 +149,9 @@ class _OCRPredictor:
138
149
  loc_preds: List[np.ndarray],
139
150
  channels_last: bool,
140
151
  assume_straight_pages: bool = False,
152
+ assume_horizontal: bool = False,
141
153
  ) -> Tuple[List[List[np.ndarray]], List[np.ndarray]]:
142
- crops = _OCRPredictor._generate_crops(pages, loc_preds, channels_last, assume_straight_pages)
154
+ crops = _OCRPredictor._generate_crops(pages, loc_preds, channels_last, assume_straight_pages, assume_horizontal)
143
155
 
144
156
  # Avoid sending zero-sized crops
145
157
  is_kept = [[all(s > 0 for s in crop.shape) for crop in page_crops] for page_crops in crops]
@@ -100,6 +100,8 @@ class OCRPredictor(NestedObject, _OCRPredictor):
100
100
  origin_pages_orientations = None
101
101
  if self.straighten_pages:
102
102
  pages = self._straighten_pages(pages, seg_maps, general_pages_orientations, origin_pages_orientations)
103
+ # update page shapes after straightening
104
+ origin_page_shapes = [page.shape[:2] for page in pages]
103
105
 
104
106
  # forward again to get predictions on straight pages
105
107
  loc_preds = self.det_predictor(pages, **kwargs) # type: ignore[assignment]
@@ -117,6 +119,7 @@ class OCRPredictor(NestedObject, _OCRPredictor):
117
119
  loc_preds, # type: ignore[arg-type]
118
120
  channels_last=True,
119
121
  assume_straight_pages=self.assume_straight_pages,
122
+ assume_horizontal=self._page_orientation_disabled,
120
123
  )
121
124
  # Rectify crop orientation and get crop orientation predictions
122
125
  crop_orientations: Any = []
@@ -391,6 +391,26 @@ def rotate_image(
391
391
  return rot_img
392
392
 
393
393
 
394
+ def remove_image_padding(image: np.ndarray) -> np.ndarray:
395
+ """Remove black border padding from an image
396
+
397
+ Args:
398
+ ----
399
+ image: numpy tensor to remove padding from
400
+
401
+ Returns:
402
+ -------
403
+ Image with padding removed
404
+ """
405
+ # Find the bounding box of the non-black region
406
+ rows = np.any(image, axis=1)
407
+ cols = np.any(image, axis=0)
408
+ rmin, rmax = np.where(rows)[0][[0, -1]]
409
+ cmin, cmax = np.where(cols)[0][[0, -1]]
410
+
411
+ return image[rmin : rmax + 1, cmin : cmax + 1]
412
+
413
+
394
414
  def estimate_page_angle(polys: np.ndarray) -> float:
395
415
  """Takes a batch of rotated previously ORIENTED polys (N, 4, 2) (rectified by the classifier) and return the
396
416
  estimated angle ccw in degrees
@@ -471,7 +491,7 @@ def extract_crops(img: np.ndarray, boxes: np.ndarray, channels_last: bool = True
471
491
 
472
492
 
473
493
  def extract_rcrops(
474
- img: np.ndarray, polys: np.ndarray, dtype=np.float32, channels_last: bool = True
494
+ img: np.ndarray, polys: np.ndarray, dtype=np.float32, channels_last: bool = True, assume_horizontal: bool = False
475
495
  ) -> List[np.ndarray]:
476
496
  """Created cropped images from list of rotated bounding boxes
477
497
 
@@ -481,6 +501,7 @@ def extract_rcrops(
481
501
  polys: bounding boxes of shape (N, 4, 2)
482
502
  dtype: target data type of bounding boxes
483
503
  channels_last: whether the channel dimensions is the last one instead of the last one
504
+ assume_horizontal: whether the boxes are assumed to be only horizontally oriented
484
505
 
485
506
  Returns:
486
507
  -------
@@ -498,22 +519,88 @@ def extract_rcrops(
498
519
  _boxes[:, :, 0] *= width
499
520
  _boxes[:, :, 1] *= height
500
521
 
501
- src_pts = _boxes[:, :3].astype(np.float32)
502
- # Preserve size
503
- d1 = np.linalg.norm(src_pts[:, 0] - src_pts[:, 1], axis=-1)
504
- d2 = np.linalg.norm(src_pts[:, 1] - src_pts[:, 2], axis=-1)
505
- # (N, 3, 2)
506
- dst_pts = np.zeros((_boxes.shape[0], 3, 2), dtype=dtype)
507
- dst_pts[:, 1, 0] = dst_pts[:, 2, 0] = d1 - 1
508
- dst_pts[:, 2, 1] = d2 - 1
509
- # Use a warp transformation to extract the crop
510
- crops = [
511
- cv2.warpAffine(
512
- img if channels_last else img.transpose(1, 2, 0),
513
- # Transformation matrix
514
- cv2.getAffineTransform(src_pts[idx], dst_pts[idx]),
515
- (int(d1[idx]), int(d2[idx])),
516
- )
517
- for idx in range(_boxes.shape[0])
518
- ]
522
+ src_img = img if channels_last else img.transpose(1, 2, 0)
523
+
524
+ # Handle only horizontal oriented boxes
525
+ if assume_horizontal:
526
+ crops = []
527
+
528
+ for box in _boxes:
529
+ # Calculate the centroid of the quadrilateral
530
+ centroid = np.mean(box, axis=0)
531
+
532
+ # Divide the points into left and right
533
+ left_points = box[box[:, 0] < centroid[0]]
534
+ right_points = box[box[:, 0] >= centroid[0]]
535
+
536
+ # Sort the left points according to the y-axis
537
+ left_points = left_points[np.argsort(left_points[:, 1])]
538
+ top_left_pt = left_points[0]
539
+ bottom_left_pt = left_points[-1]
540
+ # Sort the right points according to the y-axis
541
+ right_points = right_points[np.argsort(right_points[:, 1])]
542
+ top_right_pt = right_points[0]
543
+ bottom_right_pt = right_points[-1]
544
+ box_points = np.array(
545
+ [top_left_pt, bottom_left_pt, top_right_pt, bottom_right_pt],
546
+ dtype=dtype,
547
+ )
548
+
549
+ # Get the width and height of the rectangle that will contain the warped quadrilateral
550
+ width_upper = np.linalg.norm(top_right_pt - top_left_pt)
551
+ width_lower = np.linalg.norm(bottom_right_pt - bottom_left_pt)
552
+ height_left = np.linalg.norm(bottom_left_pt - top_left_pt)
553
+ height_right = np.linalg.norm(bottom_right_pt - top_right_pt)
554
+
555
+ # Get the maximum width and height
556
+ rect_width = max(int(width_upper), int(width_lower))
557
+ rect_height = max(int(height_left), int(height_right))
558
+
559
+ dst_pts = np.array(
560
+ [
561
+ [0, 0], # top-left
562
+ # bottom-left
563
+ [0, rect_height - 1],
564
+ # top-right
565
+ [rect_width - 1, 0],
566
+ # bottom-right
567
+ [rect_width - 1, rect_height - 1],
568
+ ],
569
+ dtype=dtype,
570
+ )
571
+
572
+ # Get the perspective transform matrix using the box points
573
+ affine_mat = cv2.getPerspectiveTransform(box_points, dst_pts)
574
+
575
+ # Perform the perspective warp to get the rectified crop
576
+ crop = cv2.warpPerspective(
577
+ src_img,
578
+ affine_mat,
579
+ (rect_width, rect_height),
580
+ )
581
+
582
+ # Add the crop to the list of crops
583
+ crops.append(crop)
584
+
585
+ # Handle any oriented boxes
586
+ else:
587
+ src_pts = _boxes[:, :3].astype(np.float32)
588
+ # Preserve size
589
+ d1 = np.linalg.norm(src_pts[:, 0] - src_pts[:, 1], axis=-1)
590
+ d2 = np.linalg.norm(src_pts[:, 1] - src_pts[:, 2], axis=-1)
591
+ # (N, 3, 2)
592
+ dst_pts = np.zeros((_boxes.shape[0], 3, 2), dtype=dtype)
593
+ dst_pts[:, 1, 0] = dst_pts[:, 2, 0] = d1 - 1
594
+ dst_pts[:, 2, 1] = d2 - 1
595
+ # Use a warp transformation to extract the crop
596
+ crops = [
597
+ cv2.warpAffine(
598
+ src_img,
599
+ # Transformation matrix
600
+ cv2.getAffineTransform(src_pts[idx], dst_pts[idx]),
601
+ (int(d1[idx]), int(d2[idx])),
602
+ )
603
+ for idx in range(_boxes.shape[0])
604
+ ]
605
+
519
606
  return crops # type: ignore[return-value]
@@ -25,6 +25,7 @@ VOCABS: Dict[str, str] = {
25
25
  "hindi_punctuation": "।,?!:्ॐ॰॥॰",
26
26
  "bangla_letters": "অআইঈউঊঋএঐওঔকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহ়ঽািীুূৃেৈোৌ্ৎংঃঁ",
27
27
  "bangla_digits": "০১২৩৪৫৬৭৮৯",
28
+ "generic_cyrillic_letters": "абвгдежзийклмнопрстуфхцчшщьюяАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЬЮЯ",
28
29
  }
29
30
 
30
31
  VOCABS["latin"] = VOCABS["digits"] + VOCABS["ascii_letters"] + VOCABS["punctuation"]
@@ -59,6 +60,9 @@ VOCABS["vietnamese"] = (
59
60
  VOCABS["hebrew"] = VOCABS["english"] + "אבגדהוזחטיכלמנסעפצקרשת" + "₪"
60
61
  VOCABS["hindi"] = VOCABS["hindi_letters"] + VOCABS["hindi_digits"] + VOCABS["hindi_punctuation"]
61
62
  VOCABS["bangla"] = VOCABS["bangla_letters"] + VOCABS["bangla_digits"]
63
+ VOCABS["ukrainian"] = (
64
+ VOCABS["generic_cyrillic_letters"] + VOCABS["digits"] + VOCABS["punctuation"] + VOCABS["currency"] + "ґіїєҐІЇЄ₴"
65
+ )
62
66
  VOCABS["multilingual"] = "".join(
63
67
  dict.fromkeys(
64
68
  VOCABS["french"]
@@ -0,0 +1 @@
1
+ __version__ = 'v0.5.0'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: onnxtr
3
- Version: 0.4.0
3
+ Version: 0.5.0
4
4
  Summary: Onnx Text Recognition (OnnxTR): docTR Onnx-Wrapper for high-performance OCR on documents.
5
5
  Author-email: Felix Dittrich <felixdittrich92@gmail.com>
6
6
  Maintainer: Felix Dittrich
@@ -225,9 +225,8 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
225
225
  Requires-Python: <4,>=3.9.0
226
226
  Description-Content-Type: text/markdown
227
227
  License-File: LICENSE
228
- Requires-Dist: numpy<2.0.0,>=1.16.0
228
+ Requires-Dist: numpy<3.0.0,>=1.16.0
229
229
  Requires-Dist: scipy<2.0.0,>=1.4.0
230
- Requires-Dist: opencv-python<5.0.0,>=4.5.0
231
230
  Requires-Dist: pypdfium2<5.0.0,>=4.11.0
232
231
  Requires-Dist: pyclipper<2.0.0,>=1.2.0
233
232
  Requires-Dist: shapely<3.0.0,>=1.6.0
@@ -240,8 +239,16 @@ Requires-Dist: anyascii>=0.3.2
240
239
  Requires-Dist: tqdm>=4.30.0
241
240
  Provides-Extra: cpu
242
241
  Requires-Dist: onnxruntime>=1.11.0; extra == "cpu"
242
+ Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "cpu"
243
243
  Provides-Extra: gpu
244
244
  Requires-Dist: onnxruntime-gpu>=1.11.0; extra == "gpu"
245
+ Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "gpu"
246
+ Provides-Extra: cpu-headless
247
+ Requires-Dist: onnxruntime>=1.11.0; extra == "cpu-headless"
248
+ Requires-Dist: opencv-python-headless<5.0.0,>=4.5.0; extra == "cpu-headless"
249
+ Provides-Extra: gpu-headless
250
+ Requires-Dist: onnxruntime-gpu>=1.11.0; extra == "gpu-headless"
251
+ Requires-Dist: opencv-python-headless<5.0.0,>=4.5.0; extra == "gpu-headless"
245
252
  Provides-Extra: html
246
253
  Requires-Dist: weasyprint>=55.0; extra == "html"
247
254
  Provides-Extra: viz
@@ -257,6 +264,7 @@ Requires-Dist: mypy>=0.812; extra == "quality"
257
264
  Requires-Dist: pre-commit>=2.17.0; extra == "quality"
258
265
  Provides-Extra: dev
259
266
  Requires-Dist: onnxruntime>=1.11.0; extra == "dev"
267
+ Requires-Dist: opencv-python<5.0.0,>=4.5.0; extra == "dev"
260
268
  Requires-Dist: weasyprint>=55.0; extra == "dev"
261
269
  Requires-Dist: matplotlib>=3.1.0; extra == "dev"
262
270
  Requires-Dist: mplcursors>=0.3; extra == "dev"
@@ -276,7 +284,7 @@ Requires-Dist: pre-commit>=2.17.0; extra == "dev"
276
284
  [![codecov](https://codecov.io/gh/felixdittrich92/OnnxTR/graph/badge.svg?token=WVFRCQBOLI)](https://codecov.io/gh/felixdittrich92/OnnxTR)
277
285
  [![Codacy Badge](https://app.codacy.com/project/badge/Grade/4fff4d764bb14fb8b4f4afeb9587231b)](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
278
286
  [![CodeFactor](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr/badge)](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
279
- [![Pypi](https://img.shields.io/badge/pypi-v0.3.2-blue.svg)](https://pypi.org/project/OnnxTR/)
287
+ [![Pypi](https://img.shields.io/badge/pypi-v0.5.0-blue.svg)](https://pypi.org/project/OnnxTR/)
280
288
 
281
289
  > :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
282
290
 
@@ -309,8 +317,10 @@ For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/get
309
317
 
310
318
  ```shell
311
319
  pip install "onnxtr[cpu]"
320
+ pip install "onnxtr[cpu-headless]" # same as cpu but with opencv-headless
312
321
  # with gpu support
313
322
  pip install "onnxtr[gpu]"
323
+ pip install "onnxtr[gpu-headless]" # same as gpu but with opencv-headless
314
324
  # with HTML support
315
325
  pip install "onnxtr[html]"
316
326
  # with support for visualization
@@ -356,6 +366,9 @@ model = ocr_predictor(
356
366
  # Additional parameters - meta information
357
367
  detect_orientation=False, # set to `True` if the orientation of the pages should be detected (default: False)
358
368
  detect_language=False, # set to `True` if the language of the pages should be detected (default: False)
369
+ # Orientation specific parameters in combination with `assume_straight_pages=False` and/or `straighten_pages=True`
370
+ disable_crop_orientation=False, # set to `True` if the crop orientation classification should be disabled (default: False)
371
+ disable_page_orientation=False, # set to `True` if the general page orientation classification should be disabled (default: False)
359
372
  # DocumentBuilder specific parameters
360
373
  resolve_lines=True, # whether words should be automatically grouped into lines (default: True)
361
374
  resolve_blocks=False, # whether lines should be automatically grouped into blocks (default: False)
@@ -589,8 +602,8 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_
589
602
  |Library |FUNSD (199 pages) |CORD (900 pages) |
590
603
  |---------------------------------|-------------------------------|-------------------------------|
591
604
  |docTR (CPU) - v0.8.1 | ~1.29s / Page | ~0.60s / Page |
592
- |**OnnxTR (CPU)** - v0.1.2 | ~0.57s / Page | **~0.25s / Page** |
593
- |**OnnxTR (CPU) 8-bit** - v0.1.2 | **~0.38s / Page** | **~0.14s / Page** |
605
+ |**OnnxTR (CPU)** - v0.4.1 | ~0.57s / Page | **~0.25s / Page** |
606
+ |**OnnxTR (CPU) 8-bit** - v0.4.1 | **~0.38s / Page** | **~0.14s / Page** |
594
607
  |EasyOCR (CPU) - v1.7.1 | ~1.96s / Page | ~1.75s / Page |
595
608
  |**PyTesseract (CPU)** - v0.3.10 | **~0.50s / Page** | ~0.52s / Page |
596
609
  |Surya (line) (CPU) - v0.4.4 | ~48.76s / Page | ~35.49s / Page |
@@ -602,7 +615,7 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_
602
615
  |-------------------------------------|-------------------------------|-------------------------------|
603
616
  |docTR (GPU) - v0.8.1 | ~0.07s / Page | ~0.05s / Page |
604
617
  |**docTR (GPU) float16** - v0.8.1 | **~0.06s / Page** | **~0.03s / Page** |
605
- |OnnxTR (GPU) - v0.1.2 | **~0.06s / Page** | ~0.04s / Page |
618
+ |OnnxTR (GPU) - v0.4.1 | **~0.06s / Page** | ~0.04s / Page |
606
619
  |EasyOCR (GPU) - v1.7.1 | ~0.31s / Page | ~0.19s / Page |
607
620
  |Surya (GPU) float16 - v0.4.4 | ~3.70s / Page | ~2.81s / Page |
608
621
  |**PaddleOCR (GPU) - no cls - v2.7.3**| ~0.08s / Page | **~0.03s / Page** |
@@ -621,6 +634,16 @@ If you wish to cite please refer to the base project citation, feel free to use
621
634
  }
622
635
  ```
623
636
 
637
+ ```bibtex
638
+ @misc{onnxtr2024,
639
+ title={OnnxTR: Optical Character Recognition made seamless & accessible to anyone, powered by Onnx},
640
+ author={Felix Dittrich},
641
+ year={2024},
642
+ publisher = {GitHub},
643
+ howpublished = {\url{https://github.com/felixdittrich92/OnnxTR}}
644
+ }
645
+ ```
646
+
624
647
  ## License
625
648
 
626
649
  Distributed under the Apache 2.0 License. See [`LICENSE`](https://github.com/felixdittrich92/OnnxTR?tab=Apache-2.0-1-ov-file#readme) for more information.
@@ -1,6 +1,5 @@
1
- numpy<2.0.0,>=1.16.0
1
+ numpy<3.0.0,>=1.16.0
2
2
  scipy<2.0.0,>=1.4.0
3
- opencv-python<5.0.0,>=4.5.0
4
3
  pypdfium2<5.0.0,>=4.11.0
5
4
  pyclipper<2.0.0,>=1.2.0
6
5
  shapely<3.0.0,>=1.6.0
@@ -14,9 +13,15 @@ tqdm>=4.30.0
14
13
 
15
14
  [cpu]
16
15
  onnxruntime>=1.11.0
16
+ opencv-python<5.0.0,>=4.5.0
17
+
18
+ [cpu-headless]
19
+ onnxruntime>=1.11.0
20
+ opencv-python-headless<5.0.0,>=4.5.0
17
21
 
18
22
  [dev]
19
23
  onnxruntime>=1.11.0
24
+ opencv-python<5.0.0,>=4.5.0
20
25
  weasyprint>=55.0
21
26
  matplotlib>=3.1.0
22
27
  mplcursors>=0.3
@@ -29,6 +34,11 @@ pre-commit>=2.17.0
29
34
 
30
35
  [gpu]
31
36
  onnxruntime-gpu>=1.11.0
37
+ opencv-python<5.0.0,>=4.5.0
38
+
39
+ [gpu-headless]
40
+ onnxruntime-gpu>=1.11.0
41
+ opencv-python-headless<5.0.0,>=4.5.0
32
42
 
33
43
  [html]
34
44
  weasyprint>=55.0
@@ -31,9 +31,8 @@ dynamic = ["version"]
31
31
  dependencies = [
32
32
  # For proper typing, mypy needs numpy>=1.20.0 (cf. https://github.com/numpy/numpy/pull/16515)
33
33
  # Additional typing support is brought by numpy>=1.22.4, but core build sticks to >=1.16.0
34
- "numpy>=1.16.0,<2.0.0",
34
+ "numpy>=1.16.0,<3.0.0",
35
35
  "scipy>=1.4.0,<2.0.0",
36
- "opencv-python>=4.5.0,<5.0.0",
37
36
  "pypdfium2>=4.11.0,<5.0.0",
38
37
  "pyclipper>=1.2.0,<2.0.0",
39
38
  "shapely>=1.6.0,<3.0.0",
@@ -49,9 +48,19 @@ dependencies = [
49
48
  [project.optional-dependencies]
50
49
  cpu = [
51
50
  "onnxruntime>=1.11.0",
51
+ "opencv-python>=4.5.0,<5.0.0",
52
52
  ]
53
53
  gpu = [
54
54
  "onnxruntime-gpu>=1.11.0",
55
+ "opencv-python>=4.5.0,<5.0.0",
56
+ ]
57
+ cpu-headless = [
58
+ "onnxruntime>=1.11.0",
59
+ "opencv-python-headless>=4.5.0,<5.0.0",
60
+ ]
61
+ gpu-headless = [
62
+ "onnxruntime-gpu>=1.11.0",
63
+ "opencv-python-headless>=4.5.0,<5.0.0",
55
64
  ]
56
65
  html = [
57
66
  "weasyprint>=55.0",
@@ -73,6 +82,7 @@ quality = [
73
82
  dev = [
74
83
  # Runtime
75
84
  "onnxruntime>=1.11.0",
85
+ "opencv-python>=4.5.0,<5.0.0",
76
86
  # HTML
77
87
  "weasyprint>=55.0",
78
88
  # Visualization
@@ -9,7 +9,7 @@ from pathlib import Path
9
9
  from setuptools import setup
10
10
 
11
11
  PKG_NAME = "onnxtr"
12
- VERSION = os.getenv("BUILD_VERSION", "0.4.0a0")
12
+ VERSION = os.getenv("BUILD_VERSION", "0.5.0a0")
13
13
 
14
14
 
15
15
  if __name__ == "__main__":
@@ -1 +0,0 @@
1
- __version__ = 'v0.4.0'
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes