python-doctr 0.8.0__tar.gz → 0.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python-doctr-0.8.0/python_doctr.egg-info → python_doctr-0.9.0}/PKG-INFO +45 -40
- {python-doctr-0.8.0 → python_doctr-0.9.0}/README.md +24 -25
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/__init__.py +1 -1
- python_doctr-0.9.0/doctr/contrib/__init__.py +0 -0
- python_doctr-0.9.0/doctr/contrib/artefacts.py +131 -0
- python_doctr-0.9.0/doctr/contrib/base.py +105 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/datasets/pytorch.py +2 -2
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/generator/base.py +6 -5
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/imgur5k.py +1 -1
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/loader.py +1 -6
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/utils.py +2 -1
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/vocabs.py +9 -2
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/file_utils.py +26 -12
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/io/elements.py +40 -6
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/io/html.py +2 -2
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/io/image/pytorch.py +6 -8
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/io/image/tensorflow.py +1 -1
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/io/pdf.py +5 -2
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/io/reader.py +6 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/__init__.py +0 -1
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/_utils.py +57 -20
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/builder.py +71 -13
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/mobilenet/pytorch.py +45 -9
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/mobilenet/tensorflow.py +38 -7
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/predictor/pytorch.py +18 -11
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/predictor/tensorflow.py +16 -10
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/textnet/pytorch.py +3 -3
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/textnet/tensorflow.py +3 -3
- python_doctr-0.9.0/doctr/models/classification/zoo.py +98 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/detection/__init__.py +1 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/detection/_utils/__init__.py +1 -0
- python_doctr-0.9.0/doctr/models/detection/_utils/base.py +66 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/detection/differentiable_binarization/base.py +4 -3
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/detection/differentiable_binarization/pytorch.py +2 -2
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/detection/differentiable_binarization/tensorflow.py +14 -18
- python_doctr-0.9.0/doctr/models/detection/fast/base.py +257 -0
- python_doctr-0.9.0/doctr/models/detection/fast/pytorch.py +442 -0
- python_doctr-0.9.0/doctr/models/detection/fast/tensorflow.py +428 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/detection/linknet/base.py +4 -3
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/detection/predictor/pytorch.py +15 -1
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/detection/predictor/tensorflow.py +15 -1
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/detection/zoo.py +21 -4
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/factory/hub.py +3 -12
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/kie_predictor/base.py +9 -3
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/kie_predictor/pytorch.py +41 -20
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/kie_predictor/tensorflow.py +36 -16
- python_doctr-0.9.0/doctr/models/modules/layers/pytorch.py +165 -0
- python_doctr-0.9.0/doctr/models/modules/layers/tensorflow.py +173 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/modules/transformer/pytorch.py +2 -2
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/predictor/base.py +77 -50
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/predictor/pytorch.py +31 -20
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/predictor/tensorflow.py +27 -17
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/preprocessor/pytorch.py +4 -4
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/preprocessor/tensorflow.py +3 -2
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/master/pytorch.py +2 -2
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/parseq/pytorch.py +4 -3
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/parseq/tensorflow.py +4 -3
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/sar/pytorch.py +7 -6
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/sar/tensorflow.py +3 -9
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/vitstr/pytorch.py +1 -1
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/zoo.py +1 -1
- python_doctr-0.9.0/doctr/models/utils/__init__.py +6 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/zoo.py +2 -2
- python_doctr-0.9.0/doctr/py.typed +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/transforms/functional/base.py +1 -1
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/transforms/functional/pytorch.py +4 -4
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/transforms/modules/base.py +37 -15
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/transforms/modules/pytorch.py +66 -8
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/transforms/modules/tensorflow.py +63 -7
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/utils/fonts.py +7 -5
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/utils/geometry.py +35 -12
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/utils/metrics.py +33 -174
- python_doctr-0.9.0/doctr/utils/reconstitution.py +126 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/utils/visualization.py +5 -118
- python_doctr-0.9.0/doctr/version.py +1 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/pyproject.toml +39 -22
- {python-doctr-0.8.0 → python_doctr-0.9.0/python_doctr.egg-info}/PKG-INFO +45 -40
- {python-doctr-0.8.0 → python_doctr-0.9.0}/python_doctr.egg-info/SOURCES.txt +10 -6
- {python-doctr-0.8.0 → python_doctr-0.9.0}/python_doctr.egg-info/requires.txt +20 -11
- {python-doctr-0.8.0 → python_doctr-0.9.0}/setup.py +1 -1
- python-doctr-0.8.0/doctr/models/artefacts/__init__.py +0 -2
- python-doctr-0.8.0/doctr/models/artefacts/barcode.py +0 -74
- python-doctr-0.8.0/doctr/models/artefacts/face.py +0 -63
- python-doctr-0.8.0/doctr/models/classification/zoo.py +0 -74
- python-doctr-0.8.0/doctr/models/modules/layers/pytorch.py +0 -86
- python-doctr-0.8.0/doctr/models/modules/layers/tensorflow.py +0 -95
- python-doctr-0.8.0/doctr/models/obj_detection/__init__.py +0 -1
- python-doctr-0.8.0/doctr/models/obj_detection/faster_rcnn/__init__.py +0 -4
- python-doctr-0.8.0/doctr/models/obj_detection/faster_rcnn/pytorch.py +0 -81
- python-doctr-0.8.0/doctr/version.py +0 -1
- {python-doctr-0.8.0 → python_doctr-0.9.0}/LICENSE +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/__init__.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/cord.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/datasets/__init__.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/datasets/base.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/datasets/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/detection.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/doc_artefacts.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/funsd.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/generator/__init__.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/generator/pytorch.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/generator/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/ic03.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/ic13.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/iiit5k.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/iiithws.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/mjsynth.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/ocr.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/orientation.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/recognition.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/sroie.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/svhn.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/svt.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/synthtext.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/wildreceipt.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/io/__init__.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/io/image/__init__.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/io/image/base.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/__init__.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/magc_resnet/__init__.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/magc_resnet/pytorch.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/magc_resnet/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/mobilenet/__init__.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/predictor/__init__.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/resnet/__init__.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/resnet/pytorch.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/resnet/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/textnet/__init__.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/vgg/__init__.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/vgg/pytorch.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/vgg/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/vit/__init__.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/vit/pytorch.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/vit/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/core.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/detection/_utils/pytorch.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/detection/_utils/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/detection/core.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/detection/differentiable_binarization/__init__.py +0 -0
- {python-doctr-0.8.0/doctr/models/detection/linknet → python_doctr-0.9.0/doctr/models/detection/fast}/__init__.py +0 -0
- {python-doctr-0.8.0/doctr/models/modules/layers → python_doctr-0.9.0/doctr/models/detection/linknet}/__init__.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/detection/linknet/pytorch.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/detection/linknet/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/detection/predictor/__init__.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/factory/__init__.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/kie_predictor/__init__.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/modules/__init__.py +0 -0
- {python-doctr-0.8.0/doctr/models/modules/transformer → python_doctr-0.9.0/doctr/models/modules/layers}/__init__.py +0 -0
- {python-doctr-0.8.0/doctr/models/modules/vision_transformer → python_doctr-0.9.0/doctr/models/modules/transformer}/__init__.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/modules/transformer/tensorflow.py +0 -0
- {python-doctr-0.8.0/doctr/models/preprocessor → python_doctr-0.9.0/doctr/models/modules/vision_transformer}/__init__.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/modules/vision_transformer/pytorch.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/modules/vision_transformer/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/predictor/__init__.py +0 -0
- {python-doctr-0.8.0/doctr/models/recognition/crnn → python_doctr-0.9.0/doctr/models/preprocessor}/__init__.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/__init__.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/core.py +0 -0
- {python-doctr-0.8.0/doctr/models/recognition/master → python_doctr-0.9.0/doctr/models/recognition/crnn}/__init__.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/crnn/pytorch.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/crnn/tensorflow.py +0 -0
- {python-doctr-0.8.0/doctr/models/recognition/parseq → python_doctr-0.9.0/doctr/models/recognition/master}/__init__.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/master/base.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/master/tensorflow.py +0 -0
- {python-doctr-0.8.0/doctr/models/recognition/sar → python_doctr-0.9.0/doctr/models/recognition/parseq}/__init__.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/parseq/base.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/predictor/__init__.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/predictor/_utils.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/predictor/pytorch.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/predictor/tensorflow.py +0 -0
- {python-doctr-0.8.0/doctr/models/recognition/vitstr → python_doctr-0.9.0/doctr/models/recognition/sar}/__init__.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/utils.py +0 -0
- {python-doctr-0.8.0/doctr/models/utils → python_doctr-0.9.0/doctr/models/recognition/vitstr}/__init__.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/vitstr/base.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/vitstr/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/utils/pytorch.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/utils/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/transforms/__init__.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/transforms/functional/__init__.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/transforms/functional/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/transforms/modules/__init__.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/utils/__init__.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/utils/common_types.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/utils/data.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/utils/multithreading.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/utils/repr.py +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/python_doctr.egg-info/dependency_links.txt +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/python_doctr.egg-info/top_level.txt +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/python_doctr.egg-info/zip-safe +0 -0
- {python-doctr-0.8.0 → python_doctr-0.9.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: python-doctr
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.9.0
|
|
4
4
|
Summary: Document Text Recognition (docTR): deep Learning for high-performance OCR on documents.
|
|
5
5
|
Author-email: Mindee <contact@mindee.com>
|
|
6
6
|
Maintainer: François-Guillaume Fernandez, Charles Gaillard, Olivier Dulcy, Felix Dittrich
|
|
@@ -209,7 +209,7 @@ License: Apache License
|
|
|
209
209
|
Project-URL: documentation, https://mindee.github.io/doctr
|
|
210
210
|
Project-URL: repository, https://github.com/mindee/doctr
|
|
211
211
|
Project-URL: tracker, https://github.com/mindee/doctr/issues
|
|
212
|
-
Project-URL: changelog, https://github.
|
|
212
|
+
Project-URL: changelog, https://mindee.github.io/doctr/changelog.html
|
|
213
213
|
Keywords: OCR,deep learning,computer vision,tensorflow,pytorch,text detection,text recognition
|
|
214
214
|
Classifier: Development Status :: 4 - Beta
|
|
215
215
|
Classifier: Intended Audience :: Developers
|
|
@@ -219,30 +219,26 @@ Classifier: License :: OSI Approved :: Apache Software License
|
|
|
219
219
|
Classifier: Natural Language :: English
|
|
220
220
|
Classifier: Operating System :: OS Independent
|
|
221
221
|
Classifier: Programming Language :: Python :: 3
|
|
222
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
223
222
|
Classifier: Programming Language :: Python :: 3.9
|
|
224
223
|
Classifier: Programming Language :: Python :: 3.10
|
|
224
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
225
225
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
226
|
-
Requires-Python: <4,>=3.
|
|
226
|
+
Requires-Python: <4,>=3.9.0
|
|
227
227
|
Description-Content-Type: text/markdown
|
|
228
228
|
License-File: LICENSE
|
|
229
|
-
Requires-Dist: importlib_metadata
|
|
230
229
|
Requires-Dist: numpy<2.0.0,>=1.16.0
|
|
231
230
|
Requires-Dist: scipy<2.0.0,>=1.4.0
|
|
232
231
|
Requires-Dist: h5py<4.0.0,>=3.1.0
|
|
233
232
|
Requires-Dist: opencv-python<5.0.0,>=4.5.0
|
|
234
|
-
Requires-Dist: pypdfium2<5.0.0,>=4.
|
|
233
|
+
Requires-Dist: pypdfium2<5.0.0,>=4.11.0
|
|
235
234
|
Requires-Dist: pyclipper<2.0.0,>=1.2.0
|
|
236
235
|
Requires-Dist: shapely<3.0.0,>=1.6.0
|
|
237
236
|
Requires-Dist: langdetect<2.0.0,>=1.0.9
|
|
238
237
|
Requires-Dist: rapidfuzz<4.0.0,>=3.0.0
|
|
239
238
|
Requires-Dist: huggingface-hub<1.0.0,>=0.20.0
|
|
240
|
-
Requires-Dist: matplotlib>=3.1.0
|
|
241
|
-
Requires-Dist: weasyprint>=55.0
|
|
242
239
|
Requires-Dist: Pillow>=9.2.0
|
|
243
240
|
Requires-Dist: defusedxml>=0.7.0
|
|
244
|
-
Requires-Dist:
|
|
245
|
-
Requires-Dist: unidecode>=1.0.0
|
|
241
|
+
Requires-Dist: anyascii>=0.3.2
|
|
246
242
|
Requires-Dist: tqdm>=4.30.0
|
|
247
243
|
Provides-Extra: tf
|
|
248
244
|
Requires-Dist: tensorflow<2.16.0,>=2.11.0; extra == "tf"
|
|
@@ -251,6 +247,13 @@ Provides-Extra: torch
|
|
|
251
247
|
Requires-Dist: torch<3.0.0,>=1.12.0; extra == "torch"
|
|
252
248
|
Requires-Dist: torchvision>=0.13.0; extra == "torch"
|
|
253
249
|
Requires-Dist: onnx<3.0.0,>=1.12.0; extra == "torch"
|
|
250
|
+
Provides-Extra: html
|
|
251
|
+
Requires-Dist: weasyprint>=55.0; extra == "html"
|
|
252
|
+
Provides-Extra: viz
|
|
253
|
+
Requires-Dist: matplotlib>=3.1.0; extra == "viz"
|
|
254
|
+
Requires-Dist: mplcursors>=0.3; extra == "viz"
|
|
255
|
+
Provides-Extra: contrib
|
|
256
|
+
Requires-Dist: onnxruntime>=1.11.0; extra == "contrib"
|
|
254
257
|
Provides-Extra: testing
|
|
255
258
|
Requires-Dist: pytest>=5.3.2; extra == "testing"
|
|
256
259
|
Requires-Dist: coverage[toml]>=4.5.4; extra == "testing"
|
|
@@ -266,7 +269,7 @@ Provides-Extra: docs
|
|
|
266
269
|
Requires-Dist: sphinx!=3.5.0,>=3.0.0; extra == "docs"
|
|
267
270
|
Requires-Dist: sphinxemoji>=0.1.8; extra == "docs"
|
|
268
271
|
Requires-Dist: sphinx-copybutton>=0.3.1; extra == "docs"
|
|
269
|
-
Requires-Dist: docutils<0.
|
|
272
|
+
Requires-Dist: docutils<0.22; extra == "docs"
|
|
270
273
|
Requires-Dist: recommonmark>=0.7.1; extra == "docs"
|
|
271
274
|
Requires-Dist: sphinx-markdown-tables>=0.0.15; extra == "docs"
|
|
272
275
|
Requires-Dist: sphinx-tabs>=3.3.0; extra == "docs"
|
|
@@ -277,29 +280,32 @@ Requires-Dist: tf2onnx<2.0.0,>=1.16.0; extra == "dev"
|
|
|
277
280
|
Requires-Dist: torch<3.0.0,>=1.12.0; extra == "dev"
|
|
278
281
|
Requires-Dist: torchvision>=0.13.0; extra == "dev"
|
|
279
282
|
Requires-Dist: onnx<3.0.0,>=1.12.0; extra == "dev"
|
|
283
|
+
Requires-Dist: weasyprint>=55.0; extra == "dev"
|
|
284
|
+
Requires-Dist: matplotlib>=3.1.0; extra == "dev"
|
|
285
|
+
Requires-Dist: mplcursors>=0.3; extra == "dev"
|
|
280
286
|
Requires-Dist: pytest>=5.3.2; extra == "dev"
|
|
281
287
|
Requires-Dist: coverage[toml]>=4.5.4; extra == "dev"
|
|
282
288
|
Requires-Dist: hdf5storage>=0.1.18; extra == "dev"
|
|
283
289
|
Requires-Dist: onnxruntime>=1.11.0; extra == "dev"
|
|
284
290
|
Requires-Dist: requests>=2.20.0; extra == "dev"
|
|
285
291
|
Requires-Dist: psutil>=5.9.5; extra == "dev"
|
|
286
|
-
Requires-Dist: ruff>=0.
|
|
287
|
-
Requires-Dist: mypy>=0
|
|
288
|
-
Requires-Dist: pre-commit>=
|
|
292
|
+
Requires-Dist: ruff>=0.3.0; extra == "dev"
|
|
293
|
+
Requires-Dist: mypy>=1.0; extra == "dev"
|
|
294
|
+
Requires-Dist: pre-commit>=3.0.0; extra == "dev"
|
|
289
295
|
Requires-Dist: sphinx!=3.5.0,>=3.0.0; extra == "dev"
|
|
290
296
|
Requires-Dist: sphinxemoji>=0.1.8; extra == "dev"
|
|
291
297
|
Requires-Dist: sphinx-copybutton>=0.3.1; extra == "dev"
|
|
292
|
-
Requires-Dist: docutils<0.
|
|
298
|
+
Requires-Dist: docutils<0.22; extra == "dev"
|
|
293
299
|
Requires-Dist: recommonmark>=0.7.1; extra == "dev"
|
|
294
300
|
Requires-Dist: sphinx-markdown-tables>=0.0.15; extra == "dev"
|
|
295
301
|
Requires-Dist: sphinx-tabs>=3.3.0; extra == "dev"
|
|
296
302
|
Requires-Dist: furo>=2022.3.4; extra == "dev"
|
|
297
303
|
|
|
298
304
|
<p align="center">
|
|
299
|
-
<img src="docs/images/Logo_doctr.gif" width="40%">
|
|
305
|
+
<img src="https://github.com/mindee/doctr/raw/main/docs/images/Logo_doctr.gif" width="40%">
|
|
300
306
|
</p>
|
|
301
307
|
|
|
302
|
-
[](https://slack.mindee.com) [](LICENSE)  [](https://github.com/mindee/doctr/pkgs/container/doctr) [](https://codecov.io/gh/mindee/doctr) [](https://www.codefactor.io/repository/github/mindee/doctr) [](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [](https://mindee.github.io/doctr) [](https://slack.mindee.com) [](LICENSE)  [](https://github.com/mindee/doctr/pkgs/container/doctr) [](https://codecov.io/gh/mindee/doctr) [](https://www.codefactor.io/repository/github/mindee/doctr) [](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [](https://mindee.github.io/doctr) [](https://pypi.org/project/python-doctr/) [](https://huggingface.co/spaces/mindee/doctr) [](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb)
|
|
303
309
|
|
|
304
310
|
|
|
305
311
|
**Optical Character Recognition made seamless & accessible to anyone, powered by TensorFlow 2 & PyTorch**
|
|
@@ -309,7 +315,7 @@ What you can expect from this repository:
|
|
|
309
315
|
- efficient ways to parse textual information (localize and identify each word) from your documents
|
|
310
316
|
- guidance on how to integrate this in your current architecture
|
|
311
317
|
|
|
312
|
-

|
|
318
|
+

|
|
313
319
|
|
|
314
320
|
## Quick Tour
|
|
315
321
|
|
|
@@ -334,7 +340,7 @@ from doctr.io import DocumentFile
|
|
|
334
340
|
pdf_doc = DocumentFile.from_pdf("path/to/your/doc.pdf")
|
|
335
341
|
# Image
|
|
336
342
|
single_img_doc = DocumentFile.from_images("path/to/your/img.jpg")
|
|
337
|
-
# Webpage
|
|
343
|
+
# Webpage (requires `weasyprint` to be installed)
|
|
338
344
|
webpage_doc = DocumentFile.from_url("https://www.yoursite.com")
|
|
339
345
|
# Multiple page images
|
|
340
346
|
multi_img_doc = DocumentFile.from_images(["path/to/page1.jpg", "path/to/page2.jpg"])
|
|
@@ -372,10 +378,11 @@ If both options are set to False, the predictor will always fit and return rotat
|
|
|
372
378
|
To interpret your model's predictions, you can visualize them interactively as follows:
|
|
373
379
|
|
|
374
380
|
```python
|
|
381
|
+
# Display the result (requires matplotlib & mplcursors to be installed)
|
|
375
382
|
result.show()
|
|
376
383
|
```
|
|
377
384
|
|
|
378
|
-

|
|
385
|
+

|
|
379
386
|
|
|
380
387
|
Or even rebuild the original document from its predictions:
|
|
381
388
|
|
|
@@ -386,7 +393,7 @@ synthetic_pages = result.synthesize()
|
|
|
386
393
|
plt.imshow(synthetic_pages[0]); plt.axis('off'); plt.show()
|
|
387
394
|
```
|
|
388
395
|
|
|
389
|
-

|
|
396
|
+

|
|
390
397
|
|
|
391
398
|
The `ocr_predictor` returns a `Document` object with a nested structure (with `Page`, `Block`, `Line`, `Word`, `Artefact`).
|
|
392
399
|
To get a better understanding of our document model, check our [documentation](https://mindee.github.io/doctr/modules/io.html#document-structure):
|
|
@@ -425,23 +432,13 @@ The KIE predictor results per page are in a dictionary format with each key repr
|
|
|
425
432
|
|
|
426
433
|
### If you are looking for support from the Mindee team
|
|
427
434
|
|
|
428
|
-
[](https://mindee.com/product/doctr)
|
|
435
|
+
[](https://mindee.com/product/doctr)
|
|
429
436
|
|
|
430
437
|
## Installation
|
|
431
438
|
|
|
432
439
|
### Prerequisites
|
|
433
440
|
|
|
434
|
-
Python 3.
|
|
435
|
-
|
|
436
|
-
Since we use [weasyprint](https://weasyprint.org/), you will need extra dependencies if you are not running Linux.
|
|
437
|
-
|
|
438
|
-
For MacOS users, you can install them as follows:
|
|
439
|
-
|
|
440
|
-
```shell
|
|
441
|
-
brew install cairo pango gdk-pixbuf libffi
|
|
442
|
-
```
|
|
443
|
-
|
|
444
|
-
For Windows users, those dependencies are included in GTK. You can find the latest installer over [here](https://github.com/tschoonj/GTK-for-Windows-Runtime-Environment-Installer/releases).
|
|
441
|
+
Python 3.9 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
|
|
445
442
|
|
|
446
443
|
### Latest release
|
|
447
444
|
|
|
@@ -460,6 +457,8 @@ We try to keep framework-specific dependencies to a minimum. You can install fra
|
|
|
460
457
|
pip install "python-doctr[tf]"
|
|
461
458
|
# for PyTorch
|
|
462
459
|
pip install "python-doctr[torch]"
|
|
460
|
+
# optional dependencies for visualization, html, and contrib modules can be installed as follows:
|
|
461
|
+
pip install "python-doctr[torch,viz,html,contib]"
|
|
463
462
|
```
|
|
464
463
|
|
|
465
464
|
For MacBooks with M1 chip, you will need some additional packages or specific versions:
|
|
@@ -494,6 +493,7 @@ Credits where it's due: this repository is implementing, among others, architect
|
|
|
494
493
|
|
|
495
494
|
- DBNet: [Real-time Scene Text Detection with Differentiable Binarization](https://arxiv.org/pdf/1911.08947.pdf).
|
|
496
495
|
- LinkNet: [LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation](https://arxiv.org/pdf/1707.03718.pdf)
|
|
496
|
+
- FAST: [FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation](https://arxiv.org/pdf/2111.02394.pdf)
|
|
497
497
|
|
|
498
498
|
### Text Recognition
|
|
499
499
|
|
|
@@ -513,7 +513,7 @@ The full package documentation is available [here](https://mindee.github.io/doct
|
|
|
513
513
|
|
|
514
514
|
A minimal demo app is provided for you to play with our end-to-end OCR models!
|
|
515
515
|
|
|
516
|
-

|
|
516
|
+

|
|
517
517
|
|
|
518
518
|
#### Live demo
|
|
519
519
|
|
|
@@ -553,11 +553,11 @@ USE_TORCH=1 streamlit run demo/app.py
|
|
|
553
553
|
Instead of having your demo actually running Python, you would prefer to run everything in your web browser?
|
|
554
554
|
Check out our [TensorFlow.js demo](https://github.com/mindee/doctr-tfjs-demo) to get started!
|
|
555
555
|
|
|
556
|
-

|
|
556
|
+

|
|
557
557
|
|
|
558
558
|
### Docker container
|
|
559
559
|
|
|
560
|
-
[We
|
|
560
|
+
[We offer Docker container support for easy testing and deployment](https://github.com/mindee/doctr/pkgs/container/doctr).
|
|
561
561
|
|
|
562
562
|
#### Using GPU with docTR Docker Images
|
|
563
563
|
|
|
@@ -646,9 +646,14 @@ Your API should now be running locally on your port 8002. Access your automatica
|
|
|
646
646
|
|
|
647
647
|
```python
|
|
648
648
|
import requests
|
|
649
|
+
|
|
650
|
+
params = {"det_arch": "db_resnet50", "reco_arch": "crnn_vgg16_bn"}
|
|
651
|
+
|
|
649
652
|
with open('/path/to/your/doc.jpg', 'rb') as f:
|
|
650
|
-
|
|
651
|
-
|
|
653
|
+
files = [ # application/pdf, image/jpeg, image/png supported
|
|
654
|
+
("files", ("doc.jpg", f.read(), "image/jpeg")),
|
|
655
|
+
]
|
|
656
|
+
print(requests.post("http://localhost:8080/ocr", params=params, files=files).json())
|
|
652
657
|
```
|
|
653
658
|
|
|
654
659
|
### Example notebooks
|
|
@@ -673,8 +678,8 @@ If you wish to cite this project, feel free to use this [BibTeX](http://www.bibt
|
|
|
673
678
|
|
|
674
679
|
If you scrolled down to this section, you most likely appreciate open source. Do you feel like extending the range of our supported characters? Or perhaps submitting a paper implementation? Or contributing in any other way?
|
|
675
680
|
|
|
676
|
-
You're in luck, we compiled a short guide (cf. [`CONTRIBUTING`](
|
|
681
|
+
You're in luck, we compiled a short guide (cf. [`CONTRIBUTING`](https://mindee.github.io/doctr/contributing/contributing.html)) for you to easily do so!
|
|
677
682
|
|
|
678
683
|
## License
|
|
679
684
|
|
|
680
|
-
Distributed under the Apache 2.0 License. See [`LICENSE`](
|
|
685
|
+
Distributed under the Apache 2.0 License. See [`LICENSE`](https://github.com/mindee/doctr?tab=Apache-2.0-1-ov-file#readme) for more information.
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
<p align="center">
|
|
2
|
-
<img src="docs/images/Logo_doctr.gif" width="40%">
|
|
2
|
+
<img src="https://github.com/mindee/doctr/raw/main/docs/images/Logo_doctr.gif" width="40%">
|
|
3
3
|
</p>
|
|
4
4
|
|
|
5
|
-
[](https://slack.mindee.com) [](LICENSE)  [](https://github.com/mindee/doctr/pkgs/container/doctr) [](https://codecov.io/gh/mindee/doctr) [](https://www.codefactor.io/repository/github/mindee/doctr) [](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [](https://mindee.github.io/doctr) [](https://slack.mindee.com) [](LICENSE)  [](https://github.com/mindee/doctr/pkgs/container/doctr) [](https://codecov.io/gh/mindee/doctr) [](https://www.codefactor.io/repository/github/mindee/doctr) [](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [](https://mindee.github.io/doctr) [](https://pypi.org/project/python-doctr/) [](https://huggingface.co/spaces/mindee/doctr) [](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb)
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
**Optical Character Recognition made seamless & accessible to anyone, powered by TensorFlow 2 & PyTorch**
|
|
@@ -12,7 +12,7 @@ What you can expect from this repository:
|
|
|
12
12
|
- efficient ways to parse textual information (localize and identify each word) from your documents
|
|
13
13
|
- guidance on how to integrate this in your current architecture
|
|
14
14
|
|
|
15
|
-

|
|
15
|
+

|
|
16
16
|
|
|
17
17
|
## Quick Tour
|
|
18
18
|
|
|
@@ -37,7 +37,7 @@ from doctr.io import DocumentFile
|
|
|
37
37
|
pdf_doc = DocumentFile.from_pdf("path/to/your/doc.pdf")
|
|
38
38
|
# Image
|
|
39
39
|
single_img_doc = DocumentFile.from_images("path/to/your/img.jpg")
|
|
40
|
-
# Webpage
|
|
40
|
+
# Webpage (requires `weasyprint` to be installed)
|
|
41
41
|
webpage_doc = DocumentFile.from_url("https://www.yoursite.com")
|
|
42
42
|
# Multiple page images
|
|
43
43
|
multi_img_doc = DocumentFile.from_images(["path/to/page1.jpg", "path/to/page2.jpg"])
|
|
@@ -75,10 +75,11 @@ If both options are set to False, the predictor will always fit and return rotat
|
|
|
75
75
|
To interpret your model's predictions, you can visualize them interactively as follows:
|
|
76
76
|
|
|
77
77
|
```python
|
|
78
|
+
# Display the result (requires matplotlib & mplcursors to be installed)
|
|
78
79
|
result.show()
|
|
79
80
|
```
|
|
80
81
|
|
|
81
|
-

|
|
82
|
+

|
|
82
83
|
|
|
83
84
|
Or even rebuild the original document from its predictions:
|
|
84
85
|
|
|
@@ -89,7 +90,7 @@ synthetic_pages = result.synthesize()
|
|
|
89
90
|
plt.imshow(synthetic_pages[0]); plt.axis('off'); plt.show()
|
|
90
91
|
```
|
|
91
92
|
|
|
92
|
-

|
|
93
|
+

|
|
93
94
|
|
|
94
95
|
The `ocr_predictor` returns a `Document` object with a nested structure (with `Page`, `Block`, `Line`, `Word`, `Artefact`).
|
|
95
96
|
To get a better understanding of our document model, check our [documentation](https://mindee.github.io/doctr/modules/io.html#document-structure):
|
|
@@ -128,23 +129,13 @@ The KIE predictor results per page are in a dictionary format with each key repr
|
|
|
128
129
|
|
|
129
130
|
### If you are looking for support from the Mindee team
|
|
130
131
|
|
|
131
|
-
[](https://mindee.com/product/doctr)
|
|
132
|
+
[](https://mindee.com/product/doctr)
|
|
132
133
|
|
|
133
134
|
## Installation
|
|
134
135
|
|
|
135
136
|
### Prerequisites
|
|
136
137
|
|
|
137
|
-
Python 3.
|
|
138
|
-
|
|
139
|
-
Since we use [weasyprint](https://weasyprint.org/), you will need extra dependencies if you are not running Linux.
|
|
140
|
-
|
|
141
|
-
For MacOS users, you can install them as follows:
|
|
142
|
-
|
|
143
|
-
```shell
|
|
144
|
-
brew install cairo pango gdk-pixbuf libffi
|
|
145
|
-
```
|
|
146
|
-
|
|
147
|
-
For Windows users, those dependencies are included in GTK. You can find the latest installer over [here](https://github.com/tschoonj/GTK-for-Windows-Runtime-Environment-Installer/releases).
|
|
138
|
+
Python 3.9 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
|
|
148
139
|
|
|
149
140
|
### Latest release
|
|
150
141
|
|
|
@@ -163,6 +154,8 @@ We try to keep framework-specific dependencies to a minimum. You can install fra
|
|
|
163
154
|
pip install "python-doctr[tf]"
|
|
164
155
|
# for PyTorch
|
|
165
156
|
pip install "python-doctr[torch]"
|
|
157
|
+
# optional dependencies for visualization, html, and contrib modules can be installed as follows:
|
|
158
|
+
pip install "python-doctr[torch,viz,html,contib]"
|
|
166
159
|
```
|
|
167
160
|
|
|
168
161
|
For MacBooks with M1 chip, you will need some additional packages or specific versions:
|
|
@@ -197,6 +190,7 @@ Credits where it's due: this repository is implementing, among others, architect
|
|
|
197
190
|
|
|
198
191
|
- DBNet: [Real-time Scene Text Detection with Differentiable Binarization](https://arxiv.org/pdf/1911.08947.pdf).
|
|
199
192
|
- LinkNet: [LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation](https://arxiv.org/pdf/1707.03718.pdf)
|
|
193
|
+
- FAST: [FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation](https://arxiv.org/pdf/2111.02394.pdf)
|
|
200
194
|
|
|
201
195
|
### Text Recognition
|
|
202
196
|
|
|
@@ -216,7 +210,7 @@ The full package documentation is available [here](https://mindee.github.io/doct
|
|
|
216
210
|
|
|
217
211
|
A minimal demo app is provided for you to play with our end-to-end OCR models!
|
|
218
212
|
|
|
219
|
-

|
|
213
|
+

|
|
220
214
|
|
|
221
215
|
#### Live demo
|
|
222
216
|
|
|
@@ -256,11 +250,11 @@ USE_TORCH=1 streamlit run demo/app.py
|
|
|
256
250
|
Instead of having your demo actually running Python, you would prefer to run everything in your web browser?
|
|
257
251
|
Check out our [TensorFlow.js demo](https://github.com/mindee/doctr-tfjs-demo) to get started!
|
|
258
252
|
|
|
259
|
-

|
|
253
|
+

|
|
260
254
|
|
|
261
255
|
### Docker container
|
|
262
256
|
|
|
263
|
-
[We
|
|
257
|
+
[We offer Docker container support for easy testing and deployment](https://github.com/mindee/doctr/pkgs/container/doctr).
|
|
264
258
|
|
|
265
259
|
#### Using GPU with docTR Docker Images
|
|
266
260
|
|
|
@@ -349,9 +343,14 @@ Your API should now be running locally on your port 8002. Access your automatica
|
|
|
349
343
|
|
|
350
344
|
```python
|
|
351
345
|
import requests
|
|
346
|
+
|
|
347
|
+
params = {"det_arch": "db_resnet50", "reco_arch": "crnn_vgg16_bn"}
|
|
348
|
+
|
|
352
349
|
with open('/path/to/your/doc.jpg', 'rb') as f:
|
|
353
|
-
|
|
354
|
-
|
|
350
|
+
files = [ # application/pdf, image/jpeg, image/png supported
|
|
351
|
+
("files", ("doc.jpg", f.read(), "image/jpeg")),
|
|
352
|
+
]
|
|
353
|
+
print(requests.post("http://localhost:8080/ocr", params=params, files=files).json())
|
|
355
354
|
```
|
|
356
355
|
|
|
357
356
|
### Example notebooks
|
|
@@ -376,8 +375,8 @@ If you wish to cite this project, feel free to use this [BibTeX](http://www.bibt
|
|
|
376
375
|
|
|
377
376
|
If you scrolled down to this section, you most likely appreciate open source. Do you feel like extending the range of our supported characters? Or perhaps submitting a paper implementation? Or contributing in any other way?
|
|
378
377
|
|
|
379
|
-
You're in luck, we compiled a short guide (cf. [`CONTRIBUTING`](
|
|
378
|
+
You're in luck, we compiled a short guide (cf. [`CONTRIBUTING`](https://mindee.github.io/doctr/contributing/contributing.html)) for you to easily do so!
|
|
380
379
|
|
|
381
380
|
## License
|
|
382
381
|
|
|
383
|
-
Distributed under the Apache 2.0 License. See [`LICENSE`](
|
|
382
|
+
Distributed under the Apache 2.0 License. See [`LICENSE`](https://github.com/mindee/doctr?tab=Apache-2.0-1-ov-file#readme) for more information.
|
|
File without changes
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
|
+
|
|
3
|
+
# This program is licensed under the Apache License 2.0.
|
|
4
|
+
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
|
+
|
|
6
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
7
|
+
|
|
8
|
+
import cv2
|
|
9
|
+
import numpy as np
|
|
10
|
+
|
|
11
|
+
from doctr.file_utils import requires_package
|
|
12
|
+
|
|
13
|
+
from .base import _BasePredictor
|
|
14
|
+
|
|
15
|
+
__all__ = ["ArtefactDetector"]
|
|
16
|
+
|
|
17
|
+
default_cfgs: Dict[str, Dict[str, Any]] = {
|
|
18
|
+
"yolov8_artefact": {
|
|
19
|
+
"input_shape": (3, 1024, 1024),
|
|
20
|
+
"labels": ["bar_code", "qr_code", "logo", "photo"],
|
|
21
|
+
"url": "https://doctr-static.mindee.com/models?id=v0.8.1/yolo_artefact-f9d66f14.onnx&src=0",
|
|
22
|
+
},
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ArtefactDetector(_BasePredictor):
|
|
27
|
+
"""
|
|
28
|
+
A class to detect artefacts in images
|
|
29
|
+
|
|
30
|
+
>>> from doctr.io import DocumentFile
|
|
31
|
+
>>> from doctr.contrib.artefacts import ArtefactDetector
|
|
32
|
+
>>> doc = DocumentFile.from_images(["path/to/image.jpg"])
|
|
33
|
+
>>> detector = ArtefactDetector()
|
|
34
|
+
>>> results = detector(doc)
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
----
|
|
38
|
+
arch: the architecture to use
|
|
39
|
+
batch_size: the batch size to use
|
|
40
|
+
model_path: the path to the model to use
|
|
41
|
+
labels: the labels to use
|
|
42
|
+
input_shape: the input shape to use
|
|
43
|
+
mask_labels: the mask labels to use
|
|
44
|
+
conf_threshold: the confidence threshold to use
|
|
45
|
+
iou_threshold: the intersection over union threshold to use
|
|
46
|
+
**kwargs: additional arguments to be passed to `download_from_url`
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
arch: str = "yolov8_artefact",
|
|
52
|
+
batch_size: int = 2,
|
|
53
|
+
model_path: Optional[str] = None,
|
|
54
|
+
labels: Optional[List[str]] = None,
|
|
55
|
+
input_shape: Optional[Tuple[int, int, int]] = None,
|
|
56
|
+
conf_threshold: float = 0.5,
|
|
57
|
+
iou_threshold: float = 0.5,
|
|
58
|
+
**kwargs: Any,
|
|
59
|
+
) -> None:
|
|
60
|
+
super().__init__(batch_size=batch_size, url=default_cfgs[arch]["url"], model_path=model_path, **kwargs)
|
|
61
|
+
self.labels = labels or default_cfgs[arch]["labels"]
|
|
62
|
+
self.input_shape = input_shape or default_cfgs[arch]["input_shape"]
|
|
63
|
+
self.conf_threshold = conf_threshold
|
|
64
|
+
self.iou_threshold = iou_threshold
|
|
65
|
+
|
|
66
|
+
def preprocess(self, img: np.ndarray) -> np.ndarray:
|
|
67
|
+
return np.transpose(cv2.resize(img, (self.input_shape[2], self.input_shape[1])), (2, 0, 1)) / np.array(255.0)
|
|
68
|
+
|
|
69
|
+
def postprocess(self, output: List[np.ndarray], input_images: List[List[np.ndarray]]) -> List[List[Dict[str, Any]]]:
|
|
70
|
+
results = []
|
|
71
|
+
|
|
72
|
+
for batch in zip(output, input_images):
|
|
73
|
+
for out, img in zip(batch[0], batch[1]):
|
|
74
|
+
org_height, org_width = img.shape[:2]
|
|
75
|
+
width_scale, height_scale = org_width / self.input_shape[2], org_height / self.input_shape[1]
|
|
76
|
+
for res in out:
|
|
77
|
+
sample_results = []
|
|
78
|
+
for row in np.transpose(np.squeeze(res)):
|
|
79
|
+
classes_scores = row[4:]
|
|
80
|
+
max_score = np.amax(classes_scores)
|
|
81
|
+
if max_score >= self.conf_threshold:
|
|
82
|
+
class_id = np.argmax(classes_scores)
|
|
83
|
+
x, y, w, h = row[0], row[1], row[2], row[3]
|
|
84
|
+
# to rescaled xmin, ymin, xmax, ymax
|
|
85
|
+
xmin = int((x - w / 2) * width_scale)
|
|
86
|
+
ymin = int((y - h / 2) * height_scale)
|
|
87
|
+
xmax = int((x + w / 2) * width_scale)
|
|
88
|
+
ymax = int((y + h / 2) * height_scale)
|
|
89
|
+
|
|
90
|
+
sample_results.append({
|
|
91
|
+
"label": self.labels[class_id],
|
|
92
|
+
"confidence": float(max_score),
|
|
93
|
+
"box": [xmin, ymin, xmax, ymax],
|
|
94
|
+
})
|
|
95
|
+
|
|
96
|
+
# Filter out overlapping boxes
|
|
97
|
+
boxes = [res["box"] for res in sample_results]
|
|
98
|
+
scores = [res["confidence"] for res in sample_results]
|
|
99
|
+
keep_indices = cv2.dnn.NMSBoxes(boxes, scores, self.conf_threshold, self.iou_threshold) # type: ignore[arg-type]
|
|
100
|
+
sample_results = [sample_results[i] for i in keep_indices]
|
|
101
|
+
|
|
102
|
+
results.append(sample_results)
|
|
103
|
+
|
|
104
|
+
self._results = results
|
|
105
|
+
return results
|
|
106
|
+
|
|
107
|
+
def show(self, **kwargs: Any) -> None:
|
|
108
|
+
"""
|
|
109
|
+
Display the results
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
----
|
|
113
|
+
**kwargs: additional keyword arguments to be passed to `plt.show`
|
|
114
|
+
"""
|
|
115
|
+
requires_package("matplotlib", "`.show()` requires matplotlib installed")
|
|
116
|
+
import matplotlib.pyplot as plt
|
|
117
|
+
from matplotlib.patches import Rectangle
|
|
118
|
+
|
|
119
|
+
# visualize the results with matplotlib
|
|
120
|
+
if self._results and self._inputs:
|
|
121
|
+
for img, res in zip(self._inputs, self._results):
|
|
122
|
+
plt.figure(figsize=(10, 10))
|
|
123
|
+
plt.imshow(img)
|
|
124
|
+
for obj in res:
|
|
125
|
+
xmin, ymin, xmax, ymax = obj["box"]
|
|
126
|
+
label = obj["label"]
|
|
127
|
+
plt.text(xmin, ymin, f"{label} {obj['confidence']:.2f}", color="red")
|
|
128
|
+
plt.gca().add_patch(
|
|
129
|
+
Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor="red", linewidth=2)
|
|
130
|
+
)
|
|
131
|
+
plt.show(**kwargs)
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
|
+
|
|
3
|
+
# This program is licensed under the Apache License 2.0.
|
|
4
|
+
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
|
+
|
|
6
|
+
from typing import Any, List, Optional
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
from doctr.file_utils import requires_package
|
|
11
|
+
from doctr.utils.data import download_from_url
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class _BasePredictor:
|
|
15
|
+
"""
|
|
16
|
+
Base class for all predictors
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
----
|
|
20
|
+
batch_size: the batch size to use
|
|
21
|
+
url: the url to use to download a model if needed
|
|
22
|
+
model_path: the path to the model to use
|
|
23
|
+
**kwargs: additional arguments to be passed to `download_from_url`
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(self, batch_size: int, url: Optional[str] = None, model_path: Optional[str] = None, **kwargs) -> None:
|
|
27
|
+
self.batch_size = batch_size
|
|
28
|
+
self.session = self._init_model(url, model_path, **kwargs)
|
|
29
|
+
|
|
30
|
+
self._inputs: List[np.ndarray] = []
|
|
31
|
+
self._results: List[Any] = []
|
|
32
|
+
|
|
33
|
+
def _init_model(self, url: Optional[str] = None, model_path: Optional[str] = None, **kwargs: Any) -> Any:
|
|
34
|
+
"""
|
|
35
|
+
Download the model from the given url if needed
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
----
|
|
39
|
+
url: the url to use
|
|
40
|
+
model_path: the path to the model to use
|
|
41
|
+
**kwargs: additional arguments to be passed to `download_from_url`
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
-------
|
|
45
|
+
Any: the ONNX loaded model
|
|
46
|
+
"""
|
|
47
|
+
requires_package("onnxruntime", "`.contrib` module requires `onnxruntime` to be installed.")
|
|
48
|
+
import onnxruntime as ort
|
|
49
|
+
|
|
50
|
+
if not url and not model_path:
|
|
51
|
+
raise ValueError("You must provide either a url or a model_path")
|
|
52
|
+
onnx_model_path = model_path if model_path else str(download_from_url(url, cache_subdir="models", **kwargs)) # type: ignore[arg-type]
|
|
53
|
+
return ort.InferenceSession(onnx_model_path, providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
|
|
54
|
+
|
|
55
|
+
def preprocess(self, img: np.ndarray) -> np.ndarray:
|
|
56
|
+
"""
|
|
57
|
+
Preprocess the input image
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
----
|
|
61
|
+
img: the input image to preprocess
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
-------
|
|
65
|
+
np.ndarray: the preprocessed image
|
|
66
|
+
"""
|
|
67
|
+
raise NotImplementedError
|
|
68
|
+
|
|
69
|
+
def postprocess(self, output: List[np.ndarray], input_images: List[List[np.ndarray]]) -> Any:
|
|
70
|
+
"""
|
|
71
|
+
Postprocess the model output
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
----
|
|
75
|
+
output: the model output to postprocess
|
|
76
|
+
input_images: the input images used to generate the output
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
-------
|
|
80
|
+
Any: the postprocessed output
|
|
81
|
+
"""
|
|
82
|
+
raise NotImplementedError
|
|
83
|
+
|
|
84
|
+
def __call__(self, inputs: List[np.ndarray]) -> Any:
|
|
85
|
+
"""
|
|
86
|
+
Call the model on the given inputs
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
----
|
|
90
|
+
inputs: the inputs to use
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
-------
|
|
94
|
+
Any: the postprocessed output
|
|
95
|
+
"""
|
|
96
|
+
self._inputs = inputs
|
|
97
|
+
model_inputs = self.session.get_inputs()
|
|
98
|
+
|
|
99
|
+
batched_inputs = [inputs[i : i + self.batch_size] for i in range(0, len(inputs), self.batch_size)]
|
|
100
|
+
processed_batches = [
|
|
101
|
+
np.array([self.preprocess(img) for img in batch], dtype=np.float32) for batch in batched_inputs
|
|
102
|
+
]
|
|
103
|
+
|
|
104
|
+
outputs = [self.session.run(None, {model_inputs[0].name: batch}) for batch in processed_batches]
|
|
105
|
+
return self.postprocess(outputs, batched_inputs)
|
|
@@ -50,9 +50,9 @@ class AbstractDataset(_AbstractDataset):
|
|
|
50
50
|
@staticmethod
|
|
51
51
|
def collate_fn(samples: List[Tuple[torch.Tensor, Any]]) -> Tuple[torch.Tensor, List[Any]]:
|
|
52
52
|
images, targets = zip(*samples)
|
|
53
|
-
images = torch.stack(images, dim=0)
|
|
53
|
+
images = torch.stack(images, dim=0) # type: ignore[assignment]
|
|
54
54
|
|
|
55
|
-
return images, list(targets)
|
|
55
|
+
return images, list(targets) # type: ignore[return-value]
|
|
56
56
|
|
|
57
57
|
|
|
58
58
|
class VisionDataset(AbstractDataset, _VisionDataset): # noqa: D101
|