python-doctr 0.9.0__tar.gz → 0.10.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_doctr-0.9.0 → python_doctr-0.10.0}/PKG-INFO +11 -11
- {python_doctr-0.9.0 → python_doctr-0.10.0}/README.md +1 -1
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/cord.py +10 -1
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/funsd.py +11 -1
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/ic03.py +11 -1
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/ic13.py +10 -1
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/iiit5k.py +26 -16
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/imgur5k.py +10 -1
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/sroie.py +11 -1
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/svhn.py +11 -1
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/svt.py +11 -1
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/synthtext.py +11 -1
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/utils.py +7 -2
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/vocabs.py +6 -2
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/wildreceipt.py +12 -1
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/file_utils.py +19 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/io/elements.py +12 -4
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/builder.py +2 -2
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/magc_resnet/tensorflow.py +13 -6
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/mobilenet/pytorch.py +2 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/mobilenet/tensorflow.py +14 -8
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/predictor/pytorch.py +11 -7
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/predictor/tensorflow.py +10 -6
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/resnet/tensorflow.py +21 -8
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/textnet/tensorflow.py +11 -5
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/vgg/tensorflow.py +9 -3
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/vit/tensorflow.py +10 -4
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/zoo.py +22 -10
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/differentiable_binarization/tensorflow.py +34 -12
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/fast/tensorflow.py +14 -11
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/linknet/tensorflow.py +23 -11
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/predictor/tensorflow.py +2 -2
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/factory/hub.py +5 -6
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/kie_predictor/base.py +4 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/kie_predictor/pytorch.py +4 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/kie_predictor/tensorflow.py +8 -1
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/modules/transformer/tensorflow.py +0 -2
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/modules/vision_transformer/pytorch.py +1 -1
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/modules/vision_transformer/tensorflow.py +1 -1
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/predictor/base.py +24 -12
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/predictor/pytorch.py +4 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/predictor/tensorflow.py +8 -1
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/preprocessor/tensorflow.py +1 -1
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/crnn/tensorflow.py +8 -6
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/master/tensorflow.py +9 -4
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/parseq/tensorflow.py +10 -8
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/sar/tensorflow.py +7 -3
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/vitstr/tensorflow.py +9 -4
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/utils/pytorch.py +1 -1
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/utils/tensorflow.py +15 -15
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/transforms/functional/pytorch.py +1 -1
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/transforms/modules/pytorch.py +7 -6
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/transforms/modules/tensorflow.py +15 -12
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/utils/geometry.py +106 -19
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/utils/metrics.py +1 -1
- python_doctr-0.10.0/doctr/utils/reconstitution.py +212 -0
- python_doctr-0.10.0/doctr/version.py +1 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/pyproject.toml +11 -12
- {python_doctr-0.9.0 → python_doctr-0.10.0}/python_doctr.egg-info/PKG-INFO +11 -11
- {python_doctr-0.9.0 → python_doctr-0.10.0}/python_doctr.egg-info/requires.txt +9 -9
- {python_doctr-0.9.0 → python_doctr-0.10.0}/setup.py +1 -1
- python_doctr-0.9.0/doctr/utils/reconstitution.py +0 -126
- python_doctr-0.9.0/doctr/version.py +0 -1
- {python_doctr-0.9.0 → python_doctr-0.10.0}/LICENSE +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/contrib/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/contrib/artefacts.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/contrib/base.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/datasets/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/datasets/base.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/datasets/pytorch.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/datasets/tensorflow.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/detection.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/doc_artefacts.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/generator/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/generator/base.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/generator/pytorch.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/generator/tensorflow.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/iiithws.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/loader.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/mjsynth.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/ocr.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/orientation.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/recognition.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/io/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/io/html.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/io/image/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/io/image/base.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/io/image/pytorch.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/io/image/tensorflow.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/io/pdf.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/io/reader.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/_utils.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/magc_resnet/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/magc_resnet/pytorch.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/mobilenet/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/predictor/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/resnet/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/resnet/pytorch.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/textnet/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/textnet/pytorch.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/vgg/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/vgg/pytorch.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/vit/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/vit/pytorch.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/core.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/_utils/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/_utils/base.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/_utils/pytorch.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/_utils/tensorflow.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/core.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/differentiable_binarization/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/differentiable_binarization/base.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/differentiable_binarization/pytorch.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/fast/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/fast/base.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/fast/pytorch.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/linknet/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/linknet/base.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/linknet/pytorch.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/predictor/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/predictor/pytorch.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/zoo.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/factory/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/kie_predictor/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/modules/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/modules/layers/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/modules/layers/pytorch.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/modules/layers/tensorflow.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/modules/transformer/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/modules/transformer/pytorch.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/modules/vision_transformer/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/predictor/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/preprocessor/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/preprocessor/pytorch.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/core.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/crnn/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/crnn/pytorch.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/master/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/master/base.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/master/pytorch.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/parseq/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/parseq/base.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/parseq/pytorch.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/predictor/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/predictor/_utils.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/predictor/pytorch.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/predictor/tensorflow.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/sar/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/sar/pytorch.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/utils.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/vitstr/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/vitstr/base.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/vitstr/pytorch.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/zoo.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/utils/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/zoo.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/py.typed +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/transforms/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/transforms/functional/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/transforms/functional/base.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/transforms/functional/tensorflow.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/transforms/modules/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/transforms/modules/base.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/utils/__init__.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/utils/common_types.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/utils/data.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/utils/fonts.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/utils/multithreading.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/utils/repr.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/utils/visualization.py +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/python_doctr.egg-info/SOURCES.txt +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/python_doctr.egg-info/dependency_links.txt +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/python_doctr.egg-info/top_level.txt +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/python_doctr.egg-info/zip-safe +0 -0
- {python_doctr-0.9.0 → python_doctr-0.10.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: python-doctr
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.10.0
|
|
4
4
|
Summary: Document Text Recognition (docTR): deep Learning for high-performance OCR on documents.
|
|
5
5
|
Author-email: Mindee <contact@mindee.com>
|
|
6
6
|
Maintainer: François-Guillaume Fernandez, Charles Gaillard, Olivier Dulcy, Felix Dittrich
|
|
@@ -226,7 +226,7 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
226
226
|
Requires-Python: <4,>=3.9.0
|
|
227
227
|
Description-Content-Type: text/markdown
|
|
228
228
|
License-File: LICENSE
|
|
229
|
-
Requires-Dist: numpy<
|
|
229
|
+
Requires-Dist: numpy<3.0.0,>=1.16.0
|
|
230
230
|
Requires-Dist: scipy<2.0.0,>=1.4.0
|
|
231
231
|
Requires-Dist: h5py<4.0.0,>=3.1.0
|
|
232
232
|
Requires-Dist: opencv-python<5.0.0,>=4.5.0
|
|
@@ -241,11 +241,12 @@ Requires-Dist: defusedxml>=0.7.0
|
|
|
241
241
|
Requires-Dist: anyascii>=0.3.2
|
|
242
242
|
Requires-Dist: tqdm>=4.30.0
|
|
243
243
|
Provides-Extra: tf
|
|
244
|
-
Requires-Dist: tensorflow<
|
|
244
|
+
Requires-Dist: tensorflow<3.0.0,>=2.15.0; extra == "tf"
|
|
245
|
+
Requires-Dist: tf-keras<3.0.0,>=2.15.0; extra == "tf"
|
|
245
246
|
Requires-Dist: tf2onnx<2.0.0,>=1.16.0; extra == "tf"
|
|
246
247
|
Provides-Extra: torch
|
|
247
|
-
Requires-Dist: torch<3.0.0,>=
|
|
248
|
-
Requires-Dist: torchvision>=0.
|
|
248
|
+
Requires-Dist: torch<3.0.0,>=2.0.0; extra == "torch"
|
|
249
|
+
Requires-Dist: torchvision>=0.15.0; extra == "torch"
|
|
249
250
|
Requires-Dist: onnx<3.0.0,>=1.12.0; extra == "torch"
|
|
250
251
|
Provides-Extra: html
|
|
251
252
|
Requires-Dist: weasyprint>=55.0; extra == "html"
|
|
@@ -257,7 +258,6 @@ Requires-Dist: onnxruntime>=1.11.0; extra == "contrib"
|
|
|
257
258
|
Provides-Extra: testing
|
|
258
259
|
Requires-Dist: pytest>=5.3.2; extra == "testing"
|
|
259
260
|
Requires-Dist: coverage[toml]>=4.5.4; extra == "testing"
|
|
260
|
-
Requires-Dist: hdf5storage>=0.1.18; extra == "testing"
|
|
261
261
|
Requires-Dist: onnxruntime>=1.11.0; extra == "testing"
|
|
262
262
|
Requires-Dist: requests>=2.20.0; extra == "testing"
|
|
263
263
|
Requires-Dist: psutil>=5.9.5; extra == "testing"
|
|
@@ -275,17 +275,17 @@ Requires-Dist: sphinx-markdown-tables>=0.0.15; extra == "docs"
|
|
|
275
275
|
Requires-Dist: sphinx-tabs>=3.3.0; extra == "docs"
|
|
276
276
|
Requires-Dist: furo>=2022.3.4; extra == "docs"
|
|
277
277
|
Provides-Extra: dev
|
|
278
|
-
Requires-Dist: tensorflow<
|
|
278
|
+
Requires-Dist: tensorflow<3.0.0,>=2.15.0; extra == "dev"
|
|
279
|
+
Requires-Dist: tf-keras<3.0.0,>=2.15.0; extra == "dev"
|
|
279
280
|
Requires-Dist: tf2onnx<2.0.0,>=1.16.0; extra == "dev"
|
|
280
|
-
Requires-Dist: torch<3.0.0,>=
|
|
281
|
-
Requires-Dist: torchvision>=0.
|
|
281
|
+
Requires-Dist: torch<3.0.0,>=2.0.0; extra == "dev"
|
|
282
|
+
Requires-Dist: torchvision>=0.15.0; extra == "dev"
|
|
282
283
|
Requires-Dist: onnx<3.0.0,>=1.12.0; extra == "dev"
|
|
283
284
|
Requires-Dist: weasyprint>=55.0; extra == "dev"
|
|
284
285
|
Requires-Dist: matplotlib>=3.1.0; extra == "dev"
|
|
285
286
|
Requires-Dist: mplcursors>=0.3; extra == "dev"
|
|
286
287
|
Requires-Dist: pytest>=5.3.2; extra == "dev"
|
|
287
288
|
Requires-Dist: coverage[toml]>=4.5.4; extra == "dev"
|
|
288
|
-
Requires-Dist: hdf5storage>=0.1.18; extra == "dev"
|
|
289
289
|
Requires-Dist: onnxruntime>=1.11.0; extra == "dev"
|
|
290
290
|
Requires-Dist: requests>=2.20.0; extra == "dev"
|
|
291
291
|
Requires-Dist: psutil>=5.9.5; extra == "dev"
|
|
@@ -464,7 +464,7 @@ pip install "python-doctr[torch,viz,html,contib]"
|
|
|
464
464
|
For MacBooks with M1 chip, you will need some additional packages or specific versions:
|
|
465
465
|
|
|
466
466
|
- TensorFlow 2: [metal plugin](https://developer.apple.com/metal/tensorflow-plugin/)
|
|
467
|
-
- PyTorch: [version >=
|
|
467
|
+
- PyTorch: [version >= 2.0.0](https://pytorch.org/get-started/locally/#start-locally)
|
|
468
468
|
|
|
469
469
|
### Developer mode
|
|
470
470
|
|
|
@@ -161,7 +161,7 @@ pip install "python-doctr[torch,viz,html,contib]"
|
|
|
161
161
|
For MacBooks with M1 chip, you will need some additional packages or specific versions:
|
|
162
162
|
|
|
163
163
|
- TensorFlow 2: [metal plugin](https://developer.apple.com/metal/tensorflow-plugin/)
|
|
164
|
-
- PyTorch: [version >=
|
|
164
|
+
- PyTorch: [version >= 2.0.0](https://pytorch.org/get-started/locally/#start-locally)
|
|
165
165
|
|
|
166
166
|
### Developer mode
|
|
167
167
|
|
|
@@ -33,6 +33,7 @@ class CORD(VisionDataset):
|
|
|
33
33
|
train: whether the subset should be the training one
|
|
34
34
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
35
35
|
recognition_task: whether the dataset should be used for recognition task
|
|
36
|
+
detection_task: whether the dataset should be used for detection task
|
|
36
37
|
**kwargs: keyword arguments from `VisionDataset`.
|
|
37
38
|
"""
|
|
38
39
|
|
|
@@ -53,6 +54,7 @@ class CORD(VisionDataset):
|
|
|
53
54
|
train: bool = True,
|
|
54
55
|
use_polygons: bool = False,
|
|
55
56
|
recognition_task: bool = False,
|
|
57
|
+
detection_task: bool = False,
|
|
56
58
|
**kwargs: Any,
|
|
57
59
|
) -> None:
|
|
58
60
|
url, sha256, name = self.TRAIN if train else self.TEST
|
|
@@ -64,10 +66,15 @@ class CORD(VisionDataset):
|
|
|
64
66
|
pre_transforms=convert_target_to_relative if not recognition_task else None,
|
|
65
67
|
**kwargs,
|
|
66
68
|
)
|
|
69
|
+
if recognition_task and detection_task:
|
|
70
|
+
raise ValueError(
|
|
71
|
+
"`recognition_task` and `detection_task` cannot be set to True simultaneously. "
|
|
72
|
+
+ "To get the whole dataset with boxes and labels leave both parameters to False."
|
|
73
|
+
)
|
|
67
74
|
|
|
68
75
|
# List images
|
|
69
76
|
tmp_root = os.path.join(self.root, "image")
|
|
70
|
-
self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
|
|
77
|
+
self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
|
|
71
78
|
self.train = train
|
|
72
79
|
np_dtype = np.float32
|
|
73
80
|
for img_path in tqdm(iterable=os.listdir(tmp_root), desc="Unpacking CORD", total=len(os.listdir(tmp_root))):
|
|
@@ -109,6 +116,8 @@ class CORD(VisionDataset):
|
|
|
109
116
|
)
|
|
110
117
|
for crop, label in zip(crops, list(text_targets)):
|
|
111
118
|
self.data.append((crop, label))
|
|
119
|
+
elif detection_task:
|
|
120
|
+
self.data.append((img_path, np.asarray(box_targets, dtype=int).clip(min=0)))
|
|
112
121
|
else:
|
|
113
122
|
self.data.append((
|
|
114
123
|
img_path,
|
|
@@ -33,6 +33,7 @@ class FUNSD(VisionDataset):
|
|
|
33
33
|
train: whether the subset should be the training one
|
|
34
34
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
35
35
|
recognition_task: whether the dataset should be used for recognition task
|
|
36
|
+
detection_task: whether the dataset should be used for detection task
|
|
36
37
|
**kwargs: keyword arguments from `VisionDataset`.
|
|
37
38
|
"""
|
|
38
39
|
|
|
@@ -45,6 +46,7 @@ class FUNSD(VisionDataset):
|
|
|
45
46
|
train: bool = True,
|
|
46
47
|
use_polygons: bool = False,
|
|
47
48
|
recognition_task: bool = False,
|
|
49
|
+
detection_task: bool = False,
|
|
48
50
|
**kwargs: Any,
|
|
49
51
|
) -> None:
|
|
50
52
|
super().__init__(
|
|
@@ -55,6 +57,12 @@ class FUNSD(VisionDataset):
|
|
|
55
57
|
pre_transforms=convert_target_to_relative if not recognition_task else None,
|
|
56
58
|
**kwargs,
|
|
57
59
|
)
|
|
60
|
+
if recognition_task and detection_task:
|
|
61
|
+
raise ValueError(
|
|
62
|
+
"`recognition_task` and `detection_task` cannot be set to True simultaneously. "
|
|
63
|
+
+ "To get the whole dataset with boxes and labels leave both parameters to False."
|
|
64
|
+
)
|
|
65
|
+
|
|
58
66
|
self.train = train
|
|
59
67
|
np_dtype = np.float32
|
|
60
68
|
|
|
@@ -63,7 +71,7 @@ class FUNSD(VisionDataset):
|
|
|
63
71
|
|
|
64
72
|
# # List images
|
|
65
73
|
tmp_root = os.path.join(self.root, subfolder, "images")
|
|
66
|
-
self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
|
|
74
|
+
self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
|
|
67
75
|
for img_path in tqdm(iterable=os.listdir(tmp_root), desc="Unpacking FUNSD", total=len(os.listdir(tmp_root))):
|
|
68
76
|
# File existence check
|
|
69
77
|
if not os.path.exists(os.path.join(tmp_root, img_path)):
|
|
@@ -100,6 +108,8 @@ class FUNSD(VisionDataset):
|
|
|
100
108
|
# filter labels with unknown characters
|
|
101
109
|
if not any(char in label for char in ["☑", "☐", "\uf703", "\uf702"]):
|
|
102
110
|
self.data.append((crop, label))
|
|
111
|
+
elif detection_task:
|
|
112
|
+
self.data.append((img_path, np.asarray(box_targets, dtype=np_dtype)))
|
|
103
113
|
else:
|
|
104
114
|
self.data.append((
|
|
105
115
|
img_path,
|
|
@@ -32,6 +32,7 @@ class IC03(VisionDataset):
|
|
|
32
32
|
train: whether the subset should be the training one
|
|
33
33
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
34
34
|
recognition_task: whether the dataset should be used for recognition task
|
|
35
|
+
detection_task: whether the dataset should be used for detection task
|
|
35
36
|
**kwargs: keyword arguments from `VisionDataset`.
|
|
36
37
|
"""
|
|
37
38
|
|
|
@@ -51,6 +52,7 @@ class IC03(VisionDataset):
|
|
|
51
52
|
train: bool = True,
|
|
52
53
|
use_polygons: bool = False,
|
|
53
54
|
recognition_task: bool = False,
|
|
55
|
+
detection_task: bool = False,
|
|
54
56
|
**kwargs: Any,
|
|
55
57
|
) -> None:
|
|
56
58
|
url, sha256, file_name = self.TRAIN if train else self.TEST
|
|
@@ -62,8 +64,14 @@ class IC03(VisionDataset):
|
|
|
62
64
|
pre_transforms=convert_target_to_relative if not recognition_task else None,
|
|
63
65
|
**kwargs,
|
|
64
66
|
)
|
|
67
|
+
if recognition_task and detection_task:
|
|
68
|
+
raise ValueError(
|
|
69
|
+
"`recognition_task` and `detection_task` cannot be set to True simultaneously. "
|
|
70
|
+
+ "To get the whole dataset with boxes and labels leave both parameters to False."
|
|
71
|
+
)
|
|
72
|
+
|
|
65
73
|
self.train = train
|
|
66
|
-
self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
|
|
74
|
+
self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
|
|
67
75
|
np_dtype = np.float32
|
|
68
76
|
|
|
69
77
|
# Load xml data
|
|
@@ -117,6 +125,8 @@ class IC03(VisionDataset):
|
|
|
117
125
|
for crop, label in zip(crops, labels):
|
|
118
126
|
if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0:
|
|
119
127
|
self.data.append((crop, label))
|
|
128
|
+
elif detection_task:
|
|
129
|
+
self.data.append((name.text, boxes))
|
|
120
130
|
else:
|
|
121
131
|
self.data.append((name.text, dict(boxes=boxes, labels=labels)))
|
|
122
132
|
|
|
@@ -38,6 +38,7 @@ class IC13(AbstractDataset):
|
|
|
38
38
|
label_folder: folder with all annotation files for the images
|
|
39
39
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
40
40
|
recognition_task: whether the dataset should be used for recognition task
|
|
41
|
+
detection_task: whether the dataset should be used for detection task
|
|
41
42
|
**kwargs: keyword arguments from `AbstractDataset`.
|
|
42
43
|
"""
|
|
43
44
|
|
|
@@ -47,11 +48,17 @@ class IC13(AbstractDataset):
|
|
|
47
48
|
label_folder: str,
|
|
48
49
|
use_polygons: bool = False,
|
|
49
50
|
recognition_task: bool = False,
|
|
51
|
+
detection_task: bool = False,
|
|
50
52
|
**kwargs: Any,
|
|
51
53
|
) -> None:
|
|
52
54
|
super().__init__(
|
|
53
55
|
img_folder, pre_transforms=convert_target_to_relative if not recognition_task else None, **kwargs
|
|
54
56
|
)
|
|
57
|
+
if recognition_task and detection_task:
|
|
58
|
+
raise ValueError(
|
|
59
|
+
"`recognition_task` and `detection_task` cannot be set to True simultaneously. "
|
|
60
|
+
+ "To get the whole dataset with boxes and labels leave both parameters to False."
|
|
61
|
+
)
|
|
55
62
|
|
|
56
63
|
# File existence check
|
|
57
64
|
if not os.path.exists(label_folder) or not os.path.exists(img_folder):
|
|
@@ -59,7 +66,7 @@ class IC13(AbstractDataset):
|
|
|
59
66
|
f"unable to locate {label_folder if not os.path.exists(label_folder) else img_folder}"
|
|
60
67
|
)
|
|
61
68
|
|
|
62
|
-
self.data: List[Tuple[Union[Path, np.ndarray], Union[str, Dict[str, Any]]]] = []
|
|
69
|
+
self.data: List[Tuple[Union[Path, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
|
|
63
70
|
np_dtype = np.float32
|
|
64
71
|
|
|
65
72
|
img_names = os.listdir(img_folder)
|
|
@@ -95,5 +102,7 @@ class IC13(AbstractDataset):
|
|
|
95
102
|
crops = crop_bboxes_from_image(img_path=img_path, geoms=box_targets)
|
|
96
103
|
for crop, label in zip(crops, labels):
|
|
97
104
|
self.data.append((crop, label))
|
|
105
|
+
elif detection_task:
|
|
106
|
+
self.data.append((img_path, box_targets))
|
|
98
107
|
else:
|
|
99
108
|
self.data.append((img_path, dict(boxes=box_targets, labels=labels)))
|
|
@@ -34,6 +34,7 @@ class IIIT5K(VisionDataset):
|
|
|
34
34
|
train: whether the subset should be the training one
|
|
35
35
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
36
36
|
recognition_task: whether the dataset should be used for recognition task
|
|
37
|
+
detection_task: whether the dataset should be used for detection task
|
|
37
38
|
**kwargs: keyword arguments from `VisionDataset`.
|
|
38
39
|
"""
|
|
39
40
|
|
|
@@ -45,6 +46,7 @@ class IIIT5K(VisionDataset):
|
|
|
45
46
|
train: bool = True,
|
|
46
47
|
use_polygons: bool = False,
|
|
47
48
|
recognition_task: bool = False,
|
|
49
|
+
detection_task: bool = False,
|
|
48
50
|
**kwargs: Any,
|
|
49
51
|
) -> None:
|
|
50
52
|
super().__init__(
|
|
@@ -55,6 +57,12 @@ class IIIT5K(VisionDataset):
|
|
|
55
57
|
pre_transforms=convert_target_to_relative if not recognition_task else None,
|
|
56
58
|
**kwargs,
|
|
57
59
|
)
|
|
60
|
+
if recognition_task and detection_task:
|
|
61
|
+
raise ValueError(
|
|
62
|
+
"`recognition_task` and `detection_task` cannot be set to True simultaneously. "
|
|
63
|
+
+ "To get the whole dataset with boxes and labels leave both parameters to False."
|
|
64
|
+
)
|
|
65
|
+
|
|
58
66
|
self.train = train
|
|
59
67
|
|
|
60
68
|
# Load mat data
|
|
@@ -62,7 +70,7 @@ class IIIT5K(VisionDataset):
|
|
|
62
70
|
mat_file = "trainCharBound" if self.train else "testCharBound"
|
|
63
71
|
mat_data = sio.loadmat(os.path.join(tmp_root, f"{mat_file}.mat"))[mat_file][0]
|
|
64
72
|
|
|
65
|
-
self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
|
|
73
|
+
self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
|
|
66
74
|
np_dtype = np.float32
|
|
67
75
|
|
|
68
76
|
for img_path, label, box_targets in tqdm(iterable=mat_data, desc="Unpacking IIIT5K", total=len(mat_data)):
|
|
@@ -73,24 +81,26 @@ class IIIT5K(VisionDataset):
|
|
|
73
81
|
if not os.path.exists(os.path.join(tmp_root, _raw_path)):
|
|
74
82
|
raise FileNotFoundError(f"unable to locate {os.path.join(tmp_root, _raw_path)}")
|
|
75
83
|
|
|
84
|
+
if use_polygons:
|
|
85
|
+
# (x, y) coordinates of top left, top right, bottom right, bottom left corners
|
|
86
|
+
box_targets = [
|
|
87
|
+
[
|
|
88
|
+
[box[0], box[1]],
|
|
89
|
+
[box[0] + box[2], box[1]],
|
|
90
|
+
[box[0] + box[2], box[1] + box[3]],
|
|
91
|
+
[box[0], box[1] + box[3]],
|
|
92
|
+
]
|
|
93
|
+
for box in box_targets
|
|
94
|
+
]
|
|
95
|
+
else:
|
|
96
|
+
# xmin, ymin, xmax, ymax
|
|
97
|
+
box_targets = [[box[0], box[1], box[0] + box[2], box[1] + box[3]] for box in box_targets]
|
|
98
|
+
|
|
76
99
|
if recognition_task:
|
|
77
100
|
self.data.append((_raw_path, _raw_label))
|
|
101
|
+
elif detection_task:
|
|
102
|
+
self.data.append((_raw_path, np.asarray(box_targets, dtype=np_dtype)))
|
|
78
103
|
else:
|
|
79
|
-
if use_polygons:
|
|
80
|
-
# (x, y) coordinates of top left, top right, bottom right, bottom left corners
|
|
81
|
-
box_targets = [
|
|
82
|
-
[
|
|
83
|
-
[box[0], box[1]],
|
|
84
|
-
[box[0] + box[2], box[1]],
|
|
85
|
-
[box[0] + box[2], box[1] + box[3]],
|
|
86
|
-
[box[0], box[1] + box[3]],
|
|
87
|
-
]
|
|
88
|
-
for box in box_targets
|
|
89
|
-
]
|
|
90
|
-
else:
|
|
91
|
-
# xmin, ymin, xmax, ymax
|
|
92
|
-
box_targets = [[box[0], box[1], box[0] + box[2], box[1] + box[3]] for box in box_targets]
|
|
93
|
-
|
|
94
104
|
# label are casted to list where each char corresponds to the character's bounding box
|
|
95
105
|
self.data.append((
|
|
96
106
|
_raw_path,
|
|
@@ -46,6 +46,7 @@ class IMGUR5K(AbstractDataset):
|
|
|
46
46
|
train: whether the subset should be the training one
|
|
47
47
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
48
48
|
recognition_task: whether the dataset should be used for recognition task
|
|
49
|
+
detection_task: whether the dataset should be used for detection task
|
|
49
50
|
**kwargs: keyword arguments from `AbstractDataset`.
|
|
50
51
|
"""
|
|
51
52
|
|
|
@@ -56,17 +57,23 @@ class IMGUR5K(AbstractDataset):
|
|
|
56
57
|
train: bool = True,
|
|
57
58
|
use_polygons: bool = False,
|
|
58
59
|
recognition_task: bool = False,
|
|
60
|
+
detection_task: bool = False,
|
|
59
61
|
**kwargs: Any,
|
|
60
62
|
) -> None:
|
|
61
63
|
super().__init__(
|
|
62
64
|
img_folder, pre_transforms=convert_target_to_relative if not recognition_task else None, **kwargs
|
|
63
65
|
)
|
|
66
|
+
if recognition_task and detection_task:
|
|
67
|
+
raise ValueError(
|
|
68
|
+
"`recognition_task` and `detection_task` cannot be set to True simultaneously. "
|
|
69
|
+
+ "To get the whole dataset with boxes and labels leave both parameters to False."
|
|
70
|
+
)
|
|
64
71
|
|
|
65
72
|
# File existence check
|
|
66
73
|
if not os.path.exists(label_path) or not os.path.exists(img_folder):
|
|
67
74
|
raise FileNotFoundError(f"unable to locate {label_path if not os.path.exists(label_path) else img_folder}")
|
|
68
75
|
|
|
69
|
-
self.data: List[Tuple[Union[str, Path, np.ndarray], Union[str, Dict[str, Any]]]] = []
|
|
76
|
+
self.data: List[Tuple[Union[str, Path, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
|
|
70
77
|
self.train = train
|
|
71
78
|
np_dtype = np.float32
|
|
72
79
|
|
|
@@ -132,6 +139,8 @@ class IMGUR5K(AbstractDataset):
|
|
|
132
139
|
tmp_img = Image.fromarray(crop)
|
|
133
140
|
tmp_img.save(os.path.join(reco_folder_path, f"{reco_images_counter}.png"))
|
|
134
141
|
reco_images_counter += 1
|
|
142
|
+
elif detection_task:
|
|
143
|
+
self.data.append((img_path, np.asarray(box_targets, dtype=np_dtype)))
|
|
135
144
|
else:
|
|
136
145
|
self.data.append((img_path, dict(boxes=np.asarray(box_targets, dtype=np_dtype), labels=labels)))
|
|
137
146
|
|
|
@@ -33,6 +33,7 @@ class SROIE(VisionDataset):
|
|
|
33
33
|
train: whether the subset should be the training one
|
|
34
34
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
35
35
|
recognition_task: whether the dataset should be used for recognition task
|
|
36
|
+
detection_task: whether the dataset should be used for detection task
|
|
36
37
|
**kwargs: keyword arguments from `VisionDataset`.
|
|
37
38
|
"""
|
|
38
39
|
|
|
@@ -52,6 +53,7 @@ class SROIE(VisionDataset):
|
|
|
52
53
|
train: bool = True,
|
|
53
54
|
use_polygons: bool = False,
|
|
54
55
|
recognition_task: bool = False,
|
|
56
|
+
detection_task: bool = False,
|
|
55
57
|
**kwargs: Any,
|
|
56
58
|
) -> None:
|
|
57
59
|
url, sha256, name = self.TRAIN if train else self.TEST
|
|
@@ -63,10 +65,16 @@ class SROIE(VisionDataset):
|
|
|
63
65
|
pre_transforms=convert_target_to_relative if not recognition_task else None,
|
|
64
66
|
**kwargs,
|
|
65
67
|
)
|
|
68
|
+
if recognition_task and detection_task:
|
|
69
|
+
raise ValueError(
|
|
70
|
+
"`recognition_task` and `detection_task` cannot be set to True simultaneously. "
|
|
71
|
+
+ "To get the whole dataset with boxes and labels leave both parameters to False."
|
|
72
|
+
)
|
|
73
|
+
|
|
66
74
|
self.train = train
|
|
67
75
|
|
|
68
76
|
tmp_root = os.path.join(self.root, "images")
|
|
69
|
-
self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
|
|
77
|
+
self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
|
|
70
78
|
np_dtype = np.float32
|
|
71
79
|
|
|
72
80
|
for img_path in tqdm(iterable=os.listdir(tmp_root), desc="Unpacking SROIE", total=len(os.listdir(tmp_root))):
|
|
@@ -94,6 +102,8 @@ class SROIE(VisionDataset):
|
|
|
94
102
|
for crop, label in zip(crops, labels):
|
|
95
103
|
if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0:
|
|
96
104
|
self.data.append((crop, label))
|
|
105
|
+
elif detection_task:
|
|
106
|
+
self.data.append((img_path, coords))
|
|
97
107
|
else:
|
|
98
108
|
self.data.append((img_path, dict(boxes=coords, labels=labels)))
|
|
99
109
|
|
|
@@ -32,6 +32,7 @@ class SVHN(VisionDataset):
|
|
|
32
32
|
train: whether the subset should be the training one
|
|
33
33
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
34
34
|
recognition_task: whether the dataset should be used for recognition task
|
|
35
|
+
detection_task: whether the dataset should be used for detection task
|
|
35
36
|
**kwargs: keyword arguments from `VisionDataset`.
|
|
36
37
|
"""
|
|
37
38
|
|
|
@@ -52,6 +53,7 @@ class SVHN(VisionDataset):
|
|
|
52
53
|
train: bool = True,
|
|
53
54
|
use_polygons: bool = False,
|
|
54
55
|
recognition_task: bool = False,
|
|
56
|
+
detection_task: bool = False,
|
|
55
57
|
**kwargs: Any,
|
|
56
58
|
) -> None:
|
|
57
59
|
url, sha256, name = self.TRAIN if train else self.TEST
|
|
@@ -63,8 +65,14 @@ class SVHN(VisionDataset):
|
|
|
63
65
|
pre_transforms=convert_target_to_relative if not recognition_task else None,
|
|
64
66
|
**kwargs,
|
|
65
67
|
)
|
|
68
|
+
if recognition_task and detection_task:
|
|
69
|
+
raise ValueError(
|
|
70
|
+
"`recognition_task` and `detection_task` cannot be set to True simultaneously. "
|
|
71
|
+
+ "To get the whole dataset with boxes and labels leave both parameters to False."
|
|
72
|
+
)
|
|
73
|
+
|
|
66
74
|
self.train = train
|
|
67
|
-
self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
|
|
75
|
+
self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
|
|
68
76
|
np_dtype = np.float32
|
|
69
77
|
|
|
70
78
|
tmp_root = os.path.join(self.root, "train" if train else "test")
|
|
@@ -122,6 +130,8 @@ class SVHN(VisionDataset):
|
|
|
122
130
|
for crop, label in zip(crops, label_targets):
|
|
123
131
|
if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0:
|
|
124
132
|
self.data.append((crop, label))
|
|
133
|
+
elif detection_task:
|
|
134
|
+
self.data.append((img_name, box_targets))
|
|
125
135
|
else:
|
|
126
136
|
self.data.append((img_name, dict(boxes=box_targets, labels=label_targets)))
|
|
127
137
|
|
|
@@ -32,6 +32,7 @@ class SVT(VisionDataset):
|
|
|
32
32
|
train: whether the subset should be the training one
|
|
33
33
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
34
34
|
recognition_task: whether the dataset should be used for recognition task
|
|
35
|
+
detection_task: whether the dataset should be used for detection task
|
|
35
36
|
**kwargs: keyword arguments from `VisionDataset`.
|
|
36
37
|
"""
|
|
37
38
|
|
|
@@ -43,6 +44,7 @@ class SVT(VisionDataset):
|
|
|
43
44
|
train: bool = True,
|
|
44
45
|
use_polygons: bool = False,
|
|
45
46
|
recognition_task: bool = False,
|
|
47
|
+
detection_task: bool = False,
|
|
46
48
|
**kwargs: Any,
|
|
47
49
|
) -> None:
|
|
48
50
|
super().__init__(
|
|
@@ -53,8 +55,14 @@ class SVT(VisionDataset):
|
|
|
53
55
|
pre_transforms=convert_target_to_relative if not recognition_task else None,
|
|
54
56
|
**kwargs,
|
|
55
57
|
)
|
|
58
|
+
if recognition_task and detection_task:
|
|
59
|
+
raise ValueError(
|
|
60
|
+
"`recognition_task` and `detection_task` cannot be set to True simultaneously. "
|
|
61
|
+
+ "To get the whole dataset with boxes and labels leave both parameters to False."
|
|
62
|
+
)
|
|
63
|
+
|
|
56
64
|
self.train = train
|
|
57
|
-
self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
|
|
65
|
+
self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
|
|
58
66
|
np_dtype = np.float32
|
|
59
67
|
|
|
60
68
|
# Load xml data
|
|
@@ -108,6 +116,8 @@ class SVT(VisionDataset):
|
|
|
108
116
|
for crop, label in zip(crops, labels):
|
|
109
117
|
if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0:
|
|
110
118
|
self.data.append((crop, label))
|
|
119
|
+
elif detection_task:
|
|
120
|
+
self.data.append((name.text, boxes))
|
|
111
121
|
else:
|
|
112
122
|
self.data.append((name.text, dict(boxes=boxes, labels=labels)))
|
|
113
123
|
|
|
@@ -35,6 +35,7 @@ class SynthText(VisionDataset):
|
|
|
35
35
|
train: whether the subset should be the training one
|
|
36
36
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
37
37
|
recognition_task: whether the dataset should be used for recognition task
|
|
38
|
+
detection_task: whether the dataset should be used for detection task
|
|
38
39
|
**kwargs: keyword arguments from `VisionDataset`.
|
|
39
40
|
"""
|
|
40
41
|
|
|
@@ -46,6 +47,7 @@ class SynthText(VisionDataset):
|
|
|
46
47
|
train: bool = True,
|
|
47
48
|
use_polygons: bool = False,
|
|
48
49
|
recognition_task: bool = False,
|
|
50
|
+
detection_task: bool = False,
|
|
49
51
|
**kwargs: Any,
|
|
50
52
|
) -> None:
|
|
51
53
|
super().__init__(
|
|
@@ -56,8 +58,14 @@ class SynthText(VisionDataset):
|
|
|
56
58
|
pre_transforms=convert_target_to_relative if not recognition_task else None,
|
|
57
59
|
**kwargs,
|
|
58
60
|
)
|
|
61
|
+
if recognition_task and detection_task:
|
|
62
|
+
raise ValueError(
|
|
63
|
+
"`recognition_task` and `detection_task` cannot be set to True simultaneously. "
|
|
64
|
+
+ "To get the whole dataset with boxes and labels leave both parameters to False."
|
|
65
|
+
)
|
|
66
|
+
|
|
59
67
|
self.train = train
|
|
60
|
-
self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
|
|
68
|
+
self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
|
|
61
69
|
np_dtype = np.float32
|
|
62
70
|
|
|
63
71
|
# Load mat data
|
|
@@ -111,6 +119,8 @@ class SynthText(VisionDataset):
|
|
|
111
119
|
tmp_img = Image.fromarray(crop)
|
|
112
120
|
tmp_img.save(os.path.join(reco_folder_path, f"{reco_images_counter}.png"))
|
|
113
121
|
reco_images_counter += 1
|
|
122
|
+
elif detection_task:
|
|
123
|
+
self.data.append((img_path[0], np.asarray(word_boxes, dtype=np_dtype)))
|
|
114
124
|
else:
|
|
115
125
|
self.data.append((img_path[0], dict(boxes=np.asarray(word_boxes, dtype=np_dtype), labels=labels)))
|
|
116
126
|
|
|
@@ -169,8 +169,13 @@ def encode_sequences(
|
|
|
169
169
|
return encoded_data
|
|
170
170
|
|
|
171
171
|
|
|
172
|
-
def convert_target_to_relative(
|
|
173
|
-
|
|
172
|
+
def convert_target_to_relative(
|
|
173
|
+
img: ImageTensor, target: Union[np.ndarray, Dict[str, Any]]
|
|
174
|
+
) -> Tuple[ImageTensor, Union[Dict[str, Any], np.ndarray]]:
|
|
175
|
+
if isinstance(target, np.ndarray):
|
|
176
|
+
target = convert_to_relative_coords(target, get_img_shape(img))
|
|
177
|
+
else:
|
|
178
|
+
target["boxes"] = convert_to_relative_coords(target["boxes"], get_img_shape(img))
|
|
174
179
|
return img, target
|
|
175
180
|
|
|
176
181
|
|
|
@@ -25,6 +25,7 @@ VOCABS: Dict[str, str] = {
|
|
|
25
25
|
"hindi_punctuation": "।,?!:्ॐ॰॥॰",
|
|
26
26
|
"bangla_letters": "অআইঈউঊঋএঐওঔকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহ়ঽািীুূৃেৈোৌ্ৎংঃঁ",
|
|
27
27
|
"bangla_digits": "০১২৩৪৫৬৭৮৯",
|
|
28
|
+
"generic_cyrillic_letters": "абвгдежзийклмнопрстуфхцчшщьюяАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЬЮЯ",
|
|
28
29
|
}
|
|
29
30
|
|
|
30
31
|
VOCABS["latin"] = VOCABS["digits"] + VOCABS["ascii_letters"] + VOCABS["punctuation"]
|
|
@@ -53,12 +54,15 @@ VOCABS["finnish"] = VOCABS["english"] + "äöÄÖ"
|
|
|
53
54
|
VOCABS["swedish"] = VOCABS["english"] + "åäöÅÄÖ"
|
|
54
55
|
VOCABS["vietnamese"] = (
|
|
55
56
|
VOCABS["english"]
|
|
56
|
-
+ "
|
|
57
|
-
+ "
|
|
57
|
+
+ "áàảạãăắằẳẵặâấầẩẫậđéèẻẽẹêếềểễệóòỏõọôốồổộỗơớờởợỡúùủũụưứừửữựiíìỉĩịýỳỷỹỵ"
|
|
58
|
+
+ "ÁÀẢẠÃĂẮẰẲẴẶÂẤẦẨẪẬĐÉÈẺẼẸÊẾỀỂỄỆÓÒỎÕỌÔỐỒỔỘỖƠỚỜỞỢỠÚÙỦŨỤƯỨỪỬỮỰIÍÌỈĨỊÝỲỶỸỴ"
|
|
58
59
|
)
|
|
59
60
|
VOCABS["hebrew"] = VOCABS["english"] + "אבגדהוזחטיכלמנסעפצקרשת" + "₪"
|
|
60
61
|
VOCABS["hindi"] = VOCABS["hindi_letters"] + VOCABS["hindi_digits"] + VOCABS["hindi_punctuation"]
|
|
61
62
|
VOCABS["bangla"] = VOCABS["bangla_letters"] + VOCABS["bangla_digits"]
|
|
63
|
+
VOCABS["ukrainian"] = (
|
|
64
|
+
VOCABS["generic_cyrillic_letters"] + VOCABS["digits"] + VOCABS["punctuation"] + VOCABS["currency"] + "ґіїєҐІЇЄ₴"
|
|
65
|
+
)
|
|
62
66
|
VOCABS["multilingual"] = "".join(
|
|
63
67
|
dict.fromkeys(
|
|
64
68
|
VOCABS["french"]
|
|
@@ -40,6 +40,7 @@ class WILDRECEIPT(AbstractDataset):
|
|
|
40
40
|
train: whether the subset should be the training one
|
|
41
41
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
42
42
|
recognition_task: whether the dataset should be used for recognition task
|
|
43
|
+
detection_task: whether the dataset should be used for detection task
|
|
43
44
|
**kwargs: keyword arguments from `AbstractDataset`.
|
|
44
45
|
"""
|
|
45
46
|
|
|
@@ -50,11 +51,19 @@ class WILDRECEIPT(AbstractDataset):
|
|
|
50
51
|
train: bool = True,
|
|
51
52
|
use_polygons: bool = False,
|
|
52
53
|
recognition_task: bool = False,
|
|
54
|
+
detection_task: bool = False,
|
|
53
55
|
**kwargs: Any,
|
|
54
56
|
) -> None:
|
|
55
57
|
super().__init__(
|
|
56
58
|
img_folder, pre_transforms=convert_target_to_relative if not recognition_task else None, **kwargs
|
|
57
59
|
)
|
|
60
|
+
# Task check
|
|
61
|
+
if recognition_task and detection_task:
|
|
62
|
+
raise ValueError(
|
|
63
|
+
"`recognition_task` and `detection_task` cannot be set to True simultaneously. "
|
|
64
|
+
+ "To get the whole dataset with boxes and labels leave both parameters to False."
|
|
65
|
+
)
|
|
66
|
+
|
|
58
67
|
# File existence check
|
|
59
68
|
if not os.path.exists(label_path) or not os.path.exists(img_folder):
|
|
60
69
|
raise FileNotFoundError(f"unable to locate {label_path if not os.path.exists(label_path) else img_folder}")
|
|
@@ -62,7 +71,7 @@ class WILDRECEIPT(AbstractDataset):
|
|
|
62
71
|
tmp_root = img_folder
|
|
63
72
|
self.train = train
|
|
64
73
|
np_dtype = np.float32
|
|
65
|
-
self.data: List[Tuple[Union[str, Path, np.ndarray], Union[str, Dict[str, Any]]]] = []
|
|
74
|
+
self.data: List[Tuple[Union[str, Path, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
|
|
66
75
|
|
|
67
76
|
with open(label_path, "r") as file:
|
|
68
77
|
data = file.read()
|
|
@@ -100,6 +109,8 @@ class WILDRECEIPT(AbstractDataset):
|
|
|
100
109
|
for crop, label in zip(crops, list(text_targets)):
|
|
101
110
|
if label and " " not in label:
|
|
102
111
|
self.data.append((crop, label))
|
|
112
|
+
elif detection_task:
|
|
113
|
+
self.data.append((img_path, np.asarray(box_targets, dtype=int).clip(min=0)))
|
|
103
114
|
else:
|
|
104
115
|
self.data.append((
|
|
105
116
|
img_path,
|
|
@@ -35,6 +35,20 @@ else: # pragma: no cover
|
|
|
35
35
|
logging.info("Disabling PyTorch because USE_TF is set")
|
|
36
36
|
_torch_available = False
|
|
37
37
|
|
|
38
|
+
# Compatibility fix to make sure tensorflow.keras stays at Keras 2
|
|
39
|
+
if "TF_USE_LEGACY_KERAS" not in os.environ:
|
|
40
|
+
os.environ["TF_USE_LEGACY_KERAS"] = "1"
|
|
41
|
+
|
|
42
|
+
elif os.environ["TF_USE_LEGACY_KERAS"] != "1":
|
|
43
|
+
raise ValueError(
|
|
44
|
+
"docTR is only compatible with Keras 2, but you have explicitly set `TF_USE_LEGACY_KERAS` to `0`. "
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def ensure_keras_v2() -> None: # pragma: no cover
|
|
49
|
+
if not os.environ.get("TF_USE_LEGACY_KERAS") == "1":
|
|
50
|
+
os.environ["TF_USE_LEGACY_KERAS"] = "1"
|
|
51
|
+
|
|
38
52
|
|
|
39
53
|
if USE_TF in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TORCH not in ENV_VARS_TRUE_VALUES:
|
|
40
54
|
_tf_available = importlib.util.find_spec("tensorflow") is not None
|
|
@@ -65,6 +79,11 @@ if USE_TF in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TORCH not in ENV_VARS_TRUE_VA
|
|
|
65
79
|
_tf_available = False
|
|
66
80
|
else:
|
|
67
81
|
logging.info(f"TensorFlow version {_tf_version} available.")
|
|
82
|
+
ensure_keras_v2()
|
|
83
|
+
import tensorflow as tf
|
|
84
|
+
|
|
85
|
+
# Enable eager execution - this is required for some models to work properly
|
|
86
|
+
tf.config.run_functions_eagerly(True)
|
|
68
87
|
else: # pragma: no cover
|
|
69
88
|
logging.info("Disabling Tensorflow because USE_TORCH is set")
|
|
70
89
|
_tf_available = False
|