python-doctr 0.11.0__tar.gz → 0.12.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_doctr-0.11.0 → python_doctr-0.12.0}/PKG-INFO +19 -3
- {python_doctr-0.11.0 → python_doctr-0.12.0}/README.md +15 -1
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/__init__.py +1 -0
- python_doctr-0.12.0/doctr/datasets/coco_text.py +139 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/cord.py +2 -1
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/funsd.py +2 -2
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/ic03.py +1 -1
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/ic13.py +2 -1
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/iiit5k.py +4 -1
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/imgur5k.py +9 -2
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/loader.py +1 -1
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/ocr.py +1 -1
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/recognition.py +1 -1
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/svhn.py +1 -1
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/svt.py +2 -2
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/synthtext.py +15 -2
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/utils.py +7 -6
- python_doctr-0.12.0/doctr/datasets/vocabs.py +1140 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/file_utils.py +9 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/io/elements.py +37 -3
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/_utils.py +1 -1
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/__init__.py +1 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/magc_resnet/pytorch.py +1 -2
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/magc_resnet/tensorflow.py +3 -3
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/mobilenet/pytorch.py +15 -1
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/mobilenet/tensorflow.py +11 -2
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/predictor/pytorch.py +1 -1
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/resnet/pytorch.py +26 -3
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/resnet/tensorflow.py +25 -4
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/textnet/pytorch.py +10 -1
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/textnet/tensorflow.py +11 -2
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/vgg/pytorch.py +16 -1
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/vgg/tensorflow.py +11 -2
- python_doctr-0.12.0/doctr/models/classification/vip/__init__.py +4 -0
- python_doctr-0.12.0/doctr/models/classification/vip/layers/__init__.py +4 -0
- python_doctr-0.12.0/doctr/models/classification/vip/layers/pytorch.py +615 -0
- python_doctr-0.12.0/doctr/models/classification/vip/pytorch.py +505 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/vit/pytorch.py +10 -1
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/vit/tensorflow.py +9 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/zoo.py +4 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/differentiable_binarization/base.py +3 -4
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/differentiable_binarization/pytorch.py +10 -1
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/differentiable_binarization/tensorflow.py +11 -4
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/fast/base.py +2 -3
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/fast/pytorch.py +13 -4
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/fast/tensorflow.py +10 -2
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/linknet/base.py +2 -3
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/linknet/pytorch.py +10 -1
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/linknet/tensorflow.py +10 -2
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/factory/hub.py +3 -3
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/kie_predictor/pytorch.py +1 -1
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/kie_predictor/tensorflow.py +1 -1
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/modules/layers/pytorch.py +49 -1
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/predictor/pytorch.py +1 -1
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/predictor/tensorflow.py +1 -1
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/__init__.py +1 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/crnn/pytorch.py +10 -1
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/crnn/tensorflow.py +10 -1
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/master/pytorch.py +10 -1
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/master/tensorflow.py +10 -3
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/parseq/pytorch.py +23 -5
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/parseq/tensorflow.py +13 -5
- python_doctr-0.12.0/doctr/models/recognition/predictor/_utils.py +145 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/predictor/pytorch.py +3 -3
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/predictor/tensorflow.py +3 -3
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/sar/pytorch.py +10 -1
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/sar/tensorflow.py +10 -3
- python_doctr-0.12.0/doctr/models/recognition/utils.py +93 -0
- python_doctr-0.12.0/doctr/models/recognition/viptr/__init__.py +4 -0
- python_doctr-0.12.0/doctr/models/recognition/viptr/pytorch.py +277 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/vitstr/pytorch.py +10 -1
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/vitstr/tensorflow.py +10 -3
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/zoo.py +5 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/utils/pytorch.py +28 -18
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/utils/tensorflow.py +15 -8
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/utils/data.py +1 -1
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/utils/geometry.py +1 -1
- python_doctr-0.12.0/doctr/version.py +1 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/pyproject.toml +1 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/python_doctr.egg-info/PKG-INFO +19 -3
- {python_doctr-0.11.0 → python_doctr-0.12.0}/python_doctr.egg-info/SOURCES.txt +7 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/python_doctr.egg-info/requires.txt +1 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/setup.py +1 -1
- python_doctr-0.11.0/doctr/datasets/vocabs.py +0 -92
- python_doctr-0.11.0/doctr/models/recognition/predictor/_utils.py +0 -83
- python_doctr-0.11.0/doctr/models/recognition/utils.py +0 -84
- python_doctr-0.11.0/doctr/version.py +0 -1
- {python_doctr-0.11.0 → python_doctr-0.12.0}/LICENSE +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/contrib/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/contrib/artefacts.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/contrib/base.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/datasets/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/datasets/base.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/datasets/pytorch.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/datasets/tensorflow.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/detection.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/doc_artefacts.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/generator/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/generator/base.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/generator/pytorch.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/generator/tensorflow.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/iiithws.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/mjsynth.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/orientation.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/sroie.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/wildreceipt.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/io/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/io/html.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/io/image/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/io/image/base.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/io/image/pytorch.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/io/image/tensorflow.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/io/pdf.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/io/reader.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/builder.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/magc_resnet/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/mobilenet/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/predictor/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/predictor/tensorflow.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/resnet/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/textnet/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/vgg/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/vit/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/core.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/_utils/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/_utils/base.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/_utils/pytorch.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/_utils/tensorflow.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/core.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/differentiable_binarization/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/fast/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/linknet/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/predictor/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/predictor/pytorch.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/predictor/tensorflow.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/zoo.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/factory/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/kie_predictor/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/kie_predictor/base.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/modules/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/modules/layers/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/modules/layers/tensorflow.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/modules/transformer/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/modules/transformer/pytorch.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/modules/transformer/tensorflow.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/modules/vision_transformer/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/modules/vision_transformer/pytorch.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/modules/vision_transformer/tensorflow.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/predictor/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/predictor/base.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/preprocessor/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/preprocessor/pytorch.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/preprocessor/tensorflow.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/core.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/crnn/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/master/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/master/base.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/parseq/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/parseq/base.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/predictor/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/sar/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/vitstr/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/vitstr/base.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/utils/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/zoo.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/py.typed +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/transforms/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/transforms/functional/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/transforms/functional/base.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/transforms/functional/pytorch.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/transforms/functional/tensorflow.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/transforms/modules/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/transforms/modules/base.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/transforms/modules/pytorch.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/transforms/modules/tensorflow.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/utils/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/utils/common_types.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/utils/fonts.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/utils/metrics.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/utils/multithreading.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/utils/reconstitution.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/utils/repr.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/utils/visualization.py +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/python_doctr.egg-info/dependency_links.txt +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/python_doctr.egg-info/top_level.txt +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/python_doctr.egg-info/zip-safe +0 -0
- {python_doctr-0.11.0 → python_doctr-0.12.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: python-doctr
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.12.0
|
|
4
4
|
Summary: Document Text Recognition (docTR): deep Learning for high-performance OCR on documents.
|
|
5
5
|
Author-email: Mindee <contact@mindee.com>
|
|
6
6
|
Maintainer: François-Guillaume Fernandez, Charles Gaillard, Olivier Dulcy, Felix Dittrich
|
|
@@ -239,6 +239,7 @@ Requires-Dist: huggingface-hub<1.0.0,>=0.20.0
|
|
|
239
239
|
Requires-Dist: Pillow>=9.2.0
|
|
240
240
|
Requires-Dist: defusedxml>=0.7.0
|
|
241
241
|
Requires-Dist: anyascii>=0.3.2
|
|
242
|
+
Requires-Dist: validators>=0.18.0
|
|
242
243
|
Requires-Dist: tqdm>=4.30.0
|
|
243
244
|
Provides-Extra: tf
|
|
244
245
|
Requires-Dist: tensorflow[and-cuda]<3.0.0,>=2.15.0; sys_platform == "linux" and extra == "tf"
|
|
@@ -302,12 +303,13 @@ Requires-Dist: recommonmark>=0.7.1; extra == "dev"
|
|
|
302
303
|
Requires-Dist: sphinx-markdown-tables>=0.0.15; extra == "dev"
|
|
303
304
|
Requires-Dist: sphinx-tabs>=3.3.0; extra == "dev"
|
|
304
305
|
Requires-Dist: furo>=2022.3.4; extra == "dev"
|
|
306
|
+
Dynamic: license-file
|
|
305
307
|
|
|
306
308
|
<p align="center">
|
|
307
309
|
<img src="https://github.com/mindee/doctr/raw/main/docs/images/Logo_doctr.gif" width="40%">
|
|
308
310
|
</p>
|
|
309
311
|
|
|
310
|
-
[](https://slack.mindee.com) [](LICENSE)  [](https://github.com/mindee/doctr/pkgs/container/doctr) [](https://codecov.io/gh/mindee/doctr) [](https://www.codefactor.io/repository/github/mindee/doctr) [](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [](https://mindee.github.io/doctr) [](https://slack.mindee.com) [](LICENSE)  [](https://github.com/mindee/doctr/pkgs/container/doctr) [](https://codecov.io/gh/mindee/doctr) [](https://www.codefactor.io/repository/github/mindee/doctr) [](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [](https://mindee.github.io/doctr) [](https://pypi.org/project/python-doctr/) [](https://huggingface.co/spaces/mindee/doctr) [](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb) [](https://gurubase.io/g/doctr)
|
|
311
313
|
|
|
312
314
|
|
|
313
315
|
**Optical Character Recognition made seamless & accessible to anyone, powered by TensorFlow 2 & PyTorch**
|
|
@@ -438,6 +440,19 @@ The KIE predictor results per page are in a dictionary format with each key repr
|
|
|
438
440
|
|
|
439
441
|
## Installation
|
|
440
442
|
|
|
443
|
+
> [!WARNING]
|
|
444
|
+
> **TensorFlow Backend Deprecation Notice**
|
|
445
|
+
>
|
|
446
|
+
> Using docTR with TensorFlow as a backend is deprecated and will be removed in the next major release (v1.0.0).
|
|
447
|
+
> We **recommend switching to the PyTorch backend**, which is more actively maintained and supports the latest features and models.
|
|
448
|
+
> Alternatively, you can use [OnnxTR](https://github.com/felixdittrich92/OnnxTR), which does **not** require TensorFlow or PyTorch.
|
|
449
|
+
>
|
|
450
|
+
> This decision was made based on several considerations:
|
|
451
|
+
>
|
|
452
|
+
> - Allows better focus on improving the core library
|
|
453
|
+
> - Frees up resources to develop new features faster
|
|
454
|
+
> - Enables more targeted optimizations with PyTorch
|
|
455
|
+
|
|
441
456
|
### Prerequisites
|
|
442
457
|
|
|
443
458
|
Python 3.10 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
|
|
@@ -504,6 +519,7 @@ Credits where it's due: this repository is implementing, among others, architect
|
|
|
504
519
|
- MASTER: [MASTER: Multi-Aspect Non-local Network for Scene Text Recognition](https://arxiv.org/pdf/1910.02562.pdf).
|
|
505
520
|
- ViTSTR: [Vision Transformer for Fast and Efficient Scene Text Recognition](https://arxiv.org/pdf/2105.08582.pdf).
|
|
506
521
|
- PARSeq: [Scene Text Recognition with Permuted Autoregressive Sequence Models](https://arxiv.org/pdf/2207.06966).
|
|
522
|
+
- VIPTR: [A Vision Permutable Extractor for Fast and Efficient Scene Text Recognition](https://arxiv.org/abs/2401.10110).
|
|
507
523
|
|
|
508
524
|
## More goodies
|
|
509
525
|
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
<img src="https://github.com/mindee/doctr/raw/main/docs/images/Logo_doctr.gif" width="40%">
|
|
3
3
|
</p>
|
|
4
4
|
|
|
5
|
-
[](https://slack.mindee.com) [](LICENSE)  [](https://github.com/mindee/doctr/pkgs/container/doctr) [](https://codecov.io/gh/mindee/doctr) [](https://www.codefactor.io/repository/github/mindee/doctr) [](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [](https://mindee.github.io/doctr) [](https://slack.mindee.com) [](LICENSE)  [](https://github.com/mindee/doctr/pkgs/container/doctr) [](https://codecov.io/gh/mindee/doctr) [](https://www.codefactor.io/repository/github/mindee/doctr) [](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [](https://mindee.github.io/doctr) [](https://pypi.org/project/python-doctr/) [](https://huggingface.co/spaces/mindee/doctr) [](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb) [](https://gurubase.io/g/doctr)
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
**Optical Character Recognition made seamless & accessible to anyone, powered by TensorFlow 2 & PyTorch**
|
|
@@ -133,6 +133,19 @@ The KIE predictor results per page are in a dictionary format with each key repr
|
|
|
133
133
|
|
|
134
134
|
## Installation
|
|
135
135
|
|
|
136
|
+
> [!WARNING]
|
|
137
|
+
> **TensorFlow Backend Deprecation Notice**
|
|
138
|
+
>
|
|
139
|
+
> Using docTR with TensorFlow as a backend is deprecated and will be removed in the next major release (v1.0.0).
|
|
140
|
+
> We **recommend switching to the PyTorch backend**, which is more actively maintained and supports the latest features and models.
|
|
141
|
+
> Alternatively, you can use [OnnxTR](https://github.com/felixdittrich92/OnnxTR), which does **not** require TensorFlow or PyTorch.
|
|
142
|
+
>
|
|
143
|
+
> This decision was made based on several considerations:
|
|
144
|
+
>
|
|
145
|
+
> - Allows better focus on improving the core library
|
|
146
|
+
> - Frees up resources to develop new features faster
|
|
147
|
+
> - Enables more targeted optimizations with PyTorch
|
|
148
|
+
|
|
136
149
|
### Prerequisites
|
|
137
150
|
|
|
138
151
|
Python 3.10 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
|
|
@@ -199,6 +212,7 @@ Credits where it's due: this repository is implementing, among others, architect
|
|
|
199
212
|
- MASTER: [MASTER: Multi-Aspect Non-local Network for Scene Text Recognition](https://arxiv.org/pdf/1910.02562.pdf).
|
|
200
213
|
- ViTSTR: [Vision Transformer for Fast and Efficient Scene Text Recognition](https://arxiv.org/pdf/2105.08582.pdf).
|
|
201
214
|
- PARSeq: [Scene Text Recognition with Permuted Autoregressive Sequence Models](https://arxiv.org/pdf/2207.06966).
|
|
215
|
+
- VIPTR: [A Vision Permutable Extractor for Fast and Efficient Scene Text Recognition](https://arxiv.org/abs/2401.10110).
|
|
202
216
|
|
|
203
217
|
## More goodies
|
|
204
218
|
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
|
+
|
|
3
|
+
# This program is licensed under the Apache License 2.0.
|
|
4
|
+
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
from tqdm import tqdm
|
|
13
|
+
|
|
14
|
+
from .datasets import AbstractDataset
|
|
15
|
+
from .utils import convert_target_to_relative, crop_bboxes_from_image
|
|
16
|
+
|
|
17
|
+
__all__ = ["COCOTEXT"]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class COCOTEXT(AbstractDataset):
|
|
21
|
+
"""
|
|
22
|
+
COCO-Text dataset from `"COCO-Text: Dataset and Benchmark for Text Detection and Recognition in Natural Images"
|
|
23
|
+
<https://arxiv.org/pdf/1601.07140v2>`_ |
|
|
24
|
+
`"homepage" <https://bgshih.github.io/cocotext/>`_.
|
|
25
|
+
|
|
26
|
+
>>> # NOTE: You need to download the dataset first.
|
|
27
|
+
>>> from doctr.datasets import COCOTEXT
|
|
28
|
+
>>> train_set = COCOTEXT(train=True, img_folder="/path/to/coco_text/train2014/",
|
|
29
|
+
>>> label_path="/path/to/coco_text/cocotext.v2.json")
|
|
30
|
+
>>> img, target = train_set[0]
|
|
31
|
+
>>> test_set = COCOTEXT(train=False, img_folder="/path/to/coco_text/train2014/",
|
|
32
|
+
>>> label_path = "/path/to/coco_text/cocotext.v2.json")
|
|
33
|
+
>>> img, target = test_set[0]
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
img_folder: folder with all the images of the dataset
|
|
37
|
+
label_path: path to the annotations file of the dataset
|
|
38
|
+
train: whether the subset should be the training one
|
|
39
|
+
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
40
|
+
recognition_task: whether the dataset should be used for recognition task
|
|
41
|
+
detection_task: whether the dataset should be used for detection task
|
|
42
|
+
**kwargs: keyword arguments from `AbstractDataset`.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __init__(
|
|
46
|
+
self,
|
|
47
|
+
img_folder: str,
|
|
48
|
+
label_path: str,
|
|
49
|
+
train: bool = True,
|
|
50
|
+
use_polygons: bool = False,
|
|
51
|
+
recognition_task: bool = False,
|
|
52
|
+
detection_task: bool = False,
|
|
53
|
+
**kwargs: Any,
|
|
54
|
+
) -> None:
|
|
55
|
+
super().__init__(
|
|
56
|
+
img_folder, pre_transforms=convert_target_to_relative if not recognition_task else None, **kwargs
|
|
57
|
+
)
|
|
58
|
+
# Task check
|
|
59
|
+
if recognition_task and detection_task:
|
|
60
|
+
raise ValueError(
|
|
61
|
+
" 'recognition' and 'detection task' cannot be set to True simultaneously. "
|
|
62
|
+
+ " To get the whole dataset with boxes and labels leave both parameters to False "
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
# File existence check
|
|
66
|
+
if not os.path.exists(label_path) or not os.path.exists(img_folder):
|
|
67
|
+
raise FileNotFoundError(f"unable to find {label_path if not os.path.exists(label_path) else img_folder}")
|
|
68
|
+
|
|
69
|
+
tmp_root = img_folder
|
|
70
|
+
self.train = train
|
|
71
|
+
np_dtype = np.float32
|
|
72
|
+
self.data: list[tuple[str | Path | np.ndarray, str | dict[str, Any] | np.ndarray]] = []
|
|
73
|
+
|
|
74
|
+
with open(label_path, "r") as file:
|
|
75
|
+
data = json.load(file)
|
|
76
|
+
|
|
77
|
+
# Filter images based on the set
|
|
78
|
+
img_items = [img for img in data["imgs"].items() if (img[1]["set"] == "train") == train]
|
|
79
|
+
box: list[float] | np.ndarray
|
|
80
|
+
|
|
81
|
+
for img_id, img_info in tqdm(img_items, desc="Preparing and Loading COCOTEXT", total=len(img_items)):
|
|
82
|
+
img_path = os.path.join(img_folder, img_info["file_name"])
|
|
83
|
+
|
|
84
|
+
# File existence check
|
|
85
|
+
if not os.path.exists(img_path): # pragma: no cover
|
|
86
|
+
raise FileNotFoundError(f"Unable to locate {img_path}")
|
|
87
|
+
|
|
88
|
+
# Get annotations for the current image (only legible text)
|
|
89
|
+
annotations = [
|
|
90
|
+
ann
|
|
91
|
+
for ann in data["anns"].values()
|
|
92
|
+
if ann["image_id"] == int(img_id) and ann["legibility"] == "legible"
|
|
93
|
+
]
|
|
94
|
+
|
|
95
|
+
# Some images have no annotations with readable text
|
|
96
|
+
if not annotations: # pragma: no cover
|
|
97
|
+
continue
|
|
98
|
+
|
|
99
|
+
_targets = []
|
|
100
|
+
|
|
101
|
+
for annotation in annotations:
|
|
102
|
+
x, y, w, h = annotation["bbox"]
|
|
103
|
+
if use_polygons:
|
|
104
|
+
# (x, y) coordinates of top left, top right, bottom right, bottom left corners
|
|
105
|
+
box = np.array(
|
|
106
|
+
[
|
|
107
|
+
[x, y],
|
|
108
|
+
[x + w, y],
|
|
109
|
+
[x + w, y + h],
|
|
110
|
+
[x, y + h],
|
|
111
|
+
],
|
|
112
|
+
dtype=np_dtype,
|
|
113
|
+
)
|
|
114
|
+
else:
|
|
115
|
+
# (xmin, ymin, xmax, ymax) coordinates
|
|
116
|
+
box = [x, y, x + w, y + h]
|
|
117
|
+
_targets.append((annotation["utf8_string"], box))
|
|
118
|
+
text_targets, box_targets = zip(*_targets)
|
|
119
|
+
|
|
120
|
+
if recognition_task:
|
|
121
|
+
crops = crop_bboxes_from_image(
|
|
122
|
+
img_path=os.path.join(tmp_root, img_path), geoms=np.asarray(box_targets, dtype=int).clip(min=0)
|
|
123
|
+
)
|
|
124
|
+
for crop, label in zip(crops, list(text_targets)):
|
|
125
|
+
if label and " " not in label:
|
|
126
|
+
self.data.append((crop, label))
|
|
127
|
+
|
|
128
|
+
elif detection_task:
|
|
129
|
+
self.data.append((img_path, np.asarray(box_targets, dtype=int).clip(min=0)))
|
|
130
|
+
else:
|
|
131
|
+
self.data.append((
|
|
132
|
+
img_path,
|
|
133
|
+
dict(boxes=np.asarray(box_targets, dtype=int).clip(min=0), labels=list(text_targets)),
|
|
134
|
+
))
|
|
135
|
+
|
|
136
|
+
self.root = tmp_root
|
|
137
|
+
|
|
138
|
+
def extra_repr(self) -> str:
|
|
139
|
+
return f"train={self.train}"
|
|
@@ -116,7 +116,8 @@ class CORD(VisionDataset):
|
|
|
116
116
|
img_path=os.path.join(tmp_root, img_path), geoms=np.asarray(box_targets, dtype=int).clip(min=0)
|
|
117
117
|
)
|
|
118
118
|
for crop, label in zip(crops, list(text_targets)):
|
|
119
|
-
|
|
119
|
+
if " " not in label:
|
|
120
|
+
self.data.append((crop, label))
|
|
120
121
|
elif detection_task:
|
|
121
122
|
self.data.append((img_path, np.asarray(box_targets, dtype=int).clip(min=0)))
|
|
122
123
|
else:
|
|
@@ -107,8 +107,8 @@ class FUNSD(VisionDataset):
|
|
|
107
107
|
)
|
|
108
108
|
for crop, label in zip(crops, list(text_targets)):
|
|
109
109
|
# filter labels with unknown characters
|
|
110
|
-
if not any(char in label for char in ["☑", "☐", "\uf703", "\uf702"]):
|
|
111
|
-
self.data.append((crop, label))
|
|
110
|
+
if not any(char in label for char in ["☑", "☐", "\u03bf", "\uf703", "\uf702", " "]):
|
|
111
|
+
self.data.append((crop, label.replace("–", "-")))
|
|
112
112
|
elif detection_task:
|
|
113
113
|
self.data.append((img_path, np.asarray(box_targets, dtype=np_dtype)))
|
|
114
114
|
else:
|
|
@@ -122,7 +122,7 @@ class IC03(VisionDataset):
|
|
|
122
122
|
if recognition_task:
|
|
123
123
|
crops = crop_bboxes_from_image(img_path=os.path.join(tmp_root, name.text), geoms=boxes)
|
|
124
124
|
for crop, label in zip(crops, labels):
|
|
125
|
-
if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0:
|
|
125
|
+
if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0 and " " not in label:
|
|
126
126
|
self.data.append((crop, label))
|
|
127
127
|
elif detection_task:
|
|
128
128
|
self.data.append((name.text, boxes))
|
|
@@ -100,7 +100,8 @@ class IC13(AbstractDataset):
|
|
|
100
100
|
if recognition_task:
|
|
101
101
|
crops = crop_bboxes_from_image(img_path=img_path, geoms=box_targets)
|
|
102
102
|
for crop, label in zip(crops, labels):
|
|
103
|
-
|
|
103
|
+
if " " not in label:
|
|
104
|
+
self.data.append((crop, label))
|
|
104
105
|
elif detection_task:
|
|
105
106
|
self.data.append((img_path, box_targets))
|
|
106
107
|
else:
|
|
@@ -8,6 +8,7 @@ from typing import Any
|
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
10
10
|
import scipy.io as sio
|
|
11
|
+
from PIL import Image
|
|
11
12
|
from tqdm import tqdm
|
|
12
13
|
|
|
13
14
|
from .datasets import VisionDataset
|
|
@@ -98,7 +99,9 @@ class IIIT5K(VisionDataset):
|
|
|
98
99
|
box_targets = [[box[0], box[1], box[0] + box[2], box[1] + box[3]] for box in box_targets]
|
|
99
100
|
|
|
100
101
|
if recognition_task:
|
|
101
|
-
|
|
102
|
+
if " " not in _raw_label:
|
|
103
|
+
with Image.open(os.path.join(tmp_root, _raw_path)) as pil_img:
|
|
104
|
+
self.data.append((np.array(pil_img.convert("RGB")), _raw_label))
|
|
102
105
|
elif detection_task:
|
|
103
106
|
self.data.append((_raw_path, np.asarray(box_targets, dtype=np_dtype)))
|
|
104
107
|
else:
|
|
@@ -133,7 +133,13 @@ class IMGUR5K(AbstractDataset):
|
|
|
133
133
|
img_path=os.path.join(self.root, img_name), geoms=np.asarray(box_targets, dtype=np_dtype)
|
|
134
134
|
)
|
|
135
135
|
for crop, label in zip(crops, labels):
|
|
136
|
-
if
|
|
136
|
+
if (
|
|
137
|
+
crop.shape[0] > 0
|
|
138
|
+
and crop.shape[1] > 0
|
|
139
|
+
and len(label) > 0
|
|
140
|
+
and len(label) < 30
|
|
141
|
+
and " " not in label
|
|
142
|
+
):
|
|
137
143
|
# write data to disk
|
|
138
144
|
with open(os.path.join(reco_folder_path, f"{reco_images_counter}.txt"), "w") as f:
|
|
139
145
|
f.write(label)
|
|
@@ -152,6 +158,7 @@ class IMGUR5K(AbstractDataset):
|
|
|
152
158
|
return f"train={self.train}"
|
|
153
159
|
|
|
154
160
|
def _read_from_folder(self, path: str) -> None:
|
|
155
|
-
|
|
161
|
+
img_paths = glob.glob(os.path.join(path, "*.png"))
|
|
162
|
+
for img_path in tqdm(iterable=img_paths, desc="Preparing and Loading IMGUR5K", total=len(img_paths)):
|
|
156
163
|
with open(os.path.join(path, f"{os.path.basename(img_path)[:-4]}.txt"), "r") as f:
|
|
157
164
|
self.data.append((img_path, f.read()))
|
|
@@ -40,7 +40,7 @@ class OCRDataset(AbstractDataset):
|
|
|
40
40
|
super().__init__(img_folder, **kwargs)
|
|
41
41
|
|
|
42
42
|
# List images
|
|
43
|
-
self.data: list[tuple[
|
|
43
|
+
self.data: list[tuple[Path, dict[str, Any]]] = []
|
|
44
44
|
np_dtype = np.float32
|
|
45
45
|
with open(label_file, "rb") as f:
|
|
46
46
|
data = json.load(f)
|
|
@@ -23,7 +23,7 @@ class RecognitionDataset(AbstractDataset):
|
|
|
23
23
|
|
|
24
24
|
Args:
|
|
25
25
|
img_folder: path to the images folder
|
|
26
|
-
labels_path:
|
|
26
|
+
labels_path: path to the json file containing all labels (character sequences)
|
|
27
27
|
**kwargs: keyword arguments from `AbstractDataset`.
|
|
28
28
|
"""
|
|
29
29
|
|
|
@@ -129,7 +129,7 @@ class SVHN(VisionDataset):
|
|
|
129
129
|
if recognition_task:
|
|
130
130
|
crops = crop_bboxes_from_image(img_path=os.path.join(tmp_root, img_name), geoms=box_targets)
|
|
131
131
|
for crop, label in zip(crops, label_targets):
|
|
132
|
-
if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0:
|
|
132
|
+
if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0 and " " not in label:
|
|
133
133
|
self.data.append((crop, label))
|
|
134
134
|
elif detection_task:
|
|
135
135
|
self.data.append((img_name, box_targets))
|
|
@@ -35,7 +35,7 @@ class SVT(VisionDataset):
|
|
|
35
35
|
**kwargs: keyword arguments from `VisionDataset`.
|
|
36
36
|
"""
|
|
37
37
|
|
|
38
|
-
URL = "http://
|
|
38
|
+
URL = "http://www.iapr-tc11.org/dataset/SVT/svt.zip"
|
|
39
39
|
SHA256 = "63b3d55e6b6d1e036e2a844a20c034fe3af3c32e4d914d6e0c4a3cd43df3bebf"
|
|
40
40
|
|
|
41
41
|
def __init__(
|
|
@@ -113,7 +113,7 @@ class SVT(VisionDataset):
|
|
|
113
113
|
if recognition_task:
|
|
114
114
|
crops = crop_bboxes_from_image(img_path=os.path.join(tmp_root, name.text), geoms=boxes)
|
|
115
115
|
for crop, label in zip(crops, labels):
|
|
116
|
-
if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0:
|
|
116
|
+
if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0 and " " not in label:
|
|
117
117
|
self.data.append((crop, label))
|
|
118
118
|
elif detection_task:
|
|
119
119
|
self.data.append((name.text, boxes))
|
|
@@ -41,6 +41,12 @@ class SynthText(VisionDataset):
|
|
|
41
41
|
URL = "https://thor.robots.ox.ac.uk/~vgg/data/scenetext/SynthText.zip"
|
|
42
42
|
SHA256 = "28ab030485ec8df3ed612c568dd71fb2793b9afbfa3a9d9c6e792aef33265bf1"
|
|
43
43
|
|
|
44
|
+
# filter corrupted or missing images
|
|
45
|
+
BLACKLIST = (
|
|
46
|
+
"67/fruits_129_",
|
|
47
|
+
"194/window_19_",
|
|
48
|
+
)
|
|
49
|
+
|
|
44
50
|
def __init__(
|
|
45
51
|
self,
|
|
46
52
|
train: bool = True,
|
|
@@ -111,7 +117,13 @@ class SynthText(VisionDataset):
|
|
|
111
117
|
if recognition_task:
|
|
112
118
|
crops = crop_bboxes_from_image(img_path=os.path.join(tmp_root, img_path[0]), geoms=word_boxes)
|
|
113
119
|
for crop, label in zip(crops, labels):
|
|
114
|
-
if
|
|
120
|
+
if (
|
|
121
|
+
crop.shape[0] > 0
|
|
122
|
+
and crop.shape[1] > 0
|
|
123
|
+
and len(label) > 0
|
|
124
|
+
and len(label) < 30
|
|
125
|
+
and " " not in label
|
|
126
|
+
):
|
|
115
127
|
# write data to disk
|
|
116
128
|
with open(os.path.join(reco_folder_path, f"{reco_images_counter}.txt"), "w") as f:
|
|
117
129
|
f.write(label)
|
|
@@ -132,6 +144,7 @@ class SynthText(VisionDataset):
|
|
|
132
144
|
return f"train={self.train}"
|
|
133
145
|
|
|
134
146
|
def _read_from_folder(self, path: str) -> None:
|
|
135
|
-
|
|
147
|
+
img_paths = glob.glob(os.path.join(path, "*.png"))
|
|
148
|
+
for img_path in tqdm(iterable=img_paths, desc="Preparing and Loading SynthText", total=len(img_paths)):
|
|
136
149
|
with open(os.path.join(path, f"{os.path.basename(img_path)[:-4]}.txt"), "r") as f:
|
|
137
150
|
self.data.append((img_path, f.read()))
|
|
@@ -48,7 +48,7 @@ def translate(
|
|
|
48
48
|
A string translated in a given vocab
|
|
49
49
|
"""
|
|
50
50
|
if VOCABS.get(vocab_name) is None:
|
|
51
|
-
raise KeyError("output vocabulary must be in vocabs
|
|
51
|
+
raise KeyError("output vocabulary must be in vocabs dictionary")
|
|
52
52
|
|
|
53
53
|
translated = ""
|
|
54
54
|
for char in input_string:
|
|
@@ -81,11 +81,12 @@ def encode_string(
|
|
|
81
81
|
"""
|
|
82
82
|
try:
|
|
83
83
|
return list(map(vocab.index, input_string))
|
|
84
|
-
except ValueError:
|
|
84
|
+
except ValueError as e:
|
|
85
|
+
missing_chars = [char for char in input_string if char not in vocab]
|
|
85
86
|
raise ValueError(
|
|
86
|
-
f"
|
|
87
|
-
|
|
88
|
-
)
|
|
87
|
+
f"Some characters cannot be found in 'vocab': {set(missing_chars)}.\n"
|
|
88
|
+
f"Please check the input string `{input_string}` and the vocabulary `{vocab}`"
|
|
89
|
+
) from e
|
|
89
90
|
|
|
90
91
|
|
|
91
92
|
def decode_sequence(
|
|
@@ -199,7 +200,7 @@ def crop_bboxes_from_image(img_path: str | Path, geoms: np.ndarray) -> list[np.n
|
|
|
199
200
|
a list of cropped images
|
|
200
201
|
"""
|
|
201
202
|
with Image.open(img_path) as pil_img:
|
|
202
|
-
img: np.ndarray = np.
|
|
203
|
+
img: np.ndarray = np.asarray(pil_img.convert("RGB"))
|
|
203
204
|
# Polygon
|
|
204
205
|
if geoms.ndim == 3 and geoms.shape[1:] == (4, 2):
|
|
205
206
|
return extract_rcrops(img, geoms.astype(dtype=int))
|