python-doctr 0.11.0__tar.gz → 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_doctr-0.11.0/python_doctr.egg-info → python_doctr-1.0.0}/PKG-INFO +22 -63
- {python_doctr-0.11.0 → python_doctr-1.0.0}/README.md +14 -47
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/__init__.py +0 -1
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/datasets/__init__.py +1 -5
- python_doctr-1.0.0/doctr/datasets/coco_text.py +139 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/datasets/cord.py +2 -1
- python_doctr-1.0.0/doctr/datasets/datasets/__init__.py +1 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/datasets/datasets/pytorch.py +2 -2
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/datasets/funsd.py +2 -2
- python_doctr-1.0.0/doctr/datasets/generator/__init__.py +1 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/datasets/ic03.py +1 -1
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/datasets/ic13.py +2 -1
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/datasets/iiit5k.py +4 -1
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/datasets/imgur5k.py +9 -2
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/datasets/ocr.py +1 -1
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/datasets/recognition.py +1 -1
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/datasets/svhn.py +1 -1
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/datasets/svt.py +2 -2
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/datasets/synthtext.py +15 -2
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/datasets/utils.py +7 -6
- python_doctr-1.0.0/doctr/datasets/vocabs.py +1138 -0
- python_doctr-1.0.0/doctr/file_utils.py +30 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/io/elements.py +37 -3
- python_doctr-1.0.0/doctr/io/image/__init__.py +2 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/io/image/pytorch.py +1 -1
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/_utils.py +4 -4
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/classification/__init__.py +1 -0
- python_doctr-1.0.0/doctr/models/classification/magc_resnet/__init__.py +1 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/classification/magc_resnet/pytorch.py +3 -4
- python_doctr-1.0.0/doctr/models/classification/mobilenet/__init__.py +1 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/classification/mobilenet/pytorch.py +15 -1
- python_doctr-1.0.0/doctr/models/classification/predictor/__init__.py +1 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/classification/predictor/pytorch.py +2 -2
- python_doctr-1.0.0/doctr/models/classification/resnet/__init__.py +1 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/classification/resnet/pytorch.py +26 -3
- python_doctr-1.0.0/doctr/models/classification/textnet/__init__.py +1 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/classification/textnet/pytorch.py +11 -2
- python_doctr-1.0.0/doctr/models/classification/vgg/__init__.py +1 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/classification/vgg/pytorch.py +16 -1
- python_doctr-1.0.0/doctr/models/classification/vip/__init__.py +1 -0
- python_doctr-1.0.0/doctr/models/classification/vip/layers/__init__.py +1 -0
- python_doctr-1.0.0/doctr/models/classification/vip/layers/pytorch.py +615 -0
- python_doctr-1.0.0/doctr/models/classification/vip/pytorch.py +505 -0
- python_doctr-1.0.0/doctr/models/classification/vit/__init__.py +1 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/classification/vit/pytorch.py +12 -3
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/classification/zoo.py +7 -8
- python_doctr-1.0.0/doctr/models/detection/_utils/__init__.py +2 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/detection/core.py +1 -1
- python_doctr-1.0.0/doctr/models/detection/differentiable_binarization/__init__.py +1 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/detection/differentiable_binarization/base.py +7 -16
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/detection/differentiable_binarization/pytorch.py +13 -4
- python_doctr-1.0.0/doctr/models/detection/fast/__init__.py +1 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/detection/fast/base.py +6 -17
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/detection/fast/pytorch.py +17 -8
- python_doctr-1.0.0/doctr/models/detection/linknet/__init__.py +1 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/detection/linknet/base.py +5 -15
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/detection/linknet/pytorch.py +12 -3
- python_doctr-1.0.0/doctr/models/detection/predictor/__init__.py +1 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/detection/predictor/pytorch.py +1 -1
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/detection/zoo.py +15 -32
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/factory/hub.py +9 -22
- python_doctr-1.0.0/doctr/models/kie_predictor/__init__.py +1 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/kie_predictor/pytorch.py +3 -7
- python_doctr-1.0.0/doctr/models/modules/layers/__init__.py +1 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/modules/layers/pytorch.py +52 -4
- python_doctr-1.0.0/doctr/models/modules/transformer/__init__.py +1 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/modules/transformer/pytorch.py +2 -2
- python_doctr-1.0.0/doctr/models/modules/vision_transformer/__init__.py +1 -0
- python_doctr-1.0.0/doctr/models/predictor/__init__.py +1 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/predictor/base.py +3 -8
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/predictor/pytorch.py +3 -6
- python_doctr-1.0.0/doctr/models/preprocessor/__init__.py +1 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/preprocessor/pytorch.py +27 -32
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/recognition/__init__.py +1 -0
- python_doctr-1.0.0/doctr/models/recognition/crnn/__init__.py +1 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/recognition/crnn/pytorch.py +16 -7
- python_doctr-1.0.0/doctr/models/recognition/master/__init__.py +1 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/recognition/master/pytorch.py +15 -6
- python_doctr-1.0.0/doctr/models/recognition/parseq/__init__.py +1 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/recognition/parseq/pytorch.py +26 -8
- python_doctr-1.0.0/doctr/models/recognition/predictor/__init__.py +1 -0
- python_doctr-1.0.0/doctr/models/recognition/predictor/_utils.py +136 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/recognition/predictor/pytorch.py +4 -5
- python_doctr-1.0.0/doctr/models/recognition/sar/__init__.py +1 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/recognition/sar/pytorch.py +13 -4
- python_doctr-1.0.0/doctr/models/recognition/utils.py +93 -0
- python_doctr-1.0.0/doctr/models/recognition/viptr/__init__.py +1 -0
- python_doctr-1.0.0/doctr/models/recognition/viptr/pytorch.py +277 -0
- python_doctr-1.0.0/doctr/models/recognition/vitstr/__init__.py +1 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/recognition/vitstr/pytorch.py +13 -4
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/recognition/zoo.py +13 -8
- python_doctr-1.0.0/doctr/models/utils/__init__.py +1 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/utils/pytorch.py +29 -19
- python_doctr-1.0.0/doctr/transforms/functional/__init__.py +1 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/transforms/functional/pytorch.py +4 -4
- python_doctr-1.0.0/doctr/transforms/modules/__init__.py +2 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/transforms/modules/base.py +26 -92
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/transforms/modules/pytorch.py +28 -26
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/utils/data.py +1 -1
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/utils/geometry.py +7 -11
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/utils/visualization.py +1 -1
- python_doctr-1.0.0/doctr/version.py +1 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/pyproject.toml +6 -23
- {python_doctr-0.11.0 → python_doctr-1.0.0/python_doctr.egg-info}/PKG-INFO +22 -63
- {python_doctr-0.11.0 → python_doctr-1.0.0}/python_doctr.egg-info/SOURCES.txt +7 -31
- {python_doctr-0.11.0 → python_doctr-1.0.0}/python_doctr.egg-info/requires.txt +4 -23
- {python_doctr-0.11.0 → python_doctr-1.0.0}/setup.py +1 -1
- python_doctr-0.11.0/doctr/datasets/datasets/__init__.py +0 -6
- python_doctr-0.11.0/doctr/datasets/datasets/tensorflow.py +0 -59
- python_doctr-0.11.0/doctr/datasets/generator/__init__.py +0 -6
- python_doctr-0.11.0/doctr/datasets/generator/tensorflow.py +0 -58
- python_doctr-0.11.0/doctr/datasets/loader.py +0 -94
- python_doctr-0.11.0/doctr/datasets/vocabs.py +0 -92
- python_doctr-0.11.0/doctr/file_utils.py +0 -120
- python_doctr-0.11.0/doctr/io/image/__init__.py +0 -8
- python_doctr-0.11.0/doctr/io/image/tensorflow.py +0 -101
- python_doctr-0.11.0/doctr/models/classification/magc_resnet/__init__.py +0 -6
- python_doctr-0.11.0/doctr/models/classification/magc_resnet/tensorflow.py +0 -196
- python_doctr-0.11.0/doctr/models/classification/mobilenet/__init__.py +0 -6
- python_doctr-0.11.0/doctr/models/classification/mobilenet/tensorflow.py +0 -433
- python_doctr-0.11.0/doctr/models/classification/predictor/__init__.py +0 -6
- python_doctr-0.11.0/doctr/models/classification/predictor/tensorflow.py +0 -60
- python_doctr-0.11.0/doctr/models/classification/resnet/__init__.py +0 -6
- python_doctr-0.11.0/doctr/models/classification/resnet/tensorflow.py +0 -397
- python_doctr-0.11.0/doctr/models/classification/textnet/__init__.py +0 -6
- python_doctr-0.11.0/doctr/models/classification/textnet/tensorflow.py +0 -266
- python_doctr-0.11.0/doctr/models/classification/vgg/__init__.py +0 -6
- python_doctr-0.11.0/doctr/models/classification/vgg/tensorflow.py +0 -116
- python_doctr-0.11.0/doctr/models/classification/vit/__init__.py +0 -6
- python_doctr-0.11.0/doctr/models/classification/vit/tensorflow.py +0 -192
- python_doctr-0.11.0/doctr/models/detection/_utils/__init__.py +0 -7
- python_doctr-0.11.0/doctr/models/detection/_utils/tensorflow.py +0 -34
- python_doctr-0.11.0/doctr/models/detection/differentiable_binarization/__init__.py +0 -6
- python_doctr-0.11.0/doctr/models/detection/differentiable_binarization/tensorflow.py +0 -414
- python_doctr-0.11.0/doctr/models/detection/fast/__init__.py +0 -6
- python_doctr-0.11.0/doctr/models/detection/fast/tensorflow.py +0 -419
- python_doctr-0.11.0/doctr/models/detection/linknet/__init__.py +0 -6
- python_doctr-0.11.0/doctr/models/detection/linknet/tensorflow.py +0 -369
- python_doctr-0.11.0/doctr/models/detection/predictor/__init__.py +0 -6
- python_doctr-0.11.0/doctr/models/detection/predictor/tensorflow.py +0 -70
- python_doctr-0.11.0/doctr/models/kie_predictor/__init__.py +0 -6
- python_doctr-0.11.0/doctr/models/kie_predictor/tensorflow.py +0 -187
- python_doctr-0.11.0/doctr/models/modules/layers/__init__.py +0 -6
- python_doctr-0.11.0/doctr/models/modules/layers/tensorflow.py +0 -171
- python_doctr-0.11.0/doctr/models/modules/transformer/__init__.py +0 -6
- python_doctr-0.11.0/doctr/models/modules/transformer/tensorflow.py +0 -235
- python_doctr-0.11.0/doctr/models/modules/vision_transformer/__init__.py +0 -6
- python_doctr-0.11.0/doctr/models/modules/vision_transformer/tensorflow.py +0 -100
- python_doctr-0.11.0/doctr/models/predictor/__init__.py +0 -6
- python_doctr-0.11.0/doctr/models/predictor/tensorflow.py +0 -155
- python_doctr-0.11.0/doctr/models/preprocessor/__init__.py +0 -6
- python_doctr-0.11.0/doctr/models/preprocessor/tensorflow.py +0 -122
- python_doctr-0.11.0/doctr/models/recognition/crnn/__init__.py +0 -6
- python_doctr-0.11.0/doctr/models/recognition/crnn/tensorflow.py +0 -308
- python_doctr-0.11.0/doctr/models/recognition/master/__init__.py +0 -6
- python_doctr-0.11.0/doctr/models/recognition/master/tensorflow.py +0 -313
- python_doctr-0.11.0/doctr/models/recognition/parseq/__init__.py +0 -6
- python_doctr-0.11.0/doctr/models/recognition/parseq/tensorflow.py +0 -508
- python_doctr-0.11.0/doctr/models/recognition/predictor/__init__.py +0 -6
- python_doctr-0.11.0/doctr/models/recognition/predictor/_utils.py +0 -83
- python_doctr-0.11.0/doctr/models/recognition/predictor/tensorflow.py +0 -79
- python_doctr-0.11.0/doctr/models/recognition/sar/__init__.py +0 -6
- python_doctr-0.11.0/doctr/models/recognition/sar/tensorflow.py +0 -416
- python_doctr-0.11.0/doctr/models/recognition/utils.py +0 -84
- python_doctr-0.11.0/doctr/models/recognition/vitstr/__init__.py +0 -6
- python_doctr-0.11.0/doctr/models/recognition/vitstr/tensorflow.py +0 -278
- python_doctr-0.11.0/doctr/models/utils/__init__.py +0 -6
- python_doctr-0.11.0/doctr/models/utils/tensorflow.py +0 -182
- python_doctr-0.11.0/doctr/transforms/functional/__init__.py +0 -6
- python_doctr-0.11.0/doctr/transforms/functional/tensorflow.py +0 -254
- python_doctr-0.11.0/doctr/transforms/modules/__init__.py +0 -8
- python_doctr-0.11.0/doctr/transforms/modules/tensorflow.py +0 -562
- python_doctr-0.11.0/doctr/version.py +0 -1
- {python_doctr-0.11.0 → python_doctr-1.0.0}/LICENSE +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/contrib/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/contrib/artefacts.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/contrib/base.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/datasets/datasets/base.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/datasets/detection.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/datasets/doc_artefacts.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/datasets/generator/base.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/datasets/generator/pytorch.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/datasets/iiithws.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/datasets/mjsynth.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/datasets/orientation.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/datasets/sroie.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/datasets/wildreceipt.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/io/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/io/html.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/io/image/base.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/io/pdf.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/io/reader.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/builder.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/core.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/detection/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/detection/_utils/base.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/detection/_utils/pytorch.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/factory/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/kie_predictor/base.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/modules/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/modules/vision_transformer/pytorch.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/recognition/core.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/recognition/master/base.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/recognition/parseq/base.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/recognition/vitstr/base.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/models/zoo.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/py.typed +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/transforms/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/transforms/functional/base.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/utils/__init__.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/utils/common_types.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/utils/fonts.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/utils/metrics.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/utils/multithreading.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/utils/reconstitution.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/doctr/utils/repr.py +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/python_doctr.egg-info/dependency_links.txt +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/python_doctr.egg-info/top_level.txt +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/python_doctr.egg-info/zip-safe +0 -0
- {python_doctr-0.11.0 → python_doctr-1.0.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: python-doctr
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 1.0.0
|
|
4
4
|
Summary: Document Text Recognition (docTR): deep Learning for high-performance OCR on documents.
|
|
5
5
|
Author-email: Mindee <contact@mindee.com>
|
|
6
6
|
Maintainer: François-Guillaume Fernandez, Charles Gaillard, Olivier Dulcy, Felix Dittrich
|
|
@@ -210,7 +210,7 @@ Project-URL: documentation, https://mindee.github.io/doctr
|
|
|
210
210
|
Project-URL: repository, https://github.com/mindee/doctr
|
|
211
211
|
Project-URL: tracker, https://github.com/mindee/doctr/issues
|
|
212
212
|
Project-URL: changelog, https://mindee.github.io/doctr/changelog.html
|
|
213
|
-
Keywords: OCR,deep learning,computer vision,
|
|
213
|
+
Keywords: OCR,deep learning,computer vision,pytorch,text detection,text recognition
|
|
214
214
|
Classifier: Development Status :: 4 - Beta
|
|
215
215
|
Classifier: Intended Audience :: Developers
|
|
216
216
|
Classifier: Intended Audience :: Education
|
|
@@ -226,6 +226,9 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
226
226
|
Requires-Python: <4,>=3.10.0
|
|
227
227
|
Description-Content-Type: text/markdown
|
|
228
228
|
License-File: LICENSE
|
|
229
|
+
Requires-Dist: torch<3.0.0,>=2.0.0
|
|
230
|
+
Requires-Dist: torchvision>=0.15.0
|
|
231
|
+
Requires-Dist: onnx<3.0.0,>=1.12.0
|
|
229
232
|
Requires-Dist: numpy<3.0.0,>=1.16.0
|
|
230
233
|
Requires-Dist: scipy<2.0.0,>=1.4.0
|
|
231
234
|
Requires-Dist: h5py<4.0.0,>=3.1.0
|
|
@@ -239,16 +242,8 @@ Requires-Dist: huggingface-hub<1.0.0,>=0.20.0
|
|
|
239
242
|
Requires-Dist: Pillow>=9.2.0
|
|
240
243
|
Requires-Dist: defusedxml>=0.7.0
|
|
241
244
|
Requires-Dist: anyascii>=0.3.2
|
|
245
|
+
Requires-Dist: validators>=0.18.0
|
|
242
246
|
Requires-Dist: tqdm>=4.30.0
|
|
243
|
-
Provides-Extra: tf
|
|
244
|
-
Requires-Dist: tensorflow[and-cuda]<3.0.0,>=2.15.0; sys_platform == "linux" and extra == "tf"
|
|
245
|
-
Requires-Dist: tensorflow<3.0.0,>=2.15.0; sys_platform != "linux" and extra == "tf"
|
|
246
|
-
Requires-Dist: tf-keras<3.0.0,>=2.15.0; extra == "tf"
|
|
247
|
-
Requires-Dist: tf2onnx<2.0.0,>=1.16.0; extra == "tf"
|
|
248
|
-
Provides-Extra: torch
|
|
249
|
-
Requires-Dist: torch<3.0.0,>=2.0.0; extra == "torch"
|
|
250
|
-
Requires-Dist: torchvision>=0.15.0; extra == "torch"
|
|
251
|
-
Requires-Dist: onnx<3.0.0,>=1.12.0; extra == "torch"
|
|
252
247
|
Provides-Extra: html
|
|
253
248
|
Requires-Dist: weasyprint>=55.0; extra == "html"
|
|
254
249
|
Provides-Extra: viz
|
|
@@ -276,10 +271,6 @@ Requires-Dist: sphinx-markdown-tables>=0.0.15; extra == "docs"
|
|
|
276
271
|
Requires-Dist: sphinx-tabs>=3.3.0; extra == "docs"
|
|
277
272
|
Requires-Dist: furo>=2022.3.4; extra == "docs"
|
|
278
273
|
Provides-Extra: dev
|
|
279
|
-
Requires-Dist: tensorflow[and-cuda]<3.0.0,>=2.15.0; sys_platform == "linux" and extra == "dev"
|
|
280
|
-
Requires-Dist: tensorflow<3.0.0,>=2.15.0; sys_platform != "linux" and extra == "dev"
|
|
281
|
-
Requires-Dist: tf-keras<3.0.0,>=2.15.0; extra == "dev"
|
|
282
|
-
Requires-Dist: tf2onnx<2.0.0,>=1.16.0; extra == "dev"
|
|
283
274
|
Requires-Dist: torch<3.0.0,>=2.0.0; extra == "dev"
|
|
284
275
|
Requires-Dist: torchvision>=0.15.0; extra == "dev"
|
|
285
276
|
Requires-Dist: onnx<3.0.0,>=1.12.0; extra == "dev"
|
|
@@ -302,15 +293,16 @@ Requires-Dist: recommonmark>=0.7.1; extra == "dev"
|
|
|
302
293
|
Requires-Dist: sphinx-markdown-tables>=0.0.15; extra == "dev"
|
|
303
294
|
Requires-Dist: sphinx-tabs>=3.3.0; extra == "dev"
|
|
304
295
|
Requires-Dist: furo>=2022.3.4; extra == "dev"
|
|
296
|
+
Dynamic: license-file
|
|
305
297
|
|
|
306
298
|
<p align="center">
|
|
307
299
|
<img src="https://github.com/mindee/doctr/raw/main/docs/images/Logo_doctr.gif" width="40%">
|
|
308
300
|
</p>
|
|
309
301
|
|
|
310
|
-
[](https://slack.mindee.com) [](LICENSE)  [](https://github.com/mindee/doctr/pkgs/container/doctr) [](https://codecov.io/gh/mindee/doctr) [](https://www.codefactor.io/repository/github/mindee/doctr) [](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [](https://mindee.github.io/doctr) [](https://slack.mindee.com) [](LICENSE)  [](https://github.com/mindee/doctr/pkgs/container/doctr) [](https://codecov.io/gh/mindee/doctr) [](https://www.codefactor.io/repository/github/mindee/doctr) [](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [](https://mindee.github.io/doctr) [](https://pypi.org/project/python-doctr/) [](https://huggingface.co/spaces/mindee/doctr) [](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb) [](https://gurubase.io/g/doctr)
|
|
311
303
|
|
|
312
304
|
|
|
313
|
-
**Optical Character Recognition made seamless & accessible to anyone, powered by
|
|
305
|
+
**Optical Character Recognition made seamless & accessible to anyone, powered by PyTorch**
|
|
314
306
|
|
|
315
307
|
What you can expect from this repository:
|
|
316
308
|
|
|
@@ -450,24 +442,15 @@ You can then install the latest release of the package using [pypi](https://pypi
|
|
|
450
442
|
pip install python-doctr
|
|
451
443
|
```
|
|
452
444
|
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
We try to keep framework-specific dependencies to a minimum. You can install framework-specific builds as follows:
|
|
445
|
+
We try to keep extra dependencies to a minimum. You can install specific builds as follows:
|
|
456
446
|
|
|
457
447
|
```shell
|
|
458
|
-
#
|
|
459
|
-
pip install
|
|
460
|
-
# for PyTorch
|
|
461
|
-
pip install "python-doctr[torch]"
|
|
448
|
+
# standard build
|
|
449
|
+
pip install python-doctr
|
|
462
450
|
# optional dependencies for visualization, html, and contrib modules can be installed as follows:
|
|
463
|
-
pip install "python-doctr[
|
|
451
|
+
pip install "python-doctr[viz,html,contrib]"
|
|
464
452
|
```
|
|
465
453
|
|
|
466
|
-
For MacBooks with M1 chip, you will need some additional packages or specific versions:
|
|
467
|
-
|
|
468
|
-
- TensorFlow 2: [metal plugin](https://developer.apple.com/metal/tensorflow-plugin/)
|
|
469
|
-
- PyTorch: [version >= 2.0.0](https://pytorch.org/get-started/locally/#start-locally)
|
|
470
|
-
|
|
471
454
|
### Developer mode
|
|
472
455
|
|
|
473
456
|
Alternatively, you can install it from source, which will require you to install [Git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git).
|
|
@@ -478,13 +461,10 @@ git clone https://github.com/mindee/doctr.git
|
|
|
478
461
|
pip install -e doctr/.
|
|
479
462
|
```
|
|
480
463
|
|
|
481
|
-
Again, if you prefer to avoid the risk of missing dependencies, you can install the
|
|
464
|
+
Again, if you prefer to avoid the risk of missing dependencies, you can install the build:
|
|
482
465
|
|
|
483
466
|
```shell
|
|
484
|
-
|
|
485
|
-
pip install -e doctr/.[tf]
|
|
486
|
-
# for PyTorch
|
|
487
|
-
pip install -e doctr/.[torch]
|
|
467
|
+
pip install -e doctr/.
|
|
488
468
|
```
|
|
489
469
|
|
|
490
470
|
## Models architectures
|
|
@@ -504,6 +484,7 @@ Credits where it's due: this repository is implementing, among others, architect
|
|
|
504
484
|
- MASTER: [MASTER: Multi-Aspect Non-local Network for Scene Text Recognition](https://arxiv.org/pdf/1910.02562.pdf).
|
|
505
485
|
- ViTSTR: [Vision Transformer for Fast and Efficient Scene Text Recognition](https://arxiv.org/pdf/2105.08582.pdf).
|
|
506
486
|
- PARSeq: [Scene Text Recognition with Permuted Autoregressive Sequence Models](https://arxiv.org/pdf/2207.06966).
|
|
487
|
+
- VIPTR: [A Vision Permutable Extractor for Fast and Efficient Scene Text Recognition](https://arxiv.org/abs/2401.10110).
|
|
507
488
|
|
|
508
489
|
## More goodies
|
|
509
490
|
|
|
@@ -526,20 +507,6 @@ Check it out [) that is required.
|
|
528
509
|
|
|
529
|
-
##### Tensorflow version
|
|
530
|
-
|
|
531
|
-
```shell
|
|
532
|
-
pip install -r demo/tf-requirements.txt
|
|
533
|
-
```
|
|
534
|
-
|
|
535
|
-
Then run your app in your default browser with:
|
|
536
|
-
|
|
537
|
-
```shell
|
|
538
|
-
USE_TF=1 streamlit run demo/app.py
|
|
539
|
-
```
|
|
540
|
-
|
|
541
|
-
##### PyTorch version
|
|
542
|
-
|
|
543
510
|
```shell
|
|
544
511
|
pip install -r demo/pt-requirements.txt
|
|
545
512
|
```
|
|
@@ -547,23 +514,16 @@ pip install -r demo/pt-requirements.txt
|
|
|
547
514
|
Then run your app in your default browser with:
|
|
548
515
|
|
|
549
516
|
```shell
|
|
550
|
-
|
|
517
|
+
streamlit run demo/app.py
|
|
551
518
|
```
|
|
552
519
|
|
|
553
|
-
#### TensorFlow.js
|
|
554
|
-
|
|
555
|
-
Instead of having your demo actually running Python, you would prefer to run everything in your web browser?
|
|
556
|
-
Check out our [TensorFlow.js demo](https://github.com/mindee/doctr-tfjs-demo) to get started!
|
|
557
|
-
|
|
558
|
-

|
|
559
|
-
|
|
560
520
|
### Docker container
|
|
561
521
|
|
|
562
522
|
We offer Docker container support for easy testing and deployment. [Here are the available docker tags.](https://github.com/mindee/doctr/pkgs/container/doctr).
|
|
563
523
|
|
|
564
524
|
#### Using GPU with docTR Docker Images
|
|
565
525
|
|
|
566
|
-
The docTR Docker images are GPU-ready and based on CUDA `12.2`. Make sure your host is **at least `12.2`**, otherwise Torch
|
|
526
|
+
The docTR Docker images are GPU-ready and based on CUDA `12.2`. Make sure your host is **at least `12.2`**, otherwise Torch won't be able to initialize the GPU.
|
|
567
527
|
Please ensure that Docker is configured to use your GPU.
|
|
568
528
|
|
|
569
529
|
To verify and configure GPU support for Docker, please follow the instructions provided in the [NVIDIA Container Toolkit Installation Guide](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html).
|
|
@@ -578,7 +538,7 @@ docker run -it --gpus all ghcr.io/mindee/doctr:torch-py3.9.18-2024-10 bash
|
|
|
578
538
|
|
|
579
539
|
The Docker images for docTR follow a specific tag nomenclature: `<deps>-py<python_version>-<doctr_version|YYYY-MM>`. Here's a breakdown of the tag structure:
|
|
580
540
|
|
|
581
|
-
- `<deps>`: `
|
|
541
|
+
- `<deps>`: `torch`, `torch-viz-html-contrib`.
|
|
582
542
|
- `<python_version>`: `3.9.18`, `3.10.13` or `3.11.8`.
|
|
583
543
|
- `<doctr_version>`: a tag >= `v0.11.0`
|
|
584
544
|
- `<YYYY-MM>`: e.g. `2014-10`
|
|
@@ -587,7 +547,6 @@ Here are examples of different image tags:
|
|
|
587
547
|
|
|
588
548
|
| Tag | Description |
|
|
589
549
|
|----------------------------|---------------------------------------------------|
|
|
590
|
-
| `tf-py3.10.13-v0.11.0` | TensorFlow version `3.10.13` with docTR `v0.11.0`. |
|
|
591
550
|
| `torch-viz-html-contrib-py3.11.8-2024-10` | Torch with extra dependencies version `3.11.8` from latest commit on `main` in `2024-10`. |
|
|
592
551
|
| `torch-py3.11.8-2024-10`| PyTorch version `3.11.8` from latest commit on `main` in `2024-10`. |
|
|
593
552
|
|
|
@@ -599,10 +558,10 @@ You can also build docTR Docker images locally on your computer.
|
|
|
599
558
|
docker build -t doctr .
|
|
600
559
|
```
|
|
601
560
|
|
|
602
|
-
You can specify custom Python versions and docTR versions using build arguments. For example, to build a docTR image with
|
|
561
|
+
You can specify custom Python versions and docTR versions using build arguments. For example, to build a docTR image with PyTorch, Python version `3.9.10`, and docTR version `v0.7.0`, run the following command:
|
|
603
562
|
|
|
604
563
|
```shell
|
|
605
|
-
docker build -t doctr --build-arg FRAMEWORK=
|
|
564
|
+
docker build -t doctr --build-arg FRAMEWORK=torch --build-arg PYTHON_VERSION=3.9.10 --build-arg DOCTR_VERSION=v0.7.0 .
|
|
606
565
|
```
|
|
607
566
|
|
|
608
567
|
### Example script
|
|
@@ -2,10 +2,10 @@
|
|
|
2
2
|
<img src="https://github.com/mindee/doctr/raw/main/docs/images/Logo_doctr.gif" width="40%">
|
|
3
3
|
</p>
|
|
4
4
|
|
|
5
|
-
[](https://slack.mindee.com) [](LICENSE)  [](https://github.com/mindee/doctr/pkgs/container/doctr) [](https://codecov.io/gh/mindee/doctr) [](https://www.codefactor.io/repository/github/mindee/doctr) [](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [](https://mindee.github.io/doctr) [](https://slack.mindee.com) [](LICENSE)  [](https://github.com/mindee/doctr/pkgs/container/doctr) [](https://codecov.io/gh/mindee/doctr) [](https://www.codefactor.io/repository/github/mindee/doctr) [](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [](https://mindee.github.io/doctr) [](https://pypi.org/project/python-doctr/) [](https://huggingface.co/spaces/mindee/doctr) [](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb) [](https://gurubase.io/g/doctr)
|
|
6
6
|
|
|
7
7
|
|
|
8
|
-
**Optical Character Recognition made seamless & accessible to anyone, powered by
|
|
8
|
+
**Optical Character Recognition made seamless & accessible to anyone, powered by PyTorch**
|
|
9
9
|
|
|
10
10
|
What you can expect from this repository:
|
|
11
11
|
|
|
@@ -145,24 +145,15 @@ You can then install the latest release of the package using [pypi](https://pypi
|
|
|
145
145
|
pip install python-doctr
|
|
146
146
|
```
|
|
147
147
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
We try to keep framework-specific dependencies to a minimum. You can install framework-specific builds as follows:
|
|
148
|
+
We try to keep extra dependencies to a minimum. You can install specific builds as follows:
|
|
151
149
|
|
|
152
150
|
```shell
|
|
153
|
-
#
|
|
154
|
-
pip install
|
|
155
|
-
# for PyTorch
|
|
156
|
-
pip install "python-doctr[torch]"
|
|
151
|
+
# standard build
|
|
152
|
+
pip install python-doctr
|
|
157
153
|
# optional dependencies for visualization, html, and contrib modules can be installed as follows:
|
|
158
|
-
pip install "python-doctr[
|
|
154
|
+
pip install "python-doctr[viz,html,contrib]"
|
|
159
155
|
```
|
|
160
156
|
|
|
161
|
-
For MacBooks with M1 chip, you will need some additional packages or specific versions:
|
|
162
|
-
|
|
163
|
-
- TensorFlow 2: [metal plugin](https://developer.apple.com/metal/tensorflow-plugin/)
|
|
164
|
-
- PyTorch: [version >= 2.0.0](https://pytorch.org/get-started/locally/#start-locally)
|
|
165
|
-
|
|
166
157
|
### Developer mode
|
|
167
158
|
|
|
168
159
|
Alternatively, you can install it from source, which will require you to install [Git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git).
|
|
@@ -173,13 +164,10 @@ git clone https://github.com/mindee/doctr.git
|
|
|
173
164
|
pip install -e doctr/.
|
|
174
165
|
```
|
|
175
166
|
|
|
176
|
-
Again, if you prefer to avoid the risk of missing dependencies, you can install the
|
|
167
|
+
Again, if you prefer to avoid the risk of missing dependencies, you can install the build:
|
|
177
168
|
|
|
178
169
|
```shell
|
|
179
|
-
|
|
180
|
-
pip install -e doctr/.[tf]
|
|
181
|
-
# for PyTorch
|
|
182
|
-
pip install -e doctr/.[torch]
|
|
170
|
+
pip install -e doctr/.
|
|
183
171
|
```
|
|
184
172
|
|
|
185
173
|
## Models architectures
|
|
@@ -199,6 +187,7 @@ Credits where it's due: this repository is implementing, among others, architect
|
|
|
199
187
|
- MASTER: [MASTER: Multi-Aspect Non-local Network for Scene Text Recognition](https://arxiv.org/pdf/1910.02562.pdf).
|
|
200
188
|
- ViTSTR: [Vision Transformer for Fast and Efficient Scene Text Recognition](https://arxiv.org/pdf/2105.08582.pdf).
|
|
201
189
|
- PARSeq: [Scene Text Recognition with Permuted Autoregressive Sequence Models](https://arxiv.org/pdf/2207.06966).
|
|
190
|
+
- VIPTR: [A Vision Permutable Extractor for Fast and Efficient Scene Text Recognition](https://arxiv.org/abs/2401.10110).
|
|
202
191
|
|
|
203
192
|
## More goodies
|
|
204
193
|
|
|
@@ -221,20 +210,6 @@ Check it out [) that is required.
|
|
223
212
|
|
|
224
|
-
##### Tensorflow version
|
|
225
|
-
|
|
226
|
-
```shell
|
|
227
|
-
pip install -r demo/tf-requirements.txt
|
|
228
|
-
```
|
|
229
|
-
|
|
230
|
-
Then run your app in your default browser with:
|
|
231
|
-
|
|
232
|
-
```shell
|
|
233
|
-
USE_TF=1 streamlit run demo/app.py
|
|
234
|
-
```
|
|
235
|
-
|
|
236
|
-
##### PyTorch version
|
|
237
|
-
|
|
238
213
|
```shell
|
|
239
214
|
pip install -r demo/pt-requirements.txt
|
|
240
215
|
```
|
|
@@ -242,23 +217,16 @@ pip install -r demo/pt-requirements.txt
|
|
|
242
217
|
Then run your app in your default browser with:
|
|
243
218
|
|
|
244
219
|
```shell
|
|
245
|
-
|
|
220
|
+
streamlit run demo/app.py
|
|
246
221
|
```
|
|
247
222
|
|
|
248
|
-
#### TensorFlow.js
|
|
249
|
-
|
|
250
|
-
Instead of having your demo actually running Python, you would prefer to run everything in your web browser?
|
|
251
|
-
Check out our [TensorFlow.js demo](https://github.com/mindee/doctr-tfjs-demo) to get started!
|
|
252
|
-
|
|
253
|
-

|
|
254
|
-
|
|
255
223
|
### Docker container
|
|
256
224
|
|
|
257
225
|
We offer Docker container support for easy testing and deployment. [Here are the available docker tags.](https://github.com/mindee/doctr/pkgs/container/doctr).
|
|
258
226
|
|
|
259
227
|
#### Using GPU with docTR Docker Images
|
|
260
228
|
|
|
261
|
-
The docTR Docker images are GPU-ready and based on CUDA `12.2`. Make sure your host is **at least `12.2`**, otherwise Torch
|
|
229
|
+
The docTR Docker images are GPU-ready and based on CUDA `12.2`. Make sure your host is **at least `12.2`**, otherwise Torch won't be able to initialize the GPU.
|
|
262
230
|
Please ensure that Docker is configured to use your GPU.
|
|
263
231
|
|
|
264
232
|
To verify and configure GPU support for Docker, please follow the instructions provided in the [NVIDIA Container Toolkit Installation Guide](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html).
|
|
@@ -273,7 +241,7 @@ docker run -it --gpus all ghcr.io/mindee/doctr:torch-py3.9.18-2024-10 bash
|
|
|
273
241
|
|
|
274
242
|
The Docker images for docTR follow a specific tag nomenclature: `<deps>-py<python_version>-<doctr_version|YYYY-MM>`. Here's a breakdown of the tag structure:
|
|
275
243
|
|
|
276
|
-
- `<deps>`: `
|
|
244
|
+
- `<deps>`: `torch`, `torch-viz-html-contrib`.
|
|
277
245
|
- `<python_version>`: `3.9.18`, `3.10.13` or `3.11.8`.
|
|
278
246
|
- `<doctr_version>`: a tag >= `v0.11.0`
|
|
279
247
|
- `<YYYY-MM>`: e.g. `2014-10`
|
|
@@ -282,7 +250,6 @@ Here are examples of different image tags:
|
|
|
282
250
|
|
|
283
251
|
| Tag | Description |
|
|
284
252
|
|----------------------------|---------------------------------------------------|
|
|
285
|
-
| `tf-py3.10.13-v0.11.0` | TensorFlow version `3.10.13` with docTR `v0.11.0`. |
|
|
286
253
|
| `torch-viz-html-contrib-py3.11.8-2024-10` | Torch with extra dependencies version `3.11.8` from latest commit on `main` in `2024-10`. |
|
|
287
254
|
| `torch-py3.11.8-2024-10`| PyTorch version `3.11.8` from latest commit on `main` in `2024-10`. |
|
|
288
255
|
|
|
@@ -294,10 +261,10 @@ You can also build docTR Docker images locally on your computer.
|
|
|
294
261
|
docker build -t doctr .
|
|
295
262
|
```
|
|
296
263
|
|
|
297
|
-
You can specify custom Python versions and docTR versions using build arguments. For example, to build a docTR image with
|
|
264
|
+
You can specify custom Python versions and docTR versions using build arguments. For example, to build a docTR image with PyTorch, Python version `3.9.10`, and docTR version `v0.7.0`, run the following command:
|
|
298
265
|
|
|
299
266
|
```shell
|
|
300
|
-
docker build -t doctr --build-arg FRAMEWORK=
|
|
267
|
+
docker build -t doctr --build-arg FRAMEWORK=torch --build-arg PYTHON_VERSION=3.9.10 --build-arg DOCTR_VERSION=v0.7.0 .
|
|
301
268
|
```
|
|
302
269
|
|
|
303
270
|
### Example script
|
|
@@ -1,6 +1,5 @@
|
|
|
1
|
-
from doctr.file_utils import is_tf_available
|
|
2
|
-
|
|
3
1
|
from .generator import *
|
|
2
|
+
from .coco_text import *
|
|
4
3
|
from .cord import *
|
|
5
4
|
from .detection import *
|
|
6
5
|
from .doc_artefacts import *
|
|
@@ -21,6 +20,3 @@ from .synthtext import *
|
|
|
21
20
|
from .utils import *
|
|
22
21
|
from .vocabs import *
|
|
23
22
|
from .wildreceipt import *
|
|
24
|
-
|
|
25
|
-
if is_tf_available():
|
|
26
|
-
from .loader import *
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
|
+
|
|
3
|
+
# This program is licensed under the Apache License 2.0.
|
|
4
|
+
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
from tqdm import tqdm
|
|
13
|
+
|
|
14
|
+
from .datasets import AbstractDataset
|
|
15
|
+
from .utils import convert_target_to_relative, crop_bboxes_from_image
|
|
16
|
+
|
|
17
|
+
__all__ = ["COCOTEXT"]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class COCOTEXT(AbstractDataset):
|
|
21
|
+
"""
|
|
22
|
+
COCO-Text dataset from `"COCO-Text: Dataset and Benchmark for Text Detection and Recognition in Natural Images"
|
|
23
|
+
<https://arxiv.org/pdf/1601.07140v2>`_ |
|
|
24
|
+
`"homepage" <https://bgshih.github.io/cocotext/>`_.
|
|
25
|
+
|
|
26
|
+
>>> # NOTE: You need to download the dataset first.
|
|
27
|
+
>>> from doctr.datasets import COCOTEXT
|
|
28
|
+
>>> train_set = COCOTEXT(train=True, img_folder="/path/to/coco_text/train2014/",
|
|
29
|
+
>>> label_path="/path/to/coco_text/cocotext.v2.json")
|
|
30
|
+
>>> img, target = train_set[0]
|
|
31
|
+
>>> test_set = COCOTEXT(train=False, img_folder="/path/to/coco_text/train2014/",
|
|
32
|
+
>>> label_path = "/path/to/coco_text/cocotext.v2.json")
|
|
33
|
+
>>> img, target = test_set[0]
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
img_folder: folder with all the images of the dataset
|
|
37
|
+
label_path: path to the annotations file of the dataset
|
|
38
|
+
train: whether the subset should be the training one
|
|
39
|
+
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
40
|
+
recognition_task: whether the dataset should be used for recognition task
|
|
41
|
+
detection_task: whether the dataset should be used for detection task
|
|
42
|
+
**kwargs: keyword arguments from `AbstractDataset`.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __init__(
|
|
46
|
+
self,
|
|
47
|
+
img_folder: str,
|
|
48
|
+
label_path: str,
|
|
49
|
+
train: bool = True,
|
|
50
|
+
use_polygons: bool = False,
|
|
51
|
+
recognition_task: bool = False,
|
|
52
|
+
detection_task: bool = False,
|
|
53
|
+
**kwargs: Any,
|
|
54
|
+
) -> None:
|
|
55
|
+
super().__init__(
|
|
56
|
+
img_folder, pre_transforms=convert_target_to_relative if not recognition_task else None, **kwargs
|
|
57
|
+
)
|
|
58
|
+
# Task check
|
|
59
|
+
if recognition_task and detection_task:
|
|
60
|
+
raise ValueError(
|
|
61
|
+
" 'recognition' and 'detection task' cannot be set to True simultaneously. "
|
|
62
|
+
+ " To get the whole dataset with boxes and labels leave both parameters to False "
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
# File existence check
|
|
66
|
+
if not os.path.exists(label_path) or not os.path.exists(img_folder):
|
|
67
|
+
raise FileNotFoundError(f"unable to find {label_path if not os.path.exists(label_path) else img_folder}")
|
|
68
|
+
|
|
69
|
+
tmp_root = img_folder
|
|
70
|
+
self.train = train
|
|
71
|
+
np_dtype = np.float32
|
|
72
|
+
self.data: list[tuple[str | Path | np.ndarray, str | dict[str, Any] | np.ndarray]] = []
|
|
73
|
+
|
|
74
|
+
with open(label_path, "r") as file:
|
|
75
|
+
data = json.load(file)
|
|
76
|
+
|
|
77
|
+
# Filter images based on the set
|
|
78
|
+
img_items = [img for img in data["imgs"].items() if (img[1]["set"] == "train") == train]
|
|
79
|
+
box: list[float] | np.ndarray
|
|
80
|
+
|
|
81
|
+
for img_id, img_info in tqdm(img_items, desc="Preparing and Loading COCOTEXT", total=len(img_items)):
|
|
82
|
+
img_path = os.path.join(img_folder, img_info["file_name"])
|
|
83
|
+
|
|
84
|
+
# File existence check
|
|
85
|
+
if not os.path.exists(img_path): # pragma: no cover
|
|
86
|
+
raise FileNotFoundError(f"Unable to locate {img_path}")
|
|
87
|
+
|
|
88
|
+
# Get annotations for the current image (only legible text)
|
|
89
|
+
annotations = [
|
|
90
|
+
ann
|
|
91
|
+
for ann in data["anns"].values()
|
|
92
|
+
if ann["image_id"] == int(img_id) and ann["legibility"] == "legible"
|
|
93
|
+
]
|
|
94
|
+
|
|
95
|
+
# Some images have no annotations with readable text
|
|
96
|
+
if not annotations: # pragma: no cover
|
|
97
|
+
continue
|
|
98
|
+
|
|
99
|
+
_targets = []
|
|
100
|
+
|
|
101
|
+
for annotation in annotations:
|
|
102
|
+
x, y, w, h = annotation["bbox"]
|
|
103
|
+
if use_polygons:
|
|
104
|
+
# (x, y) coordinates of top left, top right, bottom right, bottom left corners
|
|
105
|
+
box = np.array(
|
|
106
|
+
[
|
|
107
|
+
[x, y],
|
|
108
|
+
[x + w, y],
|
|
109
|
+
[x + w, y + h],
|
|
110
|
+
[x, y + h],
|
|
111
|
+
],
|
|
112
|
+
dtype=np_dtype,
|
|
113
|
+
)
|
|
114
|
+
else:
|
|
115
|
+
# (xmin, ymin, xmax, ymax) coordinates
|
|
116
|
+
box = [x, y, x + w, y + h]
|
|
117
|
+
_targets.append((annotation["utf8_string"], box))
|
|
118
|
+
text_targets, box_targets = zip(*_targets)
|
|
119
|
+
|
|
120
|
+
if recognition_task:
|
|
121
|
+
crops = crop_bboxes_from_image(
|
|
122
|
+
img_path=os.path.join(tmp_root, img_path), geoms=np.asarray(box_targets, dtype=int).clip(min=0)
|
|
123
|
+
)
|
|
124
|
+
for crop, label in zip(crops, list(text_targets)):
|
|
125
|
+
if label and " " not in label:
|
|
126
|
+
self.data.append((crop, label))
|
|
127
|
+
|
|
128
|
+
elif detection_task:
|
|
129
|
+
self.data.append((img_path, np.asarray(box_targets, dtype=int).clip(min=0)))
|
|
130
|
+
else:
|
|
131
|
+
self.data.append((
|
|
132
|
+
img_path,
|
|
133
|
+
dict(boxes=np.asarray(box_targets, dtype=int).clip(min=0), labels=list(text_targets)),
|
|
134
|
+
))
|
|
135
|
+
|
|
136
|
+
self.root = tmp_root
|
|
137
|
+
|
|
138
|
+
def extra_repr(self) -> str:
|
|
139
|
+
return f"train={self.train}"
|
|
@@ -116,7 +116,8 @@ class CORD(VisionDataset):
|
|
|
116
116
|
img_path=os.path.join(tmp_root, img_path), geoms=np.asarray(box_targets, dtype=int).clip(min=0)
|
|
117
117
|
)
|
|
118
118
|
for crop, label in zip(crops, list(text_targets)):
|
|
119
|
-
|
|
119
|
+
if " " not in label:
|
|
120
|
+
self.data.append((crop, label))
|
|
120
121
|
elif detection_task:
|
|
121
122
|
self.data.append((img_path, np.asarray(box_targets, dtype=int).clip(min=0)))
|
|
122
123
|
else:
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .pytorch import *
|
|
@@ -50,9 +50,9 @@ class AbstractDataset(_AbstractDataset):
|
|
|
50
50
|
@staticmethod
|
|
51
51
|
def collate_fn(samples: list[tuple[torch.Tensor, Any]]) -> tuple[torch.Tensor, list[Any]]:
|
|
52
52
|
images, targets = zip(*samples)
|
|
53
|
-
images = torch.stack(images, dim=0)
|
|
53
|
+
images = torch.stack(images, dim=0) # type: ignore[assignment]
|
|
54
54
|
|
|
55
|
-
return images, list(targets)
|
|
55
|
+
return images, list(targets) # type: ignore[return-value]
|
|
56
56
|
|
|
57
57
|
|
|
58
58
|
class VisionDataset(AbstractDataset, _VisionDataset): # noqa: D101
|
|
@@ -107,8 +107,8 @@ class FUNSD(VisionDataset):
|
|
|
107
107
|
)
|
|
108
108
|
for crop, label in zip(crops, list(text_targets)):
|
|
109
109
|
# filter labels with unknown characters
|
|
110
|
-
if not any(char in label for char in ["☑", "☐", "\uf703", "\uf702"]):
|
|
111
|
-
self.data.append((crop, label))
|
|
110
|
+
if not any(char in label for char in ["☑", "☐", "\u03bf", "\uf703", "\uf702", " "]):
|
|
111
|
+
self.data.append((crop, label.replace("–", "-")))
|
|
112
112
|
elif detection_task:
|
|
113
113
|
self.data.append((img_path, np.asarray(box_targets, dtype=np_dtype)))
|
|
114
114
|
else:
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .pytorch import *
|
|
@@ -122,7 +122,7 @@ class IC03(VisionDataset):
|
|
|
122
122
|
if recognition_task:
|
|
123
123
|
crops = crop_bboxes_from_image(img_path=os.path.join(tmp_root, name.text), geoms=boxes)
|
|
124
124
|
for crop, label in zip(crops, labels):
|
|
125
|
-
if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0:
|
|
125
|
+
if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0 and " " not in label:
|
|
126
126
|
self.data.append((crop, label))
|
|
127
127
|
elif detection_task:
|
|
128
128
|
self.data.append((name.text, boxes))
|
|
@@ -100,7 +100,8 @@ class IC13(AbstractDataset):
|
|
|
100
100
|
if recognition_task:
|
|
101
101
|
crops = crop_bboxes_from_image(img_path=img_path, geoms=box_targets)
|
|
102
102
|
for crop, label in zip(crops, labels):
|
|
103
|
-
|
|
103
|
+
if " " not in label:
|
|
104
|
+
self.data.append((crop, label))
|
|
104
105
|
elif detection_task:
|
|
105
106
|
self.data.append((img_path, box_targets))
|
|
106
107
|
else:
|
|
@@ -8,6 +8,7 @@ from typing import Any
|
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
10
10
|
import scipy.io as sio
|
|
11
|
+
from PIL import Image
|
|
11
12
|
from tqdm import tqdm
|
|
12
13
|
|
|
13
14
|
from .datasets import VisionDataset
|
|
@@ -98,7 +99,9 @@ class IIIT5K(VisionDataset):
|
|
|
98
99
|
box_targets = [[box[0], box[1], box[0] + box[2], box[1] + box[3]] for box in box_targets]
|
|
99
100
|
|
|
100
101
|
if recognition_task:
|
|
101
|
-
|
|
102
|
+
if " " not in _raw_label:
|
|
103
|
+
with Image.open(os.path.join(tmp_root, _raw_path)) as pil_img:
|
|
104
|
+
self.data.append((np.array(pil_img.convert("RGB")), _raw_label))
|
|
102
105
|
elif detection_task:
|
|
103
106
|
self.data.append((_raw_path, np.asarray(box_targets, dtype=np_dtype)))
|
|
104
107
|
else:
|
|
@@ -133,7 +133,13 @@ class IMGUR5K(AbstractDataset):
|
|
|
133
133
|
img_path=os.path.join(self.root, img_name), geoms=np.asarray(box_targets, dtype=np_dtype)
|
|
134
134
|
)
|
|
135
135
|
for crop, label in zip(crops, labels):
|
|
136
|
-
if
|
|
136
|
+
if (
|
|
137
|
+
crop.shape[0] > 0
|
|
138
|
+
and crop.shape[1] > 0
|
|
139
|
+
and len(label) > 0
|
|
140
|
+
and len(label) < 30
|
|
141
|
+
and " " not in label
|
|
142
|
+
):
|
|
137
143
|
# write data to disk
|
|
138
144
|
with open(os.path.join(reco_folder_path, f"{reco_images_counter}.txt"), "w") as f:
|
|
139
145
|
f.write(label)
|
|
@@ -152,6 +158,7 @@ class IMGUR5K(AbstractDataset):
|
|
|
152
158
|
return f"train={self.train}"
|
|
153
159
|
|
|
154
160
|
def _read_from_folder(self, path: str) -> None:
|
|
155
|
-
|
|
161
|
+
img_paths = glob.glob(os.path.join(path, "*.png"))
|
|
162
|
+
for img_path in tqdm(iterable=img_paths, desc="Preparing and Loading IMGUR5K", total=len(img_paths)):
|
|
156
163
|
with open(os.path.join(path, f"{os.path.basename(img_path)[:-4]}.txt"), "r") as f:
|
|
157
164
|
self.data.append((img_path, f.read()))
|
|
@@ -40,7 +40,7 @@ class OCRDataset(AbstractDataset):
|
|
|
40
40
|
super().__init__(img_folder, **kwargs)
|
|
41
41
|
|
|
42
42
|
# List images
|
|
43
|
-
self.data: list[tuple[
|
|
43
|
+
self.data: list[tuple[Path, dict[str, Any]]] = []
|
|
44
44
|
np_dtype = np.float32
|
|
45
45
|
with open(label_file, "rb") as f:
|
|
46
46
|
data = json.load(f)
|
|
@@ -23,7 +23,7 @@ class RecognitionDataset(AbstractDataset):
|
|
|
23
23
|
|
|
24
24
|
Args:
|
|
25
25
|
img_folder: path to the images folder
|
|
26
|
-
labels_path:
|
|
26
|
+
labels_path: path to the json file containing all labels (character sequences)
|
|
27
27
|
**kwargs: keyword arguments from `AbstractDataset`.
|
|
28
28
|
"""
|
|
29
29
|
|