python-doctr 0.10.0__tar.gz → 0.12.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_doctr-0.10.0 → python_doctr-0.12.0}/PKG-INFO +38 -20
- {python_doctr-0.10.0 → python_doctr-0.12.0}/README.md +28 -14
- python_doctr-0.12.0/doctr/contrib/__init__.py +1 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/contrib/artefacts.py +7 -9
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/contrib/base.py +8 -17
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/__init__.py +1 -0
- python_doctr-0.12.0/doctr/datasets/coco_text.py +139 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/cord.py +10 -8
- python_doctr-0.12.0/doctr/datasets/datasets/__init__.py +6 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/datasets/base.py +16 -16
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/datasets/pytorch.py +12 -12
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/datasets/tensorflow.py +10 -10
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/detection.py +6 -9
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/doc_artefacts.py +3 -4
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/funsd.py +9 -8
- python_doctr-0.12.0/doctr/datasets/generator/__init__.py +6 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/generator/base.py +16 -17
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/generator/pytorch.py +1 -3
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/generator/tensorflow.py +1 -3
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/ic03.py +5 -6
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/ic13.py +6 -6
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/iiit5k.py +10 -6
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/iiithws.py +4 -5
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/imgur5k.py +15 -7
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/loader.py +4 -7
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/mjsynth.py +6 -5
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/ocr.py +3 -4
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/orientation.py +3 -4
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/recognition.py +4 -5
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/sroie.py +6 -5
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/svhn.py +7 -6
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/svt.py +6 -7
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/synthtext.py +19 -7
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/utils.py +41 -35
- python_doctr-0.12.0/doctr/datasets/vocabs.py +1140 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/wildreceipt.py +14 -10
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/file_utils.py +11 -7
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/io/elements.py +96 -82
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/io/html.py +1 -3
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/io/image/__init__.py +3 -3
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/io/image/base.py +2 -5
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/io/image/pytorch.py +3 -12
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/io/image/tensorflow.py +2 -11
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/io/pdf.py +5 -7
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/io/reader.py +5 -11
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/_utils.py +15 -23
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/builder.py +30 -48
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/classification/__init__.py +1 -0
- {python_doctr-0.10.0/doctr/models/classification/mobilenet → python_doctr-0.12.0/doctr/models/classification/magc_resnet}/__init__.py +3 -3
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/classification/magc_resnet/pytorch.py +11 -15
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/classification/magc_resnet/tensorflow.py +11 -14
- {python_doctr-0.10.0/doctr/models/classification/vgg → python_doctr-0.12.0/doctr/models/classification/mobilenet}/__init__.py +3 -3
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/classification/mobilenet/pytorch.py +20 -18
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/classification/mobilenet/tensorflow.py +19 -23
- python_doctr-0.12.0/doctr/models/classification/predictor/__init__.py +6 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/classification/predictor/pytorch.py +7 -9
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/classification/predictor/tensorflow.py +6 -8
- python_doctr-0.12.0/doctr/models/classification/resnet/__init__.py +6 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/classification/resnet/pytorch.py +47 -34
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/classification/resnet/tensorflow.py +45 -35
- {python_doctr-0.10.0/doctr/transforms/functional → python_doctr-0.12.0/doctr/models/classification/textnet}/__init__.py +3 -3
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/classification/textnet/pytorch.py +20 -18
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/classification/textnet/tensorflow.py +19 -17
- python_doctr-0.12.0/doctr/models/classification/vgg/__init__.py +6 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/classification/vgg/pytorch.py +21 -8
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/classification/vgg/tensorflow.py +20 -14
- python_doctr-0.12.0/doctr/models/classification/vip/__init__.py +4 -0
- python_doctr-0.12.0/doctr/models/classification/vip/layers/__init__.py +4 -0
- python_doctr-0.12.0/doctr/models/classification/vip/layers/pytorch.py +615 -0
- python_doctr-0.12.0/doctr/models/classification/vip/pytorch.py +505 -0
- python_doctr-0.12.0/doctr/models/classification/vit/__init__.py +6 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/classification/vit/pytorch.py +18 -15
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/classification/vit/tensorflow.py +15 -12
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/classification/zoo.py +23 -14
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/core.py +3 -3
- python_doctr-0.12.0/doctr/models/detection/_utils/__init__.py +7 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/_utils/base.py +4 -7
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/_utils/pytorch.py +1 -5
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/_utils/tensorflow.py +1 -5
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/core.py +2 -8
- python_doctr-0.12.0/doctr/models/detection/differentiable_binarization/__init__.py +6 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/differentiable_binarization/base.py +10 -21
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/differentiable_binarization/pytorch.py +37 -31
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/differentiable_binarization/tensorflow.py +26 -29
- python_doctr-0.12.0/doctr/models/detection/fast/__init__.py +6 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/fast/base.py +8 -17
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/fast/pytorch.py +37 -35
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/fast/tensorflow.py +24 -28
- python_doctr-0.12.0/doctr/models/detection/linknet/__init__.py +6 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/linknet/base.py +8 -18
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/linknet/pytorch.py +34 -28
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/linknet/tensorflow.py +24 -25
- python_doctr-0.12.0/doctr/models/detection/predictor/__init__.py +6 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/predictor/pytorch.py +6 -7
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/predictor/tensorflow.py +5 -6
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/zoo.py +27 -7
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/factory/hub.py +6 -10
- python_doctr-0.12.0/doctr/models/kie_predictor/__init__.py +6 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/kie_predictor/base.py +4 -5
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/kie_predictor/pytorch.py +19 -20
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/kie_predictor/tensorflow.py +14 -15
- python_doctr-0.12.0/doctr/models/modules/layers/__init__.py +6 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/modules/layers/pytorch.py +55 -10
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/modules/layers/tensorflow.py +5 -7
- python_doctr-0.12.0/doctr/models/modules/transformer/__init__.py +6 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/modules/transformer/pytorch.py +12 -13
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/modules/transformer/tensorflow.py +9 -10
- python_doctr-0.12.0/doctr/models/modules/vision_transformer/__init__.py +6 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/modules/vision_transformer/pytorch.py +2 -3
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/modules/vision_transformer/tensorflow.py +3 -3
- python_doctr-0.12.0/doctr/models/predictor/__init__.py +6 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/predictor/base.py +28 -29
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/predictor/pytorch.py +13 -14
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/predictor/tensorflow.py +9 -10
- python_doctr-0.12.0/doctr/models/preprocessor/__init__.py +6 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/preprocessor/pytorch.py +13 -17
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/preprocessor/tensorflow.py +10 -14
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/__init__.py +1 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/core.py +3 -7
- python_doctr-0.12.0/doctr/models/recognition/crnn/__init__.py +6 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/crnn/pytorch.py +30 -29
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/crnn/tensorflow.py +21 -24
- python_doctr-0.12.0/doctr/models/recognition/master/__init__.py +6 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/master/base.py +3 -7
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/master/pytorch.py +32 -25
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/master/tensorflow.py +22 -25
- python_doctr-0.12.0/doctr/models/recognition/parseq/__init__.py +6 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/parseq/base.py +3 -7
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/parseq/pytorch.py +47 -29
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/parseq/tensorflow.py +29 -27
- python_doctr-0.12.0/doctr/models/recognition/predictor/__init__.py +6 -0
- python_doctr-0.12.0/doctr/models/recognition/predictor/_utils.py +145 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/predictor/pytorch.py +9 -9
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/predictor/tensorflow.py +8 -9
- python_doctr-0.12.0/doctr/models/recognition/sar/__init__.py +6 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/sar/pytorch.py +30 -22
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/sar/tensorflow.py +22 -24
- python_doctr-0.12.0/doctr/models/recognition/utils.py +93 -0
- python_doctr-0.12.0/doctr/models/recognition/viptr/__init__.py +4 -0
- python_doctr-0.12.0/doctr/models/recognition/viptr/pytorch.py +277 -0
- python_doctr-0.12.0/doctr/models/recognition/vitstr/__init__.py +6 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/vitstr/base.py +3 -7
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/vitstr/pytorch.py +28 -21
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/vitstr/tensorflow.py +22 -23
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/zoo.py +27 -11
- python_doctr-0.12.0/doctr/models/utils/__init__.py +6 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/utils/pytorch.py +41 -34
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/utils/tensorflow.py +31 -23
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/zoo.py +1 -5
- python_doctr-0.12.0/doctr/transforms/functional/__init__.py +6 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/transforms/functional/base.py +4 -11
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/transforms/functional/pytorch.py +20 -28
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/transforms/functional/tensorflow.py +10 -22
- python_doctr-0.12.0/doctr/transforms/modules/__init__.py +8 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/transforms/modules/base.py +48 -55
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/transforms/modules/pytorch.py +58 -22
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/transforms/modules/tensorflow.py +18 -32
- python_doctr-0.12.0/doctr/utils/common_types.py +17 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/utils/data.py +9 -13
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/utils/fonts.py +2 -7
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/utils/geometry.py +17 -48
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/utils/metrics.py +17 -37
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/utils/multithreading.py +4 -6
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/utils/reconstitution.py +9 -13
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/utils/repr.py +2 -3
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/utils/visualization.py +16 -29
- python_doctr-0.12.0/doctr/version.py +1 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/pyproject.toml +9 -6
- {python_doctr-0.10.0 → python_doctr-0.12.0}/python_doctr.egg-info/PKG-INFO +38 -20
- {python_doctr-0.10.0 → python_doctr-0.12.0}/python_doctr.egg-info/SOURCES.txt +7 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/python_doctr.egg-info/requires.txt +13 -2
- {python_doctr-0.10.0 → python_doctr-0.12.0}/setup.py +2 -2
- python_doctr-0.10.0/doctr/contrib/__init__.py +0 -0
- python_doctr-0.10.0/doctr/datasets/datasets/__init__.py +0 -6
- python_doctr-0.10.0/doctr/datasets/generator/__init__.py +0 -6
- python_doctr-0.10.0/doctr/datasets/vocabs.py +0 -82
- python_doctr-0.10.0/doctr/models/classification/magc_resnet/__init__.py +0 -6
- python_doctr-0.10.0/doctr/models/classification/predictor/__init__.py +0 -6
- python_doctr-0.10.0/doctr/models/classification/resnet/__init__.py +0 -6
- python_doctr-0.10.0/doctr/models/classification/textnet/__init__.py +0 -6
- python_doctr-0.10.0/doctr/models/classification/vit/__init__.py +0 -6
- python_doctr-0.10.0/doctr/models/detection/_utils/__init__.py +0 -7
- python_doctr-0.10.0/doctr/models/detection/differentiable_binarization/__init__.py +0 -6
- python_doctr-0.10.0/doctr/models/detection/fast/__init__.py +0 -6
- python_doctr-0.10.0/doctr/models/detection/linknet/__init__.py +0 -6
- python_doctr-0.10.0/doctr/models/detection/predictor/__init__.py +0 -6
- python_doctr-0.10.0/doctr/models/kie_predictor/__init__.py +0 -6
- python_doctr-0.10.0/doctr/models/modules/layers/__init__.py +0 -6
- python_doctr-0.10.0/doctr/models/modules/transformer/__init__.py +0 -6
- python_doctr-0.10.0/doctr/models/modules/vision_transformer/__init__.py +0 -6
- python_doctr-0.10.0/doctr/models/predictor/__init__.py +0 -6
- python_doctr-0.10.0/doctr/models/preprocessor/__init__.py +0 -6
- python_doctr-0.10.0/doctr/models/recognition/crnn/__init__.py +0 -6
- python_doctr-0.10.0/doctr/models/recognition/master/__init__.py +0 -6
- python_doctr-0.10.0/doctr/models/recognition/parseq/__init__.py +0 -6
- python_doctr-0.10.0/doctr/models/recognition/predictor/__init__.py +0 -6
- python_doctr-0.10.0/doctr/models/recognition/predictor/_utils.py +0 -86
- python_doctr-0.10.0/doctr/models/recognition/sar/__init__.py +0 -6
- python_doctr-0.10.0/doctr/models/recognition/utils.py +0 -89
- python_doctr-0.10.0/doctr/models/recognition/vitstr/__init__.py +0 -6
- python_doctr-0.10.0/doctr/models/utils/__init__.py +0 -6
- python_doctr-0.10.0/doctr/transforms/modules/__init__.py +0 -8
- python_doctr-0.10.0/doctr/utils/common_types.py +0 -18
- python_doctr-0.10.0/doctr/version.py +0 -1
- {python_doctr-0.10.0 → python_doctr-0.12.0}/LICENSE +0 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/__init__.py +0 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/io/__init__.py +0 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/__init__.py +0 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/__init__.py +0 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/factory/__init__.py +0 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/modules/__init__.py +0 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/py.typed +0 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/transforms/__init__.py +0 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/utils/__init__.py +0 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/python_doctr.egg-info/dependency_links.txt +0 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/python_doctr.egg-info/top_level.txt +0 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/python_doctr.egg-info/zip-safe +0 -0
- {python_doctr-0.10.0 → python_doctr-0.12.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: python-doctr
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.12.0
|
|
4
4
|
Summary: Document Text Recognition (docTR): deep Learning for high-performance OCR on documents.
|
|
5
5
|
Author-email: Mindee <contact@mindee.com>
|
|
6
6
|
Maintainer: François-Guillaume Fernandez, Charles Gaillard, Olivier Dulcy, Felix Dittrich
|
|
@@ -219,11 +219,11 @@ Classifier: License :: OSI Approved :: Apache Software License
|
|
|
219
219
|
Classifier: Natural Language :: English
|
|
220
220
|
Classifier: Operating System :: OS Independent
|
|
221
221
|
Classifier: Programming Language :: Python :: 3
|
|
222
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
223
222
|
Classifier: Programming Language :: Python :: 3.10
|
|
224
223
|
Classifier: Programming Language :: Python :: 3.11
|
|
224
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
225
225
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
226
|
-
Requires-Python: <4,>=3.
|
|
226
|
+
Requires-Python: <4,>=3.10.0
|
|
227
227
|
Description-Content-Type: text/markdown
|
|
228
228
|
License-File: LICENSE
|
|
229
229
|
Requires-Dist: numpy<3.0.0,>=1.16.0
|
|
@@ -239,9 +239,11 @@ Requires-Dist: huggingface-hub<1.0.0,>=0.20.0
|
|
|
239
239
|
Requires-Dist: Pillow>=9.2.0
|
|
240
240
|
Requires-Dist: defusedxml>=0.7.0
|
|
241
241
|
Requires-Dist: anyascii>=0.3.2
|
|
242
|
+
Requires-Dist: validators>=0.18.0
|
|
242
243
|
Requires-Dist: tqdm>=4.30.0
|
|
243
244
|
Provides-Extra: tf
|
|
244
|
-
Requires-Dist: tensorflow<3.0.0,>=2.15.0; extra == "tf"
|
|
245
|
+
Requires-Dist: tensorflow[and-cuda]<3.0.0,>=2.15.0; sys_platform == "linux" and extra == "tf"
|
|
246
|
+
Requires-Dist: tensorflow<3.0.0,>=2.15.0; sys_platform != "linux" and extra == "tf"
|
|
245
247
|
Requires-Dist: tf-keras<3.0.0,>=2.15.0; extra == "tf"
|
|
246
248
|
Requires-Dist: tf2onnx<2.0.0,>=1.16.0; extra == "tf"
|
|
247
249
|
Provides-Extra: torch
|
|
@@ -275,7 +277,8 @@ Requires-Dist: sphinx-markdown-tables>=0.0.15; extra == "docs"
|
|
|
275
277
|
Requires-Dist: sphinx-tabs>=3.3.0; extra == "docs"
|
|
276
278
|
Requires-Dist: furo>=2022.3.4; extra == "docs"
|
|
277
279
|
Provides-Extra: dev
|
|
278
|
-
Requires-Dist: tensorflow<3.0.0,>=2.15.0; extra == "dev"
|
|
280
|
+
Requires-Dist: tensorflow[and-cuda]<3.0.0,>=2.15.0; sys_platform == "linux" and extra == "dev"
|
|
281
|
+
Requires-Dist: tensorflow<3.0.0,>=2.15.0; sys_platform != "linux" and extra == "dev"
|
|
279
282
|
Requires-Dist: tf-keras<3.0.0,>=2.15.0; extra == "dev"
|
|
280
283
|
Requires-Dist: tf2onnx<2.0.0,>=1.16.0; extra == "dev"
|
|
281
284
|
Requires-Dist: torch<3.0.0,>=2.0.0; extra == "dev"
|
|
@@ -300,12 +303,13 @@ Requires-Dist: recommonmark>=0.7.1; extra == "dev"
|
|
|
300
303
|
Requires-Dist: sphinx-markdown-tables>=0.0.15; extra == "dev"
|
|
301
304
|
Requires-Dist: sphinx-tabs>=3.3.0; extra == "dev"
|
|
302
305
|
Requires-Dist: furo>=2022.3.4; extra == "dev"
|
|
306
|
+
Dynamic: license-file
|
|
303
307
|
|
|
304
308
|
<p align="center">
|
|
305
309
|
<img src="https://github.com/mindee/doctr/raw/main/docs/images/Logo_doctr.gif" width="40%">
|
|
306
310
|
</p>
|
|
307
311
|
|
|
308
|
-
[](https://slack.mindee.com) [](LICENSE)  [](https://github.com/mindee/doctr/pkgs/container/doctr) [](https://codecov.io/gh/mindee/doctr) [](https://www.codefactor.io/repository/github/mindee/doctr) [](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [](https://mindee.github.io/doctr) [](https://slack.mindee.com) [](LICENSE)  [](https://github.com/mindee/doctr/pkgs/container/doctr) [](https://codecov.io/gh/mindee/doctr) [](https://www.codefactor.io/repository/github/mindee/doctr) [](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [](https://mindee.github.io/doctr) [](https://pypi.org/project/python-doctr/) [](https://huggingface.co/spaces/mindee/doctr) [](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb) [](https://gurubase.io/g/doctr)
|
|
309
313
|
|
|
310
314
|
|
|
311
315
|
**Optical Character Recognition made seamless & accessible to anyone, powered by TensorFlow 2 & PyTorch**
|
|
@@ -436,9 +440,22 @@ The KIE predictor results per page are in a dictionary format with each key repr
|
|
|
436
440
|
|
|
437
441
|
## Installation
|
|
438
442
|
|
|
443
|
+
> [!WARNING]
|
|
444
|
+
> **TensorFlow Backend Deprecation Notice**
|
|
445
|
+
>
|
|
446
|
+
> Using docTR with TensorFlow as a backend is deprecated and will be removed in the next major release (v1.0.0).
|
|
447
|
+
> We **recommend switching to the PyTorch backend**, which is more actively maintained and supports the latest features and models.
|
|
448
|
+
> Alternatively, you can use [OnnxTR](https://github.com/felixdittrich92/OnnxTR), which does **not** require TensorFlow or PyTorch.
|
|
449
|
+
>
|
|
450
|
+
> This decision was made based on several considerations:
|
|
451
|
+
>
|
|
452
|
+
> - Allows better focus on improving the core library
|
|
453
|
+
> - Frees up resources to develop new features faster
|
|
454
|
+
> - Enables more targeted optimizations with PyTorch
|
|
455
|
+
|
|
439
456
|
### Prerequisites
|
|
440
457
|
|
|
441
|
-
Python 3.
|
|
458
|
+
Python 3.10 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
|
|
442
459
|
|
|
443
460
|
### Latest release
|
|
444
461
|
|
|
@@ -502,6 +519,7 @@ Credits where it's due: this repository is implementing, among others, architect
|
|
|
502
519
|
- MASTER: [MASTER: Multi-Aspect Non-local Network for Scene Text Recognition](https://arxiv.org/pdf/1910.02562.pdf).
|
|
503
520
|
- ViTSTR: [Vision Transformer for Fast and Efficient Scene Text Recognition](https://arxiv.org/pdf/2105.08582.pdf).
|
|
504
521
|
- PARSeq: [Scene Text Recognition with Permuted Autoregressive Sequence Models](https://arxiv.org/pdf/2207.06966).
|
|
522
|
+
- VIPTR: [A Vision Permutable Extractor for Fast and Efficient Scene Text Recognition](https://arxiv.org/abs/2401.10110).
|
|
505
523
|
|
|
506
524
|
## More goodies
|
|
507
525
|
|
|
@@ -557,37 +575,37 @@ Check out our [TensorFlow.js demo](https://github.com/mindee/doctr-tfjs-demo) to
|
|
|
557
575
|
|
|
558
576
|
### Docker container
|
|
559
577
|
|
|
560
|
-
|
|
578
|
+
We offer Docker container support for easy testing and deployment. [Here are the available docker tags.](https://github.com/mindee/doctr/pkgs/container/doctr).
|
|
561
579
|
|
|
562
580
|
#### Using GPU with docTR Docker Images
|
|
563
581
|
|
|
564
|
-
The docTR Docker images are GPU-ready and based on CUDA `
|
|
565
|
-
|
|
582
|
+
The docTR Docker images are GPU-ready and based on CUDA `12.2`. Make sure your host is **at least `12.2`**, otherwise Torch or TensorFlow won't be able to initialize the GPU.
|
|
583
|
+
Please ensure that Docker is configured to use your GPU.
|
|
566
584
|
|
|
567
585
|
To verify and configure GPU support for Docker, please follow the instructions provided in the [NVIDIA Container Toolkit Installation Guide](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html).
|
|
568
586
|
|
|
569
587
|
Once Docker is configured to use GPUs, you can run docTR Docker containers with GPU support:
|
|
570
588
|
|
|
571
589
|
```shell
|
|
572
|
-
docker run -it --gpus all ghcr.io/mindee/doctr:
|
|
590
|
+
docker run -it --gpus all ghcr.io/mindee/doctr:torch-py3.9.18-2024-10 bash
|
|
573
591
|
```
|
|
574
592
|
|
|
575
593
|
#### Available Tags
|
|
576
594
|
|
|
577
|
-
The Docker images for docTR follow a specific tag nomenclature: `<
|
|
595
|
+
The Docker images for docTR follow a specific tag nomenclature: `<deps>-py<python_version>-<doctr_version|YYYY-MM>`. Here's a breakdown of the tag structure:
|
|
578
596
|
|
|
579
|
-
- `<
|
|
580
|
-
- `<python_version>`: `3.
|
|
581
|
-
- `<
|
|
582
|
-
- `<
|
|
583
|
-
- `<YYYY-MM>`: e.g. `2023-09`
|
|
597
|
+
- `<deps>`: `tf`, `torch`, `tf-viz-html-contrib` or `torch-viz-html-contrib`.
|
|
598
|
+
- `<python_version>`: `3.9.18`, `3.10.13` or `3.11.8`.
|
|
599
|
+
- `<doctr_version>`: a tag >= `v0.11.0`
|
|
600
|
+
- `<YYYY-MM>`: e.g. `2014-10`
|
|
584
601
|
|
|
585
602
|
Here are examples of different image tags:
|
|
586
603
|
|
|
587
604
|
| Tag | Description |
|
|
588
605
|
|----------------------------|---------------------------------------------------|
|
|
589
|
-
| `tf-py3.
|
|
590
|
-
| `torch-py3.
|
|
606
|
+
| `tf-py3.10.13-v0.11.0` | TensorFlow version `3.10.13` with docTR `v0.11.0`. |
|
|
607
|
+
| `torch-viz-html-contrib-py3.11.8-2024-10` | Torch with extra dependencies version `3.11.8` from latest commit on `main` in `2024-10`. |
|
|
608
|
+
| `torch-py3.11.8-2024-10`| PyTorch version `3.11.8` from latest commit on `main` in `2024-10`. |
|
|
591
609
|
|
|
592
610
|
#### Building Docker Images Locally
|
|
593
611
|
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
<img src="https://github.com/mindee/doctr/raw/main/docs/images/Logo_doctr.gif" width="40%">
|
|
3
3
|
</p>
|
|
4
4
|
|
|
5
|
-
[](https://slack.mindee.com) [](LICENSE)  [](https://github.com/mindee/doctr/pkgs/container/doctr) [](https://codecov.io/gh/mindee/doctr) [](https://www.codefactor.io/repository/github/mindee/doctr) [](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [](https://mindee.github.io/doctr) [](https://slack.mindee.com) [](LICENSE)  [](https://github.com/mindee/doctr/pkgs/container/doctr) [](https://codecov.io/gh/mindee/doctr) [](https://www.codefactor.io/repository/github/mindee/doctr) [](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [](https://mindee.github.io/doctr) [](https://pypi.org/project/python-doctr/) [](https://huggingface.co/spaces/mindee/doctr) [](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb) [](https://gurubase.io/g/doctr)
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
**Optical Character Recognition made seamless & accessible to anyone, powered by TensorFlow 2 & PyTorch**
|
|
@@ -133,9 +133,22 @@ The KIE predictor results per page are in a dictionary format with each key repr
|
|
|
133
133
|
|
|
134
134
|
## Installation
|
|
135
135
|
|
|
136
|
+
> [!WARNING]
|
|
137
|
+
> **TensorFlow Backend Deprecation Notice**
|
|
138
|
+
>
|
|
139
|
+
> Using docTR with TensorFlow as a backend is deprecated and will be removed in the next major release (v1.0.0).
|
|
140
|
+
> We **recommend switching to the PyTorch backend**, which is more actively maintained and supports the latest features and models.
|
|
141
|
+
> Alternatively, you can use [OnnxTR](https://github.com/felixdittrich92/OnnxTR), which does **not** require TensorFlow or PyTorch.
|
|
142
|
+
>
|
|
143
|
+
> This decision was made based on several considerations:
|
|
144
|
+
>
|
|
145
|
+
> - Allows better focus on improving the core library
|
|
146
|
+
> - Frees up resources to develop new features faster
|
|
147
|
+
> - Enables more targeted optimizations with PyTorch
|
|
148
|
+
|
|
136
149
|
### Prerequisites
|
|
137
150
|
|
|
138
|
-
Python 3.
|
|
151
|
+
Python 3.10 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
|
|
139
152
|
|
|
140
153
|
### Latest release
|
|
141
154
|
|
|
@@ -199,6 +212,7 @@ Credits where it's due: this repository is implementing, among others, architect
|
|
|
199
212
|
- MASTER: [MASTER: Multi-Aspect Non-local Network for Scene Text Recognition](https://arxiv.org/pdf/1910.02562.pdf).
|
|
200
213
|
- ViTSTR: [Vision Transformer for Fast and Efficient Scene Text Recognition](https://arxiv.org/pdf/2105.08582.pdf).
|
|
201
214
|
- PARSeq: [Scene Text Recognition with Permuted Autoregressive Sequence Models](https://arxiv.org/pdf/2207.06966).
|
|
215
|
+
- VIPTR: [A Vision Permutable Extractor for Fast and Efficient Scene Text Recognition](https://arxiv.org/abs/2401.10110).
|
|
202
216
|
|
|
203
217
|
## More goodies
|
|
204
218
|
|
|
@@ -254,37 +268,37 @@ Check out our [TensorFlow.js demo](https://github.com/mindee/doctr-tfjs-demo) to
|
|
|
254
268
|
|
|
255
269
|
### Docker container
|
|
256
270
|
|
|
257
|
-
|
|
271
|
+
We offer Docker container support for easy testing and deployment. [Here are the available docker tags.](https://github.com/mindee/doctr/pkgs/container/doctr).
|
|
258
272
|
|
|
259
273
|
#### Using GPU with docTR Docker Images
|
|
260
274
|
|
|
261
|
-
The docTR Docker images are GPU-ready and based on CUDA `
|
|
262
|
-
|
|
275
|
+
The docTR Docker images are GPU-ready and based on CUDA `12.2`. Make sure your host is **at least `12.2`**, otherwise Torch or TensorFlow won't be able to initialize the GPU.
|
|
276
|
+
Please ensure that Docker is configured to use your GPU.
|
|
263
277
|
|
|
264
278
|
To verify and configure GPU support for Docker, please follow the instructions provided in the [NVIDIA Container Toolkit Installation Guide](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html).
|
|
265
279
|
|
|
266
280
|
Once Docker is configured to use GPUs, you can run docTR Docker containers with GPU support:
|
|
267
281
|
|
|
268
282
|
```shell
|
|
269
|
-
docker run -it --gpus all ghcr.io/mindee/doctr:
|
|
283
|
+
docker run -it --gpus all ghcr.io/mindee/doctr:torch-py3.9.18-2024-10 bash
|
|
270
284
|
```
|
|
271
285
|
|
|
272
286
|
#### Available Tags
|
|
273
287
|
|
|
274
|
-
The Docker images for docTR follow a specific tag nomenclature: `<
|
|
288
|
+
The Docker images for docTR follow a specific tag nomenclature: `<deps>-py<python_version>-<doctr_version|YYYY-MM>`. Here's a breakdown of the tag structure:
|
|
275
289
|
|
|
276
|
-
- `<
|
|
277
|
-
- `<python_version>`: `3.
|
|
278
|
-
- `<
|
|
279
|
-
- `<
|
|
280
|
-
- `<YYYY-MM>`: e.g. `2023-09`
|
|
290
|
+
- `<deps>`: `tf`, `torch`, `tf-viz-html-contrib` or `torch-viz-html-contrib`.
|
|
291
|
+
- `<python_version>`: `3.9.18`, `3.10.13` or `3.11.8`.
|
|
292
|
+
- `<doctr_version>`: a tag >= `v0.11.0`
|
|
293
|
+
- `<YYYY-MM>`: e.g. `2014-10`
|
|
281
294
|
|
|
282
295
|
Here are examples of different image tags:
|
|
283
296
|
|
|
284
297
|
| Tag | Description |
|
|
285
298
|
|----------------------------|---------------------------------------------------|
|
|
286
|
-
| `tf-py3.
|
|
287
|
-
| `torch-py3.
|
|
299
|
+
| `tf-py3.10.13-v0.11.0` | TensorFlow version `3.10.13` with docTR `v0.11.0`. |
|
|
300
|
+
| `torch-viz-html-contrib-py3.11.8-2024-10` | Torch with extra dependencies version `3.11.8` from latest commit on `main` in `2024-10`. |
|
|
301
|
+
| `torch-py3.11.8-2024-10`| PyTorch version `3.11.8` from latest commit on `main` in `2024-10`. |
|
|
288
302
|
|
|
289
303
|
#### Building Docker Images Locally
|
|
290
304
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .artefacts import ArtefactDetector
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
|
-
from typing import Any
|
|
6
|
+
from typing import Any
|
|
7
7
|
|
|
8
8
|
import cv2
|
|
9
9
|
import numpy as np
|
|
@@ -14,7 +14,7 @@ from .base import _BasePredictor
|
|
|
14
14
|
|
|
15
15
|
__all__ = ["ArtefactDetector"]
|
|
16
16
|
|
|
17
|
-
default_cfgs:
|
|
17
|
+
default_cfgs: dict[str, dict[str, Any]] = {
|
|
18
18
|
"yolov8_artefact": {
|
|
19
19
|
"input_shape": (3, 1024, 1024),
|
|
20
20
|
"labels": ["bar_code", "qr_code", "logo", "photo"],
|
|
@@ -34,7 +34,6 @@ class ArtefactDetector(_BasePredictor):
|
|
|
34
34
|
>>> results = detector(doc)
|
|
35
35
|
|
|
36
36
|
Args:
|
|
37
|
-
----
|
|
38
37
|
arch: the architecture to use
|
|
39
38
|
batch_size: the batch size to use
|
|
40
39
|
model_path: the path to the model to use
|
|
@@ -50,9 +49,9 @@ class ArtefactDetector(_BasePredictor):
|
|
|
50
49
|
self,
|
|
51
50
|
arch: str = "yolov8_artefact",
|
|
52
51
|
batch_size: int = 2,
|
|
53
|
-
model_path:
|
|
54
|
-
labels:
|
|
55
|
-
input_shape:
|
|
52
|
+
model_path: str | None = None,
|
|
53
|
+
labels: list[str] | None = None,
|
|
54
|
+
input_shape: tuple[int, int, int] | None = None,
|
|
56
55
|
conf_threshold: float = 0.5,
|
|
57
56
|
iou_threshold: float = 0.5,
|
|
58
57
|
**kwargs: Any,
|
|
@@ -66,7 +65,7 @@ class ArtefactDetector(_BasePredictor):
|
|
|
66
65
|
def preprocess(self, img: np.ndarray) -> np.ndarray:
|
|
67
66
|
return np.transpose(cv2.resize(img, (self.input_shape[2], self.input_shape[1])), (2, 0, 1)) / np.array(255.0)
|
|
68
67
|
|
|
69
|
-
def postprocess(self, output:
|
|
68
|
+
def postprocess(self, output: list[np.ndarray], input_images: list[list[np.ndarray]]) -> list[list[dict[str, Any]]]:
|
|
70
69
|
results = []
|
|
71
70
|
|
|
72
71
|
for batch in zip(output, input_images):
|
|
@@ -109,7 +108,6 @@ class ArtefactDetector(_BasePredictor):
|
|
|
109
108
|
Display the results
|
|
110
109
|
|
|
111
110
|
Args:
|
|
112
|
-
----
|
|
113
111
|
**kwargs: additional keyword arguments to be passed to `plt.show`
|
|
114
112
|
"""
|
|
115
113
|
requires_package("matplotlib", "`.show()` requires matplotlib installed")
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
|
-
from typing import Any
|
|
6
|
+
from typing import Any
|
|
7
7
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
|
|
@@ -16,32 +16,29 @@ class _BasePredictor:
|
|
|
16
16
|
Base class for all predictors
|
|
17
17
|
|
|
18
18
|
Args:
|
|
19
|
-
----
|
|
20
19
|
batch_size: the batch size to use
|
|
21
20
|
url: the url to use to download a model if needed
|
|
22
21
|
model_path: the path to the model to use
|
|
23
22
|
**kwargs: additional arguments to be passed to `download_from_url`
|
|
24
23
|
"""
|
|
25
24
|
|
|
26
|
-
def __init__(self, batch_size: int, url:
|
|
25
|
+
def __init__(self, batch_size: int, url: str | None = None, model_path: str | None = None, **kwargs) -> None:
|
|
27
26
|
self.batch_size = batch_size
|
|
28
27
|
self.session = self._init_model(url, model_path, **kwargs)
|
|
29
28
|
|
|
30
|
-
self._inputs:
|
|
31
|
-
self._results:
|
|
29
|
+
self._inputs: list[np.ndarray] = []
|
|
30
|
+
self._results: list[Any] = []
|
|
32
31
|
|
|
33
|
-
def _init_model(self, url:
|
|
32
|
+
def _init_model(self, url: str | None = None, model_path: str | None = None, **kwargs: Any) -> Any:
|
|
34
33
|
"""
|
|
35
34
|
Download the model from the given url if needed
|
|
36
35
|
|
|
37
36
|
Args:
|
|
38
|
-
----
|
|
39
37
|
url: the url to use
|
|
40
38
|
model_path: the path to the model to use
|
|
41
39
|
**kwargs: additional arguments to be passed to `download_from_url`
|
|
42
40
|
|
|
43
41
|
Returns:
|
|
44
|
-
-------
|
|
45
42
|
Any: the ONNX loaded model
|
|
46
43
|
"""
|
|
47
44
|
requires_package("onnxruntime", "`.contrib` module requires `onnxruntime` to be installed.")
|
|
@@ -57,40 +54,34 @@ class _BasePredictor:
|
|
|
57
54
|
Preprocess the input image
|
|
58
55
|
|
|
59
56
|
Args:
|
|
60
|
-
----
|
|
61
57
|
img: the input image to preprocess
|
|
62
58
|
|
|
63
59
|
Returns:
|
|
64
|
-
-------
|
|
65
60
|
np.ndarray: the preprocessed image
|
|
66
61
|
"""
|
|
67
62
|
raise NotImplementedError
|
|
68
63
|
|
|
69
|
-
def postprocess(self, output:
|
|
64
|
+
def postprocess(self, output: list[np.ndarray], input_images: list[list[np.ndarray]]) -> Any:
|
|
70
65
|
"""
|
|
71
66
|
Postprocess the model output
|
|
72
67
|
|
|
73
68
|
Args:
|
|
74
|
-
----
|
|
75
69
|
output: the model output to postprocess
|
|
76
70
|
input_images: the input images used to generate the output
|
|
77
71
|
|
|
78
72
|
Returns:
|
|
79
|
-
-------
|
|
80
73
|
Any: the postprocessed output
|
|
81
74
|
"""
|
|
82
75
|
raise NotImplementedError
|
|
83
76
|
|
|
84
|
-
def __call__(self, inputs:
|
|
77
|
+
def __call__(self, inputs: list[np.ndarray]) -> Any:
|
|
85
78
|
"""
|
|
86
79
|
Call the model on the given inputs
|
|
87
80
|
|
|
88
81
|
Args:
|
|
89
|
-
----
|
|
90
82
|
inputs: the inputs to use
|
|
91
83
|
|
|
92
84
|
Returns:
|
|
93
|
-
-------
|
|
94
85
|
Any: the postprocessed output
|
|
95
86
|
"""
|
|
96
87
|
self._inputs = inputs
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
|
+
|
|
3
|
+
# This program is licensed under the Apache License 2.0.
|
|
4
|
+
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
from tqdm import tqdm
|
|
13
|
+
|
|
14
|
+
from .datasets import AbstractDataset
|
|
15
|
+
from .utils import convert_target_to_relative, crop_bboxes_from_image
|
|
16
|
+
|
|
17
|
+
__all__ = ["COCOTEXT"]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class COCOTEXT(AbstractDataset):
|
|
21
|
+
"""
|
|
22
|
+
COCO-Text dataset from `"COCO-Text: Dataset and Benchmark for Text Detection and Recognition in Natural Images"
|
|
23
|
+
<https://arxiv.org/pdf/1601.07140v2>`_ |
|
|
24
|
+
`"homepage" <https://bgshih.github.io/cocotext/>`_.
|
|
25
|
+
|
|
26
|
+
>>> # NOTE: You need to download the dataset first.
|
|
27
|
+
>>> from doctr.datasets import COCOTEXT
|
|
28
|
+
>>> train_set = COCOTEXT(train=True, img_folder="/path/to/coco_text/train2014/",
|
|
29
|
+
>>> label_path="/path/to/coco_text/cocotext.v2.json")
|
|
30
|
+
>>> img, target = train_set[0]
|
|
31
|
+
>>> test_set = COCOTEXT(train=False, img_folder="/path/to/coco_text/train2014/",
|
|
32
|
+
>>> label_path = "/path/to/coco_text/cocotext.v2.json")
|
|
33
|
+
>>> img, target = test_set[0]
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
img_folder: folder with all the images of the dataset
|
|
37
|
+
label_path: path to the annotations file of the dataset
|
|
38
|
+
train: whether the subset should be the training one
|
|
39
|
+
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
40
|
+
recognition_task: whether the dataset should be used for recognition task
|
|
41
|
+
detection_task: whether the dataset should be used for detection task
|
|
42
|
+
**kwargs: keyword arguments from `AbstractDataset`.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __init__(
|
|
46
|
+
self,
|
|
47
|
+
img_folder: str,
|
|
48
|
+
label_path: str,
|
|
49
|
+
train: bool = True,
|
|
50
|
+
use_polygons: bool = False,
|
|
51
|
+
recognition_task: bool = False,
|
|
52
|
+
detection_task: bool = False,
|
|
53
|
+
**kwargs: Any,
|
|
54
|
+
) -> None:
|
|
55
|
+
super().__init__(
|
|
56
|
+
img_folder, pre_transforms=convert_target_to_relative if not recognition_task else None, **kwargs
|
|
57
|
+
)
|
|
58
|
+
# Task check
|
|
59
|
+
if recognition_task and detection_task:
|
|
60
|
+
raise ValueError(
|
|
61
|
+
" 'recognition' and 'detection task' cannot be set to True simultaneously. "
|
|
62
|
+
+ " To get the whole dataset with boxes and labels leave both parameters to False "
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
# File existence check
|
|
66
|
+
if not os.path.exists(label_path) or not os.path.exists(img_folder):
|
|
67
|
+
raise FileNotFoundError(f"unable to find {label_path if not os.path.exists(label_path) else img_folder}")
|
|
68
|
+
|
|
69
|
+
tmp_root = img_folder
|
|
70
|
+
self.train = train
|
|
71
|
+
np_dtype = np.float32
|
|
72
|
+
self.data: list[tuple[str | Path | np.ndarray, str | dict[str, Any] | np.ndarray]] = []
|
|
73
|
+
|
|
74
|
+
with open(label_path, "r") as file:
|
|
75
|
+
data = json.load(file)
|
|
76
|
+
|
|
77
|
+
# Filter images based on the set
|
|
78
|
+
img_items = [img for img in data["imgs"].items() if (img[1]["set"] == "train") == train]
|
|
79
|
+
box: list[float] | np.ndarray
|
|
80
|
+
|
|
81
|
+
for img_id, img_info in tqdm(img_items, desc="Preparing and Loading COCOTEXT", total=len(img_items)):
|
|
82
|
+
img_path = os.path.join(img_folder, img_info["file_name"])
|
|
83
|
+
|
|
84
|
+
# File existence check
|
|
85
|
+
if not os.path.exists(img_path): # pragma: no cover
|
|
86
|
+
raise FileNotFoundError(f"Unable to locate {img_path}")
|
|
87
|
+
|
|
88
|
+
# Get annotations for the current image (only legible text)
|
|
89
|
+
annotations = [
|
|
90
|
+
ann
|
|
91
|
+
for ann in data["anns"].values()
|
|
92
|
+
if ann["image_id"] == int(img_id) and ann["legibility"] == "legible"
|
|
93
|
+
]
|
|
94
|
+
|
|
95
|
+
# Some images have no annotations with readable text
|
|
96
|
+
if not annotations: # pragma: no cover
|
|
97
|
+
continue
|
|
98
|
+
|
|
99
|
+
_targets = []
|
|
100
|
+
|
|
101
|
+
for annotation in annotations:
|
|
102
|
+
x, y, w, h = annotation["bbox"]
|
|
103
|
+
if use_polygons:
|
|
104
|
+
# (x, y) coordinates of top left, top right, bottom right, bottom left corners
|
|
105
|
+
box = np.array(
|
|
106
|
+
[
|
|
107
|
+
[x, y],
|
|
108
|
+
[x + w, y],
|
|
109
|
+
[x + w, y + h],
|
|
110
|
+
[x, y + h],
|
|
111
|
+
],
|
|
112
|
+
dtype=np_dtype,
|
|
113
|
+
)
|
|
114
|
+
else:
|
|
115
|
+
# (xmin, ymin, xmax, ymax) coordinates
|
|
116
|
+
box = [x, y, x + w, y + h]
|
|
117
|
+
_targets.append((annotation["utf8_string"], box))
|
|
118
|
+
text_targets, box_targets = zip(*_targets)
|
|
119
|
+
|
|
120
|
+
if recognition_task:
|
|
121
|
+
crops = crop_bboxes_from_image(
|
|
122
|
+
img_path=os.path.join(tmp_root, img_path), geoms=np.asarray(box_targets, dtype=int).clip(min=0)
|
|
123
|
+
)
|
|
124
|
+
for crop, label in zip(crops, list(text_targets)):
|
|
125
|
+
if label and " " not in label:
|
|
126
|
+
self.data.append((crop, label))
|
|
127
|
+
|
|
128
|
+
elif detection_task:
|
|
129
|
+
self.data.append((img_path, np.asarray(box_targets, dtype=int).clip(min=0)))
|
|
130
|
+
else:
|
|
131
|
+
self.data.append((
|
|
132
|
+
img_path,
|
|
133
|
+
dict(boxes=np.asarray(box_targets, dtype=int).clip(min=0), labels=list(text_targets)),
|
|
134
|
+
))
|
|
135
|
+
|
|
136
|
+
self.root = tmp_root
|
|
137
|
+
|
|
138
|
+
def extra_repr(self) -> str:
|
|
139
|
+
return f"train={self.train}"
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
import json
|
|
7
7
|
import os
|
|
8
8
|
from pathlib import Path
|
|
9
|
-
from typing import Any
|
|
9
|
+
from typing import Any
|
|
10
10
|
|
|
11
11
|
import numpy as np
|
|
12
12
|
from tqdm import tqdm
|
|
@@ -29,7 +29,6 @@ class CORD(VisionDataset):
|
|
|
29
29
|
>>> img, target = train_set[0]
|
|
30
30
|
|
|
31
31
|
Args:
|
|
32
|
-
----
|
|
33
32
|
train: whether the subset should be the training one
|
|
34
33
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
35
34
|
recognition_task: whether the dataset should be used for recognition task
|
|
@@ -72,12 +71,14 @@ class CORD(VisionDataset):
|
|
|
72
71
|
+ "To get the whole dataset with boxes and labels leave both parameters to False."
|
|
73
72
|
)
|
|
74
73
|
|
|
75
|
-
#
|
|
74
|
+
# list images
|
|
76
75
|
tmp_root = os.path.join(self.root, "image")
|
|
77
|
-
self.data:
|
|
76
|
+
self.data: list[tuple[str | np.ndarray, str | dict[str, Any] | np.ndarray]] = []
|
|
78
77
|
self.train = train
|
|
79
78
|
np_dtype = np.float32
|
|
80
|
-
for img_path in tqdm(
|
|
79
|
+
for img_path in tqdm(
|
|
80
|
+
iterable=os.listdir(tmp_root), desc="Preparing and Loading CORD", total=len(os.listdir(tmp_root))
|
|
81
|
+
):
|
|
81
82
|
# File existence check
|
|
82
83
|
if not os.path.exists(os.path.join(tmp_root, img_path)):
|
|
83
84
|
raise FileNotFoundError(f"unable to locate {os.path.join(tmp_root, img_path)}")
|
|
@@ -91,7 +92,7 @@ class CORD(VisionDataset):
|
|
|
91
92
|
if len(word["text"]) > 0:
|
|
92
93
|
x = word["quad"]["x1"], word["quad"]["x2"], word["quad"]["x3"], word["quad"]["x4"]
|
|
93
94
|
y = word["quad"]["y1"], word["quad"]["y2"], word["quad"]["y3"], word["quad"]["y4"]
|
|
94
|
-
box:
|
|
95
|
+
box: list[float] | np.ndarray
|
|
95
96
|
if use_polygons:
|
|
96
97
|
# (x, y) coordinates of top left, top right, bottom right, bottom left corners
|
|
97
98
|
box = np.array(
|
|
@@ -115,7 +116,8 @@ class CORD(VisionDataset):
|
|
|
115
116
|
img_path=os.path.join(tmp_root, img_path), geoms=np.asarray(box_targets, dtype=int).clip(min=0)
|
|
116
117
|
)
|
|
117
118
|
for crop, label in zip(crops, list(text_targets)):
|
|
118
|
-
|
|
119
|
+
if " " not in label:
|
|
120
|
+
self.data.append((crop, label))
|
|
119
121
|
elif detection_task:
|
|
120
122
|
self.data.append((img_path, np.asarray(box_targets, dtype=int).clip(min=0)))
|
|
121
123
|
else:
|