python-doctr 0.8.0__tar.gz → 0.8.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python-doctr-0.8.0/python_doctr.egg-info → python-doctr-0.8.1}/PKG-INFO +14 -13
- {python-doctr-0.8.0 → python-doctr-0.8.1}/README.md +12 -11
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/detection/__init__.py +1 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/detection/differentiable_binarization/tensorflow.py +14 -18
- python-doctr-0.8.1/doctr/models/detection/fast/base.py +256 -0
- python-doctr-0.8.1/doctr/models/detection/fast/pytorch.py +442 -0
- python-doctr-0.8.1/doctr/models/detection/fast/tensorflow.py +428 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/detection/zoo.py +14 -2
- python-doctr-0.8.1/doctr/models/modules/layers/pytorch.py +166 -0
- python-doctr-0.8.1/doctr/models/modules/layers/tensorflow.py +175 -0
- python-doctr-0.8.1/doctr/models/utils/__init__.py +6 -0
- python-doctr-0.8.1/doctr/version.py +1 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/pyproject.toml +1 -1
- {python-doctr-0.8.0 → python-doctr-0.8.1/python_doctr.egg-info}/PKG-INFO +14 -13
- {python-doctr-0.8.0 → python-doctr-0.8.1}/python_doctr.egg-info/SOURCES.txt +4 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/setup.py +1 -1
- python-doctr-0.8.0/doctr/models/modules/layers/pytorch.py +0 -86
- python-doctr-0.8.0/doctr/models/modules/layers/tensorflow.py +0 -95
- python-doctr-0.8.0/doctr/version.py +0 -1
- {python-doctr-0.8.0 → python-doctr-0.8.1}/LICENSE +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/datasets/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/datasets/cord.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/datasets/datasets/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/datasets/datasets/base.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/datasets/datasets/pytorch.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/datasets/datasets/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/datasets/detection.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/datasets/doc_artefacts.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/datasets/funsd.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/datasets/generator/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/datasets/generator/base.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/datasets/generator/pytorch.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/datasets/generator/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/datasets/ic03.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/datasets/ic13.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/datasets/iiit5k.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/datasets/iiithws.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/datasets/imgur5k.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/datasets/loader.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/datasets/mjsynth.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/datasets/ocr.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/datasets/orientation.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/datasets/recognition.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/datasets/sroie.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/datasets/svhn.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/datasets/svt.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/datasets/synthtext.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/datasets/utils.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/datasets/vocabs.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/datasets/wildreceipt.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/file_utils.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/io/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/io/elements.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/io/html.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/io/image/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/io/image/base.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/io/image/pytorch.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/io/image/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/io/pdf.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/io/reader.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/_utils.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/artefacts/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/artefacts/barcode.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/artefacts/face.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/builder.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/classification/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/classification/magc_resnet/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/classification/magc_resnet/pytorch.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/classification/magc_resnet/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/classification/mobilenet/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/classification/mobilenet/pytorch.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/classification/mobilenet/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/classification/predictor/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/classification/predictor/pytorch.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/classification/predictor/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/classification/resnet/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/classification/resnet/pytorch.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/classification/resnet/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/classification/textnet/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/classification/textnet/pytorch.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/classification/textnet/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/classification/vgg/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/classification/vgg/pytorch.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/classification/vgg/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/classification/vit/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/classification/vit/pytorch.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/classification/vit/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/classification/zoo.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/core.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/detection/_utils/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/detection/_utils/pytorch.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/detection/_utils/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/detection/core.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/detection/differentiable_binarization/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/detection/differentiable_binarization/base.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/detection/differentiable_binarization/pytorch.py +0 -0
- {python-doctr-0.8.0/doctr/models/detection/linknet → python-doctr-0.8.1/doctr/models/detection/fast}/__init__.py +0 -0
- {python-doctr-0.8.0/doctr/models/modules/layers → python-doctr-0.8.1/doctr/models/detection/linknet}/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/detection/linknet/base.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/detection/linknet/pytorch.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/detection/linknet/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/detection/predictor/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/detection/predictor/pytorch.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/detection/predictor/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/factory/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/factory/hub.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/kie_predictor/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/kie_predictor/base.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/kie_predictor/pytorch.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/kie_predictor/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/modules/__init__.py +0 -0
- {python-doctr-0.8.0/doctr/models/modules/transformer → python-doctr-0.8.1/doctr/models/modules/layers}/__init__.py +0 -0
- {python-doctr-0.8.0/doctr/models/modules/vision_transformer → python-doctr-0.8.1/doctr/models/modules/transformer}/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/modules/transformer/pytorch.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/modules/transformer/tensorflow.py +0 -0
- {python-doctr-0.8.0/doctr/models/preprocessor → python-doctr-0.8.1/doctr/models/modules/vision_transformer}/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/modules/vision_transformer/pytorch.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/modules/vision_transformer/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/obj_detection/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/obj_detection/faster_rcnn/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/obj_detection/faster_rcnn/pytorch.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/predictor/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/predictor/base.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/predictor/pytorch.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/predictor/tensorflow.py +0 -0
- {python-doctr-0.8.0/doctr/models/recognition/crnn → python-doctr-0.8.1/doctr/models/preprocessor}/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/preprocessor/pytorch.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/preprocessor/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/recognition/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/recognition/core.py +0 -0
- {python-doctr-0.8.0/doctr/models/recognition/master → python-doctr-0.8.1/doctr/models/recognition/crnn}/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/recognition/crnn/pytorch.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/recognition/crnn/tensorflow.py +0 -0
- {python-doctr-0.8.0/doctr/models/recognition/parseq → python-doctr-0.8.1/doctr/models/recognition/master}/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/recognition/master/base.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/recognition/master/pytorch.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/recognition/master/tensorflow.py +0 -0
- {python-doctr-0.8.0/doctr/models/recognition/sar → python-doctr-0.8.1/doctr/models/recognition/parseq}/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/recognition/parseq/base.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/recognition/parseq/pytorch.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/recognition/parseq/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/recognition/predictor/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/recognition/predictor/_utils.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/recognition/predictor/pytorch.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/recognition/predictor/tensorflow.py +0 -0
- {python-doctr-0.8.0/doctr/models/recognition/vitstr → python-doctr-0.8.1/doctr/models/recognition/sar}/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/recognition/sar/pytorch.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/recognition/sar/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/recognition/utils.py +0 -0
- {python-doctr-0.8.0/doctr/models/utils → python-doctr-0.8.1/doctr/models/recognition/vitstr}/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/recognition/vitstr/base.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/recognition/vitstr/pytorch.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/recognition/vitstr/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/recognition/zoo.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/utils/pytorch.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/utils/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/models/zoo.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/transforms/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/transforms/functional/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/transforms/functional/base.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/transforms/functional/pytorch.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/transforms/functional/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/transforms/modules/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/transforms/modules/base.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/transforms/modules/pytorch.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/transforms/modules/tensorflow.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/utils/__init__.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/utils/common_types.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/utils/data.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/utils/fonts.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/utils/geometry.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/utils/metrics.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/utils/multithreading.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/utils/repr.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/doctr/utils/visualization.py +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/python_doctr.egg-info/dependency_links.txt +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/python_doctr.egg-info/requires.txt +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/python_doctr.egg-info/top_level.txt +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/python_doctr.egg-info/zip-safe +0 -0
- {python-doctr-0.8.0 → python-doctr-0.8.1}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: python-doctr
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.1
|
|
4
4
|
Summary: Document Text Recognition (docTR): deep Learning for high-performance OCR on documents.
|
|
5
5
|
Author-email: Mindee <contact@mindee.com>
|
|
6
6
|
Maintainer: François-Guillaume Fernandez, Charles Gaillard, Olivier Dulcy, Felix Dittrich
|
|
@@ -209,7 +209,7 @@ License: Apache License
|
|
|
209
209
|
Project-URL: documentation, https://mindee.github.io/doctr
|
|
210
210
|
Project-URL: repository, https://github.com/mindee/doctr
|
|
211
211
|
Project-URL: tracker, https://github.com/mindee/doctr/issues
|
|
212
|
-
Project-URL: changelog, https://github.
|
|
212
|
+
Project-URL: changelog, https://mindee.github.io/doctr/changelog.html
|
|
213
213
|
Keywords: OCR,deep learning,computer vision,tensorflow,pytorch,text detection,text recognition
|
|
214
214
|
Classifier: Development Status :: 4 - Beta
|
|
215
215
|
Classifier: Intended Audience :: Developers
|
|
@@ -296,10 +296,10 @@ Requires-Dist: sphinx-tabs>=3.3.0; extra == "dev"
|
|
|
296
296
|
Requires-Dist: furo>=2022.3.4; extra == "dev"
|
|
297
297
|
|
|
298
298
|
<p align="center">
|
|
299
|
-
<img src="docs/images/Logo_doctr.gif" width="40%">
|
|
299
|
+
<img src="https://github.com/mindee/doctr/raw/main/docs/images/Logo_doctr.gif" width="40%">
|
|
300
300
|
</p>
|
|
301
301
|
|
|
302
|
-
[](https://slack.mindee.com) [](LICENSE)  [](https://github.com/mindee/doctr/pkgs/container/doctr) [](https://codecov.io/gh/mindee/doctr) [](https://www.codefactor.io/repository/github/mindee/doctr) [](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [](https://mindee.github.io/doctr) [](https://slack.mindee.com) [](LICENSE)  [](https://github.com/mindee/doctr/pkgs/container/doctr) [](https://codecov.io/gh/mindee/doctr) [](https://www.codefactor.io/repository/github/mindee/doctr) [](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [](https://mindee.github.io/doctr) [](https://pypi.org/project/python-doctr/) [](https://huggingface.co/spaces/mindee/doctr) [](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb)
|
|
303
303
|
|
|
304
304
|
|
|
305
305
|
**Optical Character Recognition made seamless & accessible to anyone, powered by TensorFlow 2 & PyTorch**
|
|
@@ -309,7 +309,7 @@ What you can expect from this repository:
|
|
|
309
309
|
- efficient ways to parse textual information (localize and identify each word) from your documents
|
|
310
310
|
- guidance on how to integrate this in your current architecture
|
|
311
311
|
|
|
312
|
-

|
|
312
|
+

|
|
313
313
|
|
|
314
314
|
## Quick Tour
|
|
315
315
|
|
|
@@ -375,7 +375,7 @@ To interpret your model's predictions, you can visualize them interactively as f
|
|
|
375
375
|
result.show()
|
|
376
376
|
```
|
|
377
377
|
|
|
378
|
-

|
|
378
|
+

|
|
379
379
|
|
|
380
380
|
Or even rebuild the original document from its predictions:
|
|
381
381
|
|
|
@@ -386,7 +386,7 @@ synthetic_pages = result.synthesize()
|
|
|
386
386
|
plt.imshow(synthetic_pages[0]); plt.axis('off'); plt.show()
|
|
387
387
|
```
|
|
388
388
|
|
|
389
|
-

|
|
389
|
+

|
|
390
390
|
|
|
391
391
|
The `ocr_predictor` returns a `Document` object with a nested structure (with `Page`, `Block`, `Line`, `Word`, `Artefact`).
|
|
392
392
|
To get a better understanding of our document model, check our [documentation](https://mindee.github.io/doctr/modules/io.html#document-structure):
|
|
@@ -425,7 +425,7 @@ The KIE predictor results per page are in a dictionary format with each key repr
|
|
|
425
425
|
|
|
426
426
|
### If you are looking for support from the Mindee team
|
|
427
427
|
|
|
428
|
-
[](https://mindee.com/product/doctr)
|
|
428
|
+
[](https://mindee.com/product/doctr)
|
|
429
429
|
|
|
430
430
|
## Installation
|
|
431
431
|
|
|
@@ -494,6 +494,7 @@ Credits where it's due: this repository is implementing, among others, architect
|
|
|
494
494
|
|
|
495
495
|
- DBNet: [Real-time Scene Text Detection with Differentiable Binarization](https://arxiv.org/pdf/1911.08947.pdf).
|
|
496
496
|
- LinkNet: [LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation](https://arxiv.org/pdf/1707.03718.pdf)
|
|
497
|
+
- FAST: [FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation](https://arxiv.org/pdf/2111.02394.pdf)
|
|
497
498
|
|
|
498
499
|
### Text Recognition
|
|
499
500
|
|
|
@@ -513,7 +514,7 @@ The full package documentation is available [here](https://mindee.github.io/doct
|
|
|
513
514
|
|
|
514
515
|
A minimal demo app is provided for you to play with our end-to-end OCR models!
|
|
515
516
|
|
|
516
|
-

|
|
517
|
+

|
|
517
518
|
|
|
518
519
|
#### Live demo
|
|
519
520
|
|
|
@@ -553,11 +554,11 @@ USE_TORCH=1 streamlit run demo/app.py
|
|
|
553
554
|
Instead of having your demo actually running Python, you would prefer to run everything in your web browser?
|
|
554
555
|
Check out our [TensorFlow.js demo](https://github.com/mindee/doctr-tfjs-demo) to get started!
|
|
555
556
|
|
|
556
|
-

|
|
557
|
+

|
|
557
558
|
|
|
558
559
|
### Docker container
|
|
559
560
|
|
|
560
|
-
[We
|
|
561
|
+
[We offer Docker container support for easy testing and deployment](https://github.com/mindee/doctr/pkgs/container/doctr).
|
|
561
562
|
|
|
562
563
|
#### Using GPU with docTR Docker Images
|
|
563
564
|
|
|
@@ -673,8 +674,8 @@ If you wish to cite this project, feel free to use this [BibTeX](http://www.bibt
|
|
|
673
674
|
|
|
674
675
|
If you scrolled down to this section, you most likely appreciate open source. Do you feel like extending the range of our supported characters? Or perhaps submitting a paper implementation? Or contributing in any other way?
|
|
675
676
|
|
|
676
|
-
You're in luck, we compiled a short guide (cf. [`CONTRIBUTING`](
|
|
677
|
+
You're in luck, we compiled a short guide (cf. [`CONTRIBUTING`](https://mindee.github.io/doctr/contributing/contributing.html)) for you to easily do so!
|
|
677
678
|
|
|
678
679
|
## License
|
|
679
680
|
|
|
680
|
-
Distributed under the Apache 2.0 License. See [`LICENSE`](
|
|
681
|
+
Distributed under the Apache 2.0 License. See [`LICENSE`](https://github.com/mindee/doctr?tab=Apache-2.0-1-ov-file#readme) for more information.
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
<p align="center">
|
|
2
|
-
<img src="docs/images/Logo_doctr.gif" width="40%">
|
|
2
|
+
<img src="https://github.com/mindee/doctr/raw/main/docs/images/Logo_doctr.gif" width="40%">
|
|
3
3
|
</p>
|
|
4
4
|
|
|
5
|
-
[](https://slack.mindee.com) [](LICENSE)  [](https://github.com/mindee/doctr/pkgs/container/doctr) [](https://codecov.io/gh/mindee/doctr) [](https://www.codefactor.io/repository/github/mindee/doctr) [](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [](https://mindee.github.io/doctr) [](https://slack.mindee.com) [](LICENSE)  [](https://github.com/mindee/doctr/pkgs/container/doctr) [](https://codecov.io/gh/mindee/doctr) [](https://www.codefactor.io/repository/github/mindee/doctr) [](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [](https://mindee.github.io/doctr) [](https://pypi.org/project/python-doctr/) [](https://huggingface.co/spaces/mindee/doctr) [](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb)
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
**Optical Character Recognition made seamless & accessible to anyone, powered by TensorFlow 2 & PyTorch**
|
|
@@ -12,7 +12,7 @@ What you can expect from this repository:
|
|
|
12
12
|
- efficient ways to parse textual information (localize and identify each word) from your documents
|
|
13
13
|
- guidance on how to integrate this in your current architecture
|
|
14
14
|
|
|
15
|
-

|
|
15
|
+

|
|
16
16
|
|
|
17
17
|
## Quick Tour
|
|
18
18
|
|
|
@@ -78,7 +78,7 @@ To interpret your model's predictions, you can visualize them interactively as f
|
|
|
78
78
|
result.show()
|
|
79
79
|
```
|
|
80
80
|
|
|
81
|
-

|
|
81
|
+

|
|
82
82
|
|
|
83
83
|
Or even rebuild the original document from its predictions:
|
|
84
84
|
|
|
@@ -89,7 +89,7 @@ synthetic_pages = result.synthesize()
|
|
|
89
89
|
plt.imshow(synthetic_pages[0]); plt.axis('off'); plt.show()
|
|
90
90
|
```
|
|
91
91
|
|
|
92
|
-

|
|
92
|
+

|
|
93
93
|
|
|
94
94
|
The `ocr_predictor` returns a `Document` object with a nested structure (with `Page`, `Block`, `Line`, `Word`, `Artefact`).
|
|
95
95
|
To get a better understanding of our document model, check our [documentation](https://mindee.github.io/doctr/modules/io.html#document-structure):
|
|
@@ -128,7 +128,7 @@ The KIE predictor results per page are in a dictionary format with each key repr
|
|
|
128
128
|
|
|
129
129
|
### If you are looking for support from the Mindee team
|
|
130
130
|
|
|
131
|
-
[](https://mindee.com/product/doctr)
|
|
131
|
+
[](https://mindee.com/product/doctr)
|
|
132
132
|
|
|
133
133
|
## Installation
|
|
134
134
|
|
|
@@ -197,6 +197,7 @@ Credits where it's due: this repository is implementing, among others, architect
|
|
|
197
197
|
|
|
198
198
|
- DBNet: [Real-time Scene Text Detection with Differentiable Binarization](https://arxiv.org/pdf/1911.08947.pdf).
|
|
199
199
|
- LinkNet: [LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation](https://arxiv.org/pdf/1707.03718.pdf)
|
|
200
|
+
- FAST: [FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation](https://arxiv.org/pdf/2111.02394.pdf)
|
|
200
201
|
|
|
201
202
|
### Text Recognition
|
|
202
203
|
|
|
@@ -216,7 +217,7 @@ The full package documentation is available [here](https://mindee.github.io/doct
|
|
|
216
217
|
|
|
217
218
|
A minimal demo app is provided for you to play with our end-to-end OCR models!
|
|
218
219
|
|
|
219
|
-

|
|
220
|
+

|
|
220
221
|
|
|
221
222
|
#### Live demo
|
|
222
223
|
|
|
@@ -256,11 +257,11 @@ USE_TORCH=1 streamlit run demo/app.py
|
|
|
256
257
|
Instead of having your demo actually running Python, you would prefer to run everything in your web browser?
|
|
257
258
|
Check out our [TensorFlow.js demo](https://github.com/mindee/doctr-tfjs-demo) to get started!
|
|
258
259
|
|
|
259
|
-

|
|
260
|
+

|
|
260
261
|
|
|
261
262
|
### Docker container
|
|
262
263
|
|
|
263
|
-
[We
|
|
264
|
+
[We offer Docker container support for easy testing and deployment](https://github.com/mindee/doctr/pkgs/container/doctr).
|
|
264
265
|
|
|
265
266
|
#### Using GPU with docTR Docker Images
|
|
266
267
|
|
|
@@ -376,8 +377,8 @@ If you wish to cite this project, feel free to use this [BibTeX](http://www.bibt
|
|
|
376
377
|
|
|
377
378
|
If you scrolled down to this section, you most likely appreciate open source. Do you feel like extending the range of our supported characters? Or perhaps submitting a paper implementation? Or contributing in any other way?
|
|
378
379
|
|
|
379
|
-
You're in luck, we compiled a short guide (cf. [`CONTRIBUTING`](
|
|
380
|
+
You're in luck, we compiled a short guide (cf. [`CONTRIBUTING`](https://mindee.github.io/doctr/contributing/contributing.html)) for you to easily do so!
|
|
380
381
|
|
|
381
382
|
## License
|
|
382
383
|
|
|
383
|
-
Distributed under the Apache 2.0 License. See [`LICENSE`](
|
|
384
|
+
Distributed under the Apache 2.0 License. See [`LICENSE`](https://github.com/mindee/doctr?tab=Apache-2.0-1-ov-file#readme) for more information.
|
|
@@ -147,24 +147,20 @@ class DBNet(_DBNet, keras.Model, NestedObject):
|
|
|
147
147
|
_inputs = [layers.Input(shape=in_shape[1:]) for in_shape in self.feat_extractor.output_shape]
|
|
148
148
|
output_shape = tuple(self.fpn(_inputs).shape)
|
|
149
149
|
|
|
150
|
-
self.probability_head = keras.Sequential(
|
|
151
|
-
[
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
layers.Activation("relu"),
|
|
165
|
-
layers.Conv2DTranspose(num_classes, 2, strides=2, kernel_initializer="he_normal"),
|
|
166
|
-
]
|
|
167
|
-
)
|
|
150
|
+
self.probability_head = keras.Sequential([
|
|
151
|
+
*conv_sequence(64, "relu", True, kernel_size=3, input_shape=output_shape[1:]),
|
|
152
|
+
layers.Conv2DTranspose(64, 2, strides=2, use_bias=False, kernel_initializer="he_normal"),
|
|
153
|
+
layers.BatchNormalization(),
|
|
154
|
+
layers.Activation("relu"),
|
|
155
|
+
layers.Conv2DTranspose(num_classes, 2, strides=2, kernel_initializer="he_normal"),
|
|
156
|
+
])
|
|
157
|
+
self.threshold_head = keras.Sequential([
|
|
158
|
+
*conv_sequence(64, "relu", True, kernel_size=3, input_shape=output_shape[1:]),
|
|
159
|
+
layers.Conv2DTranspose(64, 2, strides=2, use_bias=False, kernel_initializer="he_normal"),
|
|
160
|
+
layers.BatchNormalization(),
|
|
161
|
+
layers.Activation("relu"),
|
|
162
|
+
layers.Conv2DTranspose(num_classes, 2, strides=2, kernel_initializer="he_normal"),
|
|
163
|
+
])
|
|
168
164
|
|
|
169
165
|
self.postprocessor = DBPostProcessor(
|
|
170
166
|
assume_straight_pages=assume_straight_pages, bin_thresh=bin_thresh, box_thresh=box_thresh
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
|
+
|
|
3
|
+
# This program is licensed under the Apache License 2.0.
|
|
4
|
+
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
|
+
|
|
6
|
+
# Credits: post-processing adapted from https://github.com/xuannianz/DifferentiableBinarization
|
|
7
|
+
|
|
8
|
+
from typing import Dict, List, Tuple, Union
|
|
9
|
+
|
|
10
|
+
import cv2
|
|
11
|
+
import numpy as np
|
|
12
|
+
import pyclipper
|
|
13
|
+
from shapely.geometry import Polygon
|
|
14
|
+
|
|
15
|
+
from doctr.models.core import BaseModel
|
|
16
|
+
|
|
17
|
+
from ..core import DetectionPostProcessor
|
|
18
|
+
|
|
19
|
+
__all__ = ["_FAST", "FASTPostProcessor"]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class FASTPostProcessor(DetectionPostProcessor):
|
|
23
|
+
"""Implements a post processor for FAST model.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
----
|
|
27
|
+
bin_thresh: threshold used to binzarized p_map at inference time
|
|
28
|
+
box_thresh: minimal objectness score to consider a box
|
|
29
|
+
assume_straight_pages: whether the inputs were expected to have horizontal text elements
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
bin_thresh: float = 0.3,
|
|
35
|
+
box_thresh: float = 0.1,
|
|
36
|
+
assume_straight_pages: bool = True,
|
|
37
|
+
) -> None:
|
|
38
|
+
super().__init__(box_thresh, bin_thresh, assume_straight_pages)
|
|
39
|
+
self.unclip_ratio = 1.0
|
|
40
|
+
|
|
41
|
+
def polygon_to_box(
|
|
42
|
+
self,
|
|
43
|
+
points: np.ndarray,
|
|
44
|
+
) -> np.ndarray:
|
|
45
|
+
"""Expand a polygon (points) by a factor unclip_ratio, and returns a polygon
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
----
|
|
49
|
+
points: The first parameter.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
-------
|
|
53
|
+
a box in absolute coordinates (xmin, ymin, xmax, ymax) or (4, 2) array (quadrangle)
|
|
54
|
+
"""
|
|
55
|
+
if not self.assume_straight_pages:
|
|
56
|
+
# Compute the rectangle polygon enclosing the raw polygon
|
|
57
|
+
rect = cv2.minAreaRect(points)
|
|
58
|
+
points = cv2.boxPoints(rect)
|
|
59
|
+
# Add 1 pixel to correct cv2 approx
|
|
60
|
+
area = (rect[1][0] + 1) * (1 + rect[1][1])
|
|
61
|
+
length = 2 * (rect[1][0] + rect[1][1]) + 2
|
|
62
|
+
else:
|
|
63
|
+
poly = Polygon(points)
|
|
64
|
+
area = poly.area
|
|
65
|
+
length = poly.length
|
|
66
|
+
distance = area * self.unclip_ratio / length # compute distance to expand polygon
|
|
67
|
+
offset = pyclipper.PyclipperOffset()
|
|
68
|
+
offset.AddPath(points, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
|
|
69
|
+
_points = offset.Execute(distance)
|
|
70
|
+
# Take biggest stack of points
|
|
71
|
+
idx = 0
|
|
72
|
+
if len(_points) > 1:
|
|
73
|
+
max_size = 0
|
|
74
|
+
for _idx, p in enumerate(_points):
|
|
75
|
+
if len(p) > max_size:
|
|
76
|
+
idx = _idx
|
|
77
|
+
max_size = len(p)
|
|
78
|
+
# We ensure that _points can be correctly casted to a ndarray
|
|
79
|
+
_points = [_points[idx]]
|
|
80
|
+
expanded_points: np.ndarray = np.asarray(_points) # expand polygon
|
|
81
|
+
if len(expanded_points) < 1:
|
|
82
|
+
return None # type: ignore[return-value]
|
|
83
|
+
return (
|
|
84
|
+
cv2.boundingRect(expanded_points) # type: ignore[return-value]
|
|
85
|
+
if self.assume_straight_pages
|
|
86
|
+
else np.roll(cv2.boxPoints(cv2.minAreaRect(expanded_points)), -1, axis=0)
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
def bitmap_to_boxes(
|
|
90
|
+
self,
|
|
91
|
+
pred: np.ndarray,
|
|
92
|
+
bitmap: np.ndarray,
|
|
93
|
+
) -> np.ndarray:
|
|
94
|
+
"""Compute boxes from a bitmap/pred_map: find connected components then filter boxes
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
----
|
|
98
|
+
pred: Pred map from differentiable linknet output
|
|
99
|
+
bitmap: Bitmap map computed from pred (binarized)
|
|
100
|
+
angle_tol: Comparison tolerance of the angle with the median angle across the page
|
|
101
|
+
ratio_tol: Under this limit aspect ratio, we cannot resolve the direction of the crop
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
-------
|
|
105
|
+
np tensor boxes for the bitmap, each box is a 6-element list
|
|
106
|
+
containing x, y, w, h, alpha, score for the box
|
|
107
|
+
"""
|
|
108
|
+
height, width = bitmap.shape[:2]
|
|
109
|
+
boxes: List[Union[np.ndarray, List[float]]] = []
|
|
110
|
+
# get contours from connected components on the bitmap
|
|
111
|
+
contours, _ = cv2.findContours(bitmap.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
112
|
+
for contour in contours:
|
|
113
|
+
# Check whether smallest enclosing bounding box is not too small
|
|
114
|
+
if np.any(contour[:, 0].max(axis=0) - contour[:, 0].min(axis=0) < 2):
|
|
115
|
+
continue
|
|
116
|
+
# Compute objectness
|
|
117
|
+
if self.assume_straight_pages:
|
|
118
|
+
x, y, w, h = cv2.boundingRect(contour)
|
|
119
|
+
points: np.ndarray = np.array([[x, y], [x, y + h], [x + w, y + h], [x + w, y]])
|
|
120
|
+
score = self.box_score(pred, points, assume_straight_pages=True)
|
|
121
|
+
else:
|
|
122
|
+
score = self.box_score(pred, contour, assume_straight_pages=False)
|
|
123
|
+
|
|
124
|
+
if score < self.box_thresh: # remove polygons with a weak objectness
|
|
125
|
+
continue
|
|
126
|
+
|
|
127
|
+
if self.assume_straight_pages:
|
|
128
|
+
_box = self.polygon_to_box(points)
|
|
129
|
+
else:
|
|
130
|
+
_box = self.polygon_to_box(np.squeeze(contour))
|
|
131
|
+
|
|
132
|
+
if self.assume_straight_pages:
|
|
133
|
+
# compute relative polygon to get rid of img shape
|
|
134
|
+
x, y, w, h = _box
|
|
135
|
+
xmin, ymin, xmax, ymax = x / width, y / height, (x + w) / width, (y + h) / height
|
|
136
|
+
boxes.append([xmin, ymin, xmax, ymax, score])
|
|
137
|
+
else:
|
|
138
|
+
# compute relative box to get rid of img shape
|
|
139
|
+
_box[:, 0] /= width
|
|
140
|
+
_box[:, 1] /= height
|
|
141
|
+
boxes.append(_box)
|
|
142
|
+
|
|
143
|
+
if not self.assume_straight_pages:
|
|
144
|
+
return np.clip(np.asarray(boxes), 0, 1) if len(boxes) > 0 else np.zeros((0, 4, 2), dtype=pred.dtype)
|
|
145
|
+
else:
|
|
146
|
+
return np.clip(np.asarray(boxes), 0, 1) if len(boxes) > 0 else np.zeros((0, 5), dtype=pred.dtype)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class _FAST(BaseModel):
|
|
150
|
+
"""FAST as described in `"FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation"
|
|
151
|
+
<https://arxiv.org/pdf/2111.02394.pdf>`_.
|
|
152
|
+
"""
|
|
153
|
+
|
|
154
|
+
min_size_box: int = 3
|
|
155
|
+
assume_straight_pages: bool = True
|
|
156
|
+
shrink_ratio = 0.1
|
|
157
|
+
|
|
158
|
+
def build_target(
|
|
159
|
+
self,
|
|
160
|
+
target: List[Dict[str, np.ndarray]],
|
|
161
|
+
output_shape: Tuple[int, int, int],
|
|
162
|
+
channels_last: bool = True,
|
|
163
|
+
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
|
164
|
+
"""Build the target, and it's mask to be used from loss computation.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
----
|
|
168
|
+
target: target coming from dataset
|
|
169
|
+
output_shape: shape of the output of the model without batch_size
|
|
170
|
+
channels_last: whether channels are last or not
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
-------
|
|
174
|
+
the new formatted target, mask and shrunken text kernel
|
|
175
|
+
"""
|
|
176
|
+
if any(t.dtype != np.float32 for tgt in target for t in tgt.values()):
|
|
177
|
+
raise AssertionError("the expected dtype of target 'boxes' entry is 'np.float32'.")
|
|
178
|
+
if any(np.any((t[:, :4] > 1) | (t[:, :4] < 0)) for tgt in target for t in tgt.values()):
|
|
179
|
+
raise ValueError("the 'boxes' entry of the target is expected to take values between 0 & 1.")
|
|
180
|
+
|
|
181
|
+
h: int
|
|
182
|
+
w: int
|
|
183
|
+
if channels_last:
|
|
184
|
+
h, w, num_classes = output_shape
|
|
185
|
+
else:
|
|
186
|
+
num_classes, h, w = output_shape
|
|
187
|
+
target_shape = (len(target), num_classes, h, w)
|
|
188
|
+
|
|
189
|
+
seg_target: np.ndarray = np.zeros(target_shape, dtype=np.uint8)
|
|
190
|
+
seg_mask: np.ndarray = np.ones(target_shape, dtype=bool)
|
|
191
|
+
shrunken_kernel: np.ndarray = np.zeros(target_shape, dtype=np.uint8)
|
|
192
|
+
|
|
193
|
+
for idx, tgt in enumerate(target):
|
|
194
|
+
for class_idx, _tgt in enumerate(tgt.values()):
|
|
195
|
+
# Draw each polygon on gt
|
|
196
|
+
if _tgt.shape[0] == 0:
|
|
197
|
+
# Empty image, full masked
|
|
198
|
+
seg_mask[idx, class_idx] = False
|
|
199
|
+
|
|
200
|
+
# Absolute bounding boxes
|
|
201
|
+
abs_boxes = _tgt.copy()
|
|
202
|
+
|
|
203
|
+
if abs_boxes.ndim == 3:
|
|
204
|
+
abs_boxes[:, :, 0] *= w
|
|
205
|
+
abs_boxes[:, :, 1] *= h
|
|
206
|
+
polys = abs_boxes
|
|
207
|
+
boxes_size = np.linalg.norm(abs_boxes[:, 2, :] - abs_boxes[:, 0, :], axis=-1)
|
|
208
|
+
abs_boxes = np.concatenate((abs_boxes.min(1), abs_boxes.max(1)), -1).round().astype(np.int32)
|
|
209
|
+
else:
|
|
210
|
+
abs_boxes[:, [0, 2]] *= w
|
|
211
|
+
abs_boxes[:, [1, 3]] *= h
|
|
212
|
+
abs_boxes = abs_boxes.round().astype(np.int32)
|
|
213
|
+
polys = np.stack(
|
|
214
|
+
[
|
|
215
|
+
abs_boxes[:, [0, 1]],
|
|
216
|
+
abs_boxes[:, [0, 3]],
|
|
217
|
+
abs_boxes[:, [2, 3]],
|
|
218
|
+
abs_boxes[:, [2, 1]],
|
|
219
|
+
],
|
|
220
|
+
axis=1,
|
|
221
|
+
)
|
|
222
|
+
boxes_size = np.minimum(abs_boxes[:, 2] - abs_boxes[:, 0], abs_boxes[:, 3] - abs_boxes[:, 1])
|
|
223
|
+
|
|
224
|
+
for poly, box, box_size in zip(polys, abs_boxes, boxes_size):
|
|
225
|
+
# Mask boxes that are too small
|
|
226
|
+
if box_size < self.min_size_box:
|
|
227
|
+
seg_mask[idx, class_idx, box[1] : box[3] + 1, box[0] : box[2] + 1] = False
|
|
228
|
+
continue
|
|
229
|
+
|
|
230
|
+
# Negative shrink for gt, as described in paper
|
|
231
|
+
polygon = Polygon(poly)
|
|
232
|
+
distance = polygon.area * (1 - np.power(self.shrink_ratio, 2)) / polygon.length
|
|
233
|
+
subject = [tuple(coor) for coor in poly]
|
|
234
|
+
padding = pyclipper.PyclipperOffset()
|
|
235
|
+
padding.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
|
|
236
|
+
shrunken = padding.Execute(-distance)
|
|
237
|
+
|
|
238
|
+
# Draw polygon on gt if it is valid
|
|
239
|
+
if len(shrunken) == 0:
|
|
240
|
+
seg_mask[idx, class_idx, box[1] : box[3] + 1, box[0] : box[2] + 1] = False
|
|
241
|
+
continue
|
|
242
|
+
shrunken = np.array(shrunken[0]).reshape(-1, 2)
|
|
243
|
+
if shrunken.shape[0] <= 2 or not Polygon(shrunken).is_valid:
|
|
244
|
+
seg_mask[idx, class_idx, box[1] : box[3] + 1, box[0] : box[2] + 1] = False
|
|
245
|
+
continue
|
|
246
|
+
cv2.fillPoly(shrunken_kernel[idx, class_idx], [shrunken.astype(np.int32)], 1.0) # type: ignore[call-overload]
|
|
247
|
+
# draw the original polygon on the segmentation target
|
|
248
|
+
cv2.fillPoly(seg_target[idx, class_idx], [poly.astype(np.int32)], 1.0) # type: ignore[call-overload]
|
|
249
|
+
|
|
250
|
+
# Don't forget to switch back to channel last if Tensorflow is used
|
|
251
|
+
if channels_last:
|
|
252
|
+
seg_target = seg_target.transpose((0, 2, 3, 1))
|
|
253
|
+
seg_mask = seg_mask.transpose((0, 2, 3, 1))
|
|
254
|
+
shrunken_kernel = shrunken_kernel.transpose((0, 2, 3, 1))
|
|
255
|
+
|
|
256
|
+
return seg_target, seg_mask, shrunken_kernel
|