PyPI - python-doctr - Versions diffs - 0.7.0__py3-none-any.whl → 0.8.1__py3-none-any.whl - Mend

python-doctr 0.7.0py3-none-any.whl → 0.8.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (137) hide show

doctr/datasets/__init__.py +2 -0
doctr/datasets/cord.py +6 -4
doctr/datasets/datasets/base.py +3 -2
doctr/datasets/datasets/pytorch.py +4 -2
doctr/datasets/datasets/tensorflow.py +4 -2
doctr/datasets/detection.py +6 -3
doctr/datasets/doc_artefacts.py +2 -1
doctr/datasets/funsd.py +7 -8
doctr/datasets/generator/base.py +3 -2
doctr/datasets/generator/pytorch.py +3 -1
doctr/datasets/generator/tensorflow.py +3 -1
doctr/datasets/ic03.py +3 -2
doctr/datasets/ic13.py +2 -1
doctr/datasets/iiit5k.py +6 -4
doctr/datasets/iiithws.py +2 -1
doctr/datasets/imgur5k.py +3 -2
doctr/datasets/loader.py +4 -2
doctr/datasets/mjsynth.py +2 -1
doctr/datasets/ocr.py +2 -1
doctr/datasets/orientation.py +40 -0
doctr/datasets/recognition.py +3 -2
doctr/datasets/sroie.py +2 -1
doctr/datasets/svhn.py +2 -1
doctr/datasets/svt.py +3 -2
doctr/datasets/synthtext.py +2 -1
doctr/datasets/utils.py +27 -11
doctr/datasets/vocabs.py +26 -1
doctr/datasets/wildreceipt.py +111 -0
doctr/file_utils.py +3 -1
doctr/io/elements.py +52 -35
doctr/io/html.py +5 -3
doctr/io/image/base.py +5 -4
doctr/io/image/pytorch.py +12 -7
doctr/io/image/tensorflow.py +11 -6
doctr/io/pdf.py +5 -4
doctr/io/reader.py +13 -5
doctr/models/_utils.py +30 -53
doctr/models/artefacts/barcode.py +4 -3
doctr/models/artefacts/face.py +4 -2
doctr/models/builder.py +58 -43
doctr/models/classification/__init__.py +1 -0
doctr/models/classification/magc_resnet/pytorch.py +5 -2
doctr/models/classification/magc_resnet/tensorflow.py +5 -2
doctr/models/classification/mobilenet/pytorch.py +16 -4
doctr/models/classification/mobilenet/tensorflow.py +29 -20
doctr/models/classification/predictor/pytorch.py +3 -2
doctr/models/classification/predictor/tensorflow.py +2 -1
doctr/models/classification/resnet/pytorch.py +23 -13
doctr/models/classification/resnet/tensorflow.py +33 -26
doctr/models/classification/textnet/__init__.py +6 -0
doctr/models/classification/textnet/pytorch.py +275 -0
doctr/models/classification/textnet/tensorflow.py +267 -0
doctr/models/classification/vgg/pytorch.py +4 -2
doctr/models/classification/vgg/tensorflow.py +5 -2
doctr/models/classification/vit/pytorch.py +9 -3
doctr/models/classification/vit/tensorflow.py +9 -3
doctr/models/classification/zoo.py +7 -2
doctr/models/core.py +1 -1
doctr/models/detection/__init__.py +1 -0
doctr/models/detection/_utils/pytorch.py +7 -1
doctr/models/detection/_utils/tensorflow.py +7 -3
doctr/models/detection/core.py +9 -3
doctr/models/detection/differentiable_binarization/base.py +37 -25
doctr/models/detection/differentiable_binarization/pytorch.py +80 -104
doctr/models/detection/differentiable_binarization/tensorflow.py +74 -55
doctr/models/detection/fast/__init__.py +6 -0
doctr/models/detection/fast/base.py +256 -0
doctr/models/detection/fast/pytorch.py +442 -0
doctr/models/detection/fast/tensorflow.py +428 -0
doctr/models/detection/linknet/base.py +12 -5
doctr/models/detection/linknet/pytorch.py +28 -15
doctr/models/detection/linknet/tensorflow.py +68 -88
doctr/models/detection/predictor/pytorch.py +16 -6
doctr/models/detection/predictor/tensorflow.py +13 -5
doctr/models/detection/zoo.py +19 -16
doctr/models/factory/hub.py +20 -10
doctr/models/kie_predictor/base.py +2 -1
doctr/models/kie_predictor/pytorch.py +28 -36
doctr/models/kie_predictor/tensorflow.py +27 -27
doctr/models/modules/__init__.py +1 -0
doctr/models/modules/layers/__init__.py +6 -0
doctr/models/modules/layers/pytorch.py +166 -0
doctr/models/modules/layers/tensorflow.py +175 -0
doctr/models/modules/transformer/pytorch.py +24 -22
doctr/models/modules/transformer/tensorflow.py +6 -4
doctr/models/modules/vision_transformer/pytorch.py +2 -4
doctr/models/modules/vision_transformer/tensorflow.py +2 -4
doctr/models/obj_detection/faster_rcnn/pytorch.py +4 -2
doctr/models/predictor/base.py +14 -3
doctr/models/predictor/pytorch.py +26 -29
doctr/models/predictor/tensorflow.py +25 -22
doctr/models/preprocessor/pytorch.py +14 -9
doctr/models/preprocessor/tensorflow.py +10 -5
doctr/models/recognition/core.py +4 -1
doctr/models/recognition/crnn/pytorch.py +23 -16
doctr/models/recognition/crnn/tensorflow.py +25 -17
doctr/models/recognition/master/base.py +4 -1
doctr/models/recognition/master/pytorch.py +20 -9
doctr/models/recognition/master/tensorflow.py +20 -8
doctr/models/recognition/parseq/base.py +4 -1
doctr/models/recognition/parseq/pytorch.py +28 -22
doctr/models/recognition/parseq/tensorflow.py +22 -11
doctr/models/recognition/predictor/_utils.py +3 -2
doctr/models/recognition/predictor/pytorch.py +3 -2
doctr/models/recognition/predictor/tensorflow.py +2 -1
doctr/models/recognition/sar/pytorch.py +14 -7
doctr/models/recognition/sar/tensorflow.py +23 -14
doctr/models/recognition/utils.py +5 -1
doctr/models/recognition/vitstr/base.py +4 -1
doctr/models/recognition/vitstr/pytorch.py +22 -13
doctr/models/recognition/vitstr/tensorflow.py +21 -10
doctr/models/recognition/zoo.py +4 -2
doctr/models/utils/pytorch.py +24 -6
doctr/models/utils/tensorflow.py +22 -3
doctr/models/zoo.py +21 -3
doctr/transforms/functional/base.py +8 -3
doctr/transforms/functional/pytorch.py +23 -6
doctr/transforms/functional/tensorflow.py +25 -5
doctr/transforms/modules/base.py +12 -5
doctr/transforms/modules/pytorch.py +10 -12
doctr/transforms/modules/tensorflow.py +17 -9
doctr/utils/common_types.py +1 -1
doctr/utils/data.py +4 -2
doctr/utils/fonts.py +3 -2
doctr/utils/geometry.py +95 -26
doctr/utils/metrics.py +36 -22
doctr/utils/multithreading.py +5 -3
doctr/utils/repr.py +3 -1
doctr/utils/visualization.py +31 -8
doctr/version.py +1 -1
{python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/METADATA +67 -31
python_doctr-0.8.1.dist-info/RECORD +173 -0
{python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/WHEEL +1 -1
python_doctr-0.7.0.dist-info/RECORD +0 -161
{python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/LICENSE +0 -0
{python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/top_level.txt +0 -0
{python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/zip-safe +0 -0

doctr/models/classification/textnet/tensorflow.py ADDED Viewed

@@ -0,0 +1,267 @@
+# Copyright (C) 2021-2024, Mindee.
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+from copy import deepcopy
+from typing import Any, Dict, List, Optional, Tuple
+from tensorflow.keras import Sequential, layers
+from doctr.datasets import VOCABS
+from ...modules.layers.tensorflow import FASTConvLayer
+from ...utils import conv_sequence, load_pretrained_params
+__all__ = ["textnet_tiny", "textnet_small", "textnet_base"]
+default_cfgs: Dict[str, Dict[str, Any]] = {
+    "textnet_tiny": {
+        "mean": (0.694, 0.695, 0.693),
+        "std": (0.299, 0.296, 0.301),
+        "input_shape": (32, 32, 3),
+        "classes": list(VOCABS["french"]),
+        "url": "https://doctr-static.mindee.com/models?id=v0.7.0/textnet_tiny-9e605bd8.zip&src=0",
+    },
+    "textnet_small": {
+        "mean": (0.694, 0.695, 0.693),
+        "std": (0.299, 0.296, 0.301),
+        "input_shape": (32, 32, 3),
+        "classes": list(VOCABS["french"]),
+        "url": "https://doctr-static.mindee.com/models?id=v0.7.0/textnet_small-4784b292.zip&src=0",
+    },
+    "textnet_base": {
+        "mean": (0.694, 0.695, 0.693),
+        "std": (0.299, 0.296, 0.301),
+        "input_shape": (32, 32, 3),
+        "classes": list(VOCABS["french"]),
+        "url": "https://doctr-static.mindee.com/models?id=v0.7.0/textnet_base-2c3f3265.zip&src=0",
+    },
+}
+class TextNet(Sequential):
+    """Implements TextNet architecture from `"FAST: Faster Arbitrarily-Shaped Text Detector with
+    Minimalist Kernel Representation" <https://arxiv.org/abs/2111.02394>`_.
+    Implementation based on the official Pytorch implementation: <https://github.com/czczup/FAST>`_.
+    Args:
+    ----
+        stages (List[Dict[str, List[int]]]): List of dictionaries containing the parameters of each stage.
+        include_top (bool, optional): Whether to include the classifier head. Defaults to True.
+        num_classes (int, optional): Number of output classes. Defaults to 1000.
+        cfg (Optional[Dict[str, Any]], optional): Additional configuration. Defaults to None.
+    """
+    def __init__(
+        self,
+        stages: List[Dict[str, List[int]]],
+        input_shape: Tuple[int, int, int] = (32, 32, 3),
+        num_classes: int = 1000,
+        include_top: bool = True,
+        cfg: Optional[Dict[str, Any]] = None,
+    ) -> None:
+        _layers = [
+            *conv_sequence(
+                out_channels=64, activation="relu", bn=True, kernel_size=3, strides=2, input_shape=input_shape
+            ),
+            *[
+                Sequential(
+                    [
+                        FASTConvLayer(**params)  # type: ignore[arg-type]
+                        for params in [{key: stage[key][i] for key in stage} for i in range(len(stage["in_channels"]))]
+                    ],
+                    name=f"stage_{i}",
+                )
+                for i, stage in enumerate(stages)
+            ],
+        ]
+        if include_top:
+            _layers.append(
+                Sequential(
+                    [
+                        layers.AveragePooling2D(1),
+                        layers.Flatten(),
+                        layers.Dense(num_classes),
+                    ],
+                    name="classifier",
+                )
+            )
+        super().__init__(_layers)
+        self.cfg = cfg
+def _textnet(
+    arch: str,
+    pretrained: bool,
+    **kwargs: Any,
+) -> TextNet:
+    kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"]))
+    kwargs["input_shape"] = kwargs.get("input_shape", default_cfgs[arch]["input_shape"])
+    kwargs["classes"] = kwargs.get("classes", default_cfgs[arch]["classes"])
+    _cfg = deepcopy(default_cfgs[arch])
+    _cfg["num_classes"] = kwargs["num_classes"]
+    _cfg["input_shape"] = kwargs["input_shape"]
+    _cfg["classes"] = kwargs["classes"]
+    kwargs.pop("classes")
+    # Build the model
+    model = TextNet(cfg=_cfg, **kwargs)
+    # Load pretrained parameters
+    if pretrained:
+        load_pretrained_params(model, default_cfgs[arch]["url"])
+    return model
+def textnet_tiny(pretrained: bool = False, **kwargs: Any) -> TextNet:
+    """Implements TextNet architecture from `"FAST: Faster Arbitrarily-Shaped Text Detector with
+    Minimalist Kernel Representation" <https://arxiv.org/abs/2111.02394>`_.
+    Implementation based on the official Pytorch implementation: <https://github.com/czczup/FAST>`_.
+    >>> import tensorflow as tf
+    >>> from doctr.models import textnet_tiny
+    >>> model = textnet_tiny(pretrained=False)
+    >>> input_tensor = tf.random.uniform(shape=[1, 32, 32, 3], maxval=1, dtype=tf.float32)
+    >>> out = model(input_tensor)
+    Args:
+    ----
+        pretrained: boolean, True if model is pretrained
+        **kwargs: keyword arguments of the TextNet architecture
+    Returns:
+    -------
+        A textnet tiny model
+    """
+    return _textnet(
+        "textnet_tiny",
+        pretrained,
+        stages=[
+            {"in_channels": [64] * 3, "out_channels": [64] * 3, "kernel_size": [(3, 3)] * 3, "stride": [1, 2, 1]},
+            {
+                "in_channels": [64, 128, 128, 128],
+                "out_channels": [128] * 4,
+                "kernel_size": [(3, 3), (1, 3), (3, 3), (3, 1)],
+                "stride": [2, 1, 1, 1],
+            },
+            {
+                "in_channels": [128, 256, 256, 256],
+                "out_channels": [256] * 4,
+                "kernel_size": [(3, 3), (3, 3), (3, 1), (1, 3)],
+                "stride": [2, 1, 1, 1],
+            },
+            {
+                "in_channels": [256, 512, 512, 512],
+                "out_channels": [512] * 4,
+                "kernel_size": [(3, 3), (3, 1), (1, 3), (3, 3)],
+                "stride": [2, 1, 1, 1],
+            },
+        ],
+        **kwargs,
+    )
+def textnet_small(pretrained: bool = False, **kwargs: Any) -> TextNet:
+    """Implements TextNet architecture from `"FAST: Faster Arbitrarily-Shaped Text Detector with
+    Minimalist Kernel Representation" <https://arxiv.org/abs/2111.02394>`_.
+    Implementation based on the official Pytorch implementation: <https://github.com/czczup/FAST>`_.
+    >>> import tensorflow as tf
+    >>> from doctr.models import textnet_small
+    >>> model = textnet_small(pretrained=False)
+    >>> input_tensor = tf.random.uniform(shape=[1, 32, 32, 3], maxval=1, dtype=tf.float32)
+    >>> out = model(input_tensor)
+    Args:
+    ----
+        pretrained: boolean, True if model is pretrained
+        **kwargs: keyword arguments of the TextNet architecture
+    Returns:
+    -------
+        A TextNet small model
+    """
+    return _textnet(
+        "textnet_small",
+        pretrained,
+        stages=[
+            {"in_channels": [64] * 2, "out_channels": [64] * 2, "kernel_size": [(3, 3)] * 2, "stride": [1, 2]},
+            {
+                "in_channels": [64, 128, 128, 128, 128, 128, 128, 128],
+                "out_channels": [128] * 8,
+                "kernel_size": [(3, 3), (1, 3), (3, 3), (3, 1), (3, 3), (3, 1), (1, 3), (3, 3)],
+                "stride": [2, 1, 1, 1, 1, 1, 1, 1],
+            },
+            {
+                "in_channels": [128, 256, 256, 256, 256, 256, 256, 256],
+                "out_channels": [256] * 8,
+                "kernel_size": [(3, 3), (3, 3), (1, 3), (3, 1), (3, 3), (1, 3), (3, 1), (3, 3)],
+                "stride": [2, 1, 1, 1, 1, 1, 1, 1],
+            },
+            {
+                "in_channels": [256, 512, 512, 512, 512],
+                "out_channels": [512] * 5,
+                "kernel_size": [(3, 3), (3, 1), (1, 3), (1, 3), (3, 1)],
+                "stride": [2, 1, 1, 1, 1],
+            },
+        ],
+        **kwargs,
+    )
+def textnet_base(pretrained: bool = False, **kwargs: Any) -> TextNet:
+    """Implements TextNet architecture from `"FAST: Faster Arbitrarily-Shaped Text Detector with
+    Minimalist Kernel Representation" <https://arxiv.org/abs/2111.02394>`_.
+    Implementation based on the official Pytorch implementation: <https://github.com/czczup/FAST>`_.
+    >>> import tensorflow as tf
+    >>> from doctr.models import textnet_base
+    >>> model = textnet_base(pretrained=False)
+    >>> input_tensor = tf.random.uniform(shape=[1, 32, 32, 3], maxval=1, dtype=tf.float32)
+    >>> out = model(input_tensor)
+    Args:
+    ----
+        pretrained: boolean, True if model is pretrained
+        **kwargs: keyword arguments of the TextNet architecture
+    Returns:
+    -------
+        A TextNet base model
+    """
+    return _textnet(
+        "textnet_base",
+        pretrained,
+        stages=[
+            {
+                "in_channels": [64] * 10,
+                "out_channels": [64] * 10,
+                "kernel_size": [(3, 3), (3, 3), (3, 1), (3, 3), (3, 1), (3, 3), (3, 3), (1, 3), (3, 3), (3, 3)],
+                "stride": [1, 2, 1, 1, 1, 1, 1, 1, 1, 1],
+            },
+            {
+                "in_channels": [64, 128, 128, 128, 128, 128, 128, 128, 128, 128],
+                "out_channels": [128] * 10,
+                "kernel_size": [(3, 3), (1, 3), (3, 3), (3, 1), (3, 3), (3, 3), (3, 1), (3, 1), (3, 3), (3, 3)],
+                "stride": [2, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+            },
+            {
+                "in_channels": [128, 256, 256, 256, 256, 256, 256, 256],
+                "out_channels": [256] * 8,
+                "kernel_size": [(3, 3), (3, 3), (3, 3), (1, 3), (3, 3), (3, 1), (3, 3), (3, 1)],
+                "stride": [2, 1, 1, 1, 1, 1, 1, 1],
+            },
+            {
+                "in_channels": [256, 512, 512, 512, 512],
+                "out_channels": [512] * 5,
+                "kernel_size": [(3, 3), (1, 3), (3, 1), (3, 1), (1, 3)],
+                "stride": [2, 1, 1, 1, 1],
+            },
+        ],
+        **kwargs,
+    )

doctr/models/classification/vgg/pytorch.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2023, Mindee.
+# Copyright (C) 2021-2024, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -77,12 +77,14 @@ def vgg16_bn_r(pretrained: bool = False, **kwargs: Any) -> tv_vgg.VGG:
     >>> out = model(input_tensor)
     Args:
+    ----
         pretrained (bool): If True, returns a model pre-trained on ImageNet
+        **kwargs: keyword arguments of the VGG architecture
     Returns:
+    -------
         VGG feature extractor
     """
     return _vgg(
         "vgg16_bn_r",
         pretrained,

doctr/models/classification/vgg/tensorflow.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2023, Mindee.
+# Copyright (C) 2021-2024, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -32,6 +32,7 @@ class VGG(Sequential):
     <https://arxiv.org/pdf/1409.1556.pdf>`_.
     Args:
+    ----
         num_blocks: number of convolutional block in each stage
         planes: number of output channels in each stage
         rect_pools: whether pooling square kernels should be replace with rectangular ones
@@ -99,12 +100,14 @@ def vgg16_bn_r(pretrained: bool = False, **kwargs: Any) -> VGG:
     >>> out = model(input_tensor)
     Args:
+    ----
         pretrained (bool): If True, returns a model pre-trained on ImageNet
+        **kwargs: keyword arguments of the VGG architecture
     Returns:
+    -------
         VGG feature extractor
     """
     return _vgg(
         "vgg16_bn_r", pretrained, [2, 2, 3, 3, 3], [64, 128, 256, 512, 512], [False, False, True, True, True], **kwargs
     )

doctr/models/classification/vit/pytorch.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2023, Mindee.
+# Copyright (C) 2021-2024, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -40,6 +40,7 @@ class ClassifierHead(nn.Module):
     """Classifier head for Vision Transformer
     Args:
+    ----
         in_channels: number of input channels
         num_classes: number of output classes
     """
@@ -64,6 +65,7 @@ class VisionTransformer(nn.Sequential):
     <https://arxiv.org/pdf/2010.11929.pdf>`_.
     Args:
+    ----
         d_model: dimension of the transformer layers
         num_layers: number of transformer layers
         num_heads: number of attention heads
@@ -141,12 +143,14 @@ def vit_s(pretrained: bool = False, **kwargs: Any) -> VisionTransformer:
     >>> out = model(input_tensor)
     Args:
+    ----
         pretrained: boolean, True if model is pretrained
+        **kwargs: keyword arguments of the VisionTransformer architecture
     Returns:
+    -------
         A feature extractor model
     """
     return _vit(
         "vit_s",
         pretrained,
@@ -171,12 +175,14 @@ def vit_b(pretrained: bool = False, **kwargs: Any) -> VisionTransformer:
     >>> out = model(input_tensor)
     Args:
+    ----
         pretrained: boolean, True if model is pretrained
+        **kwargs: keyword arguments of the VisionTransformer architecture
     Returns:
+    -------
         A feature extractor model
     """
     return _vit(
         "vit_b",
         pretrained,

doctr/models/classification/vit/tensorflow.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2023, Mindee.
+# Copyright (C) 2021-2024, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -41,6 +41,7 @@ class ClassifierHead(layers.Layer, NestedObject):
     """Classifier head for Vision Transformer
     Args:
+    ----
         num_classes: number of output classes
     """
@@ -60,6 +61,7 @@ class VisionTransformer(Sequential):
     <https://arxiv.org/pdf/2010.11929.pdf>`_.
     Args:
+    ----
         d_model: dimension of the transformer layers
         num_layers: number of transformer layers
         num_heads: number of attention heads
@@ -140,12 +142,14 @@ def vit_s(pretrained: bool = False, **kwargs: Any) -> VisionTransformer:
     >>> out = model(input_tensor)
     Args:
+    ----
         pretrained: boolean, True if model is pretrained
+        **kwargs: keyword arguments of the VisionTransformer architecture
     Returns:
+    -------
         A feature extractor model
     """
     return _vit(
         "vit_s",
         pretrained,
@@ -169,12 +173,14 @@ def vit_b(pretrained: bool = False, **kwargs: Any) -> VisionTransformer:
     >>> out = model(input_tensor)
     Args:
+    ----
         pretrained: boolean, True if model is pretrained
+        **kwargs: keyword arguments of the VisionTransformer architecture
     Returns:
+    -------
         A feature extractor model
     """
     return _vit(
         "vit_b",
         pretrained,

doctr/models/classification/zoo.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2023, Mindee.
+# Copyright (C) 2021-2024, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -24,6 +24,9 @@ ARCHS: List[str] = [
     "resnet34",
     "resnet50",
     "resnet34_wide",
+    "textnet_tiny",
+    "textnet_small",
+    "textnet_base",
     "vgg16_bn_r",
     "vit_s",
     "vit_b",
@@ -59,11 +62,13 @@ def crop_orientation_predictor(
     >>> out = model([input_crop])
     Args:
+    ----
         arch: name of the architecture to use (e.g. 'mobilenet_v3_small')
         pretrained: If True, returns a model pre-trained on our recognition crops dataset
+        **kwargs: keyword arguments to be passed to the CropOrientationPredictor
     Returns:
+    -------
         CropOrientationPredictor
     """
     return _crop_orientation_predictor(arch, pretrained, **kwargs)

doctr/models/core.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2023, Mindee.
+# Copyright (C) 2021-2024, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

doctr/models/detection/__init__.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from .differentiable_binarization import *
 from .linknet import *
+from .fast import *
 from .zoo import *

doctr/models/detection/_utils/pytorch.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2023, Mindee.
+# Copyright (C) 2021-2024, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -13,9 +13,12 @@ def erode(x: Tensor, kernel_size: int) -> Tensor:
     """Performs erosion on a given tensor
     Args:
+    ----
         x: boolean tensor of shape (N, C, H, W)
         kernel_size: the size of the kernel to use for erosion
     Returns:
+    -------
         the eroded tensor
     """
     _pad = (kernel_size - 1) // 2
@@ -27,9 +30,12 @@ def dilate(x: Tensor, kernel_size: int) -> Tensor:
     """Performs dilation on a given tensor
     Args:
+    ----
         x: boolean tensor of shape (N, C, H, W)
         kernel_size: the size of the kernel to use for dilation
     Returns:
+    -------
         the dilated tensor
     """
     _pad = (kernel_size - 1) // 2

doctr/models/detection/_utils/tensorflow.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2023, Mindee.
+# Copyright (C) 2021-2024, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -12,12 +12,14 @@ def erode(x: tf.Tensor, kernel_size: int) -> tf.Tensor:
     """Performs erosion on a given tensor
     Args:
+    ----
         x: boolean tensor of shape (N, H, W, C)
         kernel_size: the size of the kernel to use for erosion
     Returns:
+    -------
         the eroded tensor
     """
     return 1 - tf.nn.max_pool2d(1 - x, kernel_size, strides=1, padding="SAME")
@@ -25,10 +27,12 @@ def dilate(x: tf.Tensor, kernel_size: int) -> tf.Tensor:
     """Performs dilation on a given tensor
     Args:
+    ----
         x: boolean tensor of shape (N, H, W, C)
         kernel_size: the size of the kernel to use for dilation
     Returns:
+    -------
         the dilated tensor
     """
     return tf.nn.max_pool2d(x, kernel_size, strides=1, padding="SAME")

doctr/models/detection/core.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2023, Mindee.
+# Copyright (C) 2021-2024, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -17,6 +17,7 @@ class DetectionPostProcessor(NestedObject):
     """Abstract class to postprocess the raw output of the model
     Args:
+    ----
         box_thresh (float): minimal objectness score to consider a box
         bin_thresh (float): threshold to apply to segmentation raw heatmap
         assume straight_pages (bool): if True, fit straight boxes only
@@ -36,9 +37,13 @@ class DetectionPostProcessor(NestedObject):
         """Compute the confidence score for a polygon : mean of the p values on the polygon
         Args:
+        ----
             pred (np.ndarray): p map returned by the model
+            points: coordinates of the polygon
+            assume_straight_pages: if True, fit straight boxes only
         Returns:
+        -------
             polygon objectness
         """
         h, w = pred.shape[:2]
@@ -52,7 +57,7 @@ class DetectionPostProcessor(NestedObject):
         else:
             mask: np.ndarray = np.zeros((h, w), np.int32)
-            cv2.fillPoly(mask, [points.astype(np.int32)], 1.0)
+            cv2.fillPoly(mask, [points.astype(np.int32)], 1.0)  # type: ignore[call-overload]
             product = pred * mask
             return np.sum(product) / np.count_nonzero(product)
@@ -70,13 +75,14 @@ class DetectionPostProcessor(NestedObject):
         """Performs postprocessing for a list of model outputs
         Args:
+        ----
             proba_map: probability map of shape (N, H, W, C)
         Returns:
+        -------
             list of N class predictions (for each input sample), where each class predictions is a list of C tensors
         of shape (*, 5) or (*, 6)
         """
         if proba_map.ndim != 4:
             raise AssertionError(f"arg `proba_map` is expected to be 4-dimensional, got {proba_map.ndim}.")

python-doctr 0.7.0__py3-none-any.whl → 0.8.1__py3-none-any.whl

python-doctr 0.7.0py3-none-any.whl → 0.8.1py3-none-any.whl