PyPI - python-doctr - Versions diffs - 0.11.0__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

python-doctr 0.11.0py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (138) hide show

doctr/__init__.py +0 -1
doctr/datasets/__init__.py +1 -5
doctr/datasets/coco_text.py +139 -0
doctr/datasets/cord.py +2 -1
doctr/datasets/datasets/__init__.py +1 -6
doctr/datasets/datasets/pytorch.py +2 -2
doctr/datasets/funsd.py +2 -2
doctr/datasets/generator/__init__.py +1 -6
doctr/datasets/ic03.py +1 -1
doctr/datasets/ic13.py +2 -1
doctr/datasets/iiit5k.py +4 -1
doctr/datasets/imgur5k.py +9 -2
doctr/datasets/ocr.py +1 -1
doctr/datasets/recognition.py +1 -1
doctr/datasets/svhn.py +1 -1
doctr/datasets/svt.py +2 -2
doctr/datasets/synthtext.py +15 -2
doctr/datasets/utils.py +7 -6
doctr/datasets/vocabs.py +1100 -54
doctr/file_utils.py +2 -92
doctr/io/elements.py +37 -3
doctr/io/image/__init__.py +1 -7
doctr/io/image/pytorch.py +1 -1
doctr/models/_utils.py +4 -4
doctr/models/classification/__init__.py +1 -0
doctr/models/classification/magc_resnet/__init__.py +1 -6
doctr/models/classification/magc_resnet/pytorch.py +3 -4
doctr/models/classification/mobilenet/__init__.py +1 -6
doctr/models/classification/mobilenet/pytorch.py +15 -1
doctr/models/classification/predictor/__init__.py +1 -6
doctr/models/classification/predictor/pytorch.py +2 -2
doctr/models/classification/resnet/__init__.py +1 -6
doctr/models/classification/resnet/pytorch.py +26 -3
doctr/models/classification/textnet/__init__.py +1 -6
doctr/models/classification/textnet/pytorch.py +11 -2
doctr/models/classification/vgg/__init__.py +1 -6
doctr/models/classification/vgg/pytorch.py +16 -1
doctr/models/classification/vip/__init__.py +1 -0
doctr/models/classification/vip/layers/__init__.py +1 -0
doctr/models/classification/vip/layers/pytorch.py +615 -0
doctr/models/classification/vip/pytorch.py +505 -0
doctr/models/classification/vit/__init__.py +1 -6
doctr/models/classification/vit/pytorch.py +12 -3
doctr/models/classification/zoo.py +7 -8
doctr/models/detection/_utils/__init__.py +1 -6
doctr/models/detection/core.py +1 -1
doctr/models/detection/differentiable_binarization/__init__.py +1 -6
doctr/models/detection/differentiable_binarization/base.py +7 -16
doctr/models/detection/differentiable_binarization/pytorch.py +13 -4
doctr/models/detection/fast/__init__.py +1 -6
doctr/models/detection/fast/base.py +6 -17
doctr/models/detection/fast/pytorch.py +17 -8
doctr/models/detection/linknet/__init__.py +1 -6
doctr/models/detection/linknet/base.py +5 -15
doctr/models/detection/linknet/pytorch.py +12 -3
doctr/models/detection/predictor/__init__.py +1 -6
doctr/models/detection/predictor/pytorch.py +1 -1
doctr/models/detection/zoo.py +15 -32
doctr/models/factory/hub.py +9 -22
doctr/models/kie_predictor/__init__.py +1 -6
doctr/models/kie_predictor/pytorch.py +3 -7
doctr/models/modules/layers/__init__.py +1 -6
doctr/models/modules/layers/pytorch.py +52 -4
doctr/models/modules/transformer/__init__.py +1 -6
doctr/models/modules/transformer/pytorch.py +2 -2
doctr/models/modules/vision_transformer/__init__.py +1 -6
doctr/models/predictor/__init__.py +1 -6
doctr/models/predictor/base.py +3 -8
doctr/models/predictor/pytorch.py +3 -6
doctr/models/preprocessor/__init__.py +1 -6
doctr/models/preprocessor/pytorch.py +27 -32
doctr/models/recognition/__init__.py +1 -0
doctr/models/recognition/crnn/__init__.py +1 -6
doctr/models/recognition/crnn/pytorch.py +16 -7
doctr/models/recognition/master/__init__.py +1 -6
doctr/models/recognition/master/pytorch.py +15 -6
doctr/models/recognition/parseq/__init__.py +1 -6
doctr/models/recognition/parseq/pytorch.py +26 -8
doctr/models/recognition/predictor/__init__.py +1 -6
doctr/models/recognition/predictor/_utils.py +100 -47
doctr/models/recognition/predictor/pytorch.py +4 -5
doctr/models/recognition/sar/__init__.py +1 -6
doctr/models/recognition/sar/pytorch.py +13 -4
doctr/models/recognition/utils.py +56 -47
doctr/models/recognition/viptr/__init__.py +1 -0
doctr/models/recognition/viptr/pytorch.py +277 -0
doctr/models/recognition/vitstr/__init__.py +1 -6
doctr/models/recognition/vitstr/pytorch.py +13 -4
doctr/models/recognition/zoo.py +13 -8
doctr/models/utils/__init__.py +1 -6
doctr/models/utils/pytorch.py +29 -19
doctr/transforms/functional/__init__.py +1 -6
doctr/transforms/functional/pytorch.py +4 -4
doctr/transforms/modules/__init__.py +1 -7
doctr/transforms/modules/base.py +26 -92
doctr/transforms/modules/pytorch.py +28 -26
doctr/utils/data.py +1 -1
doctr/utils/geometry.py +7 -11
doctr/utils/visualization.py +1 -1
doctr/version.py +1 -1
{python_doctr-0.11.0.dist-info → python_doctr-1.0.0.dist-info}/METADATA +22 -63
python_doctr-1.0.0.dist-info/RECORD +149 -0
{python_doctr-0.11.0.dist-info → python_doctr-1.0.0.dist-info}/WHEEL +1 -1
doctr/datasets/datasets/tensorflow.py +0 -59
doctr/datasets/generator/tensorflow.py +0 -58
doctr/datasets/loader.py +0 -94
doctr/io/image/tensorflow.py +0 -101
doctr/models/classification/magc_resnet/tensorflow.py +0 -196
doctr/models/classification/mobilenet/tensorflow.py +0 -433
doctr/models/classification/predictor/tensorflow.py +0 -60
doctr/models/classification/resnet/tensorflow.py +0 -397
doctr/models/classification/textnet/tensorflow.py +0 -266
doctr/models/classification/vgg/tensorflow.py +0 -116
doctr/models/classification/vit/tensorflow.py +0 -192
doctr/models/detection/_utils/tensorflow.py +0 -34
doctr/models/detection/differentiable_binarization/tensorflow.py +0 -414
doctr/models/detection/fast/tensorflow.py +0 -419
doctr/models/detection/linknet/tensorflow.py +0 -369
doctr/models/detection/predictor/tensorflow.py +0 -70
doctr/models/kie_predictor/tensorflow.py +0 -187
doctr/models/modules/layers/tensorflow.py +0 -171
doctr/models/modules/transformer/tensorflow.py +0 -235
doctr/models/modules/vision_transformer/tensorflow.py +0 -100
doctr/models/predictor/tensorflow.py +0 -155
doctr/models/preprocessor/tensorflow.py +0 -122
doctr/models/recognition/crnn/tensorflow.py +0 -308
doctr/models/recognition/master/tensorflow.py +0 -313
doctr/models/recognition/parseq/tensorflow.py +0 -508
doctr/models/recognition/predictor/tensorflow.py +0 -79
doctr/models/recognition/sar/tensorflow.py +0 -416
doctr/models/recognition/vitstr/tensorflow.py +0 -278
doctr/models/utils/tensorflow.py +0 -182
doctr/transforms/functional/tensorflow.py +0 -254
doctr/transforms/modules/tensorflow.py +0 -562
python_doctr-0.11.0.dist-info/RECORD +0 -173
{python_doctr-0.11.0.dist-info → python_doctr-1.0.0.dist-info/licenses}/LICENSE +0 -0
{python_doctr-0.11.0.dist-info → python_doctr-1.0.0.dist-info}/top_level.txt +0 -0
{python_doctr-0.11.0.dist-info → python_doctr-1.0.0.dist-info}/zip-safe +0 -0

doctr/io/image/tensorflow.py DELETED Viewed

@@ -1,101 +0,0 @@
-# Copyright (C) 2021-2025, Mindee.
-# This program is licensed under the Apache License 2.0.
-# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-import numpy as np
-import tensorflow as tf
-from PIL import Image
-from tensorflow.keras.utils import img_to_array
-from doctr.utils.common_types import AbstractPath
-__all__ = ["tensor_from_pil", "read_img_as_tensor", "decode_img_as_tensor", "tensor_from_numpy", "get_img_shape"]
-def tensor_from_pil(pil_img: Image.Image, dtype: tf.dtypes.DType = tf.float32) -> tf.Tensor:
-    """Convert a PIL Image to a TensorFlow tensor
-    Args:
-        pil_img: a PIL image
-        dtype: the output tensor data type
-    Returns:
-        decoded image as tensor
-    """
-    npy_img = img_to_array(pil_img)
-    return tensor_from_numpy(npy_img, dtype)
-def read_img_as_tensor(img_path: AbstractPath, dtype: tf.dtypes.DType = tf.float32) -> tf.Tensor:
-    """Read an image file as a TensorFlow tensor
-    Args:
-        img_path: location of the image file
-        dtype: the desired data type of the output tensor. If it is float-related, values will be divided by 255.
-    Returns:
-        decoded image as a tensor
-    """
-    if dtype not in (tf.uint8, tf.float16, tf.float32):
-        raise ValueError("insupported value for dtype")
-    img = tf.io.read_file(img_path)
-    img = tf.image.decode_jpeg(img, channels=3)
-    if dtype != tf.uint8:
-        img = tf.image.convert_image_dtype(img, dtype=dtype)
-        img = tf.clip_by_value(img, 0, 1)
-    return img
-def decode_img_as_tensor(img_content: bytes, dtype: tf.dtypes.DType = tf.float32) -> tf.Tensor:
-    """Read a byte stream as a TensorFlow tensor
-    Args:
-        img_content: bytes of a decoded image
-        dtype: the desired data type of the output tensor. If it is float-related, values will be divided by 255.
-    Returns:
-        decoded image as a tensor
-    """
-    if dtype not in (tf.uint8, tf.float16, tf.float32):
-        raise ValueError("insupported value for dtype")
-    img = tf.io.decode_image(img_content, channels=3)
-    if dtype != tf.uint8:
-        img = tf.image.convert_image_dtype(img, dtype=dtype)
-        img = tf.clip_by_value(img, 0, 1)
-    return img
-def tensor_from_numpy(npy_img: np.ndarray, dtype: tf.dtypes.DType = tf.float32) -> tf.Tensor:
-    """Read an image file as a TensorFlow tensor
-    Args:
-        npy_img: image encoded as a numpy array of shape (H, W, C) in np.uint8
-        dtype: the desired data type of the output tensor. If it is float-related, values will be divided by 255.
-    Returns:
-        same image as a tensor of shape (H, W, C)
-    """
-    if dtype not in (tf.uint8, tf.float16, tf.float32):
-        raise ValueError("insupported value for dtype")
-    if dtype == tf.uint8:
-        img = tf.convert_to_tensor(npy_img, dtype=dtype)
-    else:
-        img = tf.image.convert_image_dtype(npy_img, dtype=dtype)
-        img = tf.clip_by_value(img, 0, 1)
-    return img
-def get_img_shape(img: tf.Tensor) -> tuple[int, int]:
-    """Get the shape of an image"""
-    return img.shape[:2]

doctr/models/classification/magc_resnet/tensorflow.py DELETED Viewed

@@ -1,196 +0,0 @@
-# Copyright (C) 2021-2025, Mindee.
-# This program is licensed under the Apache License 2.0.
-# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-import math
-from copy import deepcopy
-from functools import partial
-from typing import Any
-import tensorflow as tf
-from tensorflow.keras import activations, layers
-from tensorflow.keras.models import Sequential
-from doctr.datasets import VOCABS
-from ...utils import _build_model, load_pretrained_params
-from ..resnet.tensorflow import ResNet
-__all__ = ["magc_resnet31"]
-default_cfgs: dict[str, dict[str, Any]] = {
-    "magc_resnet31": {
-        "mean": (0.694, 0.695, 0.693),
-        "std": (0.299, 0.296, 0.301),
-        "input_shape": (32, 32, 3),
-        "classes": list(VOCABS["french"]),
-        "url": "https://doctr-static.mindee.com/models?id=v0.9.0/magc_resnet31-16aa7d71.weights.h5&src=0",
-    },
-}
-class MAGC(layers.Layer):
-    """Implements the Multi-Aspect Global Context Attention, as described in
-    <https://arxiv.org/pdf/1910.02562.pdf>`_.
-    Args:
-        inplanes: input channels
-        headers: number of headers to split channels
-        attn_scale: if True, re-scale attention to counteract the variance distibutions
-        ratio: bottleneck ratio
-        **kwargs
-    """
-    def __init__(
-        self,
-        inplanes: int,
-        headers: int = 8,
-        attn_scale: bool = False,
-        ratio: float = 0.0625,  # bottleneck ratio of 1/16 as described in paper
-        **kwargs,
-    ) -> None:
-        super().__init__(**kwargs)
-        self.headers = headers  # h
-        self.inplanes = inplanes  # C
-        self.attn_scale = attn_scale
-        self.ratio = ratio
-        self.planes = int(inplanes * ratio)
-        self.single_header_inplanes = int(inplanes / headers)  # C / h
-        self.conv_mask = layers.Conv2D(filters=1, kernel_size=1, kernel_initializer=tf.initializers.he_normal())
-        self.transform = Sequential(
-            [
-                layers.Conv2D(filters=self.planes, kernel_size=1, kernel_initializer=tf.initializers.he_normal()),
-                layers.LayerNormalization([1, 2, 3]),
-                layers.ReLU(),
-                layers.Conv2D(filters=self.inplanes, kernel_size=1, kernel_initializer=tf.initializers.he_normal()),
-            ],
-            name="transform",
-        )
-    def context_modeling(self, inputs: tf.Tensor) -> tf.Tensor:
-        b, h, w, c = (tf.shape(inputs)[i] for i in range(4))
-        # B, H, W, C -->> B*h, H, W, C/h
-        x = tf.reshape(inputs, shape=(b, h, w, self.headers, self.single_header_inplanes))
-        x = tf.transpose(x, perm=(0, 3, 1, 2, 4))
-        x = tf.reshape(x, shape=(b * self.headers, h, w, self.single_header_inplanes))
-        # Compute shorcut
-        shortcut = x
-        # B*h, 1, H*W, C/h
-        shortcut = tf.reshape(shortcut, shape=(b * self.headers, 1, h * w, self.single_header_inplanes))
-        # B*h, 1, C/h, H*W
-        shortcut = tf.transpose(shortcut, perm=[0, 1, 3, 2])
-        # Compute context mask
-        # B*h, H, W, 1
-        context_mask = self.conv_mask(x)
-        # B*h, 1, H*W, 1
-        context_mask = tf.reshape(context_mask, shape=(b * self.headers, 1, h * w, 1))
-        # scale variance
-        if self.attn_scale and self.headers > 1:
-            context_mask = context_mask / math.sqrt(self.single_header_inplanes)
-        # B*h, 1, H*W, 1
-        context_mask = activations.softmax(context_mask, axis=2)
-        # Compute context
-        # B*h, 1, C/h, 1
-        context = tf.matmul(shortcut, context_mask)
-        context = tf.reshape(context, shape=(b, 1, c, 1))
-        # B, 1, 1, C
-        context = tf.transpose(context, perm=(0, 1, 3, 2))
-        # Set shape to resolve shape when calling this module in the Sequential MAGCResnet
-        batch, chan = inputs.get_shape().as_list()[0], inputs.get_shape().as_list()[-1]
-        context.set_shape([batch, 1, 1, chan])
-        return context
-    def call(self, inputs: tf.Tensor, **kwargs) -> tf.Tensor:
-        # Context modeling: B, H, W, C  ->  B, 1, 1, C
-        context = self.context_modeling(inputs)
-        # Transform: B, 1, 1, C  ->  B, 1, 1, C
-        transformed = self.transform(context, **kwargs)
-        return inputs + transformed
-def _magc_resnet(
-    arch: str,
-    pretrained: bool,
-    num_blocks: list[int],
-    output_channels: list[int],
-    stage_downsample: list[bool],
-    stage_conv: list[bool],
-    stage_pooling: list[tuple[int, int] | None],
-    origin_stem: bool = True,
-    **kwargs: Any,
-) -> ResNet:
-    kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"]))
-    kwargs["input_shape"] = kwargs.get("input_shape", default_cfgs[arch]["input_shape"])
-    kwargs["classes"] = kwargs.get("classes", default_cfgs[arch]["classes"])
-    _cfg = deepcopy(default_cfgs[arch])
-    _cfg["num_classes"] = kwargs["num_classes"]
-    _cfg["classes"] = kwargs["classes"]
-    _cfg["input_shape"] = kwargs["input_shape"]
-    kwargs.pop("classes")
-    # Build the model
-    model = ResNet(
-        num_blocks,
-        output_channels,
-        stage_downsample,
-        stage_conv,
-        stage_pooling,
-        origin_stem,
-        attn_module=partial(MAGC, headers=8, attn_scale=True),
-        cfg=_cfg,
-        **kwargs,
-    )
-    _build_model(model)
-    # Load pretrained parameters
-    if pretrained:
-        # The number of classes is not the same as the number of classes in the pretrained model =>
-        # skip the mismatching layers for fine tuning
-        load_pretrained_params(
-            model, default_cfgs[arch]["url"], skip_mismatch=kwargs["num_classes"] != len(default_cfgs[arch]["classes"])
-        )
-    return model
-def magc_resnet31(pretrained: bool = False, **kwargs: Any) -> ResNet:
-    """Resnet31 architecture with Multi-Aspect Global Context Attention as described in
-    `"MASTER: Multi-Aspect Non-local Network for Scene Text Recognition",
-    <https://arxiv.org/pdf/1910.02562.pdf>`_.
-    >>> import tensorflow as tf
-    >>> from doctr.models import magc_resnet31
-    >>> model = magc_resnet31(pretrained=False)
-    >>> input_tensor = tf.random.uniform(shape=[1, 224, 224, 3], maxval=1, dtype=tf.float32)
-    >>> out = model(input_tensor)
-    Args:
-        pretrained: boolean, True if model is pretrained
-        **kwargs: keyword arguments of the ResNet architecture
-    Returns:
-        A feature extractor model
-    """
-    return _magc_resnet(
-        "magc_resnet31",
-        pretrained,
-        [1, 2, 5, 3],
-        [256, 256, 512, 512],
-        [False] * 4,
-        [True] * 4,
-        [(2, 2), (2, 1), None, None],
-        False,
-        stem_channels=128,
-        **kwargs,
-    )

python-doctr 0.11.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

python-doctr 0.11.0py3-none-any.whl → 1.0.0py3-none-any.whl