PyPI - python-doctr - Versions diffs - 0.12.0__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

python-doctr 0.12.0py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (116) hide show

doctr/__init__.py +0 -1
doctr/datasets/__init__.py +0 -5
doctr/datasets/datasets/__init__.py +1 -6
doctr/datasets/datasets/pytorch.py +2 -2
doctr/datasets/generator/__init__.py +1 -6
doctr/datasets/vocabs.py +0 -2
doctr/file_utils.py +2 -101
doctr/io/image/__init__.py +1 -7
doctr/io/image/pytorch.py +1 -1
doctr/models/_utils.py +3 -3
doctr/models/classification/magc_resnet/__init__.py +1 -6
doctr/models/classification/magc_resnet/pytorch.py +2 -2
doctr/models/classification/mobilenet/__init__.py +1 -6
doctr/models/classification/predictor/__init__.py +1 -6
doctr/models/classification/predictor/pytorch.py +1 -1
doctr/models/classification/resnet/__init__.py +1 -6
doctr/models/classification/textnet/__init__.py +1 -6
doctr/models/classification/textnet/pytorch.py +1 -1
doctr/models/classification/vgg/__init__.py +1 -6
doctr/models/classification/vip/__init__.py +1 -4
doctr/models/classification/vip/layers/__init__.py +1 -4
doctr/models/classification/vip/layers/pytorch.py +1 -1
doctr/models/classification/vit/__init__.py +1 -6
doctr/models/classification/vit/pytorch.py +2 -2
doctr/models/classification/zoo.py +6 -11
doctr/models/detection/_utils/__init__.py +1 -6
doctr/models/detection/core.py +1 -1
doctr/models/detection/differentiable_binarization/__init__.py +1 -6
doctr/models/detection/differentiable_binarization/base.py +4 -12
doctr/models/detection/differentiable_binarization/pytorch.py +3 -3
doctr/models/detection/fast/__init__.py +1 -6
doctr/models/detection/fast/base.py +4 -14
doctr/models/detection/fast/pytorch.py +4 -4
doctr/models/detection/linknet/__init__.py +1 -6
doctr/models/detection/linknet/base.py +3 -12
doctr/models/detection/linknet/pytorch.py +2 -2
doctr/models/detection/predictor/__init__.py +1 -6
doctr/models/detection/predictor/pytorch.py +1 -1
doctr/models/detection/zoo.py +15 -32
doctr/models/factory/hub.py +8 -21
doctr/models/kie_predictor/__init__.py +1 -6
doctr/models/kie_predictor/pytorch.py +2 -6
doctr/models/modules/layers/__init__.py +1 -6
doctr/models/modules/layers/pytorch.py +3 -3
doctr/models/modules/transformer/__init__.py +1 -6
doctr/models/modules/transformer/pytorch.py +2 -2
doctr/models/modules/vision_transformer/__init__.py +1 -6
doctr/models/predictor/__init__.py +1 -6
doctr/models/predictor/base.py +3 -8
doctr/models/predictor/pytorch.py +2 -5
doctr/models/preprocessor/__init__.py +1 -6
doctr/models/preprocessor/pytorch.py +27 -32
doctr/models/recognition/crnn/__init__.py +1 -6
doctr/models/recognition/crnn/pytorch.py +6 -6
doctr/models/recognition/master/__init__.py +1 -6
doctr/models/recognition/master/pytorch.py +5 -5
doctr/models/recognition/parseq/__init__.py +1 -6
doctr/models/recognition/parseq/pytorch.py +5 -5
doctr/models/recognition/predictor/__init__.py +1 -6
doctr/models/recognition/predictor/_utils.py +7 -16
doctr/models/recognition/predictor/pytorch.py +1 -2
doctr/models/recognition/sar/__init__.py +1 -6
doctr/models/recognition/sar/pytorch.py +3 -3
doctr/models/recognition/viptr/__init__.py +1 -4
doctr/models/recognition/viptr/pytorch.py +3 -3
doctr/models/recognition/vitstr/__init__.py +1 -6
doctr/models/recognition/vitstr/pytorch.py +3 -3
doctr/models/recognition/zoo.py +13 -13
doctr/models/utils/__init__.py +1 -6
doctr/models/utils/pytorch.py +1 -1
doctr/transforms/functional/__init__.py +1 -6
doctr/transforms/functional/pytorch.py +4 -4
doctr/transforms/modules/__init__.py +1 -7
doctr/transforms/modules/base.py +26 -92
doctr/transforms/modules/pytorch.py +28 -26
doctr/utils/geometry.py +6 -10
doctr/utils/visualization.py +1 -1
doctr/version.py +1 -1
{python_doctr-0.12.0.dist-info → python_doctr-1.0.0.dist-info}/METADATA +18 -75
python_doctr-1.0.0.dist-info/RECORD +149 -0
doctr/datasets/datasets/tensorflow.py +0 -59
doctr/datasets/generator/tensorflow.py +0 -58
doctr/datasets/loader.py +0 -94
doctr/io/image/tensorflow.py +0 -101
doctr/models/classification/magc_resnet/tensorflow.py +0 -196
doctr/models/classification/mobilenet/tensorflow.py +0 -442
doctr/models/classification/predictor/tensorflow.py +0 -60
doctr/models/classification/resnet/tensorflow.py +0 -418
doctr/models/classification/textnet/tensorflow.py +0 -275
doctr/models/classification/vgg/tensorflow.py +0 -125
doctr/models/classification/vit/tensorflow.py +0 -201
doctr/models/detection/_utils/tensorflow.py +0 -34
doctr/models/detection/differentiable_binarization/tensorflow.py +0 -421
doctr/models/detection/fast/tensorflow.py +0 -427
doctr/models/detection/linknet/tensorflow.py +0 -377
doctr/models/detection/predictor/tensorflow.py +0 -70
doctr/models/kie_predictor/tensorflow.py +0 -187
doctr/models/modules/layers/tensorflow.py +0 -171
doctr/models/modules/transformer/tensorflow.py +0 -235
doctr/models/modules/vision_transformer/tensorflow.py +0 -100
doctr/models/predictor/tensorflow.py +0 -155
doctr/models/preprocessor/tensorflow.py +0 -122
doctr/models/recognition/crnn/tensorflow.py +0 -317
doctr/models/recognition/master/tensorflow.py +0 -320
doctr/models/recognition/parseq/tensorflow.py +0 -516
doctr/models/recognition/predictor/tensorflow.py +0 -79
doctr/models/recognition/sar/tensorflow.py +0 -423
doctr/models/recognition/vitstr/tensorflow.py +0 -285
doctr/models/utils/tensorflow.py +0 -189
doctr/transforms/functional/tensorflow.py +0 -254
doctr/transforms/modules/tensorflow.py +0 -562
python_doctr-0.12.0.dist-info/RECORD +0 -180
{python_doctr-0.12.0.dist-info → python_doctr-1.0.0.dist-info}/WHEEL +0 -0
{python_doctr-0.12.0.dist-info → python_doctr-1.0.0.dist-info}/licenses/LICENSE +0 -0
{python_doctr-0.12.0.dist-info → python_doctr-1.0.0.dist-info}/top_level.txt +0 -0
{python_doctr-0.12.0.dist-info → python_doctr-1.0.0.dist-info}/zip-safe +0 -0

doctr/models/detection/linknet/base.py CHANGED Viewed

@@ -156,14 +156,12 @@ class _LinkNet(BaseModel):
         self,
         target: list[dict[str, np.ndarray]],
         output_shape: tuple[int, int, int],
-        channels_last: bool = True,
     ) -> tuple[np.ndarray, np.ndarray]:
         """Build the target, and it's mask to be used from loss computation.
         Args:
             target: target coming from dataset
             output_shape: shape of the output of the model without batch_size
-            channels_last: whether channels are last or not
         Returns:
             the new formatted target and the mask
@@ -175,10 +173,8 @@ class _LinkNet(BaseModel):
         h: int
         w: int
-        if channels_last:
-            h, w, num_classes = output_shape
-        else:
-            num_classes, h, w = output_shape
+        num_classes, h, w = output_shape
         target_shape = (len(target), num_classes, h, w)
         seg_target: np.ndarray = np.zeros(target_shape, dtype=np.uint8)
@@ -237,11 +233,6 @@ class _LinkNet(BaseModel):
                     if shrunken.shape[0] <= 2 or not Polygon(shrunken).is_valid:
                         seg_mask[idx, class_idx, box[1] : box[3] + 1, box[0] : box[2] + 1] = False
                         continue
-                    cv2.fillPoly(seg_target[idx, class_idx], [shrunken.astype(np.int32)], 1.0)  # type: ignore[call-overload]
-        # Don't forget to switch back to channel last if Tensorflow is used
-        if channels_last:
-            seg_target = seg_target.transpose((0, 2, 3, 1))
-            seg_mask = seg_mask.transpose((0, 2, 3, 1))
+                    cv2.fillPoly(seg_target[idx, class_idx], [shrunken.astype(np.int32)], 1.0)
         return seg_target, seg_mask

doctr/models/detection/linknet/pytorch.py CHANGED Viewed

@@ -193,7 +193,7 @@ class LinkNet(nn.Module, _LinkNet):
         if target is None or return_preds:
             # Disable for torch.compile compatibility
-            @torch.compiler.disable  # type: ignore[attr-defined]
+            @torch.compiler.disable
             def _postprocess(prob_map: torch.Tensor) -> list[dict[str, Any]]:
                 return [
                     dict(zip(self.class_names, preds))
@@ -230,7 +230,7 @@ class LinkNet(nn.Module, _LinkNet):
         Returns:
             A loss tensor
         """
-        _target, _mask = self.build_target(target, out_map.shape[1:], False)  # type: ignore[arg-type]
+        _target, _mask = self.build_target(target, out_map.shape[1:])  # type: ignore[arg-type]
         seg_target, seg_mask = torch.from_numpy(_target).to(dtype=out_map.dtype), torch.from_numpy(_mask)
         seg_target, seg_mask = seg_target.to(out_map.device), seg_mask.to(out_map.device)

doctr/models/detection/predictor/__init__.py CHANGED Viewed

@@ -1,6 +1 @@
-from doctr.file_utils import is_tf_available, is_torch_available
-if is_torch_available():
-    from .pytorch import *
-elif is_tf_available():
-    from .tensorflow import *  # type: ignore[assignment]
+from .pytorch import *

doctr/models/detection/predictor/pytorch.py CHANGED Viewed

@@ -36,7 +36,7 @@ class DetectionPredictor(nn.Module):
     @torch.inference_mode()
     def forward(
         self,
-        pages: list[np.ndarray | torch.Tensor],
+        pages: list[np.ndarray],
         return_maps: bool = False,
         **kwargs: Any,
     ) -> list[dict[str, np.ndarray]] | tuple[list[dict[str, np.ndarray]], list[np.ndarray]]:

doctr/models/detection/zoo.py CHANGED Viewed

@@ -5,7 +5,7 @@
 from typing import Any
-from doctr.file_utils import is_tf_available, is_torch_available
+from doctr.models.utils import _CompiledModule
 from .. import detection
 from ..detection.fast import reparameterize
@@ -16,30 +16,17 @@ __all__ = ["detection_predictor"]
 ARCHS: list[str]
-if is_tf_available():
-    ARCHS = [
-        "db_resnet50",
-        "db_mobilenet_v3_large",
-        "linknet_resnet18",
-        "linknet_resnet34",
-        "linknet_resnet50",
-        "fast_tiny",
-        "fast_small",
-        "fast_base",
-    ]
-elif is_torch_available():
-    ARCHS = [
-        "db_resnet34",
-        "db_resnet50",
-        "db_mobilenet_v3_large",
-        "linknet_resnet18",
-        "linknet_resnet34",
-        "linknet_resnet50",
-        "fast_tiny",
-        "fast_small",
-        "fast_base",
-    ]
+ARCHS = [
+    "db_resnet34",
+    "db_resnet50",
+    "db_mobilenet_v3_large",
+    "linknet_resnet18",
+    "linknet_resnet34",
+    "linknet_resnet50",
+    "fast_tiny",
+    "fast_small",
+    "fast_base",
+]
 def _predictor(arch: Any, pretrained: bool, assume_straight_pages: bool = True, **kwargs: Any) -> DetectionPredictor:
@@ -56,12 +43,8 @@ def _predictor(arch: Any, pretrained: bool, assume_straight_pages: bool = True,
         if isinstance(_model, detection.FAST):
             _model = reparameterize(_model)
     else:
-        allowed_archs = [detection.DBNet, detection.LinkNet, detection.FAST]
-        if is_torch_available():
-            # Adding the type for torch compiled models to the allowed architectures
-            from doctr.models.utils import _CompiledModule
-            allowed_archs.append(_CompiledModule)
+        # Adding the type for torch compiled models to the allowed architectures
+        allowed_archs = [detection.DBNet, detection.LinkNet, detection.FAST, _CompiledModule]
         if not isinstance(arch, tuple(allowed_archs)):
             raise ValueError(f"unknown architecture: {type(arch)}")
@@ -76,7 +59,7 @@ def _predictor(arch: Any, pretrained: bool, assume_straight_pages: bool = True,
     kwargs["std"] = kwargs.get("std", _model.cfg["std"])
     kwargs["batch_size"] = kwargs.get("batch_size", 2)
     predictor = DetectionPredictor(
-        PreProcessor(_model.cfg["input_shape"][:-1] if is_tf_available() else _model.cfg["input_shape"][1:], **kwargs),
+        PreProcessor(_model.cfg["input_shape"][1:], **kwargs),
         _model,
     )
     return predictor

doctr/models/factory/hub.py CHANGED Viewed

@@ -13,6 +13,7 @@ import textwrap
 from pathlib import Path
 from typing import Any
+import torch
 from huggingface_hub import (
     HfApi,
     Repository,
@@ -23,10 +24,6 @@ from huggingface_hub import (
 )
 from doctr import models
-from doctr.file_utils import is_tf_available, is_torch_available
-if is_torch_available():
-    import torch
 __all__ = ["login_to_hub", "push_to_hf_hub", "from_hub", "_save_model_and_config_for_hf_hub"]
@@ -61,19 +58,14 @@ def _save_model_and_config_for_hf_hub(model: Any, save_dir: str, arch: str, task
     """Save model and config to disk for pushing to huggingface hub
     Args:
-        model: TF or PyTorch model to be saved
+        model: PyTorch model to be saved
         save_dir: directory to save model and config
         arch: architecture name
         task: task name
     """
     save_directory = Path(save_dir)
-    if is_torch_available():
-        weights_path = save_directory / "pytorch_model.bin"
-        torch.save(model.state_dict(), weights_path)
-    elif is_tf_available():
-        weights_path = save_directory / "tf_model.weights.h5"
-        model.save_weights(str(weights_path))
+    weights_path = save_directory / "pytorch_model.bin"
+    torch.save(model.state_dict(), weights_path)
     config_path = save_directory / "config.json"
@@ -96,7 +88,7 @@ def push_to_hf_hub(model: Any, model_name: str, task: str, **kwargs) -> None:  #
     >>> push_to_hf_hub(model, 'my-model', 'recognition', arch='crnn_mobilenet_v3_small')
     Args:
-        model: TF or PyTorch model to be saved
+        model: PyTorch model to be saved
         model_name: name of the model which is also the repository name
         task: task name
         **kwargs: keyword arguments for push_to_hf_hub
@@ -120,7 +112,7 @@ def push_to_hf_hub(model: Any, model_name: str, task: str, **kwargs) -> None:  #
     <img src="https://doctr-static.mindee.com/models?id=v0.3.1/Logo_doctr.gif&src=0" width="60%">
     </p>
-    **Optical Character Recognition made seamless & accessible to anyone, powered by TensorFlow 2 & PyTorch**
+    **Optical Character Recognition made seamless & accessible to anyone, powered by PyTorch**
     ## Task: {task}
@@ -214,13 +206,8 @@ def from_hub(repo_id: str, **kwargs: Any):
     # update model cfg
     model.cfg = cfg
-    # Load checkpoint
-    if is_torch_available():
-        weights = hf_hub_download(repo_id, filename="pytorch_model.bin", **kwargs)
-    else:  # tf
-        weights = hf_hub_download(repo_id, filename="tf_model.weights.h5", **kwargs)
+    # load the weights
+    weights = hf_hub_download(repo_id, filename="pytorch_model.bin", **kwargs)
     model.from_pretrained(weights)
     return model

doctr/models/kie_predictor/__init__.py CHANGED Viewed

@@ -1,6 +1 @@
-from doctr.file_utils import is_tf_available, is_torch_available
-if is_torch_available():
-    from .pytorch import *
-elif is_tf_available():
-    from .tensorflow import *  # type: ignore[assignment]
+from .pytorch import *

doctr/models/kie_predictor/pytorch.py CHANGED Viewed

@@ -68,14 +68,14 @@ class KIEPredictor(nn.Module, _KIEPredictor):
     @torch.inference_mode()
     def forward(
         self,
-        pages: list[np.ndarray | torch.Tensor],
+        pages: list[np.ndarray],
         **kwargs: Any,
     ) -> Document:
         # Dimension check
         if any(page.ndim != 3 for page in pages):
             raise ValueError("incorrect input shape: all pages are expected to be multi-channel 2D images.")
-        origin_page_shapes = [page.shape[:2] if isinstance(page, np.ndarray) else page.shape[-2:] for page in pages]
+        origin_page_shapes = [page.shape[:2] for page in pages]
         # Localize text elements
         loc_preds, out_maps = self.det_predictor(pages, return_maps=True, **kwargs)
@@ -113,9 +113,6 @@ class KIEPredictor(nn.Module, _KIEPredictor):
             dict_loc_preds[class_name] = _loc_preds
             objectness_scores[class_name] = _scores
-        # Check whether crop mode should be switched to channels first
-        channels_last = len(pages) == 0 or isinstance(pages[0], np.ndarray)
         # Apply hooks to loc_preds if any
         for hook in self.hooks:
             dict_loc_preds = hook(dict_loc_preds)
@@ -126,7 +123,6 @@ class KIEPredictor(nn.Module, _KIEPredictor):
             crops[class_name], dict_loc_preds[class_name] = self._prepare_crops(
                 pages,
                 dict_loc_preds[class_name],
-                channels_last=channels_last,
                 assume_straight_pages=self.assume_straight_pages,
                 assume_horizontal=self._page_orientation_disabled,
             )

doctr/models/modules/layers/__init__.py CHANGED Viewed

@@ -1,6 +1 @@
-from doctr.file_utils import is_tf_available, is_torch_available
-if is_torch_available():
-    from .pytorch import *
-elif is_tf_available():
-    from .tensorflow import *
+from .pytorch import *

doctr/models/modules/layers/pytorch.py CHANGED Viewed

@@ -151,16 +151,16 @@ class FASTConvLayer(nn.Module):
             id_tensor = torch.from_numpy(kernel_value).to(identity.weight.device)
             self.id_tensor = self._pad_to_mxn_tensor(id_tensor)
         kernel = self.id_tensor
-        std = (identity.running_var + identity.eps).sqrt()  # type: ignore
+        std = (identity.running_var + identity.eps).sqrt()
         t = (identity.weight / std).reshape(-1, 1, 1, 1)
-        return kernel * t, identity.bias - identity.running_mean * identity.weight / std
+        return kernel * t, identity.bias - identity.running_mean * identity.weight / std  # type: ignore[operator]
     def _fuse_bn_tensor(self, conv: nn.Conv2d, bn: nn.BatchNorm2d) -> tuple[torch.Tensor, torch.Tensor]:
         kernel = conv.weight
         kernel = self._pad_to_mxn_tensor(kernel)
         std = (bn.running_var + bn.eps).sqrt()  # type: ignore
         t = (bn.weight / std).reshape(-1, 1, 1, 1)
-        return kernel * t, bn.bias - bn.running_mean * bn.weight / std
+        return kernel * t, bn.bias - bn.running_mean * bn.weight / std  # type: ignore[operator]
     def _get_equivalent_kernel_bias(self) -> tuple[torch.Tensor, torch.Tensor]:
         kernel_mxn, bias_mxn = self._fuse_bn_tensor(self.conv, self.bn)

doctr/models/modules/transformer/__init__.py CHANGED Viewed

@@ -1,6 +1 @@
-from doctr.file_utils import is_tf_available, is_torch_available
-if is_torch_available():
-    from .pytorch import *
-elif is_tf_available():
-    from .tensorflow import *
+from .pytorch import *

doctr/models/modules/transformer/pytorch.py CHANGED Viewed

@@ -50,8 +50,8 @@ def scaled_dot_product_attention(
     scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(query.size(-1))
     if mask is not None:
         # NOTE: to ensure the ONNX compatibility, masked_fill works only with int equal condition
-        scores = scores.masked_fill(mask == 0, float("-inf"))  # type: ignore[attr-defined]
-    p_attn = torch.softmax(scores, dim=-1)  # type: ignore[call-overload]
+        scores = scores.masked_fill(mask == 0, float("-inf"))
+    p_attn = torch.softmax(scores, dim=-1)
     return torch.matmul(p_attn, value), p_attn

doctr/models/modules/vision_transformer/__init__.py CHANGED Viewed

@@ -1,6 +1 @@
-from doctr.file_utils import is_tf_available, is_torch_available
-if is_torch_available():
-    from .pytorch import *
-elif is_tf_available():
-    from .tensorflow import *
+from .pytorch import *

doctr/models/predictor/__init__.py CHANGED Viewed

@@ -1,6 +1 @@
-from doctr.file_utils import is_tf_available, is_torch_available
-if is_torch_available():
-    from .pytorch import *
-elif is_tf_available():
-    from .tensorflow import *  # type: ignore[assignment]
+from .pytorch import *

doctr/models/predictor/base.py CHANGED Viewed

@@ -116,18 +116,14 @@ class _OCRPredictor:
     def _generate_crops(
         pages: list[np.ndarray],
         loc_preds: list[np.ndarray],
-        channels_last: bool,
         assume_straight_pages: bool = False,
         assume_horizontal: bool = False,
     ) -> list[list[np.ndarray]]:
         if assume_straight_pages:
-            crops = [
-                extract_crops(page, _boxes[:, :4], channels_last=channels_last)
-                for page, _boxes in zip(pages, loc_preds)
-            ]
+            crops = [extract_crops(page, _boxes[:, :4]) for page, _boxes in zip(pages, loc_preds)]
         else:
             crops = [
-                extract_rcrops(page, _boxes[:, :4], channels_last=channels_last, assume_horizontal=assume_horizontal)
+                extract_rcrops(page, _boxes[:, :4], assume_horizontal=assume_horizontal)
                 for page, _boxes in zip(pages, loc_preds)
             ]
         return crops
@@ -136,11 +132,10 @@ class _OCRPredictor:
     def _prepare_crops(
         pages: list[np.ndarray],
         loc_preds: list[np.ndarray],
-        channels_last: bool,
         assume_straight_pages: bool = False,
         assume_horizontal: bool = False,
     ) -> tuple[list[list[np.ndarray]], list[np.ndarray]]:
-        crops = _OCRPredictor._generate_crops(pages, loc_preds, channels_last, assume_straight_pages, assume_horizontal)
+        crops = _OCRPredictor._generate_crops(pages, loc_preds, assume_straight_pages, assume_horizontal)
         # Avoid sending zero-sized crops
         is_kept = [[all(s > 0 for s in crop.shape) for crop in page_crops] for page_crops in crops]

doctr/models/predictor/pytorch.py CHANGED Viewed

@@ -68,14 +68,14 @@ class OCRPredictor(nn.Module, _OCRPredictor):
     @torch.inference_mode()
     def forward(
         self,
-        pages: list[np.ndarray | torch.Tensor],
+        pages: list[np.ndarray],
         **kwargs: Any,
     ) -> Document:
         # Dimension check
         if any(page.ndim != 3 for page in pages):
             raise ValueError("incorrect input shape: all pages are expected to be multi-channel 2D images.")
-        origin_page_shapes = [page.shape[:2] if isinstance(page, np.ndarray) else page.shape[-2:] for page in pages]
+        origin_page_shapes = [page.shape[:2] for page in pages]
         # Localize text elements
         loc_preds, out_maps = self.det_predictor(pages, return_maps=True, **kwargs)
@@ -109,8 +109,6 @@ class OCRPredictor(nn.Module, _OCRPredictor):
         loc_preds = [list(loc_pred.values())[0] for loc_pred in loc_preds]
         # Detach objectness scores from loc_preds
         loc_preds, objectness_scores = detach_scores(loc_preds)
-        # Check whether crop mode should be switched to channels first
-        channels_last = len(pages) == 0 or isinstance(pages[0], np.ndarray)
         # Apply hooks to loc_preds if any
         for hook in self.hooks:
@@ -120,7 +118,6 @@ class OCRPredictor(nn.Module, _OCRPredictor):
         crops, loc_preds = self._prepare_crops(
             pages,
             loc_preds,
-            channels_last=channels_last,
             assume_straight_pages=self.assume_straight_pages,
             assume_horizontal=self._page_orientation_disabled,
         )

doctr/models/preprocessor/__init__.py CHANGED Viewed

@@ -1,6 +1 @@
-from doctr.file_utils import is_tf_available, is_torch_available
-if is_torch_available():
-    from .pytorch import *
-elif is_tf_available():
-    from .tensorflow import *  # type: ignore[assignment]
+from .pytorch import *

doctr/models/preprocessor/pytorch.py CHANGED Viewed

@@ -60,65 +60,60 @@ class PreProcessor(nn.Module):
         return batches
-    def sample_transforms(self, x: np.ndarray | torch.Tensor) -> torch.Tensor:
+    def sample_transforms(self, x: np.ndarray) -> torch.Tensor:
         if x.ndim != 3:
             raise AssertionError("expected list of 3D Tensors")
-        if isinstance(x, np.ndarray):
-            if x.dtype not in (np.uint8, np.float32):
-                raise TypeError("unsupported data type for numpy.ndarray")
-            x = torch.from_numpy(x.copy()).permute(2, 0, 1)
-        elif x.dtype not in (torch.uint8, torch.float16, torch.float32):
-            raise TypeError("unsupported data type for torch.Tensor")
+        if x.dtype not in (np.uint8, np.float32, np.float16):
+            raise TypeError("unsupported data type for numpy.ndarray")
+        tensor = torch.from_numpy(x.copy()).permute(2, 0, 1)
         # Resizing
-        x = self.resize(x)
+        tensor = self.resize(tensor)
         # Data type
-        if x.dtype == torch.uint8:
-            x = x.to(dtype=torch.float32).div(255).clip(0, 1)  # type: ignore[union-attr]
+        if tensor.dtype == torch.uint8:
+            tensor = tensor.to(dtype=torch.float32).div(255).clip(0, 1)
         else:
-            x = x.to(dtype=torch.float32)  # type: ignore[union-attr]
+            tensor = tensor.to(dtype=torch.float32)
-        return x  # type: ignore[return-value]
+        return tensor
-    def __call__(self, x: torch.Tensor | np.ndarray | list[torch.Tensor | np.ndarray]) -> list[torch.Tensor]:
+    def __call__(self, x: np.ndarray | list[np.ndarray]) -> list[torch.Tensor]:
         """Prepare document data for model forwarding
         Args:
-            x: list of images (np.array) or tensors (already resized and batched)
+            x: list of images (np.array) or a single image (np.array) of shape (H, W, C)
         Returns:
-            list of page batches
+            list of page batches (*, C, H, W) ready for model inference
         """
         # Input type check
-        if isinstance(x, (np.ndarray, torch.Tensor)):
+        if isinstance(x, np.ndarray):
             if x.ndim != 4:
                 raise AssertionError("expected 4D Tensor")
-            if isinstance(x, np.ndarray):
-                if x.dtype not in (np.uint8, np.float32):
-                    raise TypeError("unsupported data type for numpy.ndarray")
-                x = torch.from_numpy(x.copy()).permute(0, 3, 1, 2)
-            elif x.dtype not in (torch.uint8, torch.float16, torch.float32):
-                raise TypeError("unsupported data type for torch.Tensor")
+            if x.dtype not in (np.uint8, np.float32, np.float16):
+                raise TypeError("unsupported data type for numpy.ndarray")
+            tensor = torch.from_numpy(x.copy()).permute(0, 3, 1, 2)
             # Resizing
-            if x.shape[-2] != self.resize.size[0] or x.shape[-1] != self.resize.size[1]:  # type: ignore[union-attr]
-                x = F.resize(
-                    x, self.resize.size, interpolation=self.resize.interpolation, antialias=self.resize.antialias
+            if tensor.shape[-2] != self.resize.size[0] or tensor.shape[-1] != self.resize.size[1]:
+                tensor = F.resize(
+                    tensor, self.resize.size, interpolation=self.resize.interpolation, antialias=self.resize.antialias
                 )
             # Data type
-            if x.dtype == torch.uint8:  # type: ignore[union-attr]
-                x = x.to(dtype=torch.float32).div(255).clip(0, 1)  # type: ignore[union-attr]
+            if tensor.dtype == torch.uint8:
+                tensor = tensor.to(dtype=torch.float32).div(255).clip(0, 1)
             else:
-                x = x.to(dtype=torch.float32)  # type: ignore[union-attr]
-            batches = [x]
+                tensor = tensor.to(dtype=torch.float32)
+            batches = [tensor]
-        elif isinstance(x, list) and all(isinstance(sample, (np.ndarray, torch.Tensor)) for sample in x):
+        elif isinstance(x, list) and all(isinstance(sample, np.ndarray) for sample in x):
             # Sample transform (to tensor, resize)
             samples = list(multithread_exec(self.sample_transforms, x))
             # Batching
-            batches = self.batch_inputs(samples)  # type: ignore[assignment]
+            batches = self.batch_inputs(samples)
         else:
             raise TypeError(f"invalid input type: {type(x)}")
         # Batch transforms (normalize)
         batches = list(multithread_exec(self.normalize, batches))
-        return batches  # type: ignore[return-value]
+        return batches

doctr/models/recognition/crnn/__init__.py CHANGED Viewed

@@ -1,6 +1 @@
-from doctr.file_utils import is_tf_available, is_torch_available
-if is_torch_available():
-    from .pytorch import *
-elif is_tf_available():
-    from .tensorflow import *  # type: ignore[assignment]
+from .pytorch import *

doctr/models/recognition/crnn/pytorch.py CHANGED Viewed

@@ -15,7 +15,7 @@ from torch.nn import functional as F
 from doctr.datasets import VOCABS, decode_sequence
 from ...classification import mobilenet_v3_large_r, mobilenet_v3_small_r, vgg16_bn_r
-from ...utils.pytorch import load_pretrained_params
+from ...utils import load_pretrained_params
 from ..core import RecognitionModel, RecognitionPostProcessor
 __all__ = ["CRNN", "crnn_vgg16_bn", "crnn_mobilenet_v3_small", "crnn_mobilenet_v3_large"]
@@ -25,8 +25,8 @@ default_cfgs: dict[str, dict[str, Any]] = {
         "mean": (0.694, 0.695, 0.693),
         "std": (0.299, 0.296, 0.301),
         "input_shape": (3, 32, 128),
-        "vocab": VOCABS["legacy_french"],
-        "url": "https://doctr-static.mindee.com/models?id=v0.3.1/crnn_vgg16_bn-9762b0b0.pt&src=0",
+        "vocab": VOCABS["french"],
+        "url": "https://doctr-static.mindee.com/models?id=v0.12.0/crnn_vgg16_bn-0417f351.pt&src=0",
     },
     "crnn_mobilenet_v3_small": {
         "mean": (0.694, 0.695, 0.693),
@@ -82,7 +82,7 @@ class CTCPostProcessor(RecognitionPostProcessor):
     def __call__(self, logits: torch.Tensor) -> list[tuple[str, float]]:
         """Performs decoding of raw output with CTC and decoding of CTC predictions
-        with label_to_idx mapping dictionnary
+        with label_to_idx mapping dictionary
         Args:
             logits: raw output of the model, shape (N, C + 1, seq_len)
@@ -223,7 +223,7 @@ class CRNN(RecognitionModel, nn.Module):
         if target is None or return_preds:
             # Disable for torch.compile compatibility
-            @torch.compiler.disable  # type: ignore[attr-defined]
+            @torch.compiler.disable
             def _postprocess(logits: torch.Tensor) -> list[tuple[str, float]]:
                 return self.postprocessor(logits)
@@ -257,7 +257,7 @@ def _crnn(
     _cfg["input_shape"] = kwargs["input_shape"]
     # Build the model
-    model = CRNN(feat_extractor, cfg=_cfg, **kwargs)
+    model = CRNN(feat_extractor, cfg=_cfg, **kwargs)  # type: ignore[arg-type]
     # Load pretrained parameters
     if pretrained:
         # The number of classes is not the same as the number of classes in the pretrained model =>

doctr/models/recognition/master/__init__.py CHANGED Viewed

@@ -1,6 +1 @@
-from doctr.file_utils import is_tf_available, is_torch_available
-if is_torch_available():
-    from .pytorch import *
-elif is_tf_available():
-    from .tensorflow import *
+from .pytorch import *

doctr/models/recognition/master/pytorch.py CHANGED Viewed

@@ -16,7 +16,7 @@ from doctr.datasets import VOCABS
 from doctr.models.classification import magc_resnet31
 from doctr.models.modules.transformer import Decoder, PositionalEncoding
-from ...utils.pytorch import _bf16_to_float32, load_pretrained_params
+from ...utils import _bf16_to_float32, load_pretrained_params
 from .base import _MASTER, _MASTERPostProcessor
 __all__ = ["MASTER", "master"]
@@ -107,7 +107,7 @@ class MASTER(_MASTER, nn.Module):
         # NOTE: nn.TransformerDecoder takes the inverse from this implementation
         # [True, True, True, ..., False, False, False] -> False is masked
         # (N, 1, 1, max_length)
-        target_pad_mask = (target != self.vocab_size + 2).unsqueeze(1).unsqueeze(1)  # type: ignore[attr-defined]
+        target_pad_mask = (target != self.vocab_size + 2).unsqueeze(1).unsqueeze(1)
         target_length = target.size(1)
         # sub mask filled diagonal with True = see and False = masked (max_length, max_length)
         # NOTE: onnxruntime tril/triu works only with float currently (onnxruntime 1.11.1 - opset 14)
@@ -140,7 +140,7 @@ class MASTER(_MASTER, nn.Module):
         # Input length : number of timesteps
         input_len = model_output.shape[1]
         # Add one for additional <eos> token (sos disappear in shift!)
-        seq_len = seq_len + 1  # type: ignore[assignment]
+        seq_len = seq_len + 1
         # Compute loss: don't forget to shift gt! Otherwise the model learns to output the gt[t-1]!
         # The "masked" first gt char is <sos>. Delete last logit of the model output.
         cce = F.cross_entropy(model_output[:, :-1, :].permute(0, 2, 1), gt[:, 1:], reduction="none")
@@ -176,7 +176,7 @@ class MASTER(_MASTER, nn.Module):
             return_preds: if True, decode logits
         Returns:
-            A dictionnary containing eventually loss, logits and predictions.
+            A dictionary containing eventually loss, logits and predictions.
         """
         # Encode
         features = self.feat_extractor(x)["features"]
@@ -219,7 +219,7 @@ class MASTER(_MASTER, nn.Module):
         if return_preds:
             # Disable for torch.compile compatibility
-            @torch.compiler.disable  # type: ignore[attr-defined]
+            @torch.compiler.disable
             def _postprocess(logits: torch.Tensor) -> list[tuple[str, float]]:
                 return self.postprocessor(logits)

doctr/models/recognition/parseq/__init__.py CHANGED Viewed

@@ -1,6 +1 @@
-from doctr.file_utils import is_tf_available, is_torch_available
-if is_torch_available():
-    from .pytorch import *
-elif is_tf_available():
-    from .tensorflow import *
+from .pytorch import *

python-doctr 0.12.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

python-doctr 0.12.0py3-none-any.whl → 1.0.0py3-none-any.whl