PyPI - python-doctr - Versions diffs - 0.11.0__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

python-doctr 0.11.0py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (138) hide show

doctr/__init__.py +0 -1
doctr/datasets/__init__.py +1 -5
doctr/datasets/coco_text.py +139 -0
doctr/datasets/cord.py +2 -1
doctr/datasets/datasets/__init__.py +1 -6
doctr/datasets/datasets/pytorch.py +2 -2
doctr/datasets/funsd.py +2 -2
doctr/datasets/generator/__init__.py +1 -6
doctr/datasets/ic03.py +1 -1
doctr/datasets/ic13.py +2 -1
doctr/datasets/iiit5k.py +4 -1
doctr/datasets/imgur5k.py +9 -2
doctr/datasets/ocr.py +1 -1
doctr/datasets/recognition.py +1 -1
doctr/datasets/svhn.py +1 -1
doctr/datasets/svt.py +2 -2
doctr/datasets/synthtext.py +15 -2
doctr/datasets/utils.py +7 -6
doctr/datasets/vocabs.py +1100 -54
doctr/file_utils.py +2 -92
doctr/io/elements.py +37 -3
doctr/io/image/__init__.py +1 -7
doctr/io/image/pytorch.py +1 -1
doctr/models/_utils.py +4 -4
doctr/models/classification/__init__.py +1 -0
doctr/models/classification/magc_resnet/__init__.py +1 -6
doctr/models/classification/magc_resnet/pytorch.py +3 -4
doctr/models/classification/mobilenet/__init__.py +1 -6
doctr/models/classification/mobilenet/pytorch.py +15 -1
doctr/models/classification/predictor/__init__.py +1 -6
doctr/models/classification/predictor/pytorch.py +2 -2
doctr/models/classification/resnet/__init__.py +1 -6
doctr/models/classification/resnet/pytorch.py +26 -3
doctr/models/classification/textnet/__init__.py +1 -6
doctr/models/classification/textnet/pytorch.py +11 -2
doctr/models/classification/vgg/__init__.py +1 -6
doctr/models/classification/vgg/pytorch.py +16 -1
doctr/models/classification/vip/__init__.py +1 -0
doctr/models/classification/vip/layers/__init__.py +1 -0
doctr/models/classification/vip/layers/pytorch.py +615 -0
doctr/models/classification/vip/pytorch.py +505 -0
doctr/models/classification/vit/__init__.py +1 -6
doctr/models/classification/vit/pytorch.py +12 -3
doctr/models/classification/zoo.py +7 -8
doctr/models/detection/_utils/__init__.py +1 -6
doctr/models/detection/core.py +1 -1
doctr/models/detection/differentiable_binarization/__init__.py +1 -6
doctr/models/detection/differentiable_binarization/base.py +7 -16
doctr/models/detection/differentiable_binarization/pytorch.py +13 -4
doctr/models/detection/fast/__init__.py +1 -6
doctr/models/detection/fast/base.py +6 -17
doctr/models/detection/fast/pytorch.py +17 -8
doctr/models/detection/linknet/__init__.py +1 -6
doctr/models/detection/linknet/base.py +5 -15
doctr/models/detection/linknet/pytorch.py +12 -3
doctr/models/detection/predictor/__init__.py +1 -6
doctr/models/detection/predictor/pytorch.py +1 -1
doctr/models/detection/zoo.py +15 -32
doctr/models/factory/hub.py +9 -22
doctr/models/kie_predictor/__init__.py +1 -6
doctr/models/kie_predictor/pytorch.py +3 -7
doctr/models/modules/layers/__init__.py +1 -6
doctr/models/modules/layers/pytorch.py +52 -4
doctr/models/modules/transformer/__init__.py +1 -6
doctr/models/modules/transformer/pytorch.py +2 -2
doctr/models/modules/vision_transformer/__init__.py +1 -6
doctr/models/predictor/__init__.py +1 -6
doctr/models/predictor/base.py +3 -8
doctr/models/predictor/pytorch.py +3 -6
doctr/models/preprocessor/__init__.py +1 -6
doctr/models/preprocessor/pytorch.py +27 -32
doctr/models/recognition/__init__.py +1 -0
doctr/models/recognition/crnn/__init__.py +1 -6
doctr/models/recognition/crnn/pytorch.py +16 -7
doctr/models/recognition/master/__init__.py +1 -6
doctr/models/recognition/master/pytorch.py +15 -6
doctr/models/recognition/parseq/__init__.py +1 -6
doctr/models/recognition/parseq/pytorch.py +26 -8
doctr/models/recognition/predictor/__init__.py +1 -6
doctr/models/recognition/predictor/_utils.py +100 -47
doctr/models/recognition/predictor/pytorch.py +4 -5
doctr/models/recognition/sar/__init__.py +1 -6
doctr/models/recognition/sar/pytorch.py +13 -4
doctr/models/recognition/utils.py +56 -47
doctr/models/recognition/viptr/__init__.py +1 -0
doctr/models/recognition/viptr/pytorch.py +277 -0
doctr/models/recognition/vitstr/__init__.py +1 -6
doctr/models/recognition/vitstr/pytorch.py +13 -4
doctr/models/recognition/zoo.py +13 -8
doctr/models/utils/__init__.py +1 -6
doctr/models/utils/pytorch.py +29 -19
doctr/transforms/functional/__init__.py +1 -6
doctr/transforms/functional/pytorch.py +4 -4
doctr/transforms/modules/__init__.py +1 -7
doctr/transforms/modules/base.py +26 -92
doctr/transforms/modules/pytorch.py +28 -26
doctr/utils/data.py +1 -1
doctr/utils/geometry.py +7 -11
doctr/utils/visualization.py +1 -1
doctr/version.py +1 -1
{python_doctr-0.11.0.dist-info → python_doctr-1.0.0.dist-info}/METADATA +22 -63
python_doctr-1.0.0.dist-info/RECORD +149 -0
{python_doctr-0.11.0.dist-info → python_doctr-1.0.0.dist-info}/WHEEL +1 -1
doctr/datasets/datasets/tensorflow.py +0 -59
doctr/datasets/generator/tensorflow.py +0 -58
doctr/datasets/loader.py +0 -94
doctr/io/image/tensorflow.py +0 -101
doctr/models/classification/magc_resnet/tensorflow.py +0 -196
doctr/models/classification/mobilenet/tensorflow.py +0 -433
doctr/models/classification/predictor/tensorflow.py +0 -60
doctr/models/classification/resnet/tensorflow.py +0 -397
doctr/models/classification/textnet/tensorflow.py +0 -266
doctr/models/classification/vgg/tensorflow.py +0 -116
doctr/models/classification/vit/tensorflow.py +0 -192
doctr/models/detection/_utils/tensorflow.py +0 -34
doctr/models/detection/differentiable_binarization/tensorflow.py +0 -414
doctr/models/detection/fast/tensorflow.py +0 -419
doctr/models/detection/linknet/tensorflow.py +0 -369
doctr/models/detection/predictor/tensorflow.py +0 -70
doctr/models/kie_predictor/tensorflow.py +0 -187
doctr/models/modules/layers/tensorflow.py +0 -171
doctr/models/modules/transformer/tensorflow.py +0 -235
doctr/models/modules/vision_transformer/tensorflow.py +0 -100
doctr/models/predictor/tensorflow.py +0 -155
doctr/models/preprocessor/tensorflow.py +0 -122
doctr/models/recognition/crnn/tensorflow.py +0 -308
doctr/models/recognition/master/tensorflow.py +0 -313
doctr/models/recognition/parseq/tensorflow.py +0 -508
doctr/models/recognition/predictor/tensorflow.py +0 -79
doctr/models/recognition/sar/tensorflow.py +0 -416
doctr/models/recognition/vitstr/tensorflow.py +0 -278
doctr/models/utils/tensorflow.py +0 -182
doctr/transforms/functional/tensorflow.py +0 -254
doctr/transforms/modules/tensorflow.py +0 -562
python_doctr-0.11.0.dist-info/RECORD +0 -173
{python_doctr-0.11.0.dist-info → python_doctr-1.0.0.dist-info/licenses}/LICENSE +0 -0
{python_doctr-0.11.0.dist-info → python_doctr-1.0.0.dist-info}/top_level.txt +0 -0
{python_doctr-0.11.0.dist-info → python_doctr-1.0.0.dist-info}/zip-safe +0 -0

doctr/models/detection/differentiable_binarization/pytorch.py CHANGED Viewed

@@ -179,6 +179,15 @@ class DBNet(_DBNet, nn.Module):
                 m.weight.data.fill_(1.0)
                 m.bias.data.zero_()
+    def from_pretrained(self, path_or_url: str, **kwargs: Any) -> None:
+        """Load pretrained parameters onto the model
+        Args:
+            path_or_url: the path or URL to the model parameters (checkpoint)
+            **kwargs: additional arguments to be passed to `doctr.models.utils.load_pretrained_params`
+        """
+        load_pretrained_params(self, path_or_url, **kwargs)
     def forward(
         self,
         x: torch.Tensor,
@@ -206,7 +215,7 @@ class DBNet(_DBNet, nn.Module):
         if target is None or return_preds:
             # Disable for torch.compile compatibility
-            @torch.compiler.disable  # type: ignore[attr-defined]
+            @torch.compiler.disable
             def _postprocess(prob_map: torch.Tensor) -> list[dict[str, Any]]:
                 return [
                     dict(zip(self.class_names, preds))
@@ -252,7 +261,7 @@ class DBNet(_DBNet, nn.Module):
         prob_map = torch.sigmoid(out_map)
         thresh_map = torch.sigmoid(thresh_map)
-        targets = self.build_target(target, out_map.shape[1:], False)  # type: ignore[arg-type]
+        targets = self.build_target(target, out_map.shape[1:])  # type: ignore[arg-type]
         seg_target, seg_mask = torch.from_numpy(targets[0]), torch.from_numpy(targets[1])
         seg_target, seg_mask = seg_target.to(out_map.device), seg_mask.to(out_map.device)
@@ -276,7 +285,7 @@ class DBNet(_DBNet, nn.Module):
                 dice_map = torch.softmax(out_map, dim=1)
             else:
                 # compute binary map instead
-                dice_map = 1 / (1 + torch.exp(-50.0 * (prob_map - thresh_map)))  # type: ignore[assignment]
+                dice_map = 1 / (1 + torch.exp(-50.0 * (prob_map - thresh_map)))
             # Class reduced
             inter = (seg_mask * dice_map * seg_target).sum((0, 2, 3))
             cardinality = (seg_mask * (dice_map + seg_target)).sum((0, 2, 3))
@@ -328,7 +337,7 @@ def _dbnet(
         _ignore_keys = (
             ignore_keys if kwargs["class_names"] != default_cfgs[arch].get("class_names", [CLASS_NAME]) else None
         )
-        load_pretrained_params(model, default_cfgs[arch]["url"], ignore_keys=_ignore_keys)
+        model.from_pretrained(default_cfgs[arch]["url"], ignore_keys=_ignore_keys)
     return model

doctr/models/detection/fast/__init__.py CHANGED Viewed

@@ -1,6 +1 @@
-from doctr.file_utils import is_tf_available, is_torch_available
-if is_torch_available():
-    from .pytorch import *
-elif is_tf_available():
-    from .tensorflow import *  # type: ignore[assignment]
+from .pytorch import *

doctr/models/detection/fast/base.py CHANGED Viewed

@@ -56,9 +56,8 @@ class FASTPostProcessor(DetectionPostProcessor):
             area = (rect[1][0] + 1) * (1 + rect[1][1])
             length = 2 * (rect[1][0] + rect[1][1]) + 2
         else:
-            poly = Polygon(points)
-            area = poly.area
-            length = poly.length
+            area = cv2.contourArea(points)
+            length = cv2.arcLength(points, closed=True)
         distance = area * self.unclip_ratio / length  # compute distance to expand polygon
         offset = pyclipper.PyclipperOffset()
         offset.AddPath(points, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
@@ -154,14 +153,12 @@ class _FAST(BaseModel):
         self,
         target: list[dict[str, np.ndarray]],
         output_shape: tuple[int, int, int],
-        channels_last: bool = True,
     ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
         """Build the target, and it's mask to be used from loss computation.
         Args:
             target: target coming from dataset
             output_shape: shape of the output of the model without batch_size
-            channels_last: whether channels are last or not
         Returns:
             the new formatted target, mask and shrunken text kernel
@@ -173,10 +170,8 @@ class _FAST(BaseModel):
         h: int
         w: int
-        if channels_last:
-            h, w, num_classes = output_shape
-        else:
-            num_classes, h, w = output_shape
+        num_classes, h, w = output_shape
         target_shape = (len(target), num_classes, h, w)
         seg_target: np.ndarray = np.zeros(target_shape, dtype=np.uint8)
@@ -236,14 +231,8 @@ class _FAST(BaseModel):
                     if shrunken.shape[0] <= 2 or not Polygon(shrunken).is_valid:
                         seg_mask[idx, class_idx, box[1] : box[3] + 1, box[0] : box[2] + 1] = False
                         continue
-                    cv2.fillPoly(shrunken_kernel[idx, class_idx], [shrunken.astype(np.int32)], 1.0)  # type: ignore[call-overload]
+                    cv2.fillPoly(shrunken_kernel[idx, class_idx], [shrunken.astype(np.int32)], 1.0)
                     # draw the original polygon on the segmentation target
-                    cv2.fillPoly(seg_target[idx, class_idx], [poly.astype(np.int32)], 1.0)  # type: ignore[call-overload]
-        # Don't forget to switch back to channel last if Tensorflow is used
-        if channels_last:
-            seg_target = seg_target.transpose((0, 2, 3, 1))
-            seg_mask = seg_mask.transpose((0, 2, 3, 1))
-            shrunken_kernel = shrunken_kernel.transpose((0, 2, 3, 1))
+                    cv2.fillPoly(seg_target[idx, class_idx], [poly.astype(np.int32)], 1.0)
         return seg_target, seg_mask, shrunken_kernel

doctr/models/detection/fast/pytorch.py CHANGED Viewed

@@ -170,6 +170,15 @@ class FAST(_FAST, nn.Module):
                 m.weight.data.fill_(1.0)
                 m.bias.data.zero_()
+    def from_pretrained(self, path_or_url: str, **kwargs: Any) -> None:
+        """Load pretrained parameters onto the model
+        Args:
+            path_or_url: the path or URL to the model parameters (checkpoint)
+            **kwargs: additional arguments to be passed to `doctr.models.utils.load_pretrained_params`
+        """
+        load_pretrained_params(self, path_or_url, **kwargs)
     def forward(
         self,
         x: torch.Tensor,
@@ -197,7 +206,7 @@ class FAST(_FAST, nn.Module):
         if target is None or return_preds:
             # Disable for torch.compile compatibility
-            @torch.compiler.disable  # type: ignore[attr-defined]
+            @torch.compiler.disable
             def _postprocess(prob_map: torch.Tensor) -> list[dict[str, Any]]:
                 return [
                     dict(zip(self.class_names, preds))
@@ -229,7 +238,7 @@ class FAST(_FAST, nn.Module):
         Returns:
             A loss tensor
         """
-        targets = self.build_target(target, out_map.shape[1:], False)  # type: ignore[arg-type]
+        targets = self.build_target(target, out_map.shape[1:])  # type: ignore[arg-type]
         seg_target, seg_mask = torch.from_numpy(targets[0]), torch.from_numpy(targets[1])
         shrunken_kernel = torch.from_numpy(targets[2]).to(out_map.device)
@@ -294,7 +303,7 @@ def reparameterize(model: FAST | nn.Module) -> FAST:
     for module in model.modules():
         if hasattr(module, "reparameterize_layer"):
-            module.reparameterize_layer()
+            module.reparameterize_layer()  # type: ignore[operator]
     for name, child in model.named_children():
         if isinstance(child, nn.BatchNorm2d):
@@ -302,12 +311,12 @@ def reparameterize(model: FAST | nn.Module) -> FAST:
             if last_conv is None:
                 continue
             conv_w = last_conv.weight
-            conv_b = last_conv.bias if last_conv.bias is not None else torch.zeros_like(child.running_mean)
+            conv_b = last_conv.bias if last_conv.bias is not None else torch.zeros_like(child.running_mean)  # type: ignore[arg-type]
-            factor = child.weight / torch.sqrt(child.running_var + child.eps)
+            factor = child.weight / torch.sqrt(child.running_var + child.eps)  # type: ignore
             last_conv.weight = nn.Parameter(conv_w * factor.reshape([last_conv.out_channels, 1, 1, 1]))
-            last_conv.bias = nn.Parameter((conv_b - child.running_mean) * factor + child.bias)
-            model._modules[last_conv_name] = last_conv
+            last_conv.bias = nn.Parameter((conv_b - child.running_mean) * factor + child.bias)  # type: ignore[operator]
+            model._modules[last_conv_name] = last_conv  # type: ignore[index]
             model._modules[name] = nn.Identity()
             last_conv = None
         elif isinstance(child, nn.Conv2d):
@@ -349,7 +358,7 @@ def _fast(
         _ignore_keys = (
             ignore_keys if kwargs["class_names"] != default_cfgs[arch].get("class_names", [CLASS_NAME]) else None
         )
-        load_pretrained_params(model, default_cfgs[arch]["url"], ignore_keys=_ignore_keys)
+        model.from_pretrained(default_cfgs[arch]["url"], ignore_keys=_ignore_keys)
     return model

doctr/models/detection/linknet/__init__.py CHANGED Viewed

@@ -1,6 +1 @@
-from doctr.file_utils import is_tf_available, is_torch_available
-if is_torch_available():
-    from .pytorch import *
-elif is_tf_available():
-    from .tensorflow import *  # type: ignore[assignment]
+from .pytorch import *

doctr/models/detection/linknet/base.py CHANGED Viewed

@@ -56,9 +56,8 @@ class LinkNetPostProcessor(DetectionPostProcessor):
             area = (rect[1][0] + 1) * (1 + rect[1][1])
             length = 2 * (rect[1][0] + rect[1][1]) + 2
         else:
-            poly = Polygon(points)
-            area = poly.area
-            length = poly.length
+            area = cv2.contourArea(points)
+            length = cv2.arcLength(points, closed=True)
         distance = area * self.unclip_ratio / length  # compute distance to expand polygon
         offset = pyclipper.PyclipperOffset()
         offset.AddPath(points, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
@@ -157,14 +156,12 @@ class _LinkNet(BaseModel):
         self,
         target: list[dict[str, np.ndarray]],
         output_shape: tuple[int, int, int],
-        channels_last: bool = True,
     ) -> tuple[np.ndarray, np.ndarray]:
         """Build the target, and it's mask to be used from loss computation.
         Args:
             target: target coming from dataset
             output_shape: shape of the output of the model without batch_size
-            channels_last: whether channels are last or not
         Returns:
             the new formatted target and the mask
@@ -176,10 +173,8 @@ class _LinkNet(BaseModel):
         h: int
         w: int
-        if channels_last:
-            h, w, num_classes = output_shape
-        else:
-            num_classes, h, w = output_shape
+        num_classes, h, w = output_shape
         target_shape = (len(target), num_classes, h, w)
         seg_target: np.ndarray = np.zeros(target_shape, dtype=np.uint8)
@@ -238,11 +233,6 @@ class _LinkNet(BaseModel):
                     if shrunken.shape[0] <= 2 or not Polygon(shrunken).is_valid:
                         seg_mask[idx, class_idx, box[1] : box[3] + 1, box[0] : box[2] + 1] = False
                         continue
-                    cv2.fillPoly(seg_target[idx, class_idx], [shrunken.astype(np.int32)], 1.0)  # type: ignore[call-overload]
-        # Don't forget to switch back to channel last if Tensorflow is used
-        if channels_last:
-            seg_target = seg_target.transpose((0, 2, 3, 1))
-            seg_mask = seg_mask.transpose((0, 2, 3, 1))
+                    cv2.fillPoly(seg_target[idx, class_idx], [shrunken.astype(np.int32)], 1.0)
         return seg_target, seg_mask

doctr/models/detection/linknet/pytorch.py CHANGED Viewed

@@ -160,6 +160,15 @@ class LinkNet(nn.Module, _LinkNet):
                 m.weight.data.fill_(1.0)
                 m.bias.data.zero_()
+    def from_pretrained(self, path_or_url: str, **kwargs: Any) -> None:
+        """Load pretrained parameters onto the model
+        Args:
+            path_or_url: the path or URL to the model parameters (checkpoint)
+            **kwargs: additional arguments to be passed to `doctr.models.utils.load_pretrained_params`
+        """
+        load_pretrained_params(self, path_or_url, **kwargs)
     def forward(
         self,
         x: torch.Tensor,
@@ -184,7 +193,7 @@ class LinkNet(nn.Module, _LinkNet):
         if target is None or return_preds:
             # Disable for torch.compile compatibility
-            @torch.compiler.disable  # type: ignore[attr-defined]
+            @torch.compiler.disable
             def _postprocess(prob_map: torch.Tensor) -> list[dict[str, Any]]:
                 return [
                     dict(zip(self.class_names, preds))
@@ -221,7 +230,7 @@ class LinkNet(nn.Module, _LinkNet):
         Returns:
             A loss tensor
         """
-        _target, _mask = self.build_target(target, out_map.shape[1:], False)  # type: ignore[arg-type]
+        _target, _mask = self.build_target(target, out_map.shape[1:])  # type: ignore[arg-type]
         seg_target, seg_mask = torch.from_numpy(_target).to(dtype=out_map.dtype), torch.from_numpy(_mask)
         seg_target, seg_mask = seg_target.to(out_map.device), seg_mask.to(out_map.device)
@@ -282,7 +291,7 @@ def _linknet(
         _ignore_keys = (
             ignore_keys if kwargs["class_names"] != default_cfgs[arch].get("class_names", [CLASS_NAME]) else None
         )
-        load_pretrained_params(model, default_cfgs[arch]["url"], ignore_keys=_ignore_keys)
+        model.from_pretrained(default_cfgs[arch]["url"], ignore_keys=_ignore_keys)
     return model

doctr/models/detection/predictor/__init__.py CHANGED Viewed

@@ -1,6 +1 @@
-from doctr.file_utils import is_tf_available, is_torch_available
-if is_torch_available():
-    from .pytorch import *
-elif is_tf_available():
-    from .tensorflow import *  # type: ignore[assignment]
+from .pytorch import *

doctr/models/detection/predictor/pytorch.py CHANGED Viewed

@@ -36,7 +36,7 @@ class DetectionPredictor(nn.Module):
     @torch.inference_mode()
     def forward(
         self,
-        pages: list[np.ndarray | torch.Tensor],
+        pages: list[np.ndarray],
         return_maps: bool = False,
         **kwargs: Any,
     ) -> list[dict[str, np.ndarray]] | tuple[list[dict[str, np.ndarray]], list[np.ndarray]]:

doctr/models/detection/zoo.py CHANGED Viewed

@@ -5,7 +5,7 @@
 from typing import Any
-from doctr.file_utils import is_tf_available, is_torch_available
+from doctr.models.utils import _CompiledModule
 from .. import detection
 from ..detection.fast import reparameterize
@@ -16,30 +16,17 @@ __all__ = ["detection_predictor"]
 ARCHS: list[str]
-if is_tf_available():
-    ARCHS = [
-        "db_resnet50",
-        "db_mobilenet_v3_large",
-        "linknet_resnet18",
-        "linknet_resnet34",
-        "linknet_resnet50",
-        "fast_tiny",
-        "fast_small",
-        "fast_base",
-    ]
-elif is_torch_available():
-    ARCHS = [
-        "db_resnet34",
-        "db_resnet50",
-        "db_mobilenet_v3_large",
-        "linknet_resnet18",
-        "linknet_resnet34",
-        "linknet_resnet50",
-        "fast_tiny",
-        "fast_small",
-        "fast_base",
-    ]
+ARCHS = [
+    "db_resnet34",
+    "db_resnet50",
+    "db_mobilenet_v3_large",
+    "linknet_resnet18",
+    "linknet_resnet34",
+    "linknet_resnet50",
+    "fast_tiny",
+    "fast_small",
+    "fast_base",
+]
 def _predictor(arch: Any, pretrained: bool, assume_straight_pages: bool = True, **kwargs: Any) -> DetectionPredictor:
@@ -56,12 +43,8 @@ def _predictor(arch: Any, pretrained: bool, assume_straight_pages: bool = True,
         if isinstance(_model, detection.FAST):
             _model = reparameterize(_model)
     else:
-        allowed_archs = [detection.DBNet, detection.LinkNet, detection.FAST]
-        if is_torch_available():
-            # Adding the type for torch compiled models to the allowed architectures
-            from doctr.models.utils import _CompiledModule
-            allowed_archs.append(_CompiledModule)
+        # Adding the type for torch compiled models to the allowed architectures
+        allowed_archs = [detection.DBNet, detection.LinkNet, detection.FAST, _CompiledModule]
         if not isinstance(arch, tuple(allowed_archs)):
             raise ValueError(f"unknown architecture: {type(arch)}")
@@ -76,7 +59,7 @@ def _predictor(arch: Any, pretrained: bool, assume_straight_pages: bool = True,
     kwargs["std"] = kwargs.get("std", _model.cfg["std"])
     kwargs["batch_size"] = kwargs.get("batch_size", 2)
     predictor = DetectionPredictor(
-        PreProcessor(_model.cfg["input_shape"][:-1] if is_tf_available() else _model.cfg["input_shape"][1:], **kwargs),
+        PreProcessor(_model.cfg["input_shape"][1:], **kwargs),
         _model,
     )
     return predictor

doctr/models/factory/hub.py CHANGED Viewed

@@ -13,6 +13,7 @@ import textwrap
 from pathlib import Path
 from typing import Any
+import torch
 from huggingface_hub import (
     HfApi,
     Repository,
@@ -23,10 +24,6 @@ from huggingface_hub import (
 )
 from doctr import models
-from doctr.file_utils import is_tf_available, is_torch_available
-if is_torch_available():
-    import torch
 __all__ = ["login_to_hub", "push_to_hf_hub", "from_hub", "_save_model_and_config_for_hf_hub"]
@@ -61,19 +58,14 @@ def _save_model_and_config_for_hf_hub(model: Any, save_dir: str, arch: str, task
     """Save model and config to disk for pushing to huggingface hub
     Args:
-        model: TF or PyTorch model to be saved
+        model: PyTorch model to be saved
         save_dir: directory to save model and config
         arch: architecture name
         task: task name
     """
     save_directory = Path(save_dir)
-    if is_torch_available():
-        weights_path = save_directory / "pytorch_model.bin"
-        torch.save(model.state_dict(), weights_path)
-    elif is_tf_available():
-        weights_path = save_directory / "tf_model.weights.h5"
-        model.save_weights(str(weights_path))
+    weights_path = save_directory / "pytorch_model.bin"
+    torch.save(model.state_dict(), weights_path)
     config_path = save_directory / "config.json"
@@ -96,7 +88,7 @@ def push_to_hf_hub(model: Any, model_name: str, task: str, **kwargs) -> None:  #
     >>> push_to_hf_hub(model, 'my-model', 'recognition', arch='crnn_mobilenet_v3_small')
     Args:
-        model: TF or PyTorch model to be saved
+        model: PyTorch model to be saved
         model_name: name of the model which is also the repository name
         task: task name
         **kwargs: keyword arguments for push_to_hf_hub
@@ -120,7 +112,7 @@ def push_to_hf_hub(model: Any, model_name: str, task: str, **kwargs) -> None:  #
     <img src="https://doctr-static.mindee.com/models?id=v0.3.1/Logo_doctr.gif&src=0" width="60%">
     </p>
-    **Optical Character Recognition made seamless & accessible to anyone, powered by TensorFlow 2 & PyTorch**
+    **Optical Character Recognition made seamless & accessible to anyone, powered by PyTorch**
     ## Task: {task}
@@ -214,13 +206,8 @@ def from_hub(repo_id: str, **kwargs: Any):
     # update model cfg
     model.cfg = cfg
-    # Load checkpoint
-    if is_torch_available():
-        state_dict = torch.load(hf_hub_download(repo_id, filename="pytorch_model.bin", **kwargs), map_location="cpu")
-        model.load_state_dict(state_dict)
-    else:  # tf
-        weights = hf_hub_download(repo_id, filename="tf_model.weights.h5", **kwargs)
-        model.load_weights(weights)
+    # load the weights
+    weights = hf_hub_download(repo_id, filename="pytorch_model.bin", **kwargs)
+    model.from_pretrained(weights)
     return model

doctr/models/kie_predictor/__init__.py CHANGED Viewed

@@ -1,6 +1 @@
-from doctr.file_utils import is_tf_available, is_torch_available
-if is_torch_available():
-    from .pytorch import *
-elif is_tf_available():
-    from .tensorflow import *  # type: ignore[assignment]
+from .pytorch import *

doctr/models/kie_predictor/pytorch.py CHANGED Viewed

@@ -68,14 +68,14 @@ class KIEPredictor(nn.Module, _KIEPredictor):
     @torch.inference_mode()
     def forward(
         self,
-        pages: list[np.ndarray | torch.Tensor],
+        pages: list[np.ndarray],
         **kwargs: Any,
     ) -> Document:
         # Dimension check
         if any(page.ndim != 3 for page in pages):
             raise ValueError("incorrect input shape: all pages are expected to be multi-channel 2D images.")
-        origin_page_shapes = [page.shape[:2] if isinstance(page, np.ndarray) else page.shape[-2:] for page in pages]
+        origin_page_shapes = [page.shape[:2] for page in pages]
         # Localize text elements
         loc_preds, out_maps = self.det_predictor(pages, return_maps=True, **kwargs)
@@ -113,9 +113,6 @@ class KIEPredictor(nn.Module, _KIEPredictor):
             dict_loc_preds[class_name] = _loc_preds
             objectness_scores[class_name] = _scores
-        # Check whether crop mode should be switched to channels first
-        channels_last = len(pages) == 0 or isinstance(pages[0], np.ndarray)
         # Apply hooks to loc_preds if any
         for hook in self.hooks:
             dict_loc_preds = hook(dict_loc_preds)
@@ -126,7 +123,6 @@ class KIEPredictor(nn.Module, _KIEPredictor):
             crops[class_name], dict_loc_preds[class_name] = self._prepare_crops(
                 pages,
                 dict_loc_preds[class_name],
-                channels_last=channels_last,
                 assume_straight_pages=self.assume_straight_pages,
                 assume_horizontal=self._page_orientation_disabled,
             )
@@ -173,7 +169,7 @@ class KIEPredictor(nn.Module, _KIEPredictor):
             boxes_per_page,
             objectness_scores_per_page,
             text_preds_per_page,
-            origin_page_shapes,  # type: ignore[arg-type]
+            origin_page_shapes,
             crop_orientations_per_page,
             orientations,
             languages_dict,

doctr/models/modules/layers/__init__.py CHANGED Viewed

@@ -1,6 +1 @@
-from doctr.file_utils import is_tf_available, is_torch_available
-if is_torch_available():
-    from .pytorch import *
-elif is_tf_available():
-    from .tensorflow import *
+from .pytorch import *

doctr/models/modules/layers/pytorch.py CHANGED Viewed

@@ -8,7 +8,55 @@ import numpy as np
 import torch
 import torch.nn as nn
-__all__ = ["FASTConvLayer"]
+__all__ = ["FASTConvLayer", "DropPath", "AdaptiveAvgPool2d"]
+class DropPath(nn.Module):
+    """
+    DropPath (Drop Connect) layer. This is a stochastic version of the identity layer.
+    """
+    # Borrowed from https://github.com/huggingface/pytorch-image-models/blob/main/timm/layers/drop.py
+    def __init__(self, drop_prob: float = 0.0, scale_by_keep: bool = True):
+        super(DropPath, self).__init__()
+        self.drop_prob = drop_prob
+        self.scale_by_keep = scale_by_keep
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        if self.drop_prob == 0.0 or not self.training:
+            return x
+        keep_prob = 1 - self.drop_prob
+        shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with different dimensions
+        random_tensor = x.new_empty(shape).bernoulli_(keep_prob)
+        if keep_prob > 0.0 and self.scale_by_keep:
+            random_tensor.div_(keep_prob)
+        return x * random_tensor
+class AdaptiveAvgPool2d(nn.Module):
+    """
+    Custom AdaptiveAvgPool2d implementation which is ONNX and `torch.compile` compatible.
+    """
+    def __init__(self, output_size):
+        super().__init__()
+        self.output_size = output_size
+    def forward(self, x: torch.Tensor):
+        H_out, W_out = self.output_size
+        N, C, H, W = x.shape
+        out = torch.empty((N, C, H_out, W_out), device=x.device, dtype=x.dtype)
+        for oh in range(H_out):
+            start_h = (oh * H) // H_out
+            end_h = ((oh + 1) * H + H_out - 1) // H_out  # ceil((oh+1)*H / H_out)
+            for ow in range(W_out):
+                start_w = (ow * W) // W_out
+                end_w = ((ow + 1) * W + W_out - 1) // W_out  # ceil((ow+1)*W / W_out)
+                # average over the window
+                out[:, :, oh, ow] = x[:, :, start_h:end_h, start_w:end_w].mean(dim=(-2, -1))
+        return out
 class FASTConvLayer(nn.Module):
@@ -103,16 +151,16 @@ class FASTConvLayer(nn.Module):
             id_tensor = torch.from_numpy(kernel_value).to(identity.weight.device)
             self.id_tensor = self._pad_to_mxn_tensor(id_tensor)
         kernel = self.id_tensor
-        std = (identity.running_var + identity.eps).sqrt()  # type: ignore
+        std = (identity.running_var + identity.eps).sqrt()
         t = (identity.weight / std).reshape(-1, 1, 1, 1)
-        return kernel * t, identity.bias - identity.running_mean * identity.weight / std
+        return kernel * t, identity.bias - identity.running_mean * identity.weight / std  # type: ignore[operator]
     def _fuse_bn_tensor(self, conv: nn.Conv2d, bn: nn.BatchNorm2d) -> tuple[torch.Tensor, torch.Tensor]:
         kernel = conv.weight
         kernel = self._pad_to_mxn_tensor(kernel)
         std = (bn.running_var + bn.eps).sqrt()  # type: ignore
         t = (bn.weight / std).reshape(-1, 1, 1, 1)
-        return kernel * t, bn.bias - bn.running_mean * bn.weight / std
+        return kernel * t, bn.bias - bn.running_mean * bn.weight / std  # type: ignore[operator]
     def _get_equivalent_kernel_bias(self) -> tuple[torch.Tensor, torch.Tensor]:
         kernel_mxn, bias_mxn = self._fuse_bn_tensor(self.conv, self.bn)

doctr/models/modules/transformer/__init__.py CHANGED Viewed

@@ -1,6 +1 @@
-from doctr.file_utils import is_tf_available, is_torch_available
-if is_torch_available():
-    from .pytorch import *
-elif is_tf_available():
-    from .tensorflow import *
+from .pytorch import *

doctr/models/modules/transformer/pytorch.py CHANGED Viewed

@@ -50,8 +50,8 @@ def scaled_dot_product_attention(
     scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(query.size(-1))
     if mask is not None:
         # NOTE: to ensure the ONNX compatibility, masked_fill works only with int equal condition
-        scores = scores.masked_fill(mask == 0, float("-inf"))  # type: ignore[attr-defined]
-    p_attn = torch.softmax(scores, dim=-1)  # type: ignore[call-overload]
+        scores = scores.masked_fill(mask == 0, float("-inf"))
+    p_attn = torch.softmax(scores, dim=-1)
     return torch.matmul(p_attn, value), p_attn

doctr/models/modules/vision_transformer/__init__.py CHANGED Viewed

@@ -1,6 +1 @@
-from doctr.file_utils import is_tf_available, is_torch_available
-if is_torch_available():
-    from .pytorch import *
-elif is_tf_available():
-    from .tensorflow import *
+from .pytorch import *

doctr/models/predictor/__init__.py CHANGED Viewed

@@ -1,6 +1 @@
-from doctr.file_utils import is_tf_available, is_torch_available
-if is_torch_available():
-    from .pytorch import *
-elif is_tf_available():
-    from .tensorflow import *  # type: ignore[assignment]
+from .pytorch import *

python-doctr 0.11.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

python-doctr 0.11.0py3-none-any.whl → 1.0.0py3-none-any.whl