PyPI - python-doctr - Versions diffs - 0.7.0__py3-none-any.whl → 0.8.1__py3-none-any.whl - Mend

python-doctr 0.7.0py3-none-any.whl → 0.8.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (137) hide show

doctr/datasets/__init__.py +2 -0
doctr/datasets/cord.py +6 -4
doctr/datasets/datasets/base.py +3 -2
doctr/datasets/datasets/pytorch.py +4 -2
doctr/datasets/datasets/tensorflow.py +4 -2
doctr/datasets/detection.py +6 -3
doctr/datasets/doc_artefacts.py +2 -1
doctr/datasets/funsd.py +7 -8
doctr/datasets/generator/base.py +3 -2
doctr/datasets/generator/pytorch.py +3 -1
doctr/datasets/generator/tensorflow.py +3 -1
doctr/datasets/ic03.py +3 -2
doctr/datasets/ic13.py +2 -1
doctr/datasets/iiit5k.py +6 -4
doctr/datasets/iiithws.py +2 -1
doctr/datasets/imgur5k.py +3 -2
doctr/datasets/loader.py +4 -2
doctr/datasets/mjsynth.py +2 -1
doctr/datasets/ocr.py +2 -1
doctr/datasets/orientation.py +40 -0
doctr/datasets/recognition.py +3 -2
doctr/datasets/sroie.py +2 -1
doctr/datasets/svhn.py +2 -1
doctr/datasets/svt.py +3 -2
doctr/datasets/synthtext.py +2 -1
doctr/datasets/utils.py +27 -11
doctr/datasets/vocabs.py +26 -1
doctr/datasets/wildreceipt.py +111 -0
doctr/file_utils.py +3 -1
doctr/io/elements.py +52 -35
doctr/io/html.py +5 -3
doctr/io/image/base.py +5 -4
doctr/io/image/pytorch.py +12 -7
doctr/io/image/tensorflow.py +11 -6
doctr/io/pdf.py +5 -4
doctr/io/reader.py +13 -5
doctr/models/_utils.py +30 -53
doctr/models/artefacts/barcode.py +4 -3
doctr/models/artefacts/face.py +4 -2
doctr/models/builder.py +58 -43
doctr/models/classification/__init__.py +1 -0
doctr/models/classification/magc_resnet/pytorch.py +5 -2
doctr/models/classification/magc_resnet/tensorflow.py +5 -2
doctr/models/classification/mobilenet/pytorch.py +16 -4
doctr/models/classification/mobilenet/tensorflow.py +29 -20
doctr/models/classification/predictor/pytorch.py +3 -2
doctr/models/classification/predictor/tensorflow.py +2 -1
doctr/models/classification/resnet/pytorch.py +23 -13
doctr/models/classification/resnet/tensorflow.py +33 -26
doctr/models/classification/textnet/__init__.py +6 -0
doctr/models/classification/textnet/pytorch.py +275 -0
doctr/models/classification/textnet/tensorflow.py +267 -0
doctr/models/classification/vgg/pytorch.py +4 -2
doctr/models/classification/vgg/tensorflow.py +5 -2
doctr/models/classification/vit/pytorch.py +9 -3
doctr/models/classification/vit/tensorflow.py +9 -3
doctr/models/classification/zoo.py +7 -2
doctr/models/core.py +1 -1
doctr/models/detection/__init__.py +1 -0
doctr/models/detection/_utils/pytorch.py +7 -1
doctr/models/detection/_utils/tensorflow.py +7 -3
doctr/models/detection/core.py +9 -3
doctr/models/detection/differentiable_binarization/base.py +37 -25
doctr/models/detection/differentiable_binarization/pytorch.py +80 -104
doctr/models/detection/differentiable_binarization/tensorflow.py +74 -55
doctr/models/detection/fast/__init__.py +6 -0
doctr/models/detection/fast/base.py +256 -0
doctr/models/detection/fast/pytorch.py +442 -0
doctr/models/detection/fast/tensorflow.py +428 -0
doctr/models/detection/linknet/base.py +12 -5
doctr/models/detection/linknet/pytorch.py +28 -15
doctr/models/detection/linknet/tensorflow.py +68 -88
doctr/models/detection/predictor/pytorch.py +16 -6
doctr/models/detection/predictor/tensorflow.py +13 -5
doctr/models/detection/zoo.py +19 -16
doctr/models/factory/hub.py +20 -10
doctr/models/kie_predictor/base.py +2 -1
doctr/models/kie_predictor/pytorch.py +28 -36
doctr/models/kie_predictor/tensorflow.py +27 -27
doctr/models/modules/__init__.py +1 -0
doctr/models/modules/layers/__init__.py +6 -0
doctr/models/modules/layers/pytorch.py +166 -0
doctr/models/modules/layers/tensorflow.py +175 -0
doctr/models/modules/transformer/pytorch.py +24 -22
doctr/models/modules/transformer/tensorflow.py +6 -4
doctr/models/modules/vision_transformer/pytorch.py +2 -4
doctr/models/modules/vision_transformer/tensorflow.py +2 -4
doctr/models/obj_detection/faster_rcnn/pytorch.py +4 -2
doctr/models/predictor/base.py +14 -3
doctr/models/predictor/pytorch.py +26 -29
doctr/models/predictor/tensorflow.py +25 -22
doctr/models/preprocessor/pytorch.py +14 -9
doctr/models/preprocessor/tensorflow.py +10 -5
doctr/models/recognition/core.py +4 -1
doctr/models/recognition/crnn/pytorch.py +23 -16
doctr/models/recognition/crnn/tensorflow.py +25 -17
doctr/models/recognition/master/base.py +4 -1
doctr/models/recognition/master/pytorch.py +20 -9
doctr/models/recognition/master/tensorflow.py +20 -8
doctr/models/recognition/parseq/base.py +4 -1
doctr/models/recognition/parseq/pytorch.py +28 -22
doctr/models/recognition/parseq/tensorflow.py +22 -11
doctr/models/recognition/predictor/_utils.py +3 -2
doctr/models/recognition/predictor/pytorch.py +3 -2
doctr/models/recognition/predictor/tensorflow.py +2 -1
doctr/models/recognition/sar/pytorch.py +14 -7
doctr/models/recognition/sar/tensorflow.py +23 -14
doctr/models/recognition/utils.py +5 -1
doctr/models/recognition/vitstr/base.py +4 -1
doctr/models/recognition/vitstr/pytorch.py +22 -13
doctr/models/recognition/vitstr/tensorflow.py +21 -10
doctr/models/recognition/zoo.py +4 -2
doctr/models/utils/pytorch.py +24 -6
doctr/models/utils/tensorflow.py +22 -3
doctr/models/zoo.py +21 -3
doctr/transforms/functional/base.py +8 -3
doctr/transforms/functional/pytorch.py +23 -6
doctr/transforms/functional/tensorflow.py +25 -5
doctr/transforms/modules/base.py +12 -5
doctr/transforms/modules/pytorch.py +10 -12
doctr/transforms/modules/tensorflow.py +17 -9
doctr/utils/common_types.py +1 -1
doctr/utils/data.py +4 -2
doctr/utils/fonts.py +3 -2
doctr/utils/geometry.py +95 -26
doctr/utils/metrics.py +36 -22
doctr/utils/multithreading.py +5 -3
doctr/utils/repr.py +3 -1
doctr/utils/visualization.py +31 -8
doctr/version.py +1 -1
{python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/METADATA +67 -31
python_doctr-0.8.1.dist-info/RECORD +173 -0
{python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/WHEEL +1 -1
python_doctr-0.7.0.dist-info/RECORD +0 -161
{python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/LICENSE +0 -0
{python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/top_level.txt +0 -0
{python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/zip-safe +0 -0

doctr/models/builder.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2023, Mindee.
+# Copyright (C) 2021-2024, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -20,6 +20,7 @@ class DocumentBuilder(NestedObject):
     """Implements a document builder
     Args:
+    ----
         resolve_lines: whether words should be automatically grouped into lines
         resolve_blocks: whether lines should be automatically grouped into blocks
         paragraph_break: relative length of the minimum space separating paragraphs
@@ -44,9 +45,11 @@ class DocumentBuilder(NestedObject):
         """Sort bounding boxes from top to bottom, left to right
         Args:
+        ----
             boxes: bounding boxes of shape (N, 4) or (N, 4, 2) (in case of rotated bbox)
         Returns:
+        -------
             tuple: indices of ordered boxes of shape (N,), boxes
                 If straight boxes are passed tpo the function, boxes are unchanged
                 else: boxes returned are straight boxes fitted to the straightened rotated boxes
@@ -66,10 +69,12 @@ class DocumentBuilder(NestedObject):
         """Split a line in sub_lines
         Args:
+        ----
             boxes: bounding boxes of shape (N, 4)
             word_idcs: list of indexes for the words of the line
         Returns:
+        -------
             A list of (sub-)lines computed from the original line (words)
         """
         lines = []
@@ -104,12 +109,13 @@ class DocumentBuilder(NestedObject):
         """Order boxes to group them in lines
         Args:
+        ----
             boxes: bounding boxes of shape (N, 4) or (N, 4, 2) in case of rotated bbox
         Returns:
+        -------
             nested list of box indices
         """
         # Sort boxes, and straighten the boxes if they are rotated
         idxs, boxes = self._sort_boxes(boxes)
@@ -151,25 +157,23 @@ class DocumentBuilder(NestedObject):
         """Order lines to group them in blocks
         Args:
+        ----
             boxes: bounding boxes of shape (N, 4) or (N, 4, 2)
             lines: list of lines, each line is a list of idx
         Returns:
+        -------
             nested list of box indices
         """
         # Resolve enclosing boxes of lines
         if boxes.ndim == 3:
-            box_lines: np.ndarray = np.asarray(
-                [
-                    resolve_enclosing_rbbox([tuple(boxes[idx, :, :]) for idx in line])  # type: ignore[misc]
-                    for line in lines
-                ]
-            )
+            box_lines: np.ndarray = np.asarray([
+                resolve_enclosing_rbbox([tuple(boxes[idx, :, :]) for idx in line])  # type: ignore[misc]
+                for line in lines
+            ])
         else:
             _box_lines = [
-                resolve_enclosing_bbox(
-                    [(tuple(boxes[idx, :2]), tuple(boxes[idx, 2:])) for idx in line]  # type: ignore[misc]
-                )
+                resolve_enclosing_bbox([(tuple(boxes[idx, :2]), tuple(boxes[idx, 2:])) for idx in line])
                 for line in lines
             ]
             box_lines = np.asarray([(x1, y1, x2, y2) for ((x1, y1), (x2, y2)) in _box_lines])
@@ -220,13 +224,14 @@ class DocumentBuilder(NestedObject):
         """Gather independent words in structured blocks
         Args:
+        ----
             boxes: bounding boxes of all detected words of the page, of shape (N, 5) or (N, 4, 2)
             word_preds: list of all detected words of the page, of shape N
         Returns:
+        -------
             list of block elements
         """
         if boxes.shape[0] != len(word_preds):
             raise ValueError(f"Incompatible argument lengths: {boxes.shape[0]}, {len(word_preds)}")
@@ -248,24 +253,18 @@ class DocumentBuilder(NestedObject):
             _blocks = [lines]
         blocks = [
-            Block(
-                [
-                    Line(
-                        [
-                            Word(
-                                *word_preds[idx],
-                                tuple([tuple(pt) for pt in boxes[idx].tolist()]),  # type: ignore[arg-type]
-                            )
-                            if boxes.ndim == 3
-                            else Word(
-                                *word_preds[idx], ((boxes[idx, 0], boxes[idx, 1]), (boxes[idx, 2], boxes[idx, 3]))
-                            )
-                            for idx in line
-                        ]
+            Block([
+                Line([
+                    Word(
+                        *word_preds[idx],
+                        tuple([tuple(pt) for pt in boxes[idx].tolist()]),  # type: ignore[arg-type]
                     )
-                    for line in lines
-                ]
-            )
+                    if boxes.ndim == 3
+                    else Word(*word_preds[idx], ((boxes[idx, 0], boxes[idx, 1]), (boxes[idx, 2], boxes[idx, 3])))
+                    for idx in line
+                ])
+                for line in lines
+            ])
             for lines in _blocks
         ]
@@ -280,6 +279,7 @@ class DocumentBuilder(NestedObject):
     def __call__(
         self,
+        pages: List[np.ndarray],
         boxes: List[np.ndarray],
         text_preds: List[List[Tuple[str, float]]],
         page_shapes: List[Tuple[int, int]],
@@ -289,12 +289,19 @@ class DocumentBuilder(NestedObject):
         """Re-arrange detected words into structured blocks
         Args:
+        ----
+            pages: list of N elements, where each element represents the page image
             boxes: list of N elements, where each element represents the localization predictions, of shape (*, 5)
                 or (*, 6) for all words for a given page
             text_preds: list of N elements, where each element is the list of all word prediction (text + confidence)
-            page_shape: shape of each page, of size N
+            page_shapes: shape of each page, of size N
+            orientations: optional, list of N elements,
+                where each element is a dictionary containing the orientation (orientation + confidence)
+            languages: optional, list of N elements,
+                where each element is a dictionary containing the language (language + confidence)
         Returns:
+        -------
             document object
         """
         if len(boxes) != len(text_preds) or len(boxes) != len(page_shapes):
@@ -307,15 +314,12 @@ class DocumentBuilder(NestedObject):
         if self.export_as_straight_boxes and len(boxes) > 0:
             # If boxes are already straight OK, else fit a bounding rect
             if boxes[0].ndim == 3:
-                straight_boxes: List[np.ndarray] = []
-                # Iterate over pages
-                for p_boxes in boxes:
-                    # Iterate over boxes of the pages
-                    straight_boxes.append(np.concatenate((p_boxes.min(1), p_boxes.max(1)), 1))
-                boxes = straight_boxes
+                # Iterate over pages and boxes
+                boxes = [np.concatenate((p_boxes.min(1), p_boxes.max(1)), 1) for p_boxes in boxes]
         _pages = [
             Page(
+                page,
                 self._build_blocks(
                     page_boxes,
                     word_preds,
@@ -325,8 +329,8 @@ class DocumentBuilder(NestedObject):
                 orientation,
                 language,
             )
-            for _idx, shape, page_boxes, word_preds, orientation, language in zip(
-                range(len(boxes)), page_shapes, boxes, text_preds, _orientations, _languages
+            for page, _idx, shape, page_boxes, word_preds, orientation, language in zip(
+                pages, range(len(boxes)), page_shapes, boxes, text_preds, _orientations, _languages
             )
         ]
@@ -337,6 +341,7 @@ class KIEDocumentBuilder(DocumentBuilder):
     """Implements a KIE document builder
     Args:
+    ----
         resolve_lines: whether words should be automatically grouped into lines
         resolve_blocks: whether lines should be automatically grouped into blocks
         paragraph_break: relative length of the minimum space separating paragraphs
@@ -346,6 +351,7 @@ class KIEDocumentBuilder(DocumentBuilder):
     def __call__(  # type: ignore[override]
         self,
+        pages: List[np.ndarray],
         boxes: List[Dict[str, np.ndarray]],
         text_preds: List[Dict[str, List[Tuple[str, float]]]],
         page_shapes: List[Tuple[int, int]],
@@ -355,12 +361,19 @@ class KIEDocumentBuilder(DocumentBuilder):
         """Re-arrange detected words into structured predictions
         Args:
+        ----
+            pages: list of N elements, where each element represents the page image
             boxes: list of N dictionaries, where each element represents the localization predictions for a class,
-            of shape (*, 5) or (*, 6) for all predictions
+                of shape (*, 5) or (*, 6) for all predictions
             text_preds: list of N dictionaries, where each element is the list of all word prediction
-            page_shape: shape of each page, of size N
+            page_shapes: shape of each page, of size N
+            orientations: optional, list of N elements,
+                where each element is a dictionary containing the orientation (orientation + confidence)
+            languages: optional, list of N elements,
+                where each element is a dictionary containing the language (language + confidence)
         Returns:
+        -------
             document object
         """
         if len(boxes) != len(text_preds) or len(boxes) != len(page_shapes):
@@ -384,6 +397,7 @@ class KIEDocumentBuilder(DocumentBuilder):
         _pages = [
             KIEPage(
+                page,
                 {
                     k: self._build_blocks(
                         page_boxes[k],
@@ -396,8 +410,8 @@ class KIEDocumentBuilder(DocumentBuilder):
                 orientation,
                 language,
             )
-            for _idx, shape, page_boxes, word_preds, orientation, language in zip(
-                range(len(boxes)), page_shapes, boxes, text_preds, _orientations, _languages
+            for page, _idx, shape, page_boxes, word_preds, orientation, language in zip(
+                pages, range(len(boxes)), page_shapes, boxes, text_preds, _orientations, _languages
             )
         ]
@@ -411,13 +425,14 @@ class KIEDocumentBuilder(DocumentBuilder):
         """Gather independent words in structured blocks
         Args:
+        ----
             boxes: bounding boxes of all detected words of the page, of shape (N, 5) or (N, 4, 2)
             word_preds: list of all detected words of the page, of shape N
         Returns:
+        -------
             list of block elements
         """
         if boxes.shape[0] != len(word_preds):
             raise ValueError(f"Incompatible argument lengths: {boxes.shape[0]}, {len(word_preds)}")

doctr/models/classification/__init__.py CHANGED Viewed

@@ -3,4 +3,5 @@ from .resnet import *
 from .vgg import *
 from .magc_resnet import *
 from .vit import *
+from .textnet import *
 from .zoo import *

doctr/models/classification/magc_resnet/pytorch.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2023, Mindee.
+# Copyright (C) 2021-2024, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -36,6 +36,7 @@ class MAGC(nn.Module):
     <https://arxiv.org/pdf/1910.02562.pdf>`_.
     Args:
+    ----
         inplanes: input channels
         headers: number of headers to split channels
         attn_scale: if True, re-scale attention to counteract the variance distibutions
@@ -153,12 +154,14 @@ def magc_resnet31(pretrained: bool = False, **kwargs: Any) -> ResNet:
     >>> out = model(input_tensor)
     Args:
+    ----
         pretrained: boolean, True if model is pretrained
+        **kwargs: keyword arguments of the ResNet architecture
     Returns:
+    -------
         A feature extractor model
     """
     return _magc_resnet(
         "magc_resnet31",
         pretrained,

doctr/models/classification/magc_resnet/tensorflow.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2023, Mindee.
+# Copyright (C) 2021-2024, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -36,6 +36,7 @@ class MAGC(layers.Layer):
     <https://arxiv.org/pdf/1910.02562.pdf>`_.
     Args:
+    ----
         inplanes: input channels
         headers: number of headers to split channels
         attn_scale: if True, re-scale attention to counteract the variance distibutions
@@ -169,12 +170,14 @@ def magc_resnet31(pretrained: bool = False, **kwargs: Any) -> ResNet:
     >>> out = model(input_tensor)
     Args:
+    ----
         pretrained: boolean, True if model is pretrained
+        **kwargs: keyword arguments of the ResNet architecture
     Returns:
+    -------
         A feature extractor model
     """
     return _magc_resnet(
         "magc_resnet31",
         pretrained,

doctr/models/classification/mobilenet/pytorch.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2023, Mindee.
+# Copyright (C) 2021-2024, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -113,12 +113,14 @@ def mobilenet_v3_small(pretrained: bool = False, **kwargs: Any) -> mobilenetv3.M
     >>> out = model(input_tensor)
     Args:
+    ----
         pretrained: boolean, True if model is pretrained
+        **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
+    -------
         a torch.nn.Module
     """
     return _mobilenet_v3(
         "mobilenet_v3_small", pretrained, ignore_keys=["classifier.3.weight", "classifier.3.bias"], **kwargs
     )
@@ -136,12 +138,14 @@ def mobilenet_v3_small_r(pretrained: bool = False, **kwargs: Any) -> mobilenetv3
     >>> out = model(input_tensor)
     Args:
+    ----
         pretrained: boolean, True if model is pretrained
+        **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
+    -------
         a torch.nn.Module
     """
     return _mobilenet_v3(
         "mobilenet_v3_small_r",
         pretrained,
@@ -163,9 +167,12 @@ def mobilenet_v3_large(pretrained: bool = False, **kwargs: Any) -> mobilenetv3.M
     >>> out = model(input_tensor)
     Args:
+    ----
         pretrained: boolean, True if model is pretrained
+        **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
+    -------
         a torch.nn.Module
     """
     return _mobilenet_v3(
@@ -188,9 +195,12 @@ def mobilenet_v3_large_r(pretrained: bool = False, **kwargs: Any) -> mobilenetv3
     >>> out = model(input_tensor)
     Args:
+    ----
         pretrained: boolean, True if model is pretrained
+        **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
+    -------
         a torch.nn.Module
     """
     return _mobilenet_v3(
@@ -214,12 +224,14 @@ def mobilenet_v3_small_orientation(pretrained: bool = False, **kwargs: Any) -> m
     >>> out = model(input_tensor)
     Args:
+    ----
         pretrained: boolean, True if model is pretrained
+        **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
+    -------
         a torch.nn.Module
     """
     return _mobilenet_v3(
         "mobilenet_v3_small_orientation",
         pretrained,

doctr/models/classification/mobilenet/tensorflow.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2023, Mindee.
+# Copyright (C) 2021-2024, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -82,14 +82,12 @@ class SqueezeExcitation(Sequential):
     """Squeeze and Excitation."""
     def __init__(self, chan: int, squeeze_factor: int = 4) -> None:
-        super().__init__(
-            [
-                layers.GlobalAveragePooling2D(),
-                layers.Dense(chan // squeeze_factor, activation="relu"),
-                layers.Dense(chan, activation="hard_sigmoid"),
-                layers.Reshape((1, 1, chan)),
-            ]
-        )
+        super().__init__([
+            layers.GlobalAveragePooling2D(),
+            layers.Dense(chan // squeeze_factor, activation="relu"),
+            layers.Dense(chan, activation="hard_sigmoid"),
+            layers.Reshape((1, 1, chan)),
+        ])
     def call(self, inputs: tf.Tensor, **kwargs: Any) -> tf.Tensor:
         x = super().call(inputs, **kwargs)
@@ -126,6 +124,7 @@ class InvertedResidual(layers.Layer):
     """InvertedResidual for mobilenet
     Args:
+    ----
         conf: configuration object for inverted residual
     """
@@ -220,14 +219,12 @@ class MobileNetV3(Sequential):
         )
         if include_top:
-            _layers.extend(
-                [
-                    layers.GlobalAveragePooling2D(),
-                    layers.Dense(head_chans, activation=hard_swish),
-                    layers.Dropout(0.2),
-                    layers.Dense(num_classes),
-                ]
-            )
+            _layers.extend([
+                layers.GlobalAveragePooling2D(),
+                layers.Dense(head_chans, activation=hard_swish),
+                layers.Dropout(0.2),
+                layers.Dense(num_classes),
+            ])
         super().__init__(_layers)
         self.cfg = cfg
@@ -309,12 +306,14 @@ def mobilenet_v3_small(pretrained: bool = False, **kwargs: Any) -> MobileNetV3:
     >>> out = model(input_tensor)
     Args:
+    ----
         pretrained: boolean, True if model is pretrained
+        **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
+    -------
         a keras.Model
     """
     return _mobilenet_v3("mobilenet_v3_small", pretrained, False, **kwargs)
@@ -330,12 +329,14 @@ def mobilenet_v3_small_r(pretrained: bool = False, **kwargs: Any) -> MobileNetV3
     >>> out = model(input_tensor)
     Args:
+    ----
         pretrained: boolean, True if model is pretrained
+        **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
+    -------
         a keras.Model
     """
     return _mobilenet_v3("mobilenet_v3_small_r", pretrained, True, **kwargs)
@@ -351,9 +352,12 @@ def mobilenet_v3_large(pretrained: bool = False, **kwargs: Any) -> MobileNetV3:
     >>> out = model(input_tensor)
     Args:
+    ----
         pretrained: boolean, True if model is pretrained
+        **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
+    -------
         a keras.Model
     """
     return _mobilenet_v3("mobilenet_v3_large", pretrained, False, **kwargs)
@@ -371,9 +375,12 @@ def mobilenet_v3_large_r(pretrained: bool = False, **kwargs: Any) -> MobileNetV3
     >>> out = model(input_tensor)
     Args:
+    ----
         pretrained: boolean, True if model is pretrained
+        **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
+    -------
         a keras.Model
     """
     return _mobilenet_v3("mobilenet_v3_large_r", pretrained, True, **kwargs)
@@ -391,10 +398,12 @@ def mobilenet_v3_small_orientation(pretrained: bool = False, **kwargs: Any) -> M
     >>> out = model(input_tensor)
     Args:
+    ----
         pretrained: boolean, True if model is pretrained
+        **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
+    -------
         a keras.Model
     """
     return _mobilenet_v3("mobilenet_v3_small_orientation", pretrained, include_top=True, **kwargs)

doctr/models/classification/predictor/pytorch.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2023, Mindee.
+# Copyright (C) 2021-2024, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -20,6 +20,7 @@ class CropOrientationPredictor(nn.Module):
     4 possible orientations: 0, 90, 180, 270 degrees counter clockwise.
     Args:
+    ----
         pre_processor: transform inputs for easier batched model inference
         model: core classification architecture (backbone + classification head)
     """
@@ -33,7 +34,7 @@ class CropOrientationPredictor(nn.Module):
         self.pre_processor = pre_processor
         self.model = model.eval()
-    @torch.no_grad()
+    @torch.inference_mode()
     def forward(
         self,
         crops: List[Union[np.ndarray, torch.Tensor]],

doctr/models/classification/predictor/tensorflow.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2023, Mindee.
+# Copyright (C) 2021-2024, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -20,6 +20,7 @@ class CropOrientationPredictor(NestedObject):
     4 possible orientations: 0, 90, 180, 270 degrees counter clockwise.
     Args:
+    ----
         pre_processor: transform inputs for easier batched model inference
         model: core classification architecture (backbone + classification head)
     """

python-doctr 0.7.0__py3-none-any.whl → 0.8.1__py3-none-any.whl

python-doctr 0.7.0py3-none-any.whl → 0.8.1py3-none-any.whl