PyPI - python-doctr - Versions diffs - 0.7.0__py3-none-any.whl → 0.8.1__py3-none-any.whl - Mend

python-doctr 0.7.0py3-none-any.whl → 0.8.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (137) hide show

doctr/datasets/__init__.py +2 -0
doctr/datasets/cord.py +6 -4
doctr/datasets/datasets/base.py +3 -2
doctr/datasets/datasets/pytorch.py +4 -2
doctr/datasets/datasets/tensorflow.py +4 -2
doctr/datasets/detection.py +6 -3
doctr/datasets/doc_artefacts.py +2 -1
doctr/datasets/funsd.py +7 -8
doctr/datasets/generator/base.py +3 -2
doctr/datasets/generator/pytorch.py +3 -1
doctr/datasets/generator/tensorflow.py +3 -1
doctr/datasets/ic03.py +3 -2
doctr/datasets/ic13.py +2 -1
doctr/datasets/iiit5k.py +6 -4
doctr/datasets/iiithws.py +2 -1
doctr/datasets/imgur5k.py +3 -2
doctr/datasets/loader.py +4 -2
doctr/datasets/mjsynth.py +2 -1
doctr/datasets/ocr.py +2 -1
doctr/datasets/orientation.py +40 -0
doctr/datasets/recognition.py +3 -2
doctr/datasets/sroie.py +2 -1
doctr/datasets/svhn.py +2 -1
doctr/datasets/svt.py +3 -2
doctr/datasets/synthtext.py +2 -1
doctr/datasets/utils.py +27 -11
doctr/datasets/vocabs.py +26 -1
doctr/datasets/wildreceipt.py +111 -0
doctr/file_utils.py +3 -1
doctr/io/elements.py +52 -35
doctr/io/html.py +5 -3
doctr/io/image/base.py +5 -4
doctr/io/image/pytorch.py +12 -7
doctr/io/image/tensorflow.py +11 -6
doctr/io/pdf.py +5 -4
doctr/io/reader.py +13 -5
doctr/models/_utils.py +30 -53
doctr/models/artefacts/barcode.py +4 -3
doctr/models/artefacts/face.py +4 -2
doctr/models/builder.py +58 -43
doctr/models/classification/__init__.py +1 -0
doctr/models/classification/magc_resnet/pytorch.py +5 -2
doctr/models/classification/magc_resnet/tensorflow.py +5 -2
doctr/models/classification/mobilenet/pytorch.py +16 -4
doctr/models/classification/mobilenet/tensorflow.py +29 -20
doctr/models/classification/predictor/pytorch.py +3 -2
doctr/models/classification/predictor/tensorflow.py +2 -1
doctr/models/classification/resnet/pytorch.py +23 -13
doctr/models/classification/resnet/tensorflow.py +33 -26
doctr/models/classification/textnet/__init__.py +6 -0
doctr/models/classification/textnet/pytorch.py +275 -0
doctr/models/classification/textnet/tensorflow.py +267 -0
doctr/models/classification/vgg/pytorch.py +4 -2
doctr/models/classification/vgg/tensorflow.py +5 -2
doctr/models/classification/vit/pytorch.py +9 -3
doctr/models/classification/vit/tensorflow.py +9 -3
doctr/models/classification/zoo.py +7 -2
doctr/models/core.py +1 -1
doctr/models/detection/__init__.py +1 -0
doctr/models/detection/_utils/pytorch.py +7 -1
doctr/models/detection/_utils/tensorflow.py +7 -3
doctr/models/detection/core.py +9 -3
doctr/models/detection/differentiable_binarization/base.py +37 -25
doctr/models/detection/differentiable_binarization/pytorch.py +80 -104
doctr/models/detection/differentiable_binarization/tensorflow.py +74 -55
doctr/models/detection/fast/__init__.py +6 -0
doctr/models/detection/fast/base.py +256 -0
doctr/models/detection/fast/pytorch.py +442 -0
doctr/models/detection/fast/tensorflow.py +428 -0
doctr/models/detection/linknet/base.py +12 -5
doctr/models/detection/linknet/pytorch.py +28 -15
doctr/models/detection/linknet/tensorflow.py +68 -88
doctr/models/detection/predictor/pytorch.py +16 -6
doctr/models/detection/predictor/tensorflow.py +13 -5
doctr/models/detection/zoo.py +19 -16
doctr/models/factory/hub.py +20 -10
doctr/models/kie_predictor/base.py +2 -1
doctr/models/kie_predictor/pytorch.py +28 -36
doctr/models/kie_predictor/tensorflow.py +27 -27
doctr/models/modules/__init__.py +1 -0
doctr/models/modules/layers/__init__.py +6 -0
doctr/models/modules/layers/pytorch.py +166 -0
doctr/models/modules/layers/tensorflow.py +175 -0
doctr/models/modules/transformer/pytorch.py +24 -22
doctr/models/modules/transformer/tensorflow.py +6 -4
doctr/models/modules/vision_transformer/pytorch.py +2 -4
doctr/models/modules/vision_transformer/tensorflow.py +2 -4
doctr/models/obj_detection/faster_rcnn/pytorch.py +4 -2
doctr/models/predictor/base.py +14 -3
doctr/models/predictor/pytorch.py +26 -29
doctr/models/predictor/tensorflow.py +25 -22
doctr/models/preprocessor/pytorch.py +14 -9
doctr/models/preprocessor/tensorflow.py +10 -5
doctr/models/recognition/core.py +4 -1
doctr/models/recognition/crnn/pytorch.py +23 -16
doctr/models/recognition/crnn/tensorflow.py +25 -17
doctr/models/recognition/master/base.py +4 -1
doctr/models/recognition/master/pytorch.py +20 -9
doctr/models/recognition/master/tensorflow.py +20 -8
doctr/models/recognition/parseq/base.py +4 -1
doctr/models/recognition/parseq/pytorch.py +28 -22
doctr/models/recognition/parseq/tensorflow.py +22 -11
doctr/models/recognition/predictor/_utils.py +3 -2
doctr/models/recognition/predictor/pytorch.py +3 -2
doctr/models/recognition/predictor/tensorflow.py +2 -1
doctr/models/recognition/sar/pytorch.py +14 -7
doctr/models/recognition/sar/tensorflow.py +23 -14
doctr/models/recognition/utils.py +5 -1
doctr/models/recognition/vitstr/base.py +4 -1
doctr/models/recognition/vitstr/pytorch.py +22 -13
doctr/models/recognition/vitstr/tensorflow.py +21 -10
doctr/models/recognition/zoo.py +4 -2
doctr/models/utils/pytorch.py +24 -6
doctr/models/utils/tensorflow.py +22 -3
doctr/models/zoo.py +21 -3
doctr/transforms/functional/base.py +8 -3
doctr/transforms/functional/pytorch.py +23 -6
doctr/transforms/functional/tensorflow.py +25 -5
doctr/transforms/modules/base.py +12 -5
doctr/transforms/modules/pytorch.py +10 -12
doctr/transforms/modules/tensorflow.py +17 -9
doctr/utils/common_types.py +1 -1
doctr/utils/data.py +4 -2
doctr/utils/fonts.py +3 -2
doctr/utils/geometry.py +95 -26
doctr/utils/metrics.py +36 -22
doctr/utils/multithreading.py +5 -3
doctr/utils/repr.py +3 -1
doctr/utils/visualization.py +31 -8
doctr/version.py +1 -1
{python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/METADATA +67 -31
python_doctr-0.8.1.dist-info/RECORD +173 -0
{python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/WHEEL +1 -1
python_doctr-0.7.0.dist-info/RECORD +0 -161
{python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/LICENSE +0 -0
{python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/top_level.txt +0 -0
{python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/zip-safe +0 -0

doctr/datasets/utils.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2023, Mindee.
+# Copyright (C) 2021-2024, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -8,9 +8,8 @@ import unicodedata
 from collections.abc import Sequence
 from functools import partial
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Tuple, TypeVar, Union
 from typing import Sequence as SequenceType
-from typing import Tuple, TypeVar, Union
 import numpy as np
 from PIL import Image
@@ -33,13 +32,15 @@ def translate(
     """Translate a string input in a given vocabulary
     Args:
+    ----
         input_string: input string to translate
         vocab_name: vocabulary to use (french, latin, ...)
         unknown_char: unknown character for non-translatable characters
     Returns:
-        A string translated in a given vocab"""
+    -------
+        A string translated in a given vocab
+    """
     if VOCABS.get(vocab_name) is None:
         raise KeyError("output vocabulary must be in vocabs dictionnary")
@@ -66,16 +67,21 @@ def encode_string(
     """Given a predefined mapping, encode the string to a sequence of numbers
     Args:
+    ----
         input_string: string to encode
         vocab: vocabulary (string), the encoding is given by the indexing of the character sequence
     Returns:
-        A list encoding the input_string"""
+    -------
+        A list encoding the input_string
+    """
     try:
         return list(map(vocab.index, input_string))
     except ValueError:
-        raise ValueError("some characters cannot be found in 'vocab'")
+        raise ValueError(
+            f"some characters cannot be found in 'vocab'. \
+                         Please check the input string {input_string} and the vocabulary {vocab}"
+        )
 def decode_sequence(
@@ -85,13 +91,14 @@ def decode_sequence(
     """Given a predefined mapping, decode the sequence of numbers to a string
     Args:
+    ----
         input_seq: array to decode
         mapping: vocabulary (string), the encoding is given by the indexing of the character sequence
     Returns:
+    -------
         A string, decoded from input_seq
     """
     if not isinstance(input_seq, (Sequence, np.ndarray)):
         raise TypeError("Invalid sequence type")
     if isinstance(input_seq, np.ndarray) and (input_seq.dtype != np.int_ or input_seq.max() >= len(mapping)):
@@ -108,11 +115,11 @@ def encode_sequences(
     sos: Optional[int] = None,
     pad: Optional[int] = None,
     dynamic_seq_length: bool = False,
-    **kwargs: Any,
 ) -> np.ndarray:
     """Encode character sequences using a given vocab as mapping
     Args:
+    ----
         sequences: the list of character sequences of size N
         vocab: the ordered vocab to use for encoding
         target_size: maximum length of the encoded data
@@ -122,9 +129,9 @@ def encode_sequences(
         dynamic_seq_length: if `target_size` is specified, uses it as upper bound and enables dynamic sequence size
     Returns:
+    -------
         the padded encoded data as a tensor
     """
     if 0 <= eos < len(vocab):
         raise ValueError("argument 'eos' needs to be outside of vocab possible indices")
@@ -169,10 +176,14 @@ def convert_target_to_relative(img: ImageTensor, target: Dict[str, Any]) -> Tupl
 def crop_bboxes_from_image(img_path: Union[str, Path], geoms: np.ndarray) -> List[np.ndarray]:
     """Crop a set of bounding boxes from an image
     Args:
+    ----
         img_path: path to the image
         geoms: a array of polygons of shape (N, 4, 2) or of straight boxes of shape (N, 4)
     Returns:
+    -------
         a list of cropped images
     """
     img: np.ndarray = np.array(Image.open(img_path).convert("RGB"))
@@ -188,8 +199,13 @@ def pre_transform_multiclass(img, target: Tuple[np.ndarray, List]) -> Tuple[np.n
     """Converts multiclass target to relative coordinates.
     Args:
+    ----
         img: Image
         target: tuple of target polygons and their classes names
+    Returns:
+    -------
+        Image and dictionary of boxes, with class names as keys
     """
     boxes = convert_to_relative_coords(target[0], get_img_shape(img))
     boxes_classes = target[1]

doctr/datasets/vocabs.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2023, Mindee.
+# Copyright (C) 2021-2024, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -28,6 +28,7 @@ VOCABS["legacy_french"] = VOCABS["latin"] + "°" + "àâéèêëîïôùûçÀÂ
 VOCABS["french"] = VOCABS["english"] + "àâéèêëîïôùûüçÀÂÉÈÊËÎÏÔÙÛÜÇ"
 VOCABS["portuguese"] = VOCABS["english"] + "áàâãéêíïóôõúüçÁÀÂÃÉÊÍÏÓÔÕÚÜÇ"
 VOCABS["spanish"] = VOCABS["english"] + "áéíóúüñÁÉÍÓÚÜÑ" + "¡¿"
+VOCABS["italian"] = VOCABS["english"] + "àèéìíîòóùúÀÈÉÌÍÎÒÓÙÚ"
 VOCABS["german"] = VOCABS["english"] + "äöüßÄÖÜẞ"
 VOCABS["arabic"] = (
     VOCABS["digits"]
@@ -39,8 +40,32 @@ VOCABS["arabic"] = (
     + VOCABS["punctuation"]
 )
 VOCABS["czech"] = VOCABS["english"] + "áčďéěíňóřšťúůýžÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ"
+VOCABS["polish"] = VOCABS["english"] + "ąćęłńóśźżĄĆĘŁŃÓŚŹŻ"
+VOCABS["dutch"] = VOCABS["english"] + "áéíóúüñÁÉÍÓÚÜÑ"
+VOCABS["norwegian"] = VOCABS["english"] + "æøåÆØÅ"
+VOCABS["danish"] = VOCABS["english"] + "æøåÆØÅ"
+VOCABS["finnish"] = VOCABS["english"] + "äöÄÖ"
+VOCABS["swedish"] = VOCABS["english"] + "åäöÅÄÖ"
 VOCABS["vietnamese"] = (
     VOCABS["english"]
     + "áàảạãăắằẳẵặâấầẩẫậéèẻẽẹêếềểễệóòỏõọôốồổộỗơớờởợỡúùủũụưứừửữựiíìỉĩịýỳỷỹỵ"
     + "ÁÀẢẠÃĂẮẰẲẴẶÂẤẦẨẪẬÉÈẺẼẸÊẾỀỂỄỆÓÒỎÕỌÔỐỒỔỘỖƠỚỜỞỢỠÚÙỦŨỤƯỨỪỬỮỰIÍÌỈĨỊÝỲỶỸỴ"
 )
+VOCABS["hebrew"] = VOCABS["english"] + "אבגדהוזחטיכלמנסעפצקרשת" + "₪"
+VOCABS["multilingual"] = "".join(
+    dict.fromkeys(
+        VOCABS["french"]
+        + VOCABS["portuguese"]
+        + VOCABS["spanish"]
+        + VOCABS["german"]
+        + VOCABS["czech"]
+        + VOCABS["polish"]
+        + VOCABS["dutch"]
+        + VOCABS["italian"]
+        + VOCABS["norwegian"]
+        + VOCABS["danish"]
+        + VOCABS["finnish"]
+        + VOCABS["swedish"]
+        + "§"
+    )
+)

doctr/datasets/wildreceipt.py ADDED Viewed

@@ -0,0 +1,111 @@
+# Copyright (C) 2021-2024, Mindee.
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+import json
+import os
+from pathlib import Path
+from typing import Any, Dict, List, Tuple, Union
+import numpy as np
+from .datasets import AbstractDataset
+from .utils import convert_target_to_relative, crop_bboxes_from_image
+__all__ = ["WILDRECEIPT"]
+class WILDRECEIPT(AbstractDataset):
+    """WildReceipt dataset from `"Spatial Dual-Modality Graph Reasoning for Key Information Extraction"
+        <https://arxiv.org/abs/2103.14470v1>`_ |
+    `repository <https://download.openmmlab.com/mmocr/data/wildreceipt.tar>`_.
+    .. image:: https://doctr-static.mindee.com/models?id=v0.7.0/wildreceipt-dataset.jpg&src=0
+        :align: center
+    >>> # NOTE: You need to download the dataset first.
+    >>> from doctr.datasets import WILDRECEIPT
+    >>> train_set = WILDRECEIPT(train=True, img_folder="/path/to/wildreceipt/",
+    >>>                     label_path="/path/to/wildreceipt/train.txt")
+    >>> img, target = train_set[0]
+    >>> test_set = WILDRECEIPT(train=False, img_folder="/path/to/wildreceipt/",
+    >>>                    label_path="/path/to/wildreceipt/test.txt")
+    >>> img, target = test_set[0]
+    Args:
+    ----
+        img_folder: folder with all the images of the dataset
+        label_path: path to the annotations file of the dataset
+        train: whether the subset should be the training one
+        use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
+        recognition_task: whether the dataset should be used for recognition task
+        **kwargs: keyword arguments from `AbstractDataset`.
+    """
+    def __init__(
+        self,
+        img_folder: str,
+        label_path: str,
+        train: bool = True,
+        use_polygons: bool = False,
+        recognition_task: bool = False,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(
+            img_folder, pre_transforms=convert_target_to_relative if not recognition_task else None, **kwargs
+        )
+        # File existence check
+        if not os.path.exists(label_path) or not os.path.exists(img_folder):
+            raise FileNotFoundError(f"unable to locate {label_path if not os.path.exists(label_path) else img_folder}")
+        tmp_root = img_folder
+        self.train = train
+        np_dtype = np.float32
+        self.data: List[Tuple[Union[str, Path, np.ndarray], Union[str, Dict[str, Any]]]] = []
+        with open(label_path, "r") as file:
+            data = file.read()
+        # Split the text file into separate JSON strings
+        json_strings = data.strip().split("\n")
+        box: Union[List[float], np.ndarray]
+        _targets = []
+        for json_string in json_strings:
+            json_data = json.loads(json_string)
+            img_path = json_data["file_name"]
+            annotations = json_data["annotations"]
+            for annotation in annotations:
+                coordinates = annotation["box"]
+                if use_polygons:
+                    # (x, y) coordinates of top left, top right, bottom right, bottom left corners
+                    box = np.array(
+                        [
+                            [coordinates[0], coordinates[1]],
+                            [coordinates[2], coordinates[3]],
+                            [coordinates[4], coordinates[5]],
+                            [coordinates[6], coordinates[7]],
+                        ],
+                        dtype=np_dtype,
+                    )
+                else:
+                    x, y = coordinates[::2], coordinates[1::2]
+                    box = [min(x), min(y), max(x), max(y)]
+                _targets.append((annotation["text"], box))
+            text_targets, box_targets = zip(*_targets)
+            if recognition_task:
+                crops = crop_bboxes_from_image(
+                    img_path=os.path.join(tmp_root, img_path), geoms=np.asarray(box_targets, dtype=int).clip(min=0)
+                )
+                for crop, label in zip(crops, list(text_targets)):
+                    if label and " " not in label:
+                        self.data.append((crop, label))
+            else:
+                self.data.append((
+                    img_path,
+                    dict(boxes=np.asarray(box_targets, dtype=int).clip(min=0), labels=list(text_targets)),
+                ))
+        self.root = tmp_root
+    def extra_repr(self) -> str:
+        return f"train={self.train}"

doctr/file_utils.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2023, Mindee.
+# Copyright (C) 2021-2024, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -83,8 +83,10 @@ if not _torch_available and not _tf_available:  # pragma: no cover
 def is_torch_available():
+    """Whether PyTorch is installed."""
     return _torch_available
 def is_tf_available():
+    """Whether TensorFlow is installed."""
     return _tf_available

doctr/io/elements.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2023, Mindee.
+# Copyright (C) 2021-2024, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -39,7 +39,6 @@ class Element(NestedObject):
     def export(self) -> Dict[str, Any]:
         """Exports the object into a nested dict format"""
         export_dict = {k: getattr(self, k) for k in self._exported_keys}
         for children_name in self._children_names:
             if children_name in ["predictions"]:
@@ -63,6 +62,7 @@ class Word(Element):
     """Implements a word element
     Args:
+    ----
         value: the text string of the word
         confidence: the confidence associated with the text prediction
         geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
@@ -95,6 +95,7 @@ class Artefact(Element):
     """Implements a non-textual element
     Args:
+    ----
         artefact_type: the type of artefact
         confidence: the confidence of the type prediction
         geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
@@ -127,6 +128,7 @@ class Line(Element):
     """Implements a line element as a collection of words
     Args:
+    ----
         words: list of word elements
         geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
             the page's size. If not specified, it will be resolved by default to the smallest bounding box enclosing
@@ -158,11 +160,9 @@ class Line(Element):
     @classmethod
     def from_dict(cls, save_dict: Dict[str, Any], **kwargs):
         kwargs = {k: save_dict[k] for k in cls._exported_keys}
-        kwargs.update(
-            {
-                "words": [Word.from_dict(_dict) for _dict in save_dict["words"]],
-            }
-        )
+        kwargs.update({
+            "words": [Word.from_dict(_dict) for _dict in save_dict["words"]],
+        })
         return cls(**kwargs)
@@ -181,6 +181,7 @@ class Block(Element):
     """Implements a block element as a collection of lines and artefacts
     Args:
+    ----
         lines: list of line elements
         artefacts: list of artefacts
         geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
@@ -218,12 +219,10 @@ class Block(Element):
     @classmethod
     def from_dict(cls, save_dict: Dict[str, Any], **kwargs):
         kwargs = {k: save_dict[k] for k in cls._exported_keys}
-        kwargs.update(
-            {
-                "lines": [Line.from_dict(_dict) for _dict in save_dict["lines"]],
-                "artefacts": [Artefact.from_dict(_dict) for _dict in save_dict["artefacts"]],
-            }
-        )
+        kwargs.update({
+            "lines": [Line.from_dict(_dict) for _dict in save_dict["lines"]],
+            "artefacts": [Artefact.from_dict(_dict) for _dict in save_dict["artefacts"]],
+        })
         return cls(**kwargs)
@@ -231,6 +230,8 @@ class Page(Element):
     """Implements a page element as a collection of blocks
     Args:
+    ----
+        page: image encoded as a numpy array in uint8
         blocks: list of block elements
         page_idx: the index of the page in the input raw document
         dimensions: the page size in pixels in format (height, width)
@@ -244,6 +245,7 @@ class Page(Element):
     def __init__(
         self,
+        page: np.ndarray,
         blocks: List[Block],
         page_idx: int,
         dimensions: Tuple[int, int],
@@ -251,6 +253,7 @@ class Page(Element):
         language: Optional[Dict[str, Any]] = None,
     ) -> None:
         super().__init__(blocks=blocks)
+        self.page = page
         self.page_idx = page_idx
         self.dimensions = dimensions
         self.orientation = orientation if isinstance(orientation, dict) else dict(value=None, confidence=None)
@@ -263,24 +266,24 @@ class Page(Element):
     def extra_repr(self) -> str:
         return f"dimensions={self.dimensions}"
-    def show(self, page: np.ndarray, interactive: bool = True, preserve_aspect_ratio: bool = False, **kwargs) -> None:
+    def show(self, interactive: bool = True, preserve_aspect_ratio: bool = False, **kwargs) -> None:
         """Overlay the result on a given image
         Args:
-            page: image encoded as a numpy array in uint8
             interactive: whether the display should be interactive
             preserve_aspect_ratio: pass True if you passed True to the predictor
+            **kwargs: additional keyword arguments passed to the matplotlib.pyplot.show method
         """
-        visualize_page(self.export(), page, interactive=interactive, preserve_aspect_ratio=preserve_aspect_ratio)
+        visualize_page(self.export(), self.page, interactive=interactive, preserve_aspect_ratio=preserve_aspect_ratio)
         plt.show(**kwargs)
     def synthesize(self, **kwargs) -> np.ndarray:
         """Synthesize the page from the predictions
-        Returns:
+        Returns
+        -------
             synthesized page
         """
         return synthesize_page(self.export(), **kwargs)
     def export_as_xml(self, file_title: str = "docTR - XML export (hOCR)") -> Tuple[bytes, ET.ElementTree]:
@@ -288,9 +291,11 @@ class Page(Element):
         convention: https://github.com/kba/hocr-spec/blob/master/1.2/spec.md
         Args:
+        ----
             file_title: the title of the XML file
         Returns:
+        -------
             a tuple of the XML byte string, and its ElementTree
         """
         p_idx = self.page_idx
@@ -398,7 +403,9 @@ class KIEPage(Element):
     """Implements a KIE page element as a collection of predictions
     Args:
+    ----
         predictions: Dictionary with list of block elements for each detection class
+        page: image encoded as a numpy array in uint8
         page_idx: the index of the page in the input raw document
         dimensions: the page size in pixels in format (height, width)
         orientation: a dictionary with the value of the rotation angle in degress and confidence of the prediction
@@ -411,6 +418,7 @@ class KIEPage(Element):
     def __init__(
         self,
+        page: np.ndarray,
         predictions: Dict[str, List[Prediction]],
         page_idx: int,
         dimensions: Tuple[int, int],
@@ -418,6 +426,7 @@ class KIEPage(Element):
         language: Optional[Dict[str, Any]] = None,
     ) -> None:
         super().__init__(predictions=predictions)
+        self.page = page
         self.page_idx = page_idx
         self.dimensions = dimensions
         self.orientation = orientation if isinstance(orientation, dict) else dict(value=None, confidence=None)
@@ -432,24 +441,30 @@ class KIEPage(Element):
     def extra_repr(self) -> str:
         return f"dimensions={self.dimensions}"
-    def show(self, page: np.ndarray, interactive: bool = True, preserve_aspect_ratio: bool = False, **kwargs) -> None:
+    def show(self, interactive: bool = True, preserve_aspect_ratio: bool = False, **kwargs) -> None:
         """Overlay the result on a given image
         Args:
-            page: image encoded as a numpy array in uint8
             interactive: whether the display should be interactive
             preserve_aspect_ratio: pass True if you passed True to the predictor
+            **kwargs: keyword arguments passed to the matplotlib.pyplot.show method
         """
-        visualize_kie_page(self.export(), page, interactive=interactive, preserve_aspect_ratio=preserve_aspect_ratio)
+        visualize_kie_page(
+            self.export(), self.page, interactive=interactive, preserve_aspect_ratio=preserve_aspect_ratio
+        )
         plt.show(**kwargs)
     def synthesize(self, **kwargs) -> np.ndarray:
         """Synthesize the page from the predictions
+        Args:
+        ----
+            **kwargs: keyword arguments passed to the matplotlib.pyplot.show method
         Returns:
+        -------
             synthesized page
         """
         return synthesize_kie_page(self.export(), **kwargs)
     def export_as_xml(self, file_title: str = "docTR - XML export (hOCR)") -> Tuple[bytes, ET.ElementTree]:
@@ -457,9 +472,11 @@ class KIEPage(Element):
         convention: https://github.com/kba/hocr-spec/blob/master/1.2/spec.md
         Args:
+        ----
             file_title: the title of the XML file
         Returns:
+        -------
             a tuple of the XML byte string, and its ElementTree
         """
         p_idx = self.page_idx
@@ -517,9 +534,9 @@ class KIEPage(Element):
     @classmethod
     def from_dict(cls, save_dict: Dict[str, Any], **kwargs):
         kwargs = {k: save_dict[k] for k in cls._exported_keys}
-        kwargs.update(
-            {"predictions": [Prediction.from_dict(predictions_dict) for predictions_dict in save_dict["predictions"]]}
-        )
+        kwargs.update({
+            "predictions": [Prediction.from_dict(predictions_dict) for predictions_dict in save_dict["predictions"]]
+        })
         return cls(**kwargs)
@@ -527,6 +544,7 @@ class Document(Element):
     """Implements a document element as a collection of pages
     Args:
+    ----
         pages: list of page elements
     """
@@ -543,31 +561,29 @@ class Document(Element):
         """Renders the full text of the element"""
         return page_break.join(p.render() for p in self.pages)
-    def show(self, pages: List[np.ndarray], **kwargs) -> None:
-        """Overlay the result on a given image
-        Args:
-            pages: list of images encoded as numpy arrays in uint8
-        """
-        for img, result in zip(pages, self.pages):
-            result.show(img, **kwargs)
+    def show(self, **kwargs) -> None:
+        """Overlay the result on a given image"""
+        for result in self.pages:
+            result.show(**kwargs)
     def synthesize(self, **kwargs) -> List[np.ndarray]:
         """Synthesize all pages from their predictions
-        Returns:
+        Returns
+        -------
             list of synthesized pages
         """
         return [page.synthesize() for page in self.pages]
     def export_as_xml(self, **kwargs) -> List[Tuple[bytes, ET.ElementTree]]:
         """Export the document as XML (hOCR-format)
         Args:
+        ----
             **kwargs: additional keyword arguments passed to the Page.export_as_xml method
         Returns:
+        -------
             list of tuple of (bytes, ElementTree)
         """
         return [page.export_as_xml(**kwargs) for page in self.pages]
@@ -583,6 +599,7 @@ class KIEDocument(Document):
     """Implements a document element as a collection of pages
     Args:
+    ----
         pages: list of page elements
     """

doctr/io/html.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2023, Mindee.
+# Copyright (C) 2021-2024, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -13,14 +13,16 @@ __all__ = ["read_html"]
 def read_html(url: str, **kwargs: Any) -> bytes:
     """Read a PDF file and convert it into an image in numpy format
-    >>> from doctr.documents import read_html
+    >>> from doctr.io import read_html
     >>> doc = read_html("https://www.yoursite.com")
     Args:
+    ----
         url: URL of the target web page
+        **kwargs: keyword arguments from `weasyprint.HTML`
     Returns:
+    -------
         decoded PDF file as a bytes stream
     """
     return HTML(url, **kwargs).write_pdf()

doctr/io/image/base.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2023, Mindee.
+# Copyright (C) 2021-2024, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -21,18 +21,19 @@ def read_img_as_numpy(
 ) -> np.ndarray:
     """Read an image file into numpy format
-    >>> from doctr.documents import read_img
-    >>> page = read_img("path/to/your/doc.jpg")
+    >>> from doctr.io import read_img_as_numpy
+    >>> page = read_img_as_numpy("path/to/your/doc.jpg")
     Args:
+    ----
         file: the path to the image file
         output_size: the expected output size of each page in format H x W
         rgb_output: whether the output ndarray channel order should be RGB instead of BGR.
     Returns:
+    -------
         the page decoded as numpy ndarray of shape H x W x 3
     """
     if isinstance(file, (str, Path)):
         if not Path(file).is_file():
             raise FileNotFoundError(f"unable to access {file}")

python-doctr 0.7.0__py3-none-any.whl → 0.8.1__py3-none-any.whl

python-doctr 0.7.0py3-none-any.whl → 0.8.1py3-none-any.whl