PyPI - python-doctr - Versions diffs - 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl - Mend

python-doctr 0.8.0py3-none-any.whl → 0.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (85) hide show

doctr/__init__.py +1 -1
doctr/contrib/__init__.py +0 -0
doctr/contrib/artefacts.py +131 -0
doctr/contrib/base.py +105 -0
doctr/datasets/datasets/pytorch.py +2 -2
doctr/datasets/generator/base.py +6 -5
doctr/datasets/imgur5k.py +1 -1
doctr/datasets/loader.py +1 -6
doctr/datasets/utils.py +2 -1
doctr/datasets/vocabs.py +9 -2
doctr/file_utils.py +26 -12
doctr/io/elements.py +40 -6
doctr/io/html.py +2 -2
doctr/io/image/pytorch.py +6 -8
doctr/io/image/tensorflow.py +1 -1
doctr/io/pdf.py +5 -2
doctr/io/reader.py +6 -0
doctr/models/__init__.py +0 -1
doctr/models/_utils.py +57 -20
doctr/models/builder.py +71 -13
doctr/models/classification/mobilenet/pytorch.py +45 -9
doctr/models/classification/mobilenet/tensorflow.py +38 -7
doctr/models/classification/predictor/pytorch.py +18 -11
doctr/models/classification/predictor/tensorflow.py +16 -10
doctr/models/classification/textnet/pytorch.py +3 -3
doctr/models/classification/textnet/tensorflow.py +3 -3
doctr/models/classification/zoo.py +39 -15
doctr/models/detection/__init__.py +1 -0
doctr/models/detection/_utils/__init__.py +1 -0
doctr/models/detection/_utils/base.py +66 -0
doctr/models/detection/differentiable_binarization/base.py +4 -3
doctr/models/detection/differentiable_binarization/pytorch.py +2 -2
doctr/models/detection/differentiable_binarization/tensorflow.py +14 -18
doctr/models/detection/fast/__init__.py +6 -0
doctr/models/detection/fast/base.py +257 -0
doctr/models/detection/fast/pytorch.py +442 -0
doctr/models/detection/fast/tensorflow.py +428 -0
doctr/models/detection/linknet/base.py +4 -3
doctr/models/detection/predictor/pytorch.py +15 -1
doctr/models/detection/predictor/tensorflow.py +15 -1
doctr/models/detection/zoo.py +21 -4
doctr/models/factory/hub.py +3 -12
doctr/models/kie_predictor/base.py +9 -3
doctr/models/kie_predictor/pytorch.py +41 -20
doctr/models/kie_predictor/tensorflow.py +36 -16
doctr/models/modules/layers/pytorch.py +89 -10
doctr/models/modules/layers/tensorflow.py +88 -10
doctr/models/modules/transformer/pytorch.py +2 -2
doctr/models/predictor/base.py +77 -50
doctr/models/predictor/pytorch.py +31 -20
doctr/models/predictor/tensorflow.py +27 -17
doctr/models/preprocessor/pytorch.py +4 -4
doctr/models/preprocessor/tensorflow.py +3 -2
doctr/models/recognition/master/pytorch.py +2 -2
doctr/models/recognition/parseq/pytorch.py +4 -3
doctr/models/recognition/parseq/tensorflow.py +4 -3
doctr/models/recognition/sar/pytorch.py +7 -6
doctr/models/recognition/sar/tensorflow.py +3 -9
doctr/models/recognition/vitstr/pytorch.py +1 -1
doctr/models/recognition/zoo.py +1 -1
doctr/models/zoo.py +2 -2
doctr/py.typed +0 -0
doctr/transforms/functional/base.py +1 -1
doctr/transforms/functional/pytorch.py +4 -4
doctr/transforms/modules/base.py +37 -15
doctr/transforms/modules/pytorch.py +66 -8
doctr/transforms/modules/tensorflow.py +63 -7
doctr/utils/fonts.py +7 -5
doctr/utils/geometry.py +35 -12
doctr/utils/metrics.py +33 -174
doctr/utils/reconstitution.py +126 -0
doctr/utils/visualization.py +5 -118
doctr/version.py +1 -1
{python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/METADATA +96 -91
{python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/RECORD +79 -75
{python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/WHEEL +1 -1
doctr/models/artefacts/__init__.py +0 -2
doctr/models/artefacts/barcode.py +0 -74
doctr/models/artefacts/face.py +0 -63
doctr/models/obj_detection/__init__.py +0 -1
doctr/models/obj_detection/faster_rcnn/__init__.py +0 -4
doctr/models/obj_detection/faster_rcnn/pytorch.py +0 -81
{python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/LICENSE +0 -0
{python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/top_level.txt +0 -0
{python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/zip-safe +0 -0

doctr/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
-from . import io, models, datasets, transforms, utils
+from . import io, models, datasets, contrib, transforms, utils
 from .file_utils import is_tf_available, is_torch_available
 from .version import __version__  # noqa: F401

doctr/contrib/__init__.py ADDED Viewed

File without changes

doctr/contrib/artefacts.py ADDED Viewed

@@ -0,0 +1,131 @@
+# Copyright (C) 2021-2024, Mindee.
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+from typing import Any, Dict, List, Optional, Tuple
+import cv2
+import numpy as np
+from doctr.file_utils import requires_package
+from .base import _BasePredictor
+__all__ = ["ArtefactDetector"]
+default_cfgs: Dict[str, Dict[str, Any]] = {
+    "yolov8_artefact": {
+        "input_shape": (3, 1024, 1024),
+        "labels": ["bar_code", "qr_code", "logo", "photo"],
+        "url": "https://doctr-static.mindee.com/models?id=v0.8.1/yolo_artefact-f9d66f14.onnx&src=0",
+    },
+}
+class ArtefactDetector(_BasePredictor):
+    """
+    A class to detect artefacts in images
+    >>> from doctr.io import DocumentFile
+    >>> from doctr.contrib.artefacts import ArtefactDetector
+    >>> doc = DocumentFile.from_images(["path/to/image.jpg"])
+    >>> detector = ArtefactDetector()
+    >>> results = detector(doc)
+    Args:
+    ----
+        arch: the architecture to use
+        batch_size: the batch size to use
+        model_path: the path to the model to use
+        labels: the labels to use
+        input_shape: the input shape to use
+        mask_labels: the mask labels to use
+        conf_threshold: the confidence threshold to use
+        iou_threshold: the intersection over union threshold to use
+        **kwargs: additional arguments to be passed to `download_from_url`
+    """
+    def __init__(
+        self,
+        arch: str = "yolov8_artefact",
+        batch_size: int = 2,
+        model_path: Optional[str] = None,
+        labels: Optional[List[str]] = None,
+        input_shape: Optional[Tuple[int, int, int]] = None,
+        conf_threshold: float = 0.5,
+        iou_threshold: float = 0.5,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(batch_size=batch_size, url=default_cfgs[arch]["url"], model_path=model_path, **kwargs)
+        self.labels = labels or default_cfgs[arch]["labels"]
+        self.input_shape = input_shape or default_cfgs[arch]["input_shape"]
+        self.conf_threshold = conf_threshold
+        self.iou_threshold = iou_threshold
+    def preprocess(self, img: np.ndarray) -> np.ndarray:
+        return np.transpose(cv2.resize(img, (self.input_shape[2], self.input_shape[1])), (2, 0, 1)) / np.array(255.0)
+    def postprocess(self, output: List[np.ndarray], input_images: List[List[np.ndarray]]) -> List[List[Dict[str, Any]]]:
+        results = []
+        for batch in zip(output, input_images):
+            for out, img in zip(batch[0], batch[1]):
+                org_height, org_width = img.shape[:2]
+                width_scale, height_scale = org_width / self.input_shape[2], org_height / self.input_shape[1]
+                for res in out:
+                    sample_results = []
+                    for row in np.transpose(np.squeeze(res)):
+                        classes_scores = row[4:]
+                        max_score = np.amax(classes_scores)
+                        if max_score >= self.conf_threshold:
+                            class_id = np.argmax(classes_scores)
+                            x, y, w, h = row[0], row[1], row[2], row[3]
+                            # to rescaled xmin, ymin, xmax, ymax
+                            xmin = int((x - w / 2) * width_scale)
+                            ymin = int((y - h / 2) * height_scale)
+                            xmax = int((x + w / 2) * width_scale)
+                            ymax = int((y + h / 2) * height_scale)
+                            sample_results.append({
+                                "label": self.labels[class_id],
+                                "confidence": float(max_score),
+                                "box": [xmin, ymin, xmax, ymax],
+                            })
+                    # Filter out overlapping boxes
+                    boxes = [res["box"] for res in sample_results]
+                    scores = [res["confidence"] for res in sample_results]
+                    keep_indices = cv2.dnn.NMSBoxes(boxes, scores, self.conf_threshold, self.iou_threshold)  # type: ignore[arg-type]
+                    sample_results = [sample_results[i] for i in keep_indices]
+                    results.append(sample_results)
+        self._results = results
+        return results
+    def show(self, **kwargs: Any) -> None:
+        """
+        Display the results
+        Args:
+        ----
+            **kwargs: additional keyword arguments to be passed to `plt.show`
+        """
+        requires_package("matplotlib", "`.show()` requires matplotlib installed")
+        import matplotlib.pyplot as plt
+        from matplotlib.patches import Rectangle
+        # visualize the results with matplotlib
+        if self._results and self._inputs:
+            for img, res in zip(self._inputs, self._results):
+                plt.figure(figsize=(10, 10))
+                plt.imshow(img)
+                for obj in res:
+                    xmin, ymin, xmax, ymax = obj["box"]
+                    label = obj["label"]
+                    plt.text(xmin, ymin, f"{label} {obj['confidence']:.2f}", color="red")
+                    plt.gca().add_patch(
+                        Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor="red", linewidth=2)
+                    )
+                plt.show(**kwargs)

doctr/contrib/base.py ADDED Viewed

@@ -0,0 +1,105 @@
+# Copyright (C) 2021-2024, Mindee.
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+from typing import Any, List, Optional
+import numpy as np
+from doctr.file_utils import requires_package
+from doctr.utils.data import download_from_url
+class _BasePredictor:
+    """
+    Base class for all predictors
+    Args:
+    ----
+        batch_size: the batch size to use
+        url: the url to use to download a model if needed
+        model_path: the path to the model to use
+        **kwargs: additional arguments to be passed to `download_from_url`
+    """
+    def __init__(self, batch_size: int, url: Optional[str] = None, model_path: Optional[str] = None, **kwargs) -> None:
+        self.batch_size = batch_size
+        self.session = self._init_model(url, model_path, **kwargs)
+        self._inputs: List[np.ndarray] = []
+        self._results: List[Any] = []
+    def _init_model(self, url: Optional[str] = None, model_path: Optional[str] = None, **kwargs: Any) -> Any:
+        """
+        Download the model from the given url if needed
+        Args:
+        ----
+            url: the url to use
+            model_path: the path to the model to use
+            **kwargs: additional arguments to be passed to `download_from_url`
+        Returns:
+        -------
+            Any: the ONNX loaded model
+        """
+        requires_package("onnxruntime", "`.contrib` module requires `onnxruntime` to be installed.")
+        import onnxruntime as ort
+        if not url and not model_path:
+            raise ValueError("You must provide either a url or a model_path")
+        onnx_model_path = model_path if model_path else str(download_from_url(url, cache_subdir="models", **kwargs))  # type: ignore[arg-type]
+        return ort.InferenceSession(onnx_model_path, providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
+    def preprocess(self, img: np.ndarray) -> np.ndarray:
+        """
+        Preprocess the input image
+        Args:
+        ----
+            img: the input image to preprocess
+        Returns:
+        -------
+            np.ndarray: the preprocessed image
+        """
+        raise NotImplementedError
+    def postprocess(self, output: List[np.ndarray], input_images: List[List[np.ndarray]]) -> Any:
+        """
+        Postprocess the model output
+        Args:
+        ----
+            output: the model output to postprocess
+            input_images: the input images used to generate the output
+        Returns:
+        -------
+            Any: the postprocessed output
+        """
+        raise NotImplementedError
+    def __call__(self, inputs: List[np.ndarray]) -> Any:
+        """
+        Call the model on the given inputs
+        Args:
+        ----
+            inputs: the inputs to use
+        Returns:
+        -------
+            Any: the postprocessed output
+        """
+        self._inputs = inputs
+        model_inputs = self.session.get_inputs()
+        batched_inputs = [inputs[i : i + self.batch_size] for i in range(0, len(inputs), self.batch_size)]
+        processed_batches = [
+            np.array([self.preprocess(img) for img in batch], dtype=np.float32) for batch in batched_inputs
+        ]
+        outputs = [self.session.run(None, {model_inputs[0].name: batch}) for batch in processed_batches]
+        return self.postprocess(outputs, batched_inputs)

doctr/datasets/datasets/pytorch.py CHANGED Viewed

@@ -50,9 +50,9 @@ class AbstractDataset(_AbstractDataset):
     @staticmethod
     def collate_fn(samples: List[Tuple[torch.Tensor, Any]]) -> Tuple[torch.Tensor, List[Any]]:
         images, targets = zip(*samples)
-        images = torch.stack(images, dim=0)
+        images = torch.stack(images, dim=0)  # type: ignore[assignment]
-        return images, list(targets)
+        return images, list(targets)  # type: ignore[return-value]
 class VisionDataset(AbstractDataset, _VisionDataset):  # noqa: D101

doctr/datasets/generator/base.py CHANGED Viewed

@@ -20,7 +20,7 @@ def synthesize_text_img(
     font_family: Optional[str] = None,
     background_color: Optional[Tuple[int, int, int]] = None,
     text_color: Optional[Tuple[int, int, int]] = None,
-) -> Image:
+) -> Image.Image:
     """Generate a synthetic text image
     Args:
@@ -81,7 +81,7 @@ class _CharacterGenerator(AbstractDataset):
         self._data: List[Image.Image] = []
         if cache_samples:
             self._data = [
-                (synthesize_text_img(char, font_family=font), idx)
+                (synthesize_text_img(char, font_family=font), idx)  # type: ignore[misc]
                 for idx, char in enumerate(self.vocab)
                 for font in self.font_family
             ]
@@ -93,7 +93,7 @@ class _CharacterGenerator(AbstractDataset):
         # Samples are already cached
         if len(self._data) > 0:
             idx = index % len(self._data)
-            pil_img, target = self._data[idx]
+            pil_img, target = self._data[idx]  # type: ignore[misc]
         else:
             target = index % len(self.vocab)
             pil_img = synthesize_text_img(self.vocab[target], font_family=random.choice(self.font_family))
@@ -132,7 +132,8 @@ class _WordGenerator(AbstractDataset):
         if cache_samples:
             _words = [self._generate_string(*self.wordlen_range) for _ in range(num_samples)]
             self._data = [
-                (synthesize_text_img(text, font_family=random.choice(self.font_family)), text) for text in _words
+                (synthesize_text_img(text, font_family=random.choice(self.font_family)), text)  # type: ignore[misc]
+                for text in _words
             ]
     def _generate_string(self, min_chars: int, max_chars: int) -> str:
@@ -145,7 +146,7 @@ class _WordGenerator(AbstractDataset):
     def _read_sample(self, index: int) -> Tuple[Any, str]:
         # Samples are already cached
         if len(self._data) > 0:
-            pil_img, target = self._data[index]
+            pil_img, target = self._data[index]  # type: ignore[misc]
         else:
             target = self._generate_string(*self.wordlen_range)
             pil_img = synthesize_text_img(target, font_family=random.choice(self.font_family))

doctr/datasets/imgur5k.py CHANGED Viewed

@@ -112,7 +112,7 @@ class IMGUR5K(AbstractDataset):
                 if ann["word"] != "."
             ]
             # (x, y) coordinates of top left, top right, bottom right, bottom left corners
-            box_targets = [cv2.boxPoints(((box[0], box[1]), (box[2], box[3]), box[4])) for box in _boxes]  # type: ignore[arg-type]
+            box_targets = [cv2.boxPoints(((box[0], box[1]), (box[2], box[3]), box[4])) for box in _boxes]
             if not use_polygons:
                 # xmin, ymin, xmax, ymax

doctr/datasets/loader.py CHANGED Viewed

@@ -9,8 +9,6 @@ from typing import Callable, Optional
 import numpy as np
 import tensorflow as tf
-from doctr.utils.multithreading import multithread_exec
 __all__ = ["DataLoader"]
@@ -47,7 +45,6 @@ class DataLoader:
         shuffle: whether the samples should be shuffled before passing it to the iterator
         batch_size: number of elements in each batch
         drop_last: if `True`, drops the last batch if it isn't full
-        num_workers: number of workers to use for data loading
         collate_fn: function to merge samples into a batch
     """
@@ -57,7 +54,6 @@ class DataLoader:
         shuffle: bool = True,
         batch_size: int = 1,
         drop_last: bool = False,
-        num_workers: Optional[int] = None,
         collate_fn: Optional[Callable] = None,
     ) -> None:
         self.dataset = dataset
@@ -69,7 +65,6 @@ class DataLoader:
             self.collate_fn = self.dataset.collate_fn if hasattr(self.dataset, "collate_fn") else default_collate
         else:
             self.collate_fn = collate_fn
-        self.num_workers = num_workers
         self.reset()
     def __len__(self) -> int:
@@ -92,7 +87,7 @@ class DataLoader:
             idx = self._num_yielded * self.batch_size
             indices = self.indices[idx : min(len(self.dataset), idx + self.batch_size)]
-            samples = list(multithread_exec(self.dataset.__getitem__, indices, threads=self.num_workers))
+            samples = list(map(self.dataset.__getitem__, indices))
             batch_data = self.collate_fn(samples)

doctr/datasets/utils.py CHANGED Viewed

@@ -186,7 +186,8 @@ def crop_bboxes_from_image(img_path: Union[str, Path], geoms: np.ndarray) -> Lis
     -------
         a list of cropped images
     """
-    img: np.ndarray = np.array(Image.open(img_path).convert("RGB"))
+    with Image.open(img_path) as pil_img:
+        img: np.ndarray = np.array(pil_img.convert("RGB"))
     # Polygon
     if geoms.ndim == 3 and geoms.shape[1:] == (4, 2):
         return extract_rcrops(img, geoms.astype(dtype=int))

doctr/datasets/vocabs.py CHANGED Viewed

@@ -17,9 +17,14 @@ VOCABS: Dict[str, str] = {
     "ancient_greek": "αβγδεζηθικλμνξοπρστυφχψωΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ",
     "arabic_letters": "ءآأؤإئابةتثجحخدذرزسشصضطظعغـفقكلمنهوىي",
     "persian_letters": "پچڢڤگ",
-    "hindi_digits": "٠١٢٣٤٥٦٧٨٩",
+    "arabic_digits": "٠١٢٣٤٥٦٧٨٩",
     "arabic_diacritics": "ًٌٍَُِّْ",
     "arabic_punctuation": "؟؛«»—",
+    "hindi_letters": "अआइईउऊऋॠऌॡएऐओऔअंअःकखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसह",
+    "hindi_digits": "०१२३४५६७८९",
+    "hindi_punctuation": "।,?!:्ॐ॰॥॰",
+    "bangla_letters": "অআইঈউঊঋএঐওঔকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহ়ঽািীুূৃেৈোৌ্ৎংঃঁ",
+    "bangla_digits": "০১২৩৪৫৬৭৮৯",
 }
 VOCABS["latin"] = VOCABS["digits"] + VOCABS["ascii_letters"] + VOCABS["punctuation"]
@@ -32,7 +37,7 @@ VOCABS["italian"] = VOCABS["english"] + "àèéìíîòóùúÀÈÉÌÍÎÒÓÙ
 VOCABS["german"] = VOCABS["english"] + "äöüßÄÖÜẞ"
 VOCABS["arabic"] = (
     VOCABS["digits"]
-    + VOCABS["hindi_digits"]
+    + VOCABS["arabic_digits"]
     + VOCABS["arabic_letters"]
     + VOCABS["persian_letters"]
     + VOCABS["arabic_diacritics"]
@@ -52,6 +57,8 @@ VOCABS["vietnamese"] = (
     + "ÁÀẢẠÃĂẮẰẲẴẶÂẤẦẨẪẬÉÈẺẼẸÊẾỀỂỄỆÓÒỎÕỌÔỐỒỔỘỖƠỚỜỞỢỠÚÙỦŨỤƯỨỪỬỮỰIÍÌỈĨỊÝỲỶỸỴ"
 )
 VOCABS["hebrew"] = VOCABS["english"] + "אבגדהוזחטיכלמנסעפצקרשת" + "₪"
+VOCABS["hindi"] = VOCABS["hindi_letters"] + VOCABS["hindi_digits"] + VOCABS["hindi_punctuation"]
+VOCABS["bangla"] = VOCABS["bangla_letters"] + VOCABS["bangla_digits"]
 VOCABS["multilingual"] = "".join(
     dict.fromkeys(
         VOCABS["french"]

doctr/file_utils.py CHANGED Viewed

@@ -5,21 +5,16 @@
 # Adapted from https://github.com/huggingface/transformers/blob/master/src/transformers/file_utils.py
+import importlib.metadata
 import importlib.util
 import logging
 import os
-import sys
+from typing import Optional
 CLASS_NAME: str = "words"
-if sys.version_info < (3, 8):  # pragma: no cover
-    import importlib_metadata
-else:
-    import importlib.metadata as importlib_metadata
-__all__ = ["is_tf_available", "is_torch_available", "CLASS_NAME"]
+__all__ = ["is_tf_available", "is_torch_available", "requires_package", "CLASS_NAME"]
 ENV_VARS_TRUE_VALUES = {"1", "ON", "YES", "TRUE"}
 ENV_VARS_TRUE_AND_AUTO_VALUES = ENV_VARS_TRUE_VALUES.union({"AUTO"})
@@ -32,9 +27,9 @@ if USE_TORCH in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TF not in ENV_VARS_TRUE_VA
     _torch_available = importlib.util.find_spec("torch") is not None
     if _torch_available:
         try:
-            _torch_version = importlib_metadata.version("torch")
+            _torch_version = importlib.metadata.version("torch")
             logging.info(f"PyTorch version {_torch_version} available.")
-        except importlib_metadata.PackageNotFoundError:  # pragma: no cover
+        except importlib.metadata.PackageNotFoundError:  # pragma: no cover
             _torch_available = False
 else:  # pragma: no cover
     logging.info("Disabling PyTorch because USE_TF is set")
@@ -59,9 +54,9 @@ if USE_TF in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TORCH not in ENV_VARS_TRUE_VA
         # For the metadata, we have to look for both tensorflow and tensorflow-cpu
         for pkg in candidates:
             try:
-                _tf_version = importlib_metadata.version(pkg)
+                _tf_version = importlib.metadata.version(pkg)
                 break
-            except importlib_metadata.PackageNotFoundError:
+            except importlib.metadata.PackageNotFoundError:
                 pass
         _tf_available = _tf_version is not None
     if _tf_available:
@@ -82,6 +77,25 @@ if not _torch_available and not _tf_available:  # pragma: no cover
     )
+def requires_package(name: str, extra_message: Optional[str] = None) -> None:  # pragma: no cover
+    """
+    package requirement helper
+    Args:
+    ----
+        name: name of the package
+        extra_message: additional message to display if the package is not found
+    """
+    try:
+        _pkg_version = importlib.metadata.version(name)
+        logging.info(f"{name} version {_pkg_version} available.")
+    except importlib.metadata.PackageNotFoundError:
+        raise ImportError(
+            f"\n\n{extra_message if extra_message is not None else ''} "
+            f"\nPlease install it with the following command: pip install {name}\n"
+        )
 def is_torch_available():
     """Whether PyTorch is installed."""
     return _torch_available

doctr/io/elements.py CHANGED Viewed

@@ -12,14 +12,19 @@ from xml.etree import ElementTree as ET
 from xml.etree.ElementTree import Element as ETElement
 from xml.etree.ElementTree import SubElement
-import matplotlib.pyplot as plt
 import numpy as np
 import doctr
+from doctr.file_utils import requires_package
 from doctr.utils.common_types import BoundingBox
 from doctr.utils.geometry import resolve_enclosing_bbox, resolve_enclosing_rbbox
+from doctr.utils.reconstitution import synthesize_kie_page, synthesize_page
 from doctr.utils.repr import NestedObject
-from doctr.utils.visualization import synthesize_kie_page, synthesize_page, visualize_kie_page, visualize_page
+try:  # optional dependency for visualization
+    from doctr.utils.visualization import visualize_kie_page, visualize_page
+except ModuleNotFoundError:
+    pass
 __all__ = ["Element", "Word", "Artefact", "Line", "Prediction", "Block", "Page", "KIEPage", "Document"]
@@ -67,16 +72,27 @@ class Word(Element):
         confidence: the confidence associated with the text prediction
         geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
         the page's size
+        objectness_score: the objectness score of the detection
+        crop_orientation: the general orientation of the crop in degrees and its confidence
     """
-    _exported_keys: List[str] = ["value", "confidence", "geometry"]
+    _exported_keys: List[str] = ["value", "confidence", "geometry", "objectness_score", "crop_orientation"]
     _children_names: List[str] = []
-    def __init__(self, value: str, confidence: float, geometry: Union[BoundingBox, np.ndarray]) -> None:
+    def __init__(
+        self,
+        value: str,
+        confidence: float,
+        geometry: Union[BoundingBox, np.ndarray],
+        objectness_score: float,
+        crop_orientation: Dict[str, Any],
+    ) -> None:
         super().__init__()
         self.value = value
         self.confidence = confidence
         self.geometry = geometry
+        self.objectness_score = objectness_score
+        self.crop_orientation = crop_orientation
     def render(self) -> str:
         """Renders the full text of the element"""
@@ -135,7 +151,7 @@ class Line(Element):
             all words in it.
     """
-    _exported_keys: List[str] = ["geometry"]
+    _exported_keys: List[str] = ["geometry", "objectness_score"]
     _children_names: List[str] = ["words"]
     words: List[Word] = []
@@ -143,7 +159,11 @@ class Line(Element):
         self,
         words: List[Word],
         geometry: Optional[Union[BoundingBox, np.ndarray]] = None,
+        objectness_score: Optional[float] = None,
     ) -> None:
+        # Compute the objectness score of the line
+        if objectness_score is None:
+            objectness_score = float(np.mean([w.objectness_score for w in words]))
         # Resolve the geometry using the smallest enclosing bounding box
         if geometry is None:
             # Check whether this is a rotated or straight box
@@ -152,6 +172,7 @@ class Line(Element):
         super().__init__(words=words)
         self.geometry = geometry
+        self.objectness_score = objectness_score
     def render(self) -> str:
         """Renders the full text of the element"""
@@ -189,7 +210,7 @@ class Block(Element):
             all lines and artefacts in it.
     """
-    _exported_keys: List[str] = ["geometry"]
+    _exported_keys: List[str] = ["geometry", "objectness_score"]
     _children_names: List[str] = ["lines", "artefacts"]
     lines: List[Line] = []
     artefacts: List[Artefact] = []
@@ -199,7 +220,11 @@ class Block(Element):
         lines: List[Line] = [],
         artefacts: List[Artefact] = [],
         geometry: Optional[Union[BoundingBox, np.ndarray]] = None,
+        objectness_score: Optional[float] = None,
     ) -> None:
+        # Compute the objectness score of the line
+        if objectness_score is None:
+            objectness_score = float(np.mean([w.objectness_score for line in lines for w in line.words]))
         # Resolve the geometry using the smallest enclosing bounding box
         if geometry is None:
             line_boxes = [word.geometry for line in lines for word in line.words]
@@ -211,6 +236,7 @@ class Block(Element):
         super().__init__(lines=lines, artefacts=artefacts)
         self.geometry = geometry
+        self.objectness_score = objectness_score
     def render(self, line_break: str = "\n") -> str:
         """Renders the full text of the element"""
@@ -274,6 +300,10 @@ class Page(Element):
             preserve_aspect_ratio: pass True if you passed True to the predictor
             **kwargs: additional keyword arguments passed to the matplotlib.pyplot.show method
         """
+        requires_package("matplotlib", "`.show()` requires matplotlib & mplcursors installed")
+        requires_package("mplcursors", "`.show()` requires matplotlib & mplcursors installed")
+        import matplotlib.pyplot as plt
         visualize_page(self.export(), self.page, interactive=interactive, preserve_aspect_ratio=preserve_aspect_ratio)
         plt.show(**kwargs)
@@ -449,6 +479,10 @@ class KIEPage(Element):
             preserve_aspect_ratio: pass True if you passed True to the predictor
             **kwargs: keyword arguments passed to the matplotlib.pyplot.show method
         """
+        requires_package("matplotlib", "`.show()` requires matplotlib & mplcursors installed")
+        requires_package("mplcursors", "`.show()` requires matplotlib & mplcursors installed")
+        import matplotlib.pyplot as plt
         visualize_kie_page(
             self.export(), self.page, interactive=interactive, preserve_aspect_ratio=preserve_aspect_ratio
         )

doctr/io/html.py CHANGED Viewed

@@ -5,8 +5,6 @@
 from typing import Any
-from weasyprint import HTML
 __all__ = ["read_html"]
@@ -25,4 +23,6 @@ def read_html(url: str, **kwargs: Any) -> bytes:
     -------
         decoded PDF file as a bytes stream
     """
+    from weasyprint import HTML
     return HTML(url, **kwargs).write_pdf()

doctr/io/image/pytorch.py CHANGED Viewed

@@ -16,7 +16,7 @@ from doctr.utils.common_types import AbstractPath
 __all__ = ["tensor_from_pil", "read_img_as_tensor", "decode_img_as_tensor", "tensor_from_numpy", "get_img_shape"]
-def tensor_from_pil(pil_img: Image, dtype: torch.dtype = torch.float32) -> torch.Tensor:
+def tensor_from_pil(pil_img: Image.Image, dtype: torch.dtype = torch.float32) -> torch.Tensor:
     """Convert a PIL Image to a PyTorch tensor
     Args:
@@ -51,9 +51,8 @@ def read_img_as_tensor(img_path: AbstractPath, dtype: torch.dtype = torch.float3
     if dtype not in (torch.uint8, torch.float16, torch.float32):
         raise ValueError("insupported value for dtype")
-    pil_img = Image.open(img_path, mode="r").convert("RGB")
-    return tensor_from_pil(pil_img, dtype)
+    with Image.open(img_path, mode="r") as pil_img:
+        return tensor_from_pil(pil_img.convert("RGB"), dtype)
 def decode_img_as_tensor(img_content: bytes, dtype: torch.dtype = torch.float32) -> torch.Tensor:
@@ -71,9 +70,8 @@ def decode_img_as_tensor(img_content: bytes, dtype: torch.dtype = torch.float32)
     if dtype not in (torch.uint8, torch.float16, torch.float32):
         raise ValueError("insupported value for dtype")
-    pil_img = Image.open(BytesIO(img_content), mode="r").convert("RGB")
-    return tensor_from_pil(pil_img, dtype)
+    with Image.open(BytesIO(img_content), mode="r") as pil_img:
+        return tensor_from_pil(pil_img.convert("RGB"), dtype)
 def tensor_from_numpy(npy_img: np.ndarray, dtype: torch.dtype = torch.float32) -> torch.Tensor:
@@ -106,4 +104,4 @@ def tensor_from_numpy(npy_img: np.ndarray, dtype: torch.dtype = torch.float32) -
 def get_img_shape(img: torch.Tensor) -> Tuple[int, int]:
     """Get the shape of an image"""
-    return img.shape[-2:]
+    return img.shape[-2:]  # type: ignore[return-value]

doctr/io/image/tensorflow.py CHANGED Viewed

@@ -15,7 +15,7 @@ from doctr.utils.common_types import AbstractPath
 __all__ = ["tensor_from_pil", "read_img_as_tensor", "decode_img_as_tensor", "tensor_from_numpy", "get_img_shape"]
-def tensor_from_pil(pil_img: Image, dtype: tf.dtypes.DType = tf.float32) -> tf.Tensor:
+def tensor_from_pil(pil_img: Image.Image, dtype: tf.dtypes.DType = tf.float32) -> tf.Tensor:
     """Convert a PIL Image to a TensorFlow tensor
     Args:

python-doctr 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

python-doctr 0.8.0py3-none-any.whl → 0.9.0py3-none-any.whl