PyPI - python-doctr - Versions diffs - 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl - Mend

python-doctr 0.8.1py3-none-any.whl → 0.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (107) hide show

doctr/__init__.py +1 -1
doctr/contrib/__init__.py +0 -0
doctr/contrib/artefacts.py +131 -0
doctr/contrib/base.py +105 -0
doctr/datasets/cord.py +10 -1
doctr/datasets/datasets/pytorch.py +2 -2
doctr/datasets/funsd.py +11 -1
doctr/datasets/generator/base.py +6 -5
doctr/datasets/ic03.py +11 -1
doctr/datasets/ic13.py +10 -1
doctr/datasets/iiit5k.py +26 -16
doctr/datasets/imgur5k.py +11 -2
doctr/datasets/loader.py +1 -6
doctr/datasets/sroie.py +11 -1
doctr/datasets/svhn.py +11 -1
doctr/datasets/svt.py +11 -1
doctr/datasets/synthtext.py +11 -1
doctr/datasets/utils.py +9 -3
doctr/datasets/vocabs.py +15 -4
doctr/datasets/wildreceipt.py +12 -1
doctr/file_utils.py +45 -12
doctr/io/elements.py +52 -10
doctr/io/html.py +2 -2
doctr/io/image/pytorch.py +6 -8
doctr/io/image/tensorflow.py +1 -1
doctr/io/pdf.py +5 -2
doctr/io/reader.py +6 -0
doctr/models/__init__.py +0 -1
doctr/models/_utils.py +57 -20
doctr/models/builder.py +73 -15
doctr/models/classification/magc_resnet/tensorflow.py +13 -6
doctr/models/classification/mobilenet/pytorch.py +47 -9
doctr/models/classification/mobilenet/tensorflow.py +51 -14
doctr/models/classification/predictor/pytorch.py +28 -17
doctr/models/classification/predictor/tensorflow.py +26 -16
doctr/models/classification/resnet/tensorflow.py +21 -8
doctr/models/classification/textnet/pytorch.py +3 -3
doctr/models/classification/textnet/tensorflow.py +11 -5
doctr/models/classification/vgg/tensorflow.py +9 -3
doctr/models/classification/vit/tensorflow.py +10 -4
doctr/models/classification/zoo.py +55 -19
doctr/models/detection/_utils/__init__.py +1 -0
doctr/models/detection/_utils/base.py +66 -0
doctr/models/detection/differentiable_binarization/base.py +4 -3
doctr/models/detection/differentiable_binarization/pytorch.py +2 -2
doctr/models/detection/differentiable_binarization/tensorflow.py +34 -12
doctr/models/detection/fast/base.py +6 -5
doctr/models/detection/fast/pytorch.py +4 -4
doctr/models/detection/fast/tensorflow.py +15 -12
doctr/models/detection/linknet/base.py +4 -3
doctr/models/detection/linknet/tensorflow.py +23 -11
doctr/models/detection/predictor/pytorch.py +15 -1
doctr/models/detection/predictor/tensorflow.py +17 -3
doctr/models/detection/zoo.py +7 -2
doctr/models/factory/hub.py +8 -18
doctr/models/kie_predictor/base.py +13 -3
doctr/models/kie_predictor/pytorch.py +45 -20
doctr/models/kie_predictor/tensorflow.py +44 -17
doctr/models/modules/layers/pytorch.py +2 -3
doctr/models/modules/layers/tensorflow.py +6 -8
doctr/models/modules/transformer/pytorch.py +2 -2
doctr/models/modules/transformer/tensorflow.py +0 -2
doctr/models/modules/vision_transformer/pytorch.py +1 -1
doctr/models/modules/vision_transformer/tensorflow.py +1 -1
doctr/models/predictor/base.py +97 -58
doctr/models/predictor/pytorch.py +35 -20
doctr/models/predictor/tensorflow.py +35 -18
doctr/models/preprocessor/pytorch.py +4 -4
doctr/models/preprocessor/tensorflow.py +3 -2
doctr/models/recognition/crnn/tensorflow.py +8 -6
doctr/models/recognition/master/pytorch.py +2 -2
doctr/models/recognition/master/tensorflow.py +9 -4
doctr/models/recognition/parseq/pytorch.py +4 -3
doctr/models/recognition/parseq/tensorflow.py +14 -11
doctr/models/recognition/sar/pytorch.py +7 -6
doctr/models/recognition/sar/tensorflow.py +10 -12
doctr/models/recognition/vitstr/pytorch.py +1 -1
doctr/models/recognition/vitstr/tensorflow.py +9 -4
doctr/models/recognition/zoo.py +1 -1
doctr/models/utils/pytorch.py +1 -1
doctr/models/utils/tensorflow.py +15 -15
doctr/models/zoo.py +2 -2
doctr/py.typed +0 -0
doctr/transforms/functional/base.py +1 -1
doctr/transforms/functional/pytorch.py +5 -5
doctr/transforms/modules/base.py +37 -15
doctr/transforms/modules/pytorch.py +73 -14
doctr/transforms/modules/tensorflow.py +78 -19
doctr/utils/fonts.py +7 -5
doctr/utils/geometry.py +141 -31
doctr/utils/metrics.py +34 -175
doctr/utils/reconstitution.py +212 -0
doctr/utils/visualization.py +5 -118
doctr/version.py +1 -1
{python_doctr-0.8.1.dist-info → python_doctr-0.10.0.dist-info}/METADATA +85 -81
python_doctr-0.10.0.dist-info/RECORD +173 -0
{python_doctr-0.8.1.dist-info → python_doctr-0.10.0.dist-info}/WHEEL +1 -1
doctr/models/artefacts/__init__.py +0 -2
doctr/models/artefacts/barcode.py +0 -74
doctr/models/artefacts/face.py +0 -63
doctr/models/obj_detection/__init__.py +0 -1
doctr/models/obj_detection/faster_rcnn/__init__.py +0 -4
doctr/models/obj_detection/faster_rcnn/pytorch.py +0 -81
python_doctr-0.8.1.dist-info/RECORD +0 -173
{python_doctr-0.8.1.dist-info → python_doctr-0.10.0.dist-info}/LICENSE +0 -0
{python_doctr-0.8.1.dist-info → python_doctr-0.10.0.dist-info}/top_level.txt +0 -0
{python_doctr-0.8.1.dist-info → python_doctr-0.10.0.dist-info}/zip-safe +0 -0

doctr/datasets/sroie.py CHANGED Viewed

@@ -33,6 +33,7 @@ class SROIE(VisionDataset):
         train: whether the subset should be the training one
         use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
         recognition_task: whether the dataset should be used for recognition task
+        detection_task: whether the dataset should be used for detection task
         **kwargs: keyword arguments from `VisionDataset`.
     """
@@ -52,6 +53,7 @@ class SROIE(VisionDataset):
         train: bool = True,
         use_polygons: bool = False,
         recognition_task: bool = False,
+        detection_task: bool = False,
         **kwargs: Any,
     ) -> None:
         url, sha256, name = self.TRAIN if train else self.TEST
@@ -63,10 +65,16 @@ class SROIE(VisionDataset):
             pre_transforms=convert_target_to_relative if not recognition_task else None,
             **kwargs,
         )
+        if recognition_task and detection_task:
+            raise ValueError(
+                "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
+                + "To get the whole dataset with boxes and labels leave both parameters to False."
+            )
         self.train = train
         tmp_root = os.path.join(self.root, "images")
-        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
+        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
         np_dtype = np.float32
         for img_path in tqdm(iterable=os.listdir(tmp_root), desc="Unpacking SROIE", total=len(os.listdir(tmp_root))):
@@ -94,6 +102,8 @@ class SROIE(VisionDataset):
                 for crop, label in zip(crops, labels):
                     if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0:
                         self.data.append((crop, label))
+            elif detection_task:
+                self.data.append((img_path, coords))
             else:
                 self.data.append((img_path, dict(boxes=coords, labels=labels)))

doctr/datasets/svhn.py CHANGED Viewed

@@ -32,6 +32,7 @@ class SVHN(VisionDataset):
         train: whether the subset should be the training one
         use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
         recognition_task: whether the dataset should be used for recognition task
+        detection_task: whether the dataset should be used for detection task
         **kwargs: keyword arguments from `VisionDataset`.
     """
@@ -52,6 +53,7 @@ class SVHN(VisionDataset):
         train: bool = True,
         use_polygons: bool = False,
         recognition_task: bool = False,
+        detection_task: bool = False,
         **kwargs: Any,
     ) -> None:
         url, sha256, name = self.TRAIN if train else self.TEST
@@ -63,8 +65,14 @@ class SVHN(VisionDataset):
             pre_transforms=convert_target_to_relative if not recognition_task else None,
             **kwargs,
         )
+        if recognition_task and detection_task:
+            raise ValueError(
+                "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
+                + "To get the whole dataset with boxes and labels leave both parameters to False."
+            )
         self.train = train
-        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
+        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
         np_dtype = np.float32
         tmp_root = os.path.join(self.root, "train" if train else "test")
@@ -122,6 +130,8 @@ class SVHN(VisionDataset):
                     for crop, label in zip(crops, label_targets):
                         if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0:
                             self.data.append((crop, label))
+                elif detection_task:
+                    self.data.append((img_name, box_targets))
                 else:
                     self.data.append((img_name, dict(boxes=box_targets, labels=label_targets)))

doctr/datasets/svt.py CHANGED Viewed

@@ -32,6 +32,7 @@ class SVT(VisionDataset):
         train: whether the subset should be the training one
         use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
         recognition_task: whether the dataset should be used for recognition task
+        detection_task: whether the dataset should be used for detection task
         **kwargs: keyword arguments from `VisionDataset`.
     """
@@ -43,6 +44,7 @@ class SVT(VisionDataset):
         train: bool = True,
         use_polygons: bool = False,
         recognition_task: bool = False,
+        detection_task: bool = False,
         **kwargs: Any,
     ) -> None:
         super().__init__(
@@ -53,8 +55,14 @@ class SVT(VisionDataset):
             pre_transforms=convert_target_to_relative if not recognition_task else None,
             **kwargs,
         )
+        if recognition_task and detection_task:
+            raise ValueError(
+                "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
+                + "To get the whole dataset with boxes and labels leave both parameters to False."
+            )
         self.train = train
-        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
+        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
         np_dtype = np.float32
         # Load xml data
@@ -108,6 +116,8 @@ class SVT(VisionDataset):
                 for crop, label in zip(crops, labels):
                     if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0:
                         self.data.append((crop, label))
+            elif detection_task:
+                self.data.append((name.text, boxes))
             else:
                 self.data.append((name.text, dict(boxes=boxes, labels=labels)))

doctr/datasets/synthtext.py CHANGED Viewed

@@ -35,6 +35,7 @@ class SynthText(VisionDataset):
         train: whether the subset should be the training one
         use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
         recognition_task: whether the dataset should be used for recognition task
+        detection_task: whether the dataset should be used for detection task
         **kwargs: keyword arguments from `VisionDataset`.
     """
@@ -46,6 +47,7 @@ class SynthText(VisionDataset):
         train: bool = True,
         use_polygons: bool = False,
         recognition_task: bool = False,
+        detection_task: bool = False,
         **kwargs: Any,
     ) -> None:
         super().__init__(
@@ -56,8 +58,14 @@ class SynthText(VisionDataset):
             pre_transforms=convert_target_to_relative if not recognition_task else None,
             **kwargs,
         )
+        if recognition_task and detection_task:
+            raise ValueError(
+                "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
+                + "To get the whole dataset with boxes and labels leave both parameters to False."
+            )
         self.train = train
-        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
+        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
         np_dtype = np.float32
         # Load mat data
@@ -111,6 +119,8 @@ class SynthText(VisionDataset):
                             tmp_img = Image.fromarray(crop)
                             tmp_img.save(os.path.join(reco_folder_path, f"{reco_images_counter}.png"))
                             reco_images_counter += 1
+            elif detection_task:
+                self.data.append((img_path[0], np.asarray(word_boxes, dtype=np_dtype)))
             else:
                 self.data.append((img_path[0], dict(boxes=np.asarray(word_boxes, dtype=np_dtype), labels=labels)))

doctr/datasets/utils.py CHANGED Viewed

@@ -169,8 +169,13 @@ def encode_sequences(
     return encoded_data
-def convert_target_to_relative(img: ImageTensor, target: Dict[str, Any]) -> Tuple[ImageTensor, Dict[str, Any]]:
-    target["boxes"] = convert_to_relative_coords(target["boxes"], get_img_shape(img))
+def convert_target_to_relative(
+    img: ImageTensor, target: Union[np.ndarray, Dict[str, Any]]
+) -> Tuple[ImageTensor, Union[Dict[str, Any], np.ndarray]]:
+    if isinstance(target, np.ndarray):
+        target = convert_to_relative_coords(target, get_img_shape(img))
+    else:
+        target["boxes"] = convert_to_relative_coords(target["boxes"], get_img_shape(img))
     return img, target
@@ -186,7 +191,8 @@ def crop_bboxes_from_image(img_path: Union[str, Path], geoms: np.ndarray) -> Lis
     -------
         a list of cropped images
     """
-    img: np.ndarray = np.array(Image.open(img_path).convert("RGB"))
+    with Image.open(img_path) as pil_img:
+        img: np.ndarray = np.array(pil_img.convert("RGB"))
     # Polygon
     if geoms.ndim == 3 and geoms.shape[1:] == (4, 2):
         return extract_rcrops(img, geoms.astype(dtype=int))

doctr/datasets/vocabs.py CHANGED Viewed

@@ -17,9 +17,15 @@ VOCABS: Dict[str, str] = {
     "ancient_greek": "αβγδεζηθικλμνξοπρστυφχψωΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ",
     "arabic_letters": "ءآأؤإئابةتثجحخدذرزسشصضطظعغـفقكلمنهوىي",
     "persian_letters": "پچڢڤگ",
-    "hindi_digits": "٠١٢٣٤٥٦٧٨٩",
+    "arabic_digits": "٠١٢٣٤٥٦٧٨٩",
     "arabic_diacritics": "ًٌٍَُِّْ",
     "arabic_punctuation": "؟؛«»—",
+    "hindi_letters": "अआइईउऊऋॠऌॡएऐओऔअंअःकखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसह",
+    "hindi_digits": "०१२३४५६७८९",
+    "hindi_punctuation": "।,?!:्ॐ॰॥॰",
+    "bangla_letters": "অআইঈউঊঋএঐওঔকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহ়ঽািীুূৃেৈোৌ্ৎংঃঁ",
+    "bangla_digits": "০১২৩৪৫৬৭৮৯",
+    "generic_cyrillic_letters": "абвгдежзийклмнопрстуфхцчшщьюяАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЬЮЯ",
 }
 VOCABS["latin"] = VOCABS["digits"] + VOCABS["ascii_letters"] + VOCABS["punctuation"]
@@ -32,7 +38,7 @@ VOCABS["italian"] = VOCABS["english"] + "àèéìíîòóùúÀÈÉÌÍÎÒÓÙ
 VOCABS["german"] = VOCABS["english"] + "äöüßÄÖÜẞ"
 VOCABS["arabic"] = (
     VOCABS["digits"]
-    + VOCABS["hindi_digits"]
+    + VOCABS["arabic_digits"]
     + VOCABS["arabic_letters"]
     + VOCABS["persian_letters"]
     + VOCABS["arabic_diacritics"]
@@ -48,10 +54,15 @@ VOCABS["finnish"] = VOCABS["english"] + "äöÄÖ"
 VOCABS["swedish"] = VOCABS["english"] + "åäöÅÄÖ"
 VOCABS["vietnamese"] = (
     VOCABS["english"]
-    + "áàảạãăắằẳẵặâấầẩẫậéèẻẽẹêếềểễệóòỏõọôốồổộỗơớờởợỡúùủũụưứừửữựiíìỉĩịýỳỷỹỵ"
-    + "ÁÀẢẠÃĂẮẰẲẴẶÂẤẦẨẪẬÉÈẺẼẸÊẾỀỂỄỆÓÒỎÕỌÔỐỒỔỘỖƠỚỜỞỢỠÚÙỦŨỤƯỨỪỬỮỰIÍÌỈĨỊÝỲỶỸỴ"
+    + "áàảạãăắằẳẵặâấầẩẫậđéèẻẽẹêếềểễệóòỏõọôốồổộỗơớờởợỡúùủũụưứừửữựiíìỉĩịýỳỷỹỵ"
+    + "ÁÀẢẠÃĂẮẰẲẴẶÂẤẦẨẪẬĐÉÈẺẼẸÊẾỀỂỄỆÓÒỎÕỌÔỐỒỔỘỖƠỚỜỞỢỠÚÙỦŨỤƯỨỪỬỮỰIÍÌỈĨỊÝỲỶỸỴ"
 )
 VOCABS["hebrew"] = VOCABS["english"] + "אבגדהוזחטיכלמנסעפצקרשת" + "₪"
+VOCABS["hindi"] = VOCABS["hindi_letters"] + VOCABS["hindi_digits"] + VOCABS["hindi_punctuation"]
+VOCABS["bangla"] = VOCABS["bangla_letters"] + VOCABS["bangla_digits"]
+VOCABS["ukrainian"] = (
+    VOCABS["generic_cyrillic_letters"] + VOCABS["digits"] + VOCABS["punctuation"] + VOCABS["currency"] + "ґіїєҐІЇЄ₴"
+)
 VOCABS["multilingual"] = "".join(
     dict.fromkeys(
         VOCABS["french"]

doctr/datasets/wildreceipt.py CHANGED Viewed

@@ -40,6 +40,7 @@ class WILDRECEIPT(AbstractDataset):
         train: whether the subset should be the training one
         use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
         recognition_task: whether the dataset should be used for recognition task
+        detection_task: whether the dataset should be used for detection task
         **kwargs: keyword arguments from `AbstractDataset`.
     """
@@ -50,11 +51,19 @@ class WILDRECEIPT(AbstractDataset):
         train: bool = True,
         use_polygons: bool = False,
         recognition_task: bool = False,
+        detection_task: bool = False,
         **kwargs: Any,
     ) -> None:
         super().__init__(
             img_folder, pre_transforms=convert_target_to_relative if not recognition_task else None, **kwargs
         )
+        # Task check
+        if recognition_task and detection_task:
+            raise ValueError(
+                "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
+                + "To get the whole dataset with boxes and labels leave both parameters to False."
+            )
         # File existence check
         if not os.path.exists(label_path) or not os.path.exists(img_folder):
             raise FileNotFoundError(f"unable to locate {label_path if not os.path.exists(label_path) else img_folder}")
@@ -62,7 +71,7 @@ class WILDRECEIPT(AbstractDataset):
         tmp_root = img_folder
         self.train = train
         np_dtype = np.float32
-        self.data: List[Tuple[Union[str, Path, np.ndarray], Union[str, Dict[str, Any]]]] = []
+        self.data: List[Tuple[Union[str, Path, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
         with open(label_path, "r") as file:
             data = file.read()
@@ -100,6 +109,8 @@ class WILDRECEIPT(AbstractDataset):
                 for crop, label in zip(crops, list(text_targets)):
                     if label and " " not in label:
                         self.data.append((crop, label))
+            elif detection_task:
+                self.data.append((img_path, np.asarray(box_targets, dtype=int).clip(min=0)))
             else:
                 self.data.append((
                     img_path,

doctr/file_utils.py CHANGED Viewed

@@ -5,21 +5,16 @@
 # Adapted from https://github.com/huggingface/transformers/blob/master/src/transformers/file_utils.py
+import importlib.metadata
 import importlib.util
 import logging
 import os
-import sys
+from typing import Optional
 CLASS_NAME: str = "words"
-if sys.version_info < (3, 8):  # pragma: no cover
-    import importlib_metadata
-else:
-    import importlib.metadata as importlib_metadata
-__all__ = ["is_tf_available", "is_torch_available", "CLASS_NAME"]
+__all__ = ["is_tf_available", "is_torch_available", "requires_package", "CLASS_NAME"]
 ENV_VARS_TRUE_VALUES = {"1", "ON", "YES", "TRUE"}
 ENV_VARS_TRUE_AND_AUTO_VALUES = ENV_VARS_TRUE_VALUES.union({"AUTO"})
@@ -32,14 +27,28 @@ if USE_TORCH in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TF not in ENV_VARS_TRUE_VA
     _torch_available = importlib.util.find_spec("torch") is not None
     if _torch_available:
         try:
-            _torch_version = importlib_metadata.version("torch")
+            _torch_version = importlib.metadata.version("torch")
             logging.info(f"PyTorch version {_torch_version} available.")
-        except importlib_metadata.PackageNotFoundError:  # pragma: no cover
+        except importlib.metadata.PackageNotFoundError:  # pragma: no cover
             _torch_available = False
 else:  # pragma: no cover
     logging.info("Disabling PyTorch because USE_TF is set")
     _torch_available = False
+# Compatibility fix to make sure tensorflow.keras stays at Keras 2
+if "TF_USE_LEGACY_KERAS" not in os.environ:
+    os.environ["TF_USE_LEGACY_KERAS"] = "1"
+elif os.environ["TF_USE_LEGACY_KERAS"] != "1":
+    raise ValueError(
+        "docTR is only compatible with Keras 2, but you have explicitly set `TF_USE_LEGACY_KERAS` to `0`. "
+    )
+def ensure_keras_v2() -> None:  # pragma: no cover
+    if not os.environ.get("TF_USE_LEGACY_KERAS") == "1":
+        os.environ["TF_USE_LEGACY_KERAS"] = "1"
 if USE_TF in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TORCH not in ENV_VARS_TRUE_VALUES:
     _tf_available = importlib.util.find_spec("tensorflow") is not None
@@ -59,9 +68,9 @@ if USE_TF in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TORCH not in ENV_VARS_TRUE_VA
         # For the metadata, we have to look for both tensorflow and tensorflow-cpu
         for pkg in candidates:
             try:
-                _tf_version = importlib_metadata.version(pkg)
+                _tf_version = importlib.metadata.version(pkg)
                 break
-            except importlib_metadata.PackageNotFoundError:
+            except importlib.metadata.PackageNotFoundError:
                 pass
         _tf_available = _tf_version is not None
     if _tf_available:
@@ -70,6 +79,11 @@ if USE_TF in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TORCH not in ENV_VARS_TRUE_VA
             _tf_available = False
         else:
             logging.info(f"TensorFlow version {_tf_version} available.")
+            ensure_keras_v2()
+            import tensorflow as tf
+            # Enable eager execution - this is required for some models to work properly
+            tf.config.run_functions_eagerly(True)
 else:  # pragma: no cover
     logging.info("Disabling Tensorflow because USE_TORCH is set")
     _tf_available = False
@@ -82,6 +96,25 @@ if not _torch_available and not _tf_available:  # pragma: no cover
     )
+def requires_package(name: str, extra_message: Optional[str] = None) -> None:  # pragma: no cover
+    """
+    package requirement helper
+    Args:
+    ----
+        name: name of the package
+        extra_message: additional message to display if the package is not found
+    """
+    try:
+        _pkg_version = importlib.metadata.version(name)
+        logging.info(f"{name} version {_pkg_version} available.")
+    except importlib.metadata.PackageNotFoundError:
+        raise ImportError(
+            f"\n\n{extra_message if extra_message is not None else ''} "
+            f"\nPlease install it with the following command: pip install {name}\n"
+        )
 def is_torch_available():
     """Whether PyTorch is installed."""
     return _torch_available

doctr/io/elements.py CHANGED Viewed

@@ -12,14 +12,19 @@ from xml.etree import ElementTree as ET
 from xml.etree.ElementTree import Element as ETElement
 from xml.etree.ElementTree import SubElement
-import matplotlib.pyplot as plt
 import numpy as np
 import doctr
+from doctr.file_utils import requires_package
 from doctr.utils.common_types import BoundingBox
 from doctr.utils.geometry import resolve_enclosing_bbox, resolve_enclosing_rbbox
+from doctr.utils.reconstitution import synthesize_kie_page, synthesize_page
 from doctr.utils.repr import NestedObject
-from doctr.utils.visualization import synthesize_kie_page, synthesize_page, visualize_kie_page, visualize_page
+try:  # optional dependency for visualization
+    from doctr.utils.visualization import visualize_kie_page, visualize_page
+except ModuleNotFoundError:
+    pass
 __all__ = ["Element", "Word", "Artefact", "Line", "Prediction", "Block", "Page", "KIEPage", "Document"]
@@ -67,16 +72,27 @@ class Word(Element):
         confidence: the confidence associated with the text prediction
         geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
         the page's size
+        objectness_score: the objectness score of the detection
+        crop_orientation: the general orientation of the crop in degrees and its confidence
     """
-    _exported_keys: List[str] = ["value", "confidence", "geometry"]
+    _exported_keys: List[str] = ["value", "confidence", "geometry", "objectness_score", "crop_orientation"]
     _children_names: List[str] = []
-    def __init__(self, value: str, confidence: float, geometry: Union[BoundingBox, np.ndarray]) -> None:
+    def __init__(
+        self,
+        value: str,
+        confidence: float,
+        geometry: Union[BoundingBox, np.ndarray],
+        objectness_score: float,
+        crop_orientation: Dict[str, Any],
+    ) -> None:
         super().__init__()
         self.value = value
         self.confidence = confidence
         self.geometry = geometry
+        self.objectness_score = objectness_score
+        self.crop_orientation = crop_orientation
     def render(self) -> str:
         """Renders the full text of the element"""
@@ -135,7 +151,7 @@ class Line(Element):
             all words in it.
     """
-    _exported_keys: List[str] = ["geometry"]
+    _exported_keys: List[str] = ["geometry", "objectness_score"]
     _children_names: List[str] = ["words"]
     words: List[Word] = []
@@ -143,15 +159,20 @@ class Line(Element):
         self,
         words: List[Word],
         geometry: Optional[Union[BoundingBox, np.ndarray]] = None,
+        objectness_score: Optional[float] = None,
     ) -> None:
+        # Compute the objectness score of the line
+        if objectness_score is None:
+            objectness_score = float(np.mean([w.objectness_score for w in words]))
         # Resolve the geometry using the smallest enclosing bounding box
         if geometry is None:
             # Check whether this is a rotated or straight box
             box_resolution_fn = resolve_enclosing_rbbox if len(words[0].geometry) == 4 else resolve_enclosing_bbox
-            geometry = box_resolution_fn([w.geometry for w in words])  # type: ignore[operator]
+            geometry = box_resolution_fn([w.geometry for w in words])  # type: ignore[misc]
         super().__init__(words=words)
         self.geometry = geometry
+        self.objectness_score = objectness_score
     def render(self) -> str:
         """Renders the full text of the element"""
@@ -189,7 +210,7 @@ class Block(Element):
             all lines and artefacts in it.
     """
-    _exported_keys: List[str] = ["geometry"]
+    _exported_keys: List[str] = ["geometry", "objectness_score"]
     _children_names: List[str] = ["lines", "artefacts"]
     lines: List[Line] = []
     artefacts: List[Artefact] = []
@@ -199,7 +220,11 @@ class Block(Element):
         lines: List[Line] = [],
         artefacts: List[Artefact] = [],
         geometry: Optional[Union[BoundingBox, np.ndarray]] = None,
+        objectness_score: Optional[float] = None,
     ) -> None:
+        # Compute the objectness score of the line
+        if objectness_score is None:
+            objectness_score = float(np.mean([w.objectness_score for line in lines for w in line.words]))
         # Resolve the geometry using the smallest enclosing bounding box
         if geometry is None:
             line_boxes = [word.geometry for line in lines for word in line.words]
@@ -207,10 +232,11 @@ class Block(Element):
             box_resolution_fn = (
                 resolve_enclosing_rbbox if isinstance(lines[0].geometry, np.ndarray) else resolve_enclosing_bbox
             )
-            geometry = box_resolution_fn(line_boxes + artefact_boxes)  # type: ignore[operator]
+            geometry = box_resolution_fn(line_boxes + artefact_boxes)  # type: ignore
         super().__init__(lines=lines, artefacts=artefacts)
         self.geometry = geometry
+        self.objectness_score = objectness_score
     def render(self, line_break: str = "\n") -> str:
         """Renders the full text of the element"""
@@ -274,12 +300,20 @@ class Page(Element):
             preserve_aspect_ratio: pass True if you passed True to the predictor
             **kwargs: additional keyword arguments passed to the matplotlib.pyplot.show method
         """
+        requires_package("matplotlib", "`.show()` requires matplotlib & mplcursors installed")
+        requires_package("mplcursors", "`.show()` requires matplotlib & mplcursors installed")
+        import matplotlib.pyplot as plt
         visualize_page(self.export(), self.page, interactive=interactive, preserve_aspect_ratio=preserve_aspect_ratio)
         plt.show(**kwargs)
     def synthesize(self, **kwargs) -> np.ndarray:
         """Synthesize the page from the predictions
+        Args:
+        ----
+            **kwargs: keyword arguments passed to the `synthesize_page` method
         Returns
         -------
             synthesized page
@@ -449,6 +483,10 @@ class KIEPage(Element):
             preserve_aspect_ratio: pass True if you passed True to the predictor
             **kwargs: keyword arguments passed to the matplotlib.pyplot.show method
         """
+        requires_package("matplotlib", "`.show()` requires matplotlib & mplcursors installed")
+        requires_package("mplcursors", "`.show()` requires matplotlib & mplcursors installed")
+        import matplotlib.pyplot as plt
         visualize_kie_page(
             self.export(), self.page, interactive=interactive, preserve_aspect_ratio=preserve_aspect_ratio
         )
@@ -459,7 +497,7 @@ class KIEPage(Element):
         Args:
         ----
-            **kwargs: keyword arguments passed to the matplotlib.pyplot.show method
+            **kwargs: keyword arguments passed to the `synthesize_kie_page` method
         Returns:
         -------
@@ -569,11 +607,15 @@ class Document(Element):
     def synthesize(self, **kwargs) -> List[np.ndarray]:
         """Synthesize all pages from their predictions
+        Args:
+        ----
+            **kwargs: keyword arguments passed to the `Page.synthesize` method
         Returns
         -------
             list of synthesized pages
         """
-        return [page.synthesize() for page in self.pages]
+        return [page.synthesize(**kwargs) for page in self.pages]
     def export_as_xml(self, **kwargs) -> List[Tuple[bytes, ET.ElementTree]]:
         """Export the document as XML (hOCR-format)

doctr/io/html.py CHANGED Viewed

@@ -5,8 +5,6 @@
 from typing import Any
-from weasyprint import HTML
 __all__ = ["read_html"]
@@ -25,4 +23,6 @@ def read_html(url: str, **kwargs: Any) -> bytes:
     -------
         decoded PDF file as a bytes stream
     """
+    from weasyprint import HTML
     return HTML(url, **kwargs).write_pdf()

doctr/io/image/pytorch.py CHANGED Viewed

@@ -16,7 +16,7 @@ from doctr.utils.common_types import AbstractPath
 __all__ = ["tensor_from_pil", "read_img_as_tensor", "decode_img_as_tensor", "tensor_from_numpy", "get_img_shape"]
-def tensor_from_pil(pil_img: Image, dtype: torch.dtype = torch.float32) -> torch.Tensor:
+def tensor_from_pil(pil_img: Image.Image, dtype: torch.dtype = torch.float32) -> torch.Tensor:
     """Convert a PIL Image to a PyTorch tensor
     Args:
@@ -51,9 +51,8 @@ def read_img_as_tensor(img_path: AbstractPath, dtype: torch.dtype = torch.float3
     if dtype not in (torch.uint8, torch.float16, torch.float32):
         raise ValueError("insupported value for dtype")
-    pil_img = Image.open(img_path, mode="r").convert("RGB")
-    return tensor_from_pil(pil_img, dtype)
+    with Image.open(img_path, mode="r") as pil_img:
+        return tensor_from_pil(pil_img.convert("RGB"), dtype)
 def decode_img_as_tensor(img_content: bytes, dtype: torch.dtype = torch.float32) -> torch.Tensor:
@@ -71,9 +70,8 @@ def decode_img_as_tensor(img_content: bytes, dtype: torch.dtype = torch.float32)
     if dtype not in (torch.uint8, torch.float16, torch.float32):
         raise ValueError("insupported value for dtype")
-    pil_img = Image.open(BytesIO(img_content), mode="r").convert("RGB")
-    return tensor_from_pil(pil_img, dtype)
+    with Image.open(BytesIO(img_content), mode="r") as pil_img:
+        return tensor_from_pil(pil_img.convert("RGB"), dtype)
 def tensor_from_numpy(npy_img: np.ndarray, dtype: torch.dtype = torch.float32) -> torch.Tensor:
@@ -106,4 +104,4 @@ def tensor_from_numpy(npy_img: np.ndarray, dtype: torch.dtype = torch.float32) -
 def get_img_shape(img: torch.Tensor) -> Tuple[int, int]:
     """Get the shape of an image"""
-    return img.shape[-2:]
+    return img.shape[-2:]  # type: ignore[return-value]

doctr/io/image/tensorflow.py CHANGED Viewed

@@ -15,7 +15,7 @@ from doctr.utils.common_types import AbstractPath
 __all__ = ["tensor_from_pil", "read_img_as_tensor", "decode_img_as_tensor", "tensor_from_numpy", "get_img_shape"]
-def tensor_from_pil(pil_img: Image, dtype: tf.dtypes.DType = tf.float32) -> tf.Tensor:
+def tensor_from_pil(pil_img: Image.Image, dtype: tf.dtypes.DType = tf.float32) -> tf.Tensor:
     """Convert a PIL Image to a TensorFlow tensor
     Args:

doctr/io/pdf.py CHANGED Viewed

@@ -38,5 +38,8 @@ def read_pdf(
         the list of pages decoded as numpy ndarray of shape H x W x C
     """
     # Rasterise pages to numpy ndarrays with pypdfium2
-    pdf = pdfium.PdfDocument(file, password=password, autoclose=True)
-    return [page.render(scale=scale, rev_byteorder=rgb_mode, **kwargs).to_numpy() for page in pdf]
+    pdf = pdfium.PdfDocument(file, password=password)
+    try:
+        return [page.render(scale=scale, rev_byteorder=rgb_mode, **kwargs).to_numpy() for page in pdf]
+    finally:
+        pdf.close()

doctr/io/reader.py CHANGED Viewed

@@ -8,6 +8,7 @@ from typing import List, Sequence, Union
 import numpy as np
+from doctr.file_utils import requires_package
 from doctr.utils.common_types import AbstractFile
 from .html import read_html
@@ -54,6 +55,11 @@ class DocumentFile:
         -------
             the list of pages decoded as numpy ndarray of shape H x W x 3
         """
+        requires_package(
+            "weasyprint",
+            "`.from_url` requires weasyprint installed.\n"
+            + "Installation instructions: https://doc.courtbouillon.org/weasyprint/stable/first_steps.html#installation",
+        )
         pdf_stream = read_html(url)
         return cls.from_pdf(pdf_stream, **kwargs)

doctr/models/__init__.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from . import artefacts
 from .classification import *
 from .detection import *
 from .recognition import *

python-doctr 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl

python-doctr 0.8.1py3-none-any.whl → 0.10.0py3-none-any.whl