PyPI - python-doctr - Versions diffs - 0.9.0__py3-none-any.whl → 0.10.0__py3-none-any.whl - Mend

python-doctr 0.9.0py3-none-any.whl → 0.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

doctr/datasets/cord.py +10 -1
doctr/datasets/funsd.py +11 -1
doctr/datasets/ic03.py +11 -1
doctr/datasets/ic13.py +10 -1
doctr/datasets/iiit5k.py +26 -16
doctr/datasets/imgur5k.py +10 -1
doctr/datasets/sroie.py +11 -1
doctr/datasets/svhn.py +11 -1
doctr/datasets/svt.py +11 -1
doctr/datasets/synthtext.py +11 -1
doctr/datasets/utils.py +7 -2
doctr/datasets/vocabs.py +6 -2
doctr/datasets/wildreceipt.py +12 -1
doctr/file_utils.py +19 -0
doctr/io/elements.py +12 -4
doctr/models/builder.py +2 -2
doctr/models/classification/magc_resnet/tensorflow.py +13 -6
doctr/models/classification/mobilenet/pytorch.py +2 -0
doctr/models/classification/mobilenet/tensorflow.py +14 -8
doctr/models/classification/predictor/pytorch.py +11 -7
doctr/models/classification/predictor/tensorflow.py +10 -6
doctr/models/classification/resnet/tensorflow.py +21 -8
doctr/models/classification/textnet/tensorflow.py +11 -5
doctr/models/classification/vgg/tensorflow.py +9 -3
doctr/models/classification/vit/tensorflow.py +10 -4
doctr/models/classification/zoo.py +22 -10
doctr/models/detection/differentiable_binarization/tensorflow.py +34 -12
doctr/models/detection/fast/tensorflow.py +14 -11
doctr/models/detection/linknet/tensorflow.py +23 -11
doctr/models/detection/predictor/tensorflow.py +2 -2
doctr/models/factory/hub.py +5 -6
doctr/models/kie_predictor/base.py +4 -0
doctr/models/kie_predictor/pytorch.py +4 -0
doctr/models/kie_predictor/tensorflow.py +8 -1
doctr/models/modules/transformer/tensorflow.py +0 -2
doctr/models/modules/vision_transformer/pytorch.py +1 -1
doctr/models/modules/vision_transformer/tensorflow.py +1 -1
doctr/models/predictor/base.py +24 -12
doctr/models/predictor/pytorch.py +4 -0
doctr/models/predictor/tensorflow.py +8 -1
doctr/models/preprocessor/tensorflow.py +1 -1
doctr/models/recognition/crnn/tensorflow.py +8 -6
doctr/models/recognition/master/tensorflow.py +9 -4
doctr/models/recognition/parseq/tensorflow.py +10 -8
doctr/models/recognition/sar/tensorflow.py +7 -3
doctr/models/recognition/vitstr/tensorflow.py +9 -4
doctr/models/utils/pytorch.py +1 -1
doctr/models/utils/tensorflow.py +15 -15
doctr/transforms/functional/pytorch.py +1 -1
doctr/transforms/modules/pytorch.py +7 -6
doctr/transforms/modules/tensorflow.py +15 -12
doctr/utils/geometry.py +106 -19
doctr/utils/metrics.py +1 -1
doctr/utils/reconstitution.py +151 -65
doctr/version.py +1 -1
{python_doctr-0.9.0.dist-info → python_doctr-0.10.0.dist-info}/METADATA +11 -11
{python_doctr-0.9.0.dist-info → python_doctr-0.10.0.dist-info}/RECORD +61 -61
{python_doctr-0.9.0.dist-info → python_doctr-0.10.0.dist-info}/WHEEL +1 -1
{python_doctr-0.9.0.dist-info → python_doctr-0.10.0.dist-info}/LICENSE +0 -0
{python_doctr-0.9.0.dist-info → python_doctr-0.10.0.dist-info}/top_level.txt +0 -0
{python_doctr-0.9.0.dist-info → python_doctr-0.10.0.dist-info}/zip-safe +0 -0

doctr/datasets/cord.py CHANGED Viewed

@@ -33,6 +33,7 @@ class CORD(VisionDataset):
         train: whether the subset should be the training one
         use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
         recognition_task: whether the dataset should be used for recognition task
+        detection_task: whether the dataset should be used for detection task
         **kwargs: keyword arguments from `VisionDataset`.
     """
@@ -53,6 +54,7 @@ class CORD(VisionDataset):
         train: bool = True,
         use_polygons: bool = False,
         recognition_task: bool = False,
+        detection_task: bool = False,
         **kwargs: Any,
     ) -> None:
         url, sha256, name = self.TRAIN if train else self.TEST
@@ -64,10 +66,15 @@ class CORD(VisionDataset):
             pre_transforms=convert_target_to_relative if not recognition_task else None,
             **kwargs,
         )
+        if recognition_task and detection_task:
+            raise ValueError(
+                "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
+                + "To get the whole dataset with boxes and labels leave both parameters to False."
+            )
         # List images
         tmp_root = os.path.join(self.root, "image")
-        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
+        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
         self.train = train
         np_dtype = np.float32
         for img_path in tqdm(iterable=os.listdir(tmp_root), desc="Unpacking CORD", total=len(os.listdir(tmp_root))):
@@ -109,6 +116,8 @@ class CORD(VisionDataset):
                 )
                 for crop, label in zip(crops, list(text_targets)):
                     self.data.append((crop, label))
+            elif detection_task:
+                self.data.append((img_path, np.asarray(box_targets, dtype=int).clip(min=0)))
             else:
                 self.data.append((
                     img_path,

doctr/datasets/funsd.py CHANGED Viewed

@@ -33,6 +33,7 @@ class FUNSD(VisionDataset):
         train: whether the subset should be the training one
         use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
         recognition_task: whether the dataset should be used for recognition task
+        detection_task: whether the dataset should be used for detection task
         **kwargs: keyword arguments from `VisionDataset`.
     """
@@ -45,6 +46,7 @@ class FUNSD(VisionDataset):
         train: bool = True,
         use_polygons: bool = False,
         recognition_task: bool = False,
+        detection_task: bool = False,
         **kwargs: Any,
     ) -> None:
         super().__init__(
@@ -55,6 +57,12 @@ class FUNSD(VisionDataset):
             pre_transforms=convert_target_to_relative if not recognition_task else None,
             **kwargs,
         )
+        if recognition_task and detection_task:
+            raise ValueError(
+                "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
+                + "To get the whole dataset with boxes and labels leave both parameters to False."
+            )
         self.train = train
         np_dtype = np.float32
@@ -63,7 +71,7 @@ class FUNSD(VisionDataset):
         # # List images
         tmp_root = os.path.join(self.root, subfolder, "images")
-        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
+        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
         for img_path in tqdm(iterable=os.listdir(tmp_root), desc="Unpacking FUNSD", total=len(os.listdir(tmp_root))):
             # File existence check
             if not os.path.exists(os.path.join(tmp_root, img_path)):
@@ -100,6 +108,8 @@ class FUNSD(VisionDataset):
                     # filter labels with unknown characters
                     if not any(char in label for char in ["☑", "☐", "\uf703", "\uf702"]):
                         self.data.append((crop, label))
+            elif detection_task:
+                self.data.append((img_path, np.asarray(box_targets, dtype=np_dtype)))
             else:
                 self.data.append((
                     img_path,

doctr/datasets/ic03.py CHANGED Viewed

@@ -32,6 +32,7 @@ class IC03(VisionDataset):
         train: whether the subset should be the training one
         use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
         recognition_task: whether the dataset should be used for recognition task
+        detection_task: whether the dataset should be used for detection task
         **kwargs: keyword arguments from `VisionDataset`.
     """
@@ -51,6 +52,7 @@ class IC03(VisionDataset):
         train: bool = True,
         use_polygons: bool = False,
         recognition_task: bool = False,
+        detection_task: bool = False,
         **kwargs: Any,
     ) -> None:
         url, sha256, file_name = self.TRAIN if train else self.TEST
@@ -62,8 +64,14 @@ class IC03(VisionDataset):
             pre_transforms=convert_target_to_relative if not recognition_task else None,
             **kwargs,
         )
+        if recognition_task and detection_task:
+            raise ValueError(
+                "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
+                + "To get the whole dataset with boxes and labels leave both parameters to False."
+            )
         self.train = train
-        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
+        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
         np_dtype = np.float32
         # Load xml data
@@ -117,6 +125,8 @@ class IC03(VisionDataset):
                     for crop, label in zip(crops, labels):
                         if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0:
                             self.data.append((crop, label))
+                elif detection_task:
+                    self.data.append((name.text, boxes))
                 else:
                     self.data.append((name.text, dict(boxes=boxes, labels=labels)))

doctr/datasets/ic13.py CHANGED Viewed

@@ -38,6 +38,7 @@ class IC13(AbstractDataset):
         label_folder: folder with all annotation files for the images
         use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
         recognition_task: whether the dataset should be used for recognition task
+        detection_task: whether the dataset should be used for detection task
         **kwargs: keyword arguments from `AbstractDataset`.
     """
@@ -47,11 +48,17 @@ class IC13(AbstractDataset):
         label_folder: str,
         use_polygons: bool = False,
         recognition_task: bool = False,
+        detection_task: bool = False,
         **kwargs: Any,
     ) -> None:
         super().__init__(
             img_folder, pre_transforms=convert_target_to_relative if not recognition_task else None, **kwargs
         )
+        if recognition_task and detection_task:
+            raise ValueError(
+                "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
+                + "To get the whole dataset with boxes and labels leave both parameters to False."
+            )
         # File existence check
         if not os.path.exists(label_folder) or not os.path.exists(img_folder):
@@ -59,7 +66,7 @@ class IC13(AbstractDataset):
                 f"unable to locate {label_folder if not os.path.exists(label_folder) else img_folder}"
             )
-        self.data: List[Tuple[Union[Path, np.ndarray], Union[str, Dict[str, Any]]]] = []
+        self.data: List[Tuple[Union[Path, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
         np_dtype = np.float32
         img_names = os.listdir(img_folder)
@@ -95,5 +102,7 @@ class IC13(AbstractDataset):
                 crops = crop_bboxes_from_image(img_path=img_path, geoms=box_targets)
                 for crop, label in zip(crops, labels):
                     self.data.append((crop, label))
+            elif detection_task:
+                self.data.append((img_path, box_targets))
             else:
                 self.data.append((img_path, dict(boxes=box_targets, labels=labels)))

doctr/datasets/iiit5k.py CHANGED Viewed

@@ -34,6 +34,7 @@ class IIIT5K(VisionDataset):
         train: whether the subset should be the training one
         use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
         recognition_task: whether the dataset should be used for recognition task
+        detection_task: whether the dataset should be used for detection task
         **kwargs: keyword arguments from `VisionDataset`.
     """
@@ -45,6 +46,7 @@ class IIIT5K(VisionDataset):
         train: bool = True,
         use_polygons: bool = False,
         recognition_task: bool = False,
+        detection_task: bool = False,
         **kwargs: Any,
     ) -> None:
         super().__init__(
@@ -55,6 +57,12 @@ class IIIT5K(VisionDataset):
             pre_transforms=convert_target_to_relative if not recognition_task else None,
             **kwargs,
         )
+        if recognition_task and detection_task:
+            raise ValueError(
+                "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
+                + "To get the whole dataset with boxes and labels leave both parameters to False."
+            )
         self.train = train
         # Load mat data
@@ -62,7 +70,7 @@ class IIIT5K(VisionDataset):
         mat_file = "trainCharBound" if self.train else "testCharBound"
         mat_data = sio.loadmat(os.path.join(tmp_root, f"{mat_file}.mat"))[mat_file][0]
-        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
+        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
         np_dtype = np.float32
         for img_path, label, box_targets in tqdm(iterable=mat_data, desc="Unpacking IIIT5K", total=len(mat_data)):
@@ -73,24 +81,26 @@ class IIIT5K(VisionDataset):
             if not os.path.exists(os.path.join(tmp_root, _raw_path)):
                 raise FileNotFoundError(f"unable to locate {os.path.join(tmp_root, _raw_path)}")
+            if use_polygons:
+                # (x, y) coordinates of top left, top right, bottom right, bottom left corners
+                box_targets = [
+                    [
+                        [box[0], box[1]],
+                        [box[0] + box[2], box[1]],
+                        [box[0] + box[2], box[1] + box[3]],
+                        [box[0], box[1] + box[3]],
+                    ]
+                    for box in box_targets
+                ]
+            else:
+                # xmin, ymin, xmax, ymax
+                box_targets = [[box[0], box[1], box[0] + box[2], box[1] + box[3]] for box in box_targets]
             if recognition_task:
                 self.data.append((_raw_path, _raw_label))
+            elif detection_task:
+                self.data.append((_raw_path, np.asarray(box_targets, dtype=np_dtype)))
             else:
-                if use_polygons:
-                    # (x, y) coordinates of top left, top right, bottom right, bottom left corners
-                    box_targets = [
-                        [
-                            [box[0], box[1]],
-                            [box[0] + box[2], box[1]],
-                            [box[0] + box[2], box[1] + box[3]],
-                            [box[0], box[1] + box[3]],
-                        ]
-                        for box in box_targets
-                    ]
-                else:
-                    # xmin, ymin, xmax, ymax
-                    box_targets = [[box[0], box[1], box[0] + box[2], box[1] + box[3]] for box in box_targets]
                 # label are casted to list where each char corresponds to the character's bounding box
                 self.data.append((
                     _raw_path,

doctr/datasets/imgur5k.py CHANGED Viewed

@@ -46,6 +46,7 @@ class IMGUR5K(AbstractDataset):
         train: whether the subset should be the training one
         use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
         recognition_task: whether the dataset should be used for recognition task
+        detection_task: whether the dataset should be used for detection task
         **kwargs: keyword arguments from `AbstractDataset`.
     """
@@ -56,17 +57,23 @@ class IMGUR5K(AbstractDataset):
         train: bool = True,
         use_polygons: bool = False,
         recognition_task: bool = False,
+        detection_task: bool = False,
         **kwargs: Any,
     ) -> None:
         super().__init__(
             img_folder, pre_transforms=convert_target_to_relative if not recognition_task else None, **kwargs
         )
+        if recognition_task and detection_task:
+            raise ValueError(
+                "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
+                + "To get the whole dataset with boxes and labels leave both parameters to False."
+            )
         # File existence check
         if not os.path.exists(label_path) or not os.path.exists(img_folder):
             raise FileNotFoundError(f"unable to locate {label_path if not os.path.exists(label_path) else img_folder}")
-        self.data: List[Tuple[Union[str, Path, np.ndarray], Union[str, Dict[str, Any]]]] = []
+        self.data: List[Tuple[Union[str, Path, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
         self.train = train
         np_dtype = np.float32
@@ -132,6 +139,8 @@ class IMGUR5K(AbstractDataset):
                                 tmp_img = Image.fromarray(crop)
                                 tmp_img.save(os.path.join(reco_folder_path, f"{reco_images_counter}.png"))
                                 reco_images_counter += 1
+                elif detection_task:
+                    self.data.append((img_path, np.asarray(box_targets, dtype=np_dtype)))
                 else:
                     self.data.append((img_path, dict(boxes=np.asarray(box_targets, dtype=np_dtype), labels=labels)))

doctr/datasets/sroie.py CHANGED Viewed

@@ -33,6 +33,7 @@ class SROIE(VisionDataset):
         train: whether the subset should be the training one
         use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
         recognition_task: whether the dataset should be used for recognition task
+        detection_task: whether the dataset should be used for detection task
         **kwargs: keyword arguments from `VisionDataset`.
     """
@@ -52,6 +53,7 @@ class SROIE(VisionDataset):
         train: bool = True,
         use_polygons: bool = False,
         recognition_task: bool = False,
+        detection_task: bool = False,
         **kwargs: Any,
     ) -> None:
         url, sha256, name = self.TRAIN if train else self.TEST
@@ -63,10 +65,16 @@ class SROIE(VisionDataset):
             pre_transforms=convert_target_to_relative if not recognition_task else None,
             **kwargs,
         )
+        if recognition_task and detection_task:
+            raise ValueError(
+                "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
+                + "To get the whole dataset with boxes and labels leave both parameters to False."
+            )
         self.train = train
         tmp_root = os.path.join(self.root, "images")
-        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
+        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
         np_dtype = np.float32
         for img_path in tqdm(iterable=os.listdir(tmp_root), desc="Unpacking SROIE", total=len(os.listdir(tmp_root))):
@@ -94,6 +102,8 @@ class SROIE(VisionDataset):
                 for crop, label in zip(crops, labels):
                     if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0:
                         self.data.append((crop, label))
+            elif detection_task:
+                self.data.append((img_path, coords))
             else:
                 self.data.append((img_path, dict(boxes=coords, labels=labels)))

doctr/datasets/svhn.py CHANGED Viewed

@@ -32,6 +32,7 @@ class SVHN(VisionDataset):
         train: whether the subset should be the training one
         use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
         recognition_task: whether the dataset should be used for recognition task
+        detection_task: whether the dataset should be used for detection task
         **kwargs: keyword arguments from `VisionDataset`.
     """
@@ -52,6 +53,7 @@ class SVHN(VisionDataset):
         train: bool = True,
         use_polygons: bool = False,
         recognition_task: bool = False,
+        detection_task: bool = False,
         **kwargs: Any,
     ) -> None:
         url, sha256, name = self.TRAIN if train else self.TEST
@@ -63,8 +65,14 @@ class SVHN(VisionDataset):
             pre_transforms=convert_target_to_relative if not recognition_task else None,
             **kwargs,
         )
+        if recognition_task and detection_task:
+            raise ValueError(
+                "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
+                + "To get the whole dataset with boxes and labels leave both parameters to False."
+            )
         self.train = train
-        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
+        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
         np_dtype = np.float32
         tmp_root = os.path.join(self.root, "train" if train else "test")
@@ -122,6 +130,8 @@ class SVHN(VisionDataset):
                     for crop, label in zip(crops, label_targets):
                         if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0:
                             self.data.append((crop, label))
+                elif detection_task:
+                    self.data.append((img_name, box_targets))
                 else:
                     self.data.append((img_name, dict(boxes=box_targets, labels=label_targets)))

doctr/datasets/svt.py CHANGED Viewed

@@ -32,6 +32,7 @@ class SVT(VisionDataset):
         train: whether the subset should be the training one
         use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
         recognition_task: whether the dataset should be used for recognition task
+        detection_task: whether the dataset should be used for detection task
         **kwargs: keyword arguments from `VisionDataset`.
     """
@@ -43,6 +44,7 @@ class SVT(VisionDataset):
         train: bool = True,
         use_polygons: bool = False,
         recognition_task: bool = False,
+        detection_task: bool = False,
         **kwargs: Any,
     ) -> None:
         super().__init__(
@@ -53,8 +55,14 @@ class SVT(VisionDataset):
             pre_transforms=convert_target_to_relative if not recognition_task else None,
             **kwargs,
         )
+        if recognition_task and detection_task:
+            raise ValueError(
+                "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
+                + "To get the whole dataset with boxes and labels leave both parameters to False."
+            )
         self.train = train
-        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
+        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
         np_dtype = np.float32
         # Load xml data
@@ -108,6 +116,8 @@ class SVT(VisionDataset):
                 for crop, label in zip(crops, labels):
                     if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0:
                         self.data.append((crop, label))
+            elif detection_task:
+                self.data.append((name.text, boxes))
             else:
                 self.data.append((name.text, dict(boxes=boxes, labels=labels)))

doctr/datasets/synthtext.py CHANGED Viewed

@@ -35,6 +35,7 @@ class SynthText(VisionDataset):
         train: whether the subset should be the training one
         use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
         recognition_task: whether the dataset should be used for recognition task
+        detection_task: whether the dataset should be used for detection task
         **kwargs: keyword arguments from `VisionDataset`.
     """
@@ -46,6 +47,7 @@ class SynthText(VisionDataset):
         train: bool = True,
         use_polygons: bool = False,
         recognition_task: bool = False,
+        detection_task: bool = False,
         **kwargs: Any,
     ) -> None:
         super().__init__(
@@ -56,8 +58,14 @@ class SynthText(VisionDataset):
             pre_transforms=convert_target_to_relative if not recognition_task else None,
             **kwargs,
         )
+        if recognition_task and detection_task:
+            raise ValueError(
+                "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
+                + "To get the whole dataset with boxes and labels leave both parameters to False."
+            )
         self.train = train
-        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
+        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
         np_dtype = np.float32
         # Load mat data
@@ -111,6 +119,8 @@ class SynthText(VisionDataset):
                             tmp_img = Image.fromarray(crop)
                             tmp_img.save(os.path.join(reco_folder_path, f"{reco_images_counter}.png"))
                             reco_images_counter += 1
+            elif detection_task:
+                self.data.append((img_path[0], np.asarray(word_boxes, dtype=np_dtype)))
             else:
                 self.data.append((img_path[0], dict(boxes=np.asarray(word_boxes, dtype=np_dtype), labels=labels)))

doctr/datasets/utils.py CHANGED Viewed

@@ -169,8 +169,13 @@ def encode_sequences(
     return encoded_data
-def convert_target_to_relative(img: ImageTensor, target: Dict[str, Any]) -> Tuple[ImageTensor, Dict[str, Any]]:
-    target["boxes"] = convert_to_relative_coords(target["boxes"], get_img_shape(img))
+def convert_target_to_relative(
+    img: ImageTensor, target: Union[np.ndarray, Dict[str, Any]]
+) -> Tuple[ImageTensor, Union[Dict[str, Any], np.ndarray]]:
+    if isinstance(target, np.ndarray):
+        target = convert_to_relative_coords(target, get_img_shape(img))
+    else:
+        target["boxes"] = convert_to_relative_coords(target["boxes"], get_img_shape(img))
     return img, target

doctr/datasets/vocabs.py CHANGED Viewed

@@ -25,6 +25,7 @@ VOCABS: Dict[str, str] = {
     "hindi_punctuation": "।,?!:्ॐ॰॥॰",
     "bangla_letters": "অআইঈউঊঋএঐওঔকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহ়ঽািীুূৃেৈোৌ্ৎংঃঁ",
     "bangla_digits": "০১২৩৪৫৬৭৮৯",
+    "generic_cyrillic_letters": "абвгдежзийклмнопрстуфхцчшщьюяАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЬЮЯ",
 }
 VOCABS["latin"] = VOCABS["digits"] + VOCABS["ascii_letters"] + VOCABS["punctuation"]
@@ -53,12 +54,15 @@ VOCABS["finnish"] = VOCABS["english"] + "äöÄÖ"
 VOCABS["swedish"] = VOCABS["english"] + "åäöÅÄÖ"
 VOCABS["vietnamese"] = (
     VOCABS["english"]
-    + "áàảạãăắằẳẵặâấầẩẫậéèẻẽẹêếềểễệóòỏõọôốồổộỗơớờởợỡúùủũụưứừửữựiíìỉĩịýỳỷỹỵ"
-    + "ÁÀẢẠÃĂẮẰẲẴẶÂẤẦẨẪẬÉÈẺẼẸÊẾỀỂỄỆÓÒỎÕỌÔỐỒỔỘỖƠỚỜỞỢỠÚÙỦŨỤƯỨỪỬỮỰIÍÌỈĨỊÝỲỶỸỴ"
+    + "áàảạãăắằẳẵặâấầẩẫậđéèẻẽẹêếềểễệóòỏõọôốồổộỗơớờởợỡúùủũụưứừửữựiíìỉĩịýỳỷỹỵ"
+    + "ÁÀẢẠÃĂẮẰẲẴẶÂẤẦẨẪẬĐÉÈẺẼẸÊẾỀỂỄỆÓÒỎÕỌÔỐỒỔỘỖƠỚỜỞỢỠÚÙỦŨỤƯỨỪỬỮỰIÍÌỈĨỊÝỲỶỸỴ"
 )
 VOCABS["hebrew"] = VOCABS["english"] + "אבגדהוזחטיכלמנסעפצקרשת" + "₪"
 VOCABS["hindi"] = VOCABS["hindi_letters"] + VOCABS["hindi_digits"] + VOCABS["hindi_punctuation"]
 VOCABS["bangla"] = VOCABS["bangla_letters"] + VOCABS["bangla_digits"]
+VOCABS["ukrainian"] = (
+    VOCABS["generic_cyrillic_letters"] + VOCABS["digits"] + VOCABS["punctuation"] + VOCABS["currency"] + "ґіїєҐІЇЄ₴"
+)
 VOCABS["multilingual"] = "".join(
     dict.fromkeys(
         VOCABS["french"]

doctr/datasets/wildreceipt.py CHANGED Viewed

@@ -40,6 +40,7 @@ class WILDRECEIPT(AbstractDataset):
         train: whether the subset should be the training one
         use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
         recognition_task: whether the dataset should be used for recognition task
+        detection_task: whether the dataset should be used for detection task
         **kwargs: keyword arguments from `AbstractDataset`.
     """
@@ -50,11 +51,19 @@ class WILDRECEIPT(AbstractDataset):
         train: bool = True,
         use_polygons: bool = False,
         recognition_task: bool = False,
+        detection_task: bool = False,
         **kwargs: Any,
     ) -> None:
         super().__init__(
             img_folder, pre_transforms=convert_target_to_relative if not recognition_task else None, **kwargs
         )
+        # Task check
+        if recognition_task and detection_task:
+            raise ValueError(
+                "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
+                + "To get the whole dataset with boxes and labels leave both parameters to False."
+            )
         # File existence check
         if not os.path.exists(label_path) or not os.path.exists(img_folder):
             raise FileNotFoundError(f"unable to locate {label_path if not os.path.exists(label_path) else img_folder}")
@@ -62,7 +71,7 @@ class WILDRECEIPT(AbstractDataset):
         tmp_root = img_folder
         self.train = train
         np_dtype = np.float32
-        self.data: List[Tuple[Union[str, Path, np.ndarray], Union[str, Dict[str, Any]]]] = []
+        self.data: List[Tuple[Union[str, Path, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
         with open(label_path, "r") as file:
             data = file.read()
@@ -100,6 +109,8 @@ class WILDRECEIPT(AbstractDataset):
                 for crop, label in zip(crops, list(text_targets)):
                     if label and " " not in label:
                         self.data.append((crop, label))
+            elif detection_task:
+                self.data.append((img_path, np.asarray(box_targets, dtype=int).clip(min=0)))
             else:
                 self.data.append((
                     img_path,

doctr/file_utils.py CHANGED Viewed

@@ -35,6 +35,20 @@ else:  # pragma: no cover
     logging.info("Disabling PyTorch because USE_TF is set")
     _torch_available = False
+# Compatibility fix to make sure tensorflow.keras stays at Keras 2
+if "TF_USE_LEGACY_KERAS" not in os.environ:
+    os.environ["TF_USE_LEGACY_KERAS"] = "1"
+elif os.environ["TF_USE_LEGACY_KERAS"] != "1":
+    raise ValueError(
+        "docTR is only compatible with Keras 2, but you have explicitly set `TF_USE_LEGACY_KERAS` to `0`. "
+    )
+def ensure_keras_v2() -> None:  # pragma: no cover
+    if not os.environ.get("TF_USE_LEGACY_KERAS") == "1":
+        os.environ["TF_USE_LEGACY_KERAS"] = "1"
 if USE_TF in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TORCH not in ENV_VARS_TRUE_VALUES:
     _tf_available = importlib.util.find_spec("tensorflow") is not None
@@ -65,6 +79,11 @@ if USE_TF in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TORCH not in ENV_VARS_TRUE_VA
             _tf_available = False
         else:
             logging.info(f"TensorFlow version {_tf_version} available.")
+            ensure_keras_v2()
+            import tensorflow as tf
+            # Enable eager execution - this is required for some models to work properly
+            tf.config.run_functions_eagerly(True)
 else:  # pragma: no cover
     logging.info("Disabling Tensorflow because USE_TORCH is set")
     _tf_available = False

doctr/io/elements.py CHANGED Viewed

@@ -168,7 +168,7 @@ class Line(Element):
         if geometry is None:
             # Check whether this is a rotated or straight box
             box_resolution_fn = resolve_enclosing_rbbox if len(words[0].geometry) == 4 else resolve_enclosing_bbox
-            geometry = box_resolution_fn([w.geometry for w in words])  # type: ignore[operator]
+            geometry = box_resolution_fn([w.geometry for w in words])  # type: ignore[misc]
         super().__init__(words=words)
         self.geometry = geometry
@@ -232,7 +232,7 @@ class Block(Element):
             box_resolution_fn = (
                 resolve_enclosing_rbbox if isinstance(lines[0].geometry, np.ndarray) else resolve_enclosing_bbox
             )
-            geometry = box_resolution_fn(line_boxes + artefact_boxes)  # type: ignore[operator]
+            geometry = box_resolution_fn(line_boxes + artefact_boxes)  # type: ignore
         super().__init__(lines=lines, artefacts=artefacts)
         self.geometry = geometry
@@ -310,6 +310,10 @@ class Page(Element):
     def synthesize(self, **kwargs) -> np.ndarray:
         """Synthesize the page from the predictions
+        Args:
+        ----
+            **kwargs: keyword arguments passed to the `synthesize_page` method
         Returns
         -------
             synthesized page
@@ -493,7 +497,7 @@ class KIEPage(Element):
         Args:
         ----
-            **kwargs: keyword arguments passed to the matplotlib.pyplot.show method
+            **kwargs: keyword arguments passed to the `synthesize_kie_page` method
         Returns:
         -------
@@ -603,11 +607,15 @@ class Document(Element):
     def synthesize(self, **kwargs) -> List[np.ndarray]:
         """Synthesize all pages from their predictions
+        Args:
+        ----
+            **kwargs: keyword arguments passed to the `Page.synthesize` method
         Returns
         -------
             list of synthesized pages
         """
-        return [page.synthesize() for page in self.pages]
+        return [page.synthesize(**kwargs) for page in self.pages]
     def export_as_xml(self, **kwargs) -> List[Tuple[bytes, ET.ElementTree]]:
         """Export the document as XML (hOCR-format)

doctr/models/builder.py CHANGED Viewed

@@ -266,7 +266,7 @@ class DocumentBuilder(NestedObject):
                 Line([
                     Word(
                         *word_preds[idx],
-                        tuple([tuple(pt) for pt in boxes[idx].tolist()]),  # type: ignore[arg-type]
+                        tuple(tuple(pt) for pt in boxes[idx].tolist()),  # type: ignore[arg-type]
                         float(objectness_scores[idx]),
                         crop_orientations[idx],
                     )
@@ -500,7 +500,7 @@ class KIEDocumentBuilder(DocumentBuilder):
             Prediction(
                 value=word_preds[idx][0],
                 confidence=word_preds[idx][1],
-                geometry=tuple([tuple(pt) for pt in boxes[idx].tolist()]),  # type: ignore[arg-type]
+                geometry=tuple(tuple(pt) for pt in boxes[idx].tolist()),  # type: ignore[arg-type]
                 objectness_score=float(objectness_scores[idx]),
                 crop_orientation=crop_orientations[idx],
             )

python-doctr 0.9.0__py3-none-any.whl → 0.10.0__py3-none-any.whl

python-doctr 0.9.0py3-none-any.whl → 0.10.0py3-none-any.whl