PyPI - deepdoctection - Versions diffs - 0.30__py3-none-any.whl → 0.31__py3-none-any.whl - Mend

deepdoctection 0.30py3-none-any.whl → 0.31py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (74) hide show

deepdoctection/__init__.py +4 -2
deepdoctection/analyzer/dd.py +6 -5
deepdoctection/dataflow/base.py +0 -19
deepdoctection/dataflow/custom.py +4 -3
deepdoctection/dataflow/custom_serialize.py +14 -5
deepdoctection/dataflow/parallel_map.py +12 -11
deepdoctection/dataflow/serialize.py +5 -4
deepdoctection/datapoint/annotation.py +33 -12
deepdoctection/datapoint/box.py +1 -4
deepdoctection/datapoint/convert.py +3 -1
deepdoctection/datapoint/image.py +66 -29
deepdoctection/datapoint/view.py +57 -25
deepdoctection/datasets/adapter.py +1 -1
deepdoctection/datasets/base.py +83 -10
deepdoctection/datasets/dataflow_builder.py +1 -1
deepdoctection/datasets/info.py +2 -2
deepdoctection/datasets/instances/layouttest.py +2 -7
deepdoctection/eval/accmetric.py +1 -1
deepdoctection/eval/base.py +5 -4
deepdoctection/eval/eval.py +2 -2
deepdoctection/eval/tp_eval_callback.py +5 -4
deepdoctection/extern/base.py +39 -13
deepdoctection/extern/d2detect.py +164 -64
deepdoctection/extern/deskew.py +32 -7
deepdoctection/extern/doctrocr.py +227 -39
deepdoctection/extern/fastlang.py +45 -7
deepdoctection/extern/hfdetr.py +90 -33
deepdoctection/extern/hflayoutlm.py +109 -22
deepdoctection/extern/pdftext.py +2 -1
deepdoctection/extern/pt/ptutils.py +3 -2
deepdoctection/extern/tessocr.py +134 -22
deepdoctection/extern/texocr.py +2 -0
deepdoctection/extern/tp/tpcompat.py +4 -4
deepdoctection/extern/tp/tpfrcnn/preproc.py +2 -7
deepdoctection/extern/tpdetect.py +50 -23
deepdoctection/mapper/d2struct.py +1 -1
deepdoctection/mapper/hfstruct.py +1 -1
deepdoctection/mapper/laylmstruct.py +1 -1
deepdoctection/mapper/maputils.py +13 -2
deepdoctection/mapper/prodigystruct.py +1 -1
deepdoctection/mapper/pubstruct.py +10 -10
deepdoctection/mapper/tpstruct.py +1 -1
deepdoctection/pipe/anngen.py +35 -8
deepdoctection/pipe/base.py +53 -19
deepdoctection/pipe/cell.py +29 -8
deepdoctection/pipe/common.py +12 -4
deepdoctection/pipe/doctectionpipe.py +2 -2
deepdoctection/pipe/language.py +3 -2
deepdoctection/pipe/layout.py +3 -2
deepdoctection/pipe/lm.py +2 -2
deepdoctection/pipe/refine.py +18 -10
deepdoctection/pipe/segment.py +21 -16
deepdoctection/pipe/text.py +14 -8
deepdoctection/pipe/transform.py +16 -9
deepdoctection/train/d2_frcnn_train.py +15 -12
deepdoctection/train/hf_detr_train.py +8 -6
deepdoctection/train/hf_layoutlm_train.py +16 -11
deepdoctection/utils/__init__.py +3 -0
deepdoctection/utils/concurrency.py +1 -1
deepdoctection/utils/context.py +2 -2
deepdoctection/utils/env_info.py +55 -22
deepdoctection/utils/error.py +84 -0
deepdoctection/utils/file_utils.py +4 -15
deepdoctection/utils/fs.py +7 -7
deepdoctection/utils/pdf_utils.py +5 -4
deepdoctection/utils/settings.py +5 -1
deepdoctection/utils/transform.py +1 -1
deepdoctection/utils/utils.py +0 -6
deepdoctection/utils/viz.py +44 -2
{deepdoctection-0.30.dist-info → deepdoctection-0.31.dist-info}/METADATA +33 -58
{deepdoctection-0.30.dist-info → deepdoctection-0.31.dist-info}/RECORD +74 -73
{deepdoctection-0.30.dist-info → deepdoctection-0.31.dist-info}/WHEEL +1 -1
{deepdoctection-0.30.dist-info → deepdoctection-0.31.dist-info}/LICENSE +0 -0
{deepdoctection-0.30.dist-info → deepdoctection-0.31.dist-info}/top_level.txt +0 -0

deepdoctection/datapoint/image.py CHANGED Viewed

@@ -28,6 +28,7 @@ import numpy as np
 from numpy import uint8
 from ..utils.detection_types import ImageType, JsonDict, Pathlike
+from ..utils.error import AnnotationError, BoundingBoxError, ImageError, UUIDError
 from ..utils.identifier import get_uuid, is_uuid_like
 from ..utils.settings import ObjectTypes, get_type
 from .annotation import Annotation, BoundingBox, ImageAnnotation, SummaryAnnotation
@@ -108,7 +109,7 @@ class Image:
         """
         if self._image_id is not None:
             return self._image_id
-        raise ValueError("image_id not set")
+        raise ImageError("image_id not set")
     @image_id.setter
     def image_id(self, input_id: str) -> None:
@@ -116,13 +117,13 @@ class Image:
         image_id setter
         """
         if self._image_id is not None:
-            raise ValueError("image_id already defined and cannot be reset")
+            raise ImageError("image_id already defined and cannot be reset")
         if is_uuid_like(input_id):
             self._image_id = input_id
         elif isinstance(input_id, property):
             pass
         else:
-            raise ValueError("image_id must be uuid3 string")
+            raise UUIDError("image_id must be uuid3 string")
     @property
     def image(self) -> Optional[ImageType]:
@@ -153,7 +154,7 @@ class Image:
             self._self_embedding()
         else:
             if not isinstance(image, np.ndarray):
-                raise TypeError(f"Cannot load image is of type: {type(image)}")
+                raise ImageError(f"Cannot load image is of type: {type(image)}")
             self._image = image.astype(uint8)
             self.set_width_height(self._image.shape[1], self._image.shape[0])
             self._self_embedding()
@@ -248,7 +249,7 @@ class Image:
         width
         """
         if self._bbox is None:
-            raise ValueError("Width not available. Call set_width_height first")
+            raise ImageError("Width not available. Call set_width_height first")
         return self._bbox.width
     @property
@@ -257,7 +258,7 @@ class Image:
         height
         """
         if self._bbox is None:
-            raise ValueError("Height not available. Call set_width_height first")
+            raise ImageError("Height not available. Call set_width_height first")
         return self._bbox.height
     def set_width_height(self, width: float, height: float) -> None:
@@ -281,7 +282,7 @@ class Image:
         :param bounding_box: bounding box of this image in terms of the embedding image.
         """
         if not isinstance(bounding_box, BoundingBox):
-            raise TypeError(f"Bounding box must be of type BoundingBox, is of type {type(bounding_box)}")
+            raise BoundingBoxError(f"Bounding box must be of type BoundingBox, is of type {type(bounding_box)}")
         self.embeddings[image_id] = bounding_box
     def get_embedding(self, image_id: str) -> BoundingBox:
@@ -307,14 +308,14 @@ class Image:
         :param annotation: image annotation to store
         """
         if not isinstance(annotation, ImageAnnotation):
-            raise TypeError(
+            raise AnnotationError(
                 f"Annotation must be of type ImageAnnotation: "
                 f"{annotation.annotation_id} but is of type {str(type(annotation))}"
             )
         if annotation._annotation_id is None:  # pylint: disable=W0212
             annotation.annotation_id = self.define_annotation_id(annotation)
         if annotation.annotation_id in self._annotation_ids:
-            raise ValueError(f"Cannot dump annotation with already taken " f"id {annotation.annotation_id}")
+            raise ImageError(f"Cannot dump annotation with already taken " f"id {annotation.annotation_id}")
         self._annotation_ids.append(annotation.annotation_id)
         self.annotations.append(annotation)
@@ -322,7 +323,10 @@ class Image:
         self,
         category_names: Optional[Union[str, ObjectTypes, Sequence[Union[str, ObjectTypes]]]] = None,
         annotation_ids: Optional[Union[str, Sequence[str]]] = None,
-        annotation_types: Optional[Union[str, Sequence[str]]] = None,
+        service_id: Optional[Union[str, Sequence[str]]] = None,
+        model_id: Optional[Union[str, Sequence[str]]] = None,
+        session_ids: Optional[Union[str, Sequence[str]]] = None,
+        ignore_inactive: bool = True,
     ) -> List[ImageAnnotation]:
         """
         Selection of annotations from the annotation container. Filter conditions can be defined by specifying
@@ -333,47 +337,80 @@ class Image:
         :param category_names: A single name or list of names
         :param annotation_ids: A single id or list of ids
-        :param annotation_types: A type name or list of type names.
+        :param service_id: A single service name or list of service names
+        :param model_id: A single model name or list of model names
+        :param session_ids: A single session id or list of session ids
+        :param ignore_inactive: If set to `True` only active annotations are returned.
         :return: A (possibly empty) list of Annotations
         """
-        cat_names = [category_names] if isinstance(category_names, (ObjectTypes, str)) else category_names
-        if cat_names is not None:
-            cat_names = [get_type(cat_name) for cat_name in cat_names]
-        ann_ids = [annotation_ids] if isinstance(annotation_ids, str) else annotation_ids
-        ann_types = [annotation_types] if isinstance(annotation_types, str) else annotation_types
+        if category_names is not None:
+            category_names = (
+                [get_type(cat_name) for cat_name in category_names]
+                if isinstance(category_names, (list, set))
+                else [get_type(category_names)]  # type:ignore
+            )
-        anns = filter(lambda x: x.active, self.annotations)
+        ann_ids = [annotation_ids] if isinstance(annotation_ids, str) else annotation_ids
+        service_id = [service_id] if isinstance(service_id, str) else service_id
+        model_id = [model_id] if isinstance(model_id, str) else model_id
+        session_id = [session_ids] if isinstance(session_ids, str) else session_ids
-        if ann_types is not None:
-            for type_name in ann_types:
-                anns = filter(lambda x: isinstance(x, eval(type_name)), anns)  # pylint: disable=W0123, W0640
+        if ignore_inactive:
+            anns = filter(lambda x: x.active, self.annotations)
+        else:
+            anns = self.annotations  # type:ignore
-        if cat_names is not None:
-            anns = filter(lambda x: x.category_name in cat_names, anns)  # type:ignore
+        if category_names is not None:
+            anns = filter(lambda x: x.category_name in category_names, anns)  # type:ignore
         if ann_ids is not None:
             anns = filter(lambda x: x.annotation_id in ann_ids, anns)  # type:ignore
+        if service_id is not None:
+            anns = filter(lambda x: x.service_id in service_id, anns)  # type:ignore
+        if model_id is not None:
+            anns = filter(lambda x: x.model_id in model_id, anns)  # type:ignore
+        if session_id is not None:
+            anns = filter(lambda x: x.session_id in session_id, anns)  # type:ignore
         return list(anns)
     def get_annotation_iter(
         self,
         category_names: Optional[Union[str, ObjectTypes, Sequence[Union[str, ObjectTypes]]]] = None,
         annotation_ids: Optional[Union[str, Sequence[str]]] = None,
-        annotation_types: Optional[Union[str, Sequence[str]]] = None,
+        service_id: Optional[Union[str, Sequence[str]]] = None,
+        model_id: Optional[Union[str, Sequence[str]]] = None,
+        session_ids: Optional[Union[str, Sequence[str]]] = None,
+        ignore_inactive: bool = True,
     ) -> Iterable[ImageAnnotation]:
         """
         Get annotation as an iterator. Same as `get_annotation` but returns an iterator instead of a list.
         :param category_names: A single name or list of names
         :param annotation_ids: A single id or list of ids
-        :param annotation_types: A type name or list of type names.
+        :param service_id: A single service name or list of service names
+        :param model_id: A single model name or list of model names
+        :param session_ids: A single session id or list of session ids
+        :param ignore_inactive: If set to `True` only active annotations are returned.
         :return: A (possibly empty) list of annotations
         """
-        return iter(self.get_annotation(category_names, annotation_ids, annotation_types))
+        return iter(
+            self.get_annotation(
+                category_names=category_names,
+                annotation_ids=annotation_ids,
+                service_id=service_id,
+                model_id=model_id,
+                session_ids=session_ids,
+                ignore_inactive=ignore_inactive,
+            )
+        )
     def as_dict(self) -> Dict[str, Any]:
         """
@@ -439,7 +476,7 @@ class Image:
         new_image = Image(file_name=self.file_name, location=self.location, external_id=annotation_id)
         if self._bbox is None or ann.bounding_box is None:
-            raise ValueError(f"Bounding box for image and ImageAnnotation ({annotation_id}) must be set")
+            raise ImageError(f"Bounding box for image and ImageAnnotation ({annotation_id}) must be set")
         new_bounding_box = intersection_box(self._bbox, ann.bounding_box, self.width, self.height)
         if new_bounding_box.absolute_coords:
@@ -454,7 +491,7 @@ class Image:
         if crop_image and self.image is not None:
             new_image.image = crop_box_from_image(self.image, ann.bounding_box, self.width, self.height)
         elif crop_image and self.image is None:
-            raise ValueError("crop_image = True requires self.image to be not None")
+            raise ImageError("crop_image = True requires self.image to be not None")
         ann.image = new_image
@@ -472,7 +509,7 @@ class Image:
         ann = self.get_annotation(annotation_ids=annotation_id)[0]
         if ann.image is None:
-            raise ValueError("When adding sub images to ImageAnnotation then ImageAnnotation.image must not be None")
+            raise ImageError("When adding sub images to ImageAnnotation then ImageAnnotation.image must not be None")
         assert ann.bounding_box is not None
         box = ann.bounding_box.to_list("xyxy")
         proposals = self.get_annotation(category_names)
@@ -485,7 +522,7 @@ class Image:
         sub_images = self.get_annotation(annotation_ids=selected_ids.tolist())
         for sub_image in sub_images:
             if sub_image.image is None:
-                raise ValueError(
+                raise ImageError(
                     "When setting an embedding to ImageAnnotation then ImageAnnotation.image must not be None"
                 )
             sub_image.image.set_embedding(

deepdoctection/datapoint/view.py CHANGED Viewed

@@ -26,6 +26,7 @@ from typing import Any, Dict, List, Mapping, Optional, Sequence, Set, Tuple, Typ
 import numpy as np
 from ..utils.detection_types import ImageType, JsonDict, Pathlike
+from ..utils.error import AnnotationError, ImageError
 from ..utils.logger import LoggingRecord, logger
 from ..utils.settings import (
     CellType,
@@ -96,7 +97,7 @@ class ImageAnnotationBaseView(ImageAnnotation):
                 interactive_imshow(np_image)
                 return None
             return np_image
-        raise ValueError(f"base_page.image is None for {self.annotation_id}")
+        raise AnnotationError(f"base_page.image is None for {self.annotation_id}")
     def __getattr__(self, item: str) -> Optional[Union[str, int, List[str]]]:
         """
@@ -115,7 +116,7 @@ class ImageAnnotationBaseView(ImageAnnotation):
         :return: value according to the logic described above
         """
         if item not in self.get_attribute_names():
-            raise AttributeError(f"Attribute {item} is not supported for {type(self)}")
+            raise AnnotationError(f"Attribute {item} is not supported for {type(self)}")
         if item in self.sub_categories:
             sub_cat = self.get_sub_category(get_type(item))
             if item != sub_cat.category_name:
@@ -326,7 +327,7 @@ class Table(Layout):
     def text(self) -> str:
         try:
             return str(self)
-        except TypeError:
+        except (TypeError, AnnotationError):
             return super().text
     @property
@@ -368,7 +369,7 @@ class Table(Layout):
             for cell in cells:
                 all_words.extend(cell.get_ordered_words())  # type: ignore
             return all_words
-        except TypeError:
+        except (TypeError, AnnotationError):
             return super().get_ordered_words()
@@ -452,40 +453,71 @@ class Page(Image):
         "page_number",
     }
-    @no_type_check
-    def get_annotation(
+    def get_annotation(  # type: ignore
         self,
         category_names: Optional[Union[str, ObjectTypes, Sequence[Union[str, ObjectTypes]]]] = None,
         annotation_ids: Optional[Union[str, Sequence[str]]] = None,
-        annotation_types: Optional[Union[str, Sequence[str]]] = None,
+        service_id: Optional[Union[str, Sequence[str]]] = None,
+        model_id: Optional[Union[str, Sequence[str]]] = None,
+        session_ids: Optional[Union[str, Sequence[str]]] = None,
+        ignore_inactive: bool = True,
     ) -> List[ImageAnnotationBaseView]:
         """
+        Selection of annotations from the annotation container. Filter conditions can be defined by specifying
+        the annotation_id or the category name. (Since only image annotations are currently allowed in the container,
+        annotation_type is a redundant filter condition.) Only annotations that have  active = 'True' are
+        returned. If more than one condition is provided, only annotations will be returned that satisfy all conditions.
+        If no condition is provided, it will return all active annotations.
         Identical to its base class method for having correct return types. If the base class changes, please
         change this method as well.
+        :param category_names: A single name or list of names
+        :param annotation_ids: A single id or list of ids
+        :param service_id: A single service name or list of service names
+        :param model_id: A single model name or list of model names
+        :param session_ids: A single session id or list of session ids
+        :param ignore_inactive: If set to `True` only active annotations are returned.
+        :return: A (possibly empty) list of Annotations
         """
-        cat_names = [category_names] if isinstance(category_names, (ObjectTypes, str)) else category_names
-        if cat_names is not None:
-            cat_names = [get_type(cat_name) for cat_name in cat_names]
-        ann_ids = [annotation_ids] if isinstance(annotation_ids, str) else annotation_ids
-        ann_types = [annotation_types] if isinstance(annotation_types, str) else annotation_types
-        anns = filter(lambda x: x.active, self.annotations)
+        if category_names is not None:
+            category_names = (
+                [get_type(cat_name) for cat_name in category_names]
+                if isinstance(category_names, list)
+                else [get_type(category_names)]  # type:ignore
+            )
+        ann_ids = [annotation_ids] if isinstance(annotation_ids, str) else annotation_ids
+        service_id = [service_id] if isinstance(service_id, str) else service_id
+        model_id = [model_id] if isinstance(model_id, str) else model_id
+        session_id = [session_ids] if isinstance(session_ids, str) else session_ids
-        if ann_types is not None:
-            for type_name in ann_types:
-                anns = filter(lambda x: isinstance(x, eval(type_name)), anns)  # pylint: disable=W0123, W0640
+        if ignore_inactive:
+            anns = filter(lambda x: x.active, self.annotations)
+        else:
+            anns = self.annotations  # type:ignore
-        if cat_names is not None:
-            anns = filter(lambda x: x.category_name in cat_names, anns)
+        if category_names is not None:
+            anns = filter(lambda x: x.category_name in category_names, anns)  # type:ignore
         if ann_ids is not None:
-            anns = filter(lambda x: x.annotation_id in ann_ids, anns)
+            anns = filter(lambda x: x.annotation_id in ann_ids, anns)  # type:ignore
+        if service_id is not None:
+            anns = filter(lambda x: x.generating_service in service_id, anns)  # type:ignore
+        if model_id is not None:
+            anns = filter(lambda x: x.generating_model in model_id, anns)  # type:ignore
+        if session_id is not None:
+            anns = filter(lambda x: x.session_id in session_id, anns)  # type:ignore
-        return list(anns)
+        return list(anns)  # type:ignore
     def __getattr__(self, item: str) -> Any:
         if item not in self.get_attribute_names():
-            raise AttributeError(f"Attribute {item} is not supported for {type(self)}")
+            raise ImageError(f"Attribute {item} is not supported for {type(self)}")
         if self.summary is not None:
             if item in self.summary.sub_categories:
                 sub_cat = self.summary.get_sub_category(get_type(item))
@@ -629,10 +661,10 @@ class Page(Image):
         """
         ann = self.get_annotation(annotation_ids=annotation_id)[0]
         if ann.category_name not in self.floating_text_block_categories:
-            raise ValueError(
-                f"Annotation {annotation_id} with category_name {ann.category_name} is not a floating text "
-                f"block category. Cannot get context. Make sure to make this category a floating text "
-                f"block"
+            raise ImageError(
+                f"Cannot get context. Make sure to parametrize this category to a floating text: "
+                f"annotation_id: {annotation_id},"
+                f"category_name: {ann.category_name}"
             )
         block_with_order = self._order("layouts")
         position = block_with_order.index(ann)

deepdoctection/datasets/adapter.py CHANGED Viewed

@@ -165,4 +165,4 @@ class DatasetAdapter(IterableDataset):  # type: ignore
         return len(self.df)
     def __getitem__(self, item: Any) -> None:
-        raise NotImplementedError
+        raise NotImplementedError()

deepdoctection/datasets/base.py CHANGED Viewed

@@ -18,17 +18,19 @@
 """
 Module for the base class of datasets.
 """
+import json
 import os
 import pprint
 from abc import ABC, abstractmethod
 from collections import defaultdict
-from typing import Dict, List, Mapping, Optional, Sequence, Tuple, Type, Union
+from inspect import signature
+from pathlib import Path
+from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple, Type, Union
 import numpy as np
 from ..dataflow import CacheData, ConcatData, CustomDataFromList, DataFlow
-from ..datapoint import Image
+from ..datapoint.image import Image
 from ..utils.detection_types import Pathlike
 from ..utils.logger import LoggingRecord, logger
 from ..utils.settings import ObjectTypes, TypeOrStr, get_type
@@ -51,9 +53,11 @@ class DatasetBase(ABC):
         self._dataflow_builder.splits = self._dataset_info.splits
         if not self.dataset_available() and self.is_built_in():
-            print(
-                f"Dataset {self._dataset_info.name} not locally found. Please download at {self._dataset_info.url}"
-                f" and place under {self._dataflow_builder.get_workdir()}"
+            logger.warning(
+                LoggingRecord(
+                    f"Dataset {self._dataset_info.name} not locally found. Please download at {self._dataset_info.url}"
+                    f" and place under {self._dataflow_builder.get_workdir()}"
+                )
             )
     @property
@@ -76,7 +80,7 @@ class DatasetBase(ABC):
         Construct the DatasetCategory object.
         """
-        raise NotImplementedError
+        raise NotImplementedError()
     @classmethod
     @abstractmethod
@@ -85,7 +89,7 @@ class DatasetBase(ABC):
         Construct the DatasetInfo object.
         """
-        raise NotImplementedError
+        raise NotImplementedError()
     @abstractmethod
     def _builder(self) -> DataFlowBaseBuilder:
@@ -93,7 +97,7 @@ class DatasetBase(ABC):
         Construct the DataFlowBaseBuilder object. It needs to be implemented in the derived class.
         """
-        raise NotImplementedError
+        raise NotImplementedError()
     def dataset_available(self) -> bool:
         """
@@ -114,7 +118,7 @@ class DatasetBase(ABC):
 class _BuiltInDataset(DatasetBase, ABC):
     """
-    Dataclass for built-in dataset. Do not use this it
+    Dataclass for built-in dataset. Do not use this
     """
     _name: Optional[str] = None
@@ -427,6 +431,11 @@ class CustomDataset(DatasetBase):
         else:
             self.init_sub_categories = init_sub_categories
         self.annotation_files = annotation_files
+        if signature(dataflow_builder.__init__).parameters.keys() != {"self", "location", "annotation_files"}:
+            raise TypeError(
+                "Dataflow builder must have the signature `def __init__(self, location: Pathlike, "
+                "annotation_files: Optional[Mapping[str, Union[str, Sequence[str]]]] = None):`"
+            )
         self.dataflow_builder = dataflow_builder(self.location, self.annotation_files)
         super().__init__()
@@ -438,3 +447,67 @@ class CustomDataset(DatasetBase):
     def _builder(self) -> DataFlowBaseBuilder:
         return self.dataflow_builder
+    @staticmethod
+    def from_dataset_card(file_path: str, dataflow_builder: Type[DataFlowBaseBuilder]) -> "CustomDataset":
+        """
+        This static method creates a CustomDataset instance from a dataset card.
+        A dataset card is a JSON file that contains metadata about the dataset such as its name, type, location,
+        initial categories, initial sub categories, and annotation files. The dataflow_builder parameter is a class
+        that inherits from DataFlowBaseBuilder and is used to build the dataflow for the dataset.
+        :param file_path: The path to the dataset card (JSON file).
+        :param dataflow_builder: The class used to build the dataflow for the dataset.
+        :return: A CustomDataset instance created from the dataset card.
+        """
+        with open(file_path, "r", encoding="UTF-8") as file:
+            meta_data = json.load(file)
+        meta_data["dataset_type"] = get_type(meta_data["dataset_type"])
+        meta_data["location"] = Path(meta_data["location"])
+        meta_data["init_categories"] = [get_type(cat) for cat in meta_data["init_categories"]]
+        meta_data["init_sub_categories"] = (
+            {
+                get_type(cat): {
+                    get_type(sub_cat_key): [get_type(sub_cat_value) for sub_cat_value in sub_cat_values]
+                    for sub_cat_key, sub_cat_values in sub_cats.items()
+                }
+                for cat, sub_cats in meta_data["init_sub_categories"].items()
+            }
+            if meta_data["init_sub_categories"] is not None
+            else None
+        )
+        return CustomDataset(**meta_data, dataflow_builder=dataflow_builder)
+    def as_dict(self) -> Mapping[str, Any]:
+        """
+        Return the meta-data of the dataset as a dictionary.
+        :return: A dictionary containing the meta-data of the dataset.
+        """
+        return {
+            "name": self.name,
+            "dataset_type": self.type,
+            "location": str(self.location),
+            "annotation_files": self.annotation_files,
+            "init_categories": [cat.value for cat in self.init_categories],
+            "init_sub_categories": {
+                cat.value: {
+                    sub_cat_key.value: [sub_cat_value.value for sub_cat_value in sub_cat_values]
+                    for sub_cat_key, sub_cat_values in sub_cats.items()
+                }
+                for cat, sub_cats in self.init_sub_categories.items()
+            }
+            if self.init_sub_categories is not None
+            else None,
+        }
+    def save_dataset_card(self, file_path: str) -> None:
+        """
+        Save the dataset card to a JSON file.
+        :param file_path: file_path
+        """
+        with open(file_path, "w", encoding="UTF-8") as file:
+            json.dump(self.as_dict(), file, indent=4)

deepdoctection/datasets/dataflow_builder.py CHANGED Viewed

@@ -110,7 +110,7 @@ class DataFlowBaseBuilder(ABC):
         :param kwargs: A custom set of arguments/values
         :return: dataflow
         """
-        raise NotImplementedError
+        raise NotImplementedError()
     def get_annotation_file(self, split: str) -> str:
         """Get single annotation file."""

deepdoctection/datasets/info.py CHANGED Viewed

@@ -306,7 +306,7 @@ class DatasetCategories:
         _cat_to_sub_cat = {get_type(key): get_type(value) for key, value in cat_to_sub_cat.items()}
         if not self._allow_update:
-            raise PermissionError("Replacing categories with sub categories is not allowed")
+            raise RuntimeWarning("Replacing categories with sub categories is not allowed")
         self._categories_update = self.init_categories
         categories = self.get_categories(name_as_key=True)
         cats_or_sub_cats = [
@@ -332,7 +332,7 @@ class DatasetCategories:
         """
         if not self._allow_update:
-            raise PermissionError("Filtering categories is not allowed")
+            raise RuntimeWarning("Filtering categories is not allowed")
         if isinstance(categories, (ObjectTypes, str)):
             categories = [get_type(categories)]
         else:

deepdoctection/datasets/instances/layouttest.py CHANGED Viewed

@@ -49,12 +49,7 @@ _LICENSE = (
     " – Permissive – Version 1.0 License. Dr. Janis Meyer does not own the copyright of the images. \n"
     " Use of the images must abide by the PMC Open Access Subset Terms of Use."
 )
-_URL = [
-    "https://www.googleapis.com/drive/v3/files/1ZD4Ef4gd2FIfp7vR8jbnrZeXD3gSWNqE?alt"
-    "=media&key=AIzaSyDuoPG6naK-kRJikScR7cP_1sQBF1r3fWU",
-    "https://www.googleapis.com/drive/v3/files/18HD62LFLa1iAmqffo4SyjuEQ32MzyNQ0?alt"
-    "=media&key=AIzaSyDuoPG6naK-kRJikScR7cP_1sQBF1r3fWU",
-]
 _SPLITS: Mapping[str, str] = {"test": "test", "predict": "predict"}
 _TYPE = DatasetType.object_detection
 _LOCATION = "testlayout"
@@ -77,7 +72,7 @@ class LayoutTest(_BuiltInDataset):
     @classmethod
     def _info(cls) -> DatasetInfo:
-        return DatasetInfo(name=_NAME, description=_DESCRIPTION, license=_LICENSE, url=_URL, splits=_SPLITS, type=_TYPE)
+        return DatasetInfo(name=_NAME, description=_DESCRIPTION, license=_LICENSE, splits=_SPLITS, type=_TYPE)
     def _categories(self) -> DatasetCategories:
         return DatasetCategories(init_categories=_INIT_CATEGORIES)

deepdoctection/eval/accmetric.py CHANGED Viewed

@@ -87,7 +87,7 @@ def accuracy(label_gt: Sequence[int], label_predictions: Sequence[int], masks: O
     np_label_gt, np_label_pr = np.asarray(label_gt), np.asarray(label_predictions)
     if len(np_label_gt) != len(np_label_pr):
         raise ValueError(
-            f"length of label_gt ({len(np_label_gt)}) and label_predictions" f" ({len(np_label_pr)}) must be equal"
+            f"length label_gt: {len(np_label_gt)}, length label_predictions: ({len(np_label_pr)}) but must be equal"
         )
     if masks is not None:
         np_label_gt, np_label_pr = _mask_some_gt_and_pr_labels(np_label_gt, np_label_pr, masks)

deepdoctection/eval/base.py CHANGED Viewed

@@ -25,6 +25,7 @@ from typing import Any, Callable, List, Optional, Tuple
 from ..dataflow import DataFlow
 from ..datasets.info import DatasetCategories
 from ..utils.detection_types import JsonDict
+from ..utils.error import DependencyError
 from ..utils.file_utils import Requirement
@@ -52,7 +53,7 @@ class MetricBase(ABC):
         requirements = cls.get_requirements()
         name = cls.__name__ if hasattr(cls, "__name__") else cls.__class__.__name__
         if not all(requirement[1] for requirement in requirements):
-            raise ImportError(
+            raise DependencyError(
                 "\n".join(
                     [f"{name} has the following dependencies:"]
                     + [requirement[2] for requirement in requirements if not requirement[1]]
@@ -66,7 +67,7 @@ class MetricBase(ABC):
         """
         Get a list of requirements for running the detector
         """
-        raise NotImplementedError
+        raise NotImplementedError()
     @classmethod
     @abstractmethod
@@ -80,7 +81,7 @@ class MetricBase(ABC):
         :param dataflow_predictions: Dataflow with predictions.
         :param categories:  DatasetCategories with respect to the underlying dataset.
         """
-        raise NotImplementedError
+        raise NotImplementedError()
     @classmethod
     @abstractmethod
@@ -95,7 +96,7 @@ class MetricBase(ABC):
         :param dataflow_predictions: Dataflow with predictions.
         :param categories: DatasetCategories with respect to the underlying dataset.
         """
-        raise NotImplementedError
+        raise NotImplementedError()
     @classmethod
     def result_list_to_dict(cls, results: List[JsonDict]) -> JsonDict:

deepdoctection/eval/eval.py CHANGED Viewed

@@ -171,7 +171,7 @@ class Evaluator:
                         "metric has no attribute sub_cats and cannot be used for token classification datasets"
                     )
             else:
-                raise NotImplementedError
+                raise NotImplementedError()
         else:
             self.wandb_table_agent = None
@@ -271,7 +271,7 @@ class Evaluator:
             sub_cats_to_remove = meta_anns["sub_categories"]
             df_pr = MapData(df_pr, remove_cats(sub_categories=sub_cats_to_remove))
         else:
-            raise NotImplementedError
+            raise NotImplementedError()
         return df_pr

deepdoctection 0.30__py3-none-any.whl → 0.31__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.30py3-none-any.whl → 0.31py3-none-any.whl