PyPI - deepdoctection - Versions diffs - 0.33__py3-none-any.whl → 0.35__py3-none-any.whl - Mend

deepdoctection 0.33py3-none-any.whl → 0.35py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (40) hide show

deepdoctection/__init__.py +11 -12
deepdoctection/analyzer/__init__.py +1 -0
deepdoctection/analyzer/_config.py +150 -0
deepdoctection/analyzer/dd.py +42 -358
deepdoctection/analyzer/factory.py +522 -0
deepdoctection/configs/conf_dd_one.yaml +1 -0
deepdoctection/datapoint/annotation.py +41 -3
deepdoctection/datapoint/convert.py +6 -4
deepdoctection/datapoint/image.py +132 -46
deepdoctection/datapoint/view.py +2 -1
deepdoctection/datasets/base.py +1 -1
deepdoctection/datasets/instances/fintabnet.py +1 -1
deepdoctection/datasets/instances/xfund.py +29 -7
deepdoctection/eval/eval.py +7 -1
deepdoctection/extern/model.py +2 -1
deepdoctection/extern/pdftext.py +96 -5
deepdoctection/extern/tessocr.py +1 -0
deepdoctection/mapper/cats.py +11 -13
deepdoctection/mapper/cocostruct.py +6 -2
deepdoctection/mapper/d2struct.py +2 -1
deepdoctection/mapper/laylmstruct.py +1 -1
deepdoctection/mapper/match.py +31 -0
deepdoctection/mapper/misc.py +1 -1
deepdoctection/mapper/prodigystruct.py +1 -1
deepdoctection/pipe/anngen.py +27 -0
deepdoctection/pipe/base.py +23 -0
deepdoctection/pipe/common.py +123 -38
deepdoctection/pipe/segment.py +1 -1
deepdoctection/pipe/sub_layout.py +1 -1
deepdoctection/utils/env_info.py +31 -2
deepdoctection/utils/file_utils.py +19 -0
deepdoctection/utils/fs.py +27 -4
deepdoctection/utils/metacfg.py +12 -0
deepdoctection/utils/pdf_utils.py +114 -6
deepdoctection/utils/settings.py +3 -0
{deepdoctection-0.33.dist-info → deepdoctection-0.35.dist-info}/METADATA +20 -11
{deepdoctection-0.33.dist-info → deepdoctection-0.35.dist-info}/RECORD +40 -38
{deepdoctection-0.33.dist-info → deepdoctection-0.35.dist-info}/WHEEL +1 -1
{deepdoctection-0.33.dist-info → deepdoctection-0.35.dist-info}/LICENSE +0 -0
{deepdoctection-0.33.dist-info → deepdoctection-0.35.dist-info}/top_level.txt +0 -0

deepdoctection/datapoint/image.py CHANGED Viewed

@@ -21,10 +21,11 @@ Dataclass Image
 from __future__ import annotations
 import json
+from collections import defaultdict
 from dataclasses import dataclass, field
-from os import environ
+from os import environ, fspath
 from pathlib import Path
-from typing import Any, Iterable, Optional, Sequence, Union, no_type_check
+from typing import Any, Optional, Sequence, Union, no_type_check
 import numpy as np
 from numpy import uint8
@@ -33,7 +34,7 @@ from ..utils.error import AnnotationError, BoundingBoxError, ImageError, UUIDErr
 from ..utils.identifier import get_uuid, is_uuid_like
 from ..utils.settings import ObjectTypes, SummaryType, get_type
 from ..utils.types import ImageDict, PathLikeOrStr, PixelValues
-from .annotation import Annotation, BoundingBox, CategoryAnnotation, ImageAnnotation
+from .annotation import Annotation, AnnotationMap, BoundingBox, CategoryAnnotation, ImageAnnotation
 from .box import crop_box_from_image, global_to_local_coords, intersection_box
 from .convert import as_dict, convert_b64_to_np_array, convert_np_array_to_b64, convert_pdf_bytes_to_np_array_v2
@@ -303,6 +304,15 @@ class Image:
         return self.embeddings[image_id]
+    def remove_embedding(self, image_id: str) -> None:
+        """
+        Remove an embedding from the image.
+        :param image_id: uuid string of the embedding image
+        """
+        if image_id in self.embeddings:
+            self.embeddings.pop(image_id)
     def _self_embedding(self) -> None:
         if self._bbox is not None:
             self.set_embedding(self.image_id, self._bbox)
@@ -387,39 +397,6 @@ class Image:
         return list(anns)
-    def get_annotation_iter(
-        self,
-        category_names: Optional[Union[str, ObjectTypes, Sequence[Union[str, ObjectTypes]]]] = None,
-        annotation_ids: Optional[Union[str, Sequence[str]]] = None,
-        service_id: Optional[Union[str, Sequence[str]]] = None,
-        model_id: Optional[Union[str, Sequence[str]]] = None,
-        session_ids: Optional[Union[str, Sequence[str]]] = None,
-        ignore_inactive: bool = True,
-    ) -> Iterable[ImageAnnotation]:
-        """
-        Get annotation as an iterator. Same as `get_annotation` but returns an iterator instead of a list.
-        :param category_names: A single name or list of names
-        :param annotation_ids: A single id or list of ids
-        :param service_id: A single service name or list of service names
-        :param model_id: A single model name or list of model names
-        :param session_ids: A single session id or list of session ids
-        :param ignore_inactive: If set to `True` only active annotations are returned.
-        :return: A (possibly empty) list of annotations
-        """
-        return iter(
-            self.get_annotation(
-                category_names=category_names,
-                annotation_ids=annotation_ids,
-                service_id=service_id,
-                model_id=model_id,
-                session_ids=session_ids,
-                ignore_inactive=ignore_inactive,
-            )
-        )
     def as_dict(self) -> dict[str, Any]:
         """
         Returns the full image dataclass as dict. Uses the custom `convert.as_dict` to disregard attributes
@@ -435,13 +412,22 @@ class Image:
             img_dict["_image"] = None
         return img_dict
+    def as_json(self) -> str:
+        """
+        Returns the full image dataclass as json string.
+        :return:  A json string.
+        """
+        return json.dumps(self.as_dict(), indent=4)
     @staticmethod
     def remove_keys() -> list[str]:
         """
         A list of attributes to suspend from as_dict creation.
         """
-        return ["_image"]
+        return ["_image", "_annotation_ids", "_category_name"]
     def define_annotation_id(self, annotation: Annotation) -> str:
         """
@@ -456,17 +442,79 @@ class Image:
         attributes_values = [str(getattr(annotation, attribute)) for attribute in attributes]
         return get_uuid(*attributes_values, str(self.image_id))
-    def remove(self, annotation: ImageAnnotation) -> None:
+    def remove(
+        self,
+        annotation_ids: Optional[Union[str, list[str]]] = None,
+        service_ids: Optional[Union[str, list[str]]] = None,
+    ) -> None:
         """
         Instead of removing consider deactivating annotations.
         Calls `List.remove`. Make sure, the element is in the list for otherwise a ValueError will be raised.
-        :param annotation: The annotation to remove
+        :param annotation_ids: The annotation to remove
+        :param service_ids: The service id to remove
         """
+        ann_id_to_annotation_maps = self.get_annotation_id_to_annotation_maps()
+        if annotation_ids is not None:
+            annotation_ids = [annotation_ids] if isinstance(annotation_ids, str) else annotation_ids
+            for ann_id in annotation_ids:
+                if ann_id not in ann_id_to_annotation_maps:
+                    raise ImageError(f"Annotation with id {ann_id} not found")
+                annotation_maps = ann_id_to_annotation_maps[ann_id]
+                for annotation_map in annotation_maps:
+                    self._remove_by_annotation_id(ann_id, annotation_map)
+        if service_ids is not None:
+            service_ids = [service_ids] if isinstance(service_ids, str) else service_ids
+            service_id_to_annotation_id = self.get_service_id_to_annotation_id()
+            for service_id in service_ids:
+                if service_id not in service_id_to_annotation_id:
+                    raise ImageError(f"Service id {service_id} not found")
+                annotation_ids = service_id_to_annotation_id[service_id]
+                for ann_id in annotation_ids:
+                    if ann_id not in ann_id_to_annotation_maps:
+                        raise ImageError(f"Annotation with id {ann_id} not found")
+                    annotation_maps = ann_id_to_annotation_maps[ann_id]
+                    for annotation_map in annotation_maps:
+                        self._remove_by_annotation_id(ann_id, annotation_map)
+    def _remove_by_annotation_id(self, annotation_id: str, location_dict: AnnotationMap) -> None:
+        image_annotation_id = location_dict.image_annotation_id
+        annotations = self.get_annotation(annotation_ids=image_annotation_id)
+        if not annotations:
+            return
+        # There can only be one annotation with a given id
+        annotation = annotations[0]
+        if (
+            location_dict.sub_category_key is None
+            and location_dict.relationship_key is None
+            and location_dict.summary_key is None
+        ):
+            self.annotations.remove(annotation)
+            self._annotation_ids.remove(annotation.annotation_id)
+        sub_category_key = location_dict.sub_category_key
+        if sub_category_key is not None:
+            annotation.remove_sub_category(sub_category_key)
-        self.annotations.remove(annotation)
-        self._annotation_ids.remove(annotation.annotation_id)
+        relationship_key = location_dict.relationship_key
+        if relationship_key is not None:
+            annotation.remove_relationship(relationship_key, annotation_id)
+        summary_key = location_dict.summary_key
+        if summary_key is not None:
+            if annotation.image is not None:
+                annotation.image.summary.remove_sub_category(summary_key)
     def image_ann_to_image(self, annotation_id: str, crop_image: bool = False) -> None:
         """
@@ -580,6 +628,7 @@ class Image:
         if summary_dict := kwargs.get("_summary", kwargs.get("summary")):
             image.summary = CategoryAnnotation.from_dict(**summary_dict)
             image.summary.category_name = SummaryType.SUMMARY
         return image
     @classmethod
@@ -645,7 +694,7 @@ class Image:
         highest_hierarchy_only: bool = False,
         path: Optional[PathLikeOrStr] = None,
         dry: bool = False,
-    ) -> Optional[ImageDict]:
+    ) -> Optional[Union[ImageDict, str]]:
         """
         Export image as dictionary. As numpy array cannot be serialized `image` values will be converted into
         base64 encodings.
@@ -664,21 +713,58 @@ class Image:
             path = path / self.image_id
         suffix = path.suffix
         if suffix:
-            path_json = path.as_posix().replace(suffix, ".json")
+            path_json = fspath(path).replace(suffix, ".json")
         else:
-            path_json = path.as_posix() + ".json"
+            path_json = fspath(path) + ".json"
         if highest_hierarchy_only:
             self.remove_image_from_lower_hierachy()
         export_dict = self.as_dict()
-        export_dict["location"] = str(export_dict["location"])
+        export_dict["location"] = fspath(export_dict["location"])
         if not image_to_json:
             export_dict["_image"] = None
         if dry:
             return export_dict
         with open(path_json, "w", encoding="UTF-8") as file:
             json.dump(export_dict, file, indent=2)
-        return None
+        return path_json
     def get_categories_from_current_state(self) -> set[str]:
         """Returns all active dumped categories"""
         return {ann.category_name for ann in self.get_annotation()}
+    def get_service_id_to_annotation_id(self) -> defaultdict[str, list[str]]:
+        """
+        Returns a dictionary with service ids as keys and lists of annotation ids that have been generated by the
+        service
+        :return: default with service ids as keys and lists of annotation ids as values
+        """
+        service_id_dict = defaultdict(list)
+        for ann in self.get_annotation():
+            if ann.service_id:
+                service_id_dict[ann.service_id].append(ann.annotation_id)
+            for sub_cat_key in ann.sub_categories:
+                sub_cat = ann.get_sub_category(sub_cat_key)
+                if sub_cat.service_id:
+                    service_id_dict[sub_cat.service_id].append(sub_cat.annotation_id)
+            if ann.image is not None:
+                for summary_cat_key in ann.image.summary:
+                    summary_cat = ann.get_summary(summary_cat_key)
+                    if summary_cat.service_id:
+                        service_id_dict[summary_cat.service_id].append(summary_cat.annotation_id)
+        return service_id_dict
+    def get_annotation_id_to_annotation_maps(self) -> defaultdict[str, list[AnnotationMap]]:
+        """
+        Returns a dictionary with annotation ids as keys and lists of AnnotationMap as values. The range of ids
+        is the union of all ImageAnnotation, CategoryAnnotation and ContainerAnnotation of the image.
+        :return: default dict with annotation ids as keys and lists of AnnotationMap as values
+        """
+        all_ann_id_dict = defaultdict(list)
+        for ann in self.get_annotation():
+            ann_id_dict = ann.get_annotation_map()
+            for key, val in ann_id_dict.items():
+                all_ann_id_dict[key].extend(val)
+        return all_ann_id_dict

deepdoctection/datapoint/view.py CHANGED Viewed

@@ -509,6 +509,7 @@ class Page(Image):
         "location",
         "document_id",
         "page_number",
+        "angle",
     }
     include_residual_text_container: bool = True
@@ -971,7 +972,7 @@ class Page(Image):
         highest_hierarchy_only: bool = False,
         path: Optional[PathLikeOrStr] = None,
         dry: bool = False,
-    ) -> Optional[ImageDict]:
+    ) -> Optional[Union[ImageDict, str]]:
         """
         Export image as dictionary. As numpy array cannot be serialized `image` values will be converted into
         base64 encodings.

deepdoctection/datasets/base.py CHANGED Viewed

@@ -451,7 +451,7 @@ class CustomDataset(DatasetBase):
         return self.dataflow_builder
     @staticmethod
-    def from_dataset_card(file_path: str, dataflow_builder: Type[DataFlowBaseBuilder]) -> CustomDataset:
+    def from_dataset_card(file_path: PathLikeOrStr, dataflow_builder: Type[DataFlowBaseBuilder]) -> CustomDataset:
         """
         This static method creates a CustomDataset instance from a dataset card.

deepdoctection/datasets/instances/fintabnet.py CHANGED Viewed

@@ -264,7 +264,7 @@ class FintabnetBuilder(DataFlowBaseBuilder):
                     add_summary=True,
                 ),
             )
-            df = MapData(df, lambda dp: [ann.image for ann in dp.get_annotation_iter(category_names=LayoutType.TABLE)])
+            df = MapData(df, lambda dp: [ann.image for ann in dp.get_annotation(category_names=LayoutType.TABLE)])
             df = FlattenData(df)
             df = MapData(df, lambda dp: dp[0])

deepdoctection/datasets/instances/xfund.py CHANGED Viewed

@@ -180,13 +180,35 @@ class XfundBuilder(DataFlowBaseBuilder):
             "answer": TokenClasses.ANSWER,
             "header": TokenClasses.HEADER,
         }
-        ner_token_to_id_mapping = self.categories.get_sub_categories(
-            categories=LayoutType.WORD,
-            sub_categories={LayoutType.WORD: [WordType.TOKEN_TAG, WordType.TAG, WordType.TOKEN_CLASS]},
-            keys=False,
-            values_as_dict=True,
-            name_as_key=True,
-        )
+        if LayoutType.WORD in self.categories.get_categories(filtered=True, name_as_key=True):
+            ner_token_to_id_mapping = self.categories.get_sub_categories(
+                categories=LayoutType.WORD,
+                sub_categories={LayoutType.WORD: [WordType.TOKEN_TAG, WordType.TAG, WordType.TOKEN_CLASS]},
+                keys=False,
+                values_as_dict=True,
+                name_as_key=True,
+            )
+        else:
+            ner_token_to_id_mapping = {
+                LayoutType.WORD: {
+                    WordType.TAG: {BioTag.BEGIN: 3, BioTag.INSIDE: 1, BioTag.OUTSIDE: 2},
+                    WordType.TOKEN_CLASS: {
+                        TokenClasses.ANSWER: 3,
+                        TokenClasses.HEADER: 4,
+                        TokenClasses.OTHER: 1,
+                        TokenClasses.QUESTION: 2,
+                    },
+                    WordType.TOKEN_TAG: {
+                        TokenClassWithTag.B_ANSWER: 1,
+                        TokenClassWithTag.B_HEADER: 2,
+                        TokenClassWithTag.B_QUESTION: 3,
+                        TokenClassWithTag.I_ANSWER: 4,
+                        TokenClassWithTag.I_HEADER: 5,
+                        TokenClassWithTag.I_QUESTION: 6,
+                        BioTag.OUTSIDE: 7,
+                    },
+                }
+            }
         df = MapData(
             df,
             xfund_to_image(

deepdoctection/eval/eval.py CHANGED Viewed

@@ -293,6 +293,8 @@ class Evaluator:
         show_words = kwargs.pop("show_words", False)
         show_token_class = kwargs.pop("show_token_class", True)
         ignore_default_token_class = kwargs.pop("ignore_default_token_class", False)
+        floating_text_block_categories = kwargs.pop("floating_text_block_categories", None)
+        include_residual_text_containers = kwargs.pop("include_residual_Text_containers", True)
         df_gt = self.dataset.dataflow.build(**kwargs)
         df_pr = self.dataset.dataflow.build(**kwargs)
@@ -301,7 +303,11 @@ class Evaluator:
         df_pr = MapData(df_pr, deepcopy)
         df_pr = self._clean_up_predict_dataflow_annotations(df_pr)
-        page_parsing_component = PageParsingService(text_container=LayoutType.WORD)
+        page_parsing_component = PageParsingService(
+            text_container=LayoutType.WORD,
+            floating_text_block_categories=floating_text_block_categories,  # type: ignore
+            include_residual_text_container=bool(include_residual_text_containers),
+        )
         df_gt = page_parsing_component.predict_dataflow(df_gt)
         if self.pipe_component:

deepdoctection/extern/model.py CHANGED Viewed

@@ -1051,7 +1051,8 @@ class ModelCatalog:
         with jsonlines.open(path) as reader:
             for obj in reader:
                 if not obj["name"] in ModelCatalog.CATALOG:
-                    obj["categories"] = {int(key): get_type(val) for key, val in obj["categories"].items()}
+                    categories = obj.get("categories") or {}
+                    obj["categories"] = {int(key): get_type(val) for key, val in categories.items()}
                     ModelCatalog.register(obj["name"], ModelProfile(**obj))
     @staticmethod

deepdoctection/extern/pdftext.py CHANGED Viewed

@@ -24,21 +24,25 @@ from typing import Optional
 from lazy_imports import try_import
 from ..utils.context import save_tmp_file
-from ..utils.file_utils import get_pdfplumber_requirement
+from ..utils.file_utils import get_pdfplumber_requirement, get_pypdfium2_requirement
 from ..utils.settings import LayoutType, ObjectTypes
 from ..utils.types import Requirement
 from .base import DetectionResult, ModelCategories, PdfMiner
-with try_import() as import_guard:
+with try_import() as pdfplumber_import_guard:
     from pdfplumber.pdf import PDF, Page
+with try_import() as pypdfmium_import_guard:
+    import pypdfium2.raw as pypdfium_c
+    from pypdfium2 import PdfDocument
-def _to_detect_result(word: dict[str, str]) -> DetectionResult:
+def _to_detect_result(word: dict[str, str], class_name: ObjectTypes) -> DetectionResult:
     return DetectionResult(
         box=[float(word["x0"]), float(word["top"]), float(word["x1"]), float(word["bottom"])],
         class_id=1,
         text=word["text"],
-        class_name=LayoutType.WORD,
+        class_name=class_name,
     )
@@ -49,6 +53,7 @@ class PdfPlumberTextDetector(PdfMiner):
         pdf_plumber = PdfPlumberTextDetector()
         df = SerializerPdfDoc.load("path/to/document.pdf")
+        df.reset_state()
         for dp in df:
             detection_results = pdf_plumber.predict(dp["pdf_bytes"])
@@ -61,6 +66,8 @@ class PdfPlumberTextDetector(PdfMiner):
         pipe = DoctectionPipe([text_extract])
         df = pipe.analyze(path="path/to/document.pdf")
+        df.reset_state()
         for dp in df:
             ...
@@ -87,7 +94,7 @@ class PdfPlumberTextDetector(PdfMiner):
                 self._page = PDF(fin).pages[0]
                 self._pdf_bytes = pdf_bytes
                 words = self._page.extract_words(x_tolerance=self.x_tolerance, y_tolerance=self.y_tolerance)
-        detect_results = list(map(_to_detect_result, words))
+        detect_results = [_to_detect_result(word, self.get_category_names()[0]) for word in words]
         return detect_results
     @classmethod
@@ -113,3 +120,87 @@ class PdfPlumberTextDetector(PdfMiner):
     def get_category_names(self) -> tuple[ObjectTypes, ...]:
         return self.categories.get_categories(as_dict=False)
+class Pdfmium2TextDetector(PdfMiner):
+    """
+    Text miner based on the pypdfium2 engine. It will return text on text line level and not on word level
+        pdfmium2 = Pdfmium2TextDetector()
+        df = SerializerPdfDoc.load("path/to/document.pdf")
+        df.reset_state()
+        for dp in df:
+            detection_results = pdfmium2.predict(dp["pdf_bytes"])
+    To use it in a more integrated way:
+        pdfmium2 = Pdfmium2TextDetector()
+        text_extract = TextExtractionService(pdfmium2)
+        pipe = DoctectionPipe([text_extract])
+        df = pipe.analyze(path="path/to/document.pdf")
+        df.reset_state()
+        for dp in df:
+            ...
+    """
+    def __init__(self) -> None:
+        self.name = "Pdfmium"
+        self.model_id = self.get_model_id()
+        self.categories = ModelCategories(init_categories={1: LayoutType.LINE})
+        self._page: Optional[Page] = None
+    def predict(self, pdf_bytes: bytes) -> list[DetectionResult]:
+        """
+        Call pypdfium2 and returns detected text as detection results
+        :param pdf_bytes: bytes of a single pdf page
+        :return: A list of DetectionResult
+        """
+        pdf = PdfDocument(pdf_bytes)
+        page = pdf.get_page(0)
+        text = page.get_textpage()
+        words = []
+        height = page.get_height()
+        for obj in page.get_objects((pypdfium_c.FPDF_PAGEOBJ_TEXT,)):
+            box = obj.get_pos()
+            if all(x > 0 for x in box):
+                words.append(
+                    {
+                        "text": text.get_text_bounded(*box),
+                        "x0": box[0],
+                        "x1": box[2],
+                        "top": height - box[3],
+                        "bottom": height - box[1],
+                    }
+                )
+        detect_results = [_to_detect_result(word, self.get_category_names()[0]) for word in words]
+        return detect_results
+    @classmethod
+    def get_requirements(cls) -> list[Requirement]:
+        return [get_pypdfium2_requirement()]
+    def get_width_height(self, pdf_bytes: bytes) -> tuple[float, float]:
+        """
+        Get the width and height of the full page
+        :param pdf_bytes: pdf_bytes generating the pdf
+        :return: width and height
+        """
+        if self._pdf_bytes == pdf_bytes and self._page is not None:
+            return self._page.bbox[2], self._page.bbox[3]  # pylint: disable=E1101
+        # if the pdf bytes is not equal to the cached pdf, will recalculate values
+        pdf = PdfDocument(pdf_bytes)
+        self._page = pdf.get_page(0)
+        self._pdf_bytes = pdf_bytes
+        if self._page is not None:
+            return self._page.get_width(), self._page.get_height()  # type: ignore
+        raise ValueError("Page not found")
+    def get_category_names(self) -> tuple[ObjectTypes, ...]:
+        return self.categories.get_categories(as_dict=False)

deepdoctection/extern/tessocr.py CHANGED Viewed

@@ -421,6 +421,7 @@ class TesseractRotationTransformer(ImageTransformer):
     def __init__(self) -> None:
         self.name = fspath(_TESS_PATH) + "-rotation"
         self.categories = ModelCategories(init_categories={1: PageType.ANGLE})
+        self.model_id = self.get_model_id()
     def transform(self, np_img: PixelValues, specification: DetectionResult) -> PixelValues:
         """

deepdoctection/mapper/cats.py CHANGED Viewed

@@ -23,7 +23,7 @@ builder method of a dataset.
 from collections import defaultdict
 from typing import Any, Literal, Mapping, Optional, Sequence, Union
-from ..datapoint.annotation import DEFAULT_CATEGORY_ID, CategoryAnnotation, ContainerAnnotation, ImageAnnotation
+from ..datapoint.annotation import DEFAULT_CATEGORY_ID, CategoryAnnotation, ContainerAnnotation
 from ..datapoint.image import Image
 from ..utils.settings import ObjectTypes, SummaryType, TypeOrStr, get_type
 from .maputils import LabelSummarizer, curry
@@ -49,7 +49,7 @@ def cat_to_sub_cat(
     if cat_to_sub_cat_dict is None:
         return dp
     cat_to_sub_cat_dict_obj_type = {get_type(key): get_type(value) for key, value in cat_to_sub_cat_dict.items()}
-    for ann in dp.get_annotation_iter(category_names=list(cat_to_sub_cat_dict_obj_type.keys())):
+    for ann in dp.get_annotation(category_names=list(cat_to_sub_cat_dict_obj_type.keys())):
         sub_cat_type = cat_to_sub_cat_dict_obj_type[get_type(ann.category_name)]
         sub_cat = ann.get_sub_category(sub_cat_type)
         if sub_cat:
@@ -88,13 +88,13 @@ def re_assign_cat_ids(
     :return: Image
     """
-    anns_to_remove: list[ImageAnnotation] = []
-    for ann in dp.get_annotation_iter():
+    ann_ids_to_remove: list[str] = []
+    for ann in dp.get_annotation():
         if categories_dict_name_as_key is not None:
             if ann.category_name in categories_dict_name_as_key:
                 ann.category_id = categories_dict_name_as_key[ann.category_name]
             else:
-                anns_to_remove.append(ann)
+                ann_ids_to_remove.append(ann.annotation_id)
         if cat_to_sub_cat_mapping:
             if ann.category_name in cat_to_sub_cat_mapping:
@@ -104,8 +104,7 @@ def re_assign_cat_ids(
                     sub_category = ann.get_sub_category(key)
                     sub_category.category_id = sub_cat_values_dict.get(sub_category.category_name, DEFAULT_CATEGORY_ID)
-    for ann in anns_to_remove:
-        dp.remove(ann)
+    dp.remove(annotation_ids=ann_ids_to_remove)
     return dp
@@ -249,7 +248,7 @@ def image_to_cat_id(
         raise ValueError(f"id_name_or_value must be in ('id', 'name', 'value') but is {id_name_or_value}")
     if category_names or sub_categories:
-        for ann in dp.get_annotation_iter():
+        for ann in dp.get_annotation():
             if ann.category_name in category_names:
                 cat_container[ann.category_name].append(ann.category_id)
             if ann.category_name in tmp_sub_category_names:
@@ -321,11 +320,11 @@ def remove_cats(
     if isinstance(summary_sub_categories, str):
         summary_sub_categories = [summary_sub_categories]
-    anns_to_remove = []
+    ann_ids_to_remove = []
-    for ann in dp.get_annotation_iter():
+    for ann in dp.get_annotation():
         if ann.category_name in category_names:
-            anns_to_remove.append(ann)
+            ann_ids_to_remove.append(ann.annotation_id)
         if ann.category_name in sub_categories.keys():
             sub_cats_to_remove = sub_categories[ann.category_name]
             if isinstance(sub_cats_to_remove, str):
@@ -339,8 +338,7 @@ def remove_cats(
             for relation in relationships_to_remove:
                 ann.remove_relationship(key=get_type(relation))
-    for ann in anns_to_remove:
-        dp.remove(ann)
+    dp.remove(annotation_ids=ann_ids_to_remove)
     if summary_sub_categories is not None:
         for sub_cat in summary_sub_categories:

deepdoctection/mapper/cocostruct.py CHANGED Viewed

@@ -129,7 +129,7 @@ def image_to_coco(dp: Image) -> tuple[JsonDict, list[JsonDict]]:
     img["height"] = dp.height
     img["file_name"] = dp.file_name
-    for img_ann in dp.get_annotation_iter():
+    for img_ann in dp.get_annotation():
         ann: JsonDict = {
             "id": int("".join([s for s in img_ann.annotation_id if s.isdigit()])),
             "image_id": img["id"],
@@ -139,7 +139,11 @@ def image_to_coco(dp: Image) -> tuple[JsonDict, list[JsonDict]]:
             ann["score"] = img_ann.score
         ann["iscrowd"] = 0
         bounding_box = img_ann.get_bounding_box(dp.image_id)
-        ann["area"] = bounding_box.area
+        ann["area"] = (
+            bounding_box.area
+            if bounding_box.absolute_coords
+            else bounding_box.transform(dp.width, dp.height, absolute_coords=True).area
+        )
         ann["bbox"] = bounding_box.to_list(mode="xywh")
         anns.append(ann)

deepdoctection/mapper/d2struct.py CHANGED Viewed

@@ -41,7 +41,7 @@ with try_import() as d2_import_guard:
     from detectron2.structures import BoxMode
 with try_import() as wb_import_guard:
-    from wandb import Classes
+    from wandb import Classes  # type: ignore
     from wandb import Image as Wbimage
@@ -189,6 +189,7 @@ def to_wandb_image(
         class_set = Classes([{"name": val, "id": key} for key, val in sub_categories.items()])
     else:
         class_set = Classes([{"name": val, "id": key} for key, val in categories.items()])
+        class_labels = dict(categories.items())
     for ann in anns:
         bounding_box = ann.get_bounding_box(dp.image_id)

deepdoctection/mapper/laylmstruct.py CHANGED Viewed

@@ -127,7 +127,7 @@ def image_to_raw_layoutlm_features(
     all_boxes = []
     all_labels: list[int] = []
-    anns = dp.get_annotation_iter(category_names=LayoutType.WORD)
+    anns = dp.get_annotation(category_names=LayoutType.WORD)
     word_id_to_segment_box = {}
     if segment_positions:

deepdoctection 0.33__py3-none-any.whl → 0.35__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.33py3-none-any.whl → 0.35py3-none-any.whl