PyPI - deepdoctection - Versions diffs - 0.33__py3-none-any.whl → 0.34__py3-none-any.whl - Mend

deepdoctection 0.33py3-none-any.whl → 0.34py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (31) hide show

deepdoctection/__init__.py +6 -3
deepdoctection/analyzer/dd.py +39 -31
deepdoctection/datapoint/annotation.py +40 -2
deepdoctection/datapoint/image.py +117 -41
deepdoctection/datapoint/view.py +1 -1
deepdoctection/datasets/base.py +1 -1
deepdoctection/datasets/instances/fintabnet.py +1 -1
deepdoctection/datasets/instances/xfund.py +29 -7
deepdoctection/eval/eval.py +7 -1
deepdoctection/extern/model.py +2 -1
deepdoctection/mapper/cats.py +11 -13
deepdoctection/mapper/cocostruct.py +6 -2
deepdoctection/mapper/d2struct.py +2 -1
deepdoctection/mapper/laylmstruct.py +1 -1
deepdoctection/mapper/match.py +31 -0
deepdoctection/mapper/misc.py +1 -1
deepdoctection/mapper/prodigystruct.py +1 -1
deepdoctection/pipe/anngen.py +27 -0
deepdoctection/pipe/base.py +23 -0
deepdoctection/pipe/common.py +123 -38
deepdoctection/pipe/segment.py +1 -1
deepdoctection/pipe/sub_layout.py +1 -1
deepdoctection/utils/env_info.py +1 -1
deepdoctection/utils/fs.py +27 -4
deepdoctection/utils/pdf_utils.py +28 -3
deepdoctection/utils/settings.py +3 -0
{deepdoctection-0.33.dist-info → deepdoctection-0.34.dist-info}/METADATA +4 -1
{deepdoctection-0.33.dist-info → deepdoctection-0.34.dist-info}/RECORD +31 -31
{deepdoctection-0.33.dist-info → deepdoctection-0.34.dist-info}/WHEEL +1 -1
{deepdoctection-0.33.dist-info → deepdoctection-0.34.dist-info}/LICENSE +0 -0
{deepdoctection-0.33.dist-info → deepdoctection-0.34.dist-info}/top_level.txt +0 -0

deepdoctection/mapper/cocostruct.py CHANGED Viewed

@@ -129,7 +129,7 @@ def image_to_coco(dp: Image) -> tuple[JsonDict, list[JsonDict]]:
     img["height"] = dp.height
     img["file_name"] = dp.file_name
-    for img_ann in dp.get_annotation_iter():
+    for img_ann in dp.get_annotation():
         ann: JsonDict = {
             "id": int("".join([s for s in img_ann.annotation_id if s.isdigit()])),
             "image_id": img["id"],
@@ -139,7 +139,11 @@ def image_to_coco(dp: Image) -> tuple[JsonDict, list[JsonDict]]:
             ann["score"] = img_ann.score
         ann["iscrowd"] = 0
         bounding_box = img_ann.get_bounding_box(dp.image_id)
-        ann["area"] = bounding_box.area
+        ann["area"] = (
+            bounding_box.area
+            if bounding_box.absolute_coords
+            else bounding_box.transform(dp.width, dp.height, absolute_coords=True).area
+        )
         ann["bbox"] = bounding_box.to_list(mode="xywh")
         anns.append(ann)

deepdoctection/mapper/d2struct.py CHANGED Viewed

@@ -41,7 +41,7 @@ with try_import() as d2_import_guard:
     from detectron2.structures import BoxMode
 with try_import() as wb_import_guard:
-    from wandb import Classes
+    from wandb import Classes  # type: ignore
     from wandb import Image as Wbimage
@@ -189,6 +189,7 @@ def to_wandb_image(
         class_set = Classes([{"name": val, "id": key} for key, val in sub_categories.items()])
     else:
         class_set = Classes([{"name": val, "id": key} for key, val in categories.items()])
+        class_labels = dict(categories.items())
     for ann in anns:
         bounding_box = ann.get_bounding_box(dp.image_id)

deepdoctection/mapper/laylmstruct.py CHANGED Viewed

@@ -127,7 +127,7 @@ def image_to_raw_layoutlm_features(
     all_boxes = []
     all_labels: list[int] = []
-    anns = dp.get_annotation_iter(category_names=LayoutType.WORD)
+    anns = dp.get_annotation(category_names=LayoutType.WORD)
     word_id_to_segment_box = {}
     if segment_positions:

deepdoctection/mapper/match.py CHANGED Viewed

@@ -23,6 +23,7 @@ from typing import Any, Literal, Optional, Sequence, Union
 import numpy as np
 from numpy.typing import NDArray
+from scipy.spatial import distance
 from ..datapoint.annotation import ImageAnnotation
 from ..datapoint.box import iou
@@ -164,3 +165,33 @@ def match_anns_by_intersection(
         return [], [], [], []
     return child_index, parent_index, child_anns, parent_anns
+def match_anns_by_distance(
+    dp: Image,
+    parent_ann_category_names: Union[TypeOrStr, Sequence[TypeOrStr]],
+    child_ann_category_names: Union[TypeOrStr, Sequence[TypeOrStr]],
+    parent_ann_ids: Optional[Union[Sequence[str], str]] = None,
+    child_ann_ids: Optional[Union[str, Sequence[str]]] = None,
+) -> list[tuple[ImageAnnotation, ImageAnnotation]]:
+    """
+    Generates pairs of parent and child annotations by calculating the euclidean distance between the centers of the
+    parent and child bounding boxes. It will return the closest child for each parent. Note, that a child can be
+    assigned multiple times to different parents.
+    :param dp: image datapoint
+    :param parent_ann_category_names: single str or list of category names
+    :param child_ann_category_names: single str or list of category names
+    :param parent_ann_ids: Additional filter condition. If some ids are selected, it will ignore all other parent candi-
+                           dates which are not in the list.
+    :param child_ann_ids: Additional filter condition. If some ids are selected, it will ignore all other children
+                          candidates which are not in the list.
+    :return:
+    """
+    parent_anns = dp.get_annotation(annotation_ids=parent_ann_ids, category_names=parent_ann_category_names)
+    child_anns = dp.get_annotation(annotation_ids=child_ann_ids, category_names=child_ann_category_names)
+    child_centers = [block.get_bounding_box(dp.image_id).center for block in child_anns]
+    parent_centers = [block.get_bounding_box(dp.image_id).center for block in parent_anns]
+    child_indices = distance.cdist(parent_centers, child_centers).argmin(axis=1)
+    return [(parent_anns[i], child_anns[j]) for i, j in enumerate(child_indices)]

deepdoctection/mapper/misc.py CHANGED Viewed

@@ -145,7 +145,7 @@ def image_ann_to_image(dp: Image, category_names: Union[str, list[str]], crop_im
     :return: Image
     """
-    img_anns = dp.get_annotation_iter(category_names=category_names)
+    img_anns = dp.get_annotation(category_names=category_names)
     for ann in img_anns:
         dp.image_ann_to_image(annotation_id=ann.annotation_id, crop_image=crop_image)

deepdoctection/mapper/prodigystruct.py CHANGED Viewed

@@ -163,7 +163,7 @@ def image_to_prodigy(dp: Image, category_names: Optional[Sequence[ObjectTypes]]
     output["image_id"] = dp.image_id
     spans = []
-    for ann in dp.get_annotation_iter(category_names=category_names):
+    for ann in dp.get_annotation(category_names=category_names):
         bounding_box = ann.get_bounding_box(dp.image_id)
         if not bounding_box.absolute_coords:
             bounding_box = bounding_box.transform(dp.width, dp.height, absolute_coords=True)

deepdoctection/pipe/anngen.py CHANGED Viewed

@@ -272,6 +272,33 @@ class DatapointManager:
             return None
         return cont_ann.annotation_id
+    def set_relationship_annotation(
+        self, relationship_name: ObjectTypes, target_annotation_id: str, annotation_id: str
+    ) -> Optional[str]:
+        """
+        Create a relationship annotation and dump it to the target annotation.
+        :param relationship_name: The relationship key
+        :param target_annotation_id: Annotation_id of the parent `ImageAnnotation`
+        :param annotation_id: The annotation_id to dump the relationship to
+        :return: Annotation_id of the parent `ImageAnnotation` for references if the dumpy has been successful
+        """
+        self.assert_datapoint_passed()
+        with MappingContextManager(
+            dp_name=self.datapoint.file_name,
+            filter_level="annotation",
+            relationship_annotation={
+                "relationship_name": relationship_name.value,
+                "target_annotation_id": target_annotation_id,
+                "annotation_id": annotation_id,
+            },
+        ) as annotation_context:
+            self._cache_anns[target_annotation_id].dump_relationship(relationship_name, annotation_id)
+        if annotation_context.context_error:
+            return None
+        return target_annotation_id
     def set_summary_annotation(
         self,
         summary_key: ObjectTypes,

deepdoctection/pipe/base.py CHANGED Viewed

@@ -163,6 +163,29 @@ class PipelineComponent(ABC):
                 return True
         return False
+    def _undo(self, dp: Image) -> Image:
+        """
+        Undo the processing of the pipeline component. It will remove `ImageAnnotation`, `CategoryAnnotation` and
+        `ContainerAnnotation` with the service_id of the pipeline component.
+        """
+        if self.timer_on:
+            with timed_operation(self.__class__.__name__):
+                self.dp_manager.datapoint = dp
+                dp.remove(service_ids=self.service_id)
+        else:
+            self.dp_manager.datapoint = dp
+            dp.remove(service_ids=self.service_id)
+        return self.dp_manager.datapoint
+    def undo(self, df: DataFlow) -> DataFlow:
+        """
+        Mapping a datapoint via `_undo` within a dataflow pipeline
+        :param df: An input dataflow of Images
+        :return: A output dataflow of Images
+        """
+        return MapData(df, self._undo)
 class Pipeline(ABC):
     """

deepdoctection/pipe/common.py CHANGED Viewed

@@ -29,8 +29,7 @@ import numpy as np
 from ..dataflow import DataFlow, MapData
 from ..datapoint.image import Image
 from ..datapoint.view import IMAGE_DEFAULTS, Page
-from ..mapper.maputils import MappingContextManager
-from ..mapper.match import match_anns_by_intersection
+from ..mapper.match import match_anns_by_distance, match_anns_by_intersection
 from ..mapper.misc import to_image
 from ..utils.settings import LayoutType, ObjectTypes, Relationships, TypeOrStr, get_type
 from .base import MetaAnnotation, PipelineComponent
@@ -76,21 +75,23 @@ class ImageCroppingService(PipelineComponent):
         pass
-@pipeline_component_registry.register("MatchingService")
-class MatchingService(PipelineComponent):
+class IntersectionMatcher:
     """
-    Objects of two object classes can be assigned to one another by determining their pairwise average. If this is above
-    a limit, a relation is created between them.
-    The parent object class (based on its category) and the child object class are defined for the service. A child
-    relation is created in the parent class if the conditions are met.
+    Objects of two object classes can be assigned to one another by determining their pairwise intersection. If this is
+    above a limit, a relation is created between them.
+    The parent object class (based on its category) and the child object class are defined for the service.
     Either `iou` (intersection-over-union) or `ioa` (intersection-over-area) can be selected as the matching rule.
             # the following will assign word annotations to text and title annotation, provided that their ioa-threshold
             # is above 0.7. words below that threshold will not be assigned.
-            match = MatchingService(parent_categories=["TEXT","TITLE"],child_categories="WORD",matching_rule="ioa",
-                                    threshold=0.7)
+            matcher = IntersectionMatcher(matching_rule="ioa", threshold=0.7)
+            match_service = MatchingService(parent_categories=["text","title"],
+                                    child_categories="word",
+                                    matcher=matcher,
+                                    relationship_key=Relationships.CHILD)
             # Assigning means that text and title annotation will receive a relationship called "CHILD" which is a list
               of annotation ids of mapped words.
@@ -98,16 +99,12 @@ class MatchingService(PipelineComponent):
     def __init__(
         self,
-        parent_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
-        child_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
         matching_rule: Literal["iou", "ioa"],
         threshold: float,
         use_weighted_intersections: bool = False,
         max_parent_only: bool = False,
     ) -> None:
         """
-        :param parent_categories: list of categories to be used a for parent class. Will generate a child-relationship
-        :param child_categories: list of categories to be used for a child class.
         :param matching_rule: "iou" or "ioa"
         :param threshold: iou/ioa threshold. Value between [0,1]
         :param use_weighted_intersections: This is currently only implemented for matching_rule 'ioa'. Instead of using
@@ -115,7 +112,105 @@ class MatchingService(PipelineComponent):
                                            that intersections with more cells will likely decrease the ioa value. By
                                            multiplying the ioa with the number of all intersection for each child this
                                            value calibrate the ioa.
-        :param max_parent_only: Will assign to each child at most one parent with maximum ioa
+        :param max_parent_only: Will assign to each child at most one parent with maximum ioa"""
+        if matching_rule not in ("iou", "ioa"):
+            raise ValueError("segment rule must be either iou or ioa")
+        self.matching_rule = matching_rule
+        self.threshold = threshold
+        self.use_weighted_intersections = use_weighted_intersections
+        self.max_parent_only = max_parent_only
+    def match(
+        self,
+        dp: Image,
+        parent_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
+        child_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
+    ) -> list[tuple[str, str]]:
+        """
+        The matching algorithm
+        :param dp: datapoint image
+        :param parent_categories: list of categories to be used a for parent class. Will generate a child-relationship
+        :param child_categories: list of categories to be used for a child class.
+        :return: A list of tuples with parent and child annotation ids
+        """
+        child_index, parent_index, child_anns, parent_anns = match_anns_by_intersection(
+            dp,
+            parent_ann_category_names=parent_categories,
+            child_ann_category_names=child_categories,
+            matching_rule=self.matching_rule,
+            threshold=self.threshold,
+            use_weighted_intersections=self.use_weighted_intersections,
+            max_parent_only=self.max_parent_only,
+        )
+        matched_child_anns = np.take(child_anns, child_index)  # type: ignore
+        matched_parent_anns = np.take(parent_anns, parent_index)  # type: ignore
+        all_parent_child_relations = []
+        for idx, parent in enumerate(matched_parent_anns):
+            all_parent_child_relations.append((parent.annotation_id, matched_child_anns[idx].annotation_id))
+        return all_parent_child_relations
+class NeighbourMatcher:
+    """
+    Objects of two object classes can be assigned to one another by determining their pairwise distance.
+        # the following will assign caption annotations to figure annotation
+        matcher = NeighbourMatcher()
+        match_service = MatchingService(parent_categories=["figure"],
+                                        child_categories="caption",
+                                        matcher=matcher,
+                                        relationship_key=Relationships.LAYOUT_LINK)
+    """
+    def match(
+        self,
+        dp: Image,
+        parent_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
+        child_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
+    ) -> list[tuple[str, str]]:
+        """
+        The matching algorithm
+        :param dp: datapoint image
+        :param parent_categories: list of categories to be used a for parent class. Will generate a child-relationship
+        :param child_categories: list of categories to be used for a child class.
+        :return: A list of tuples with parent and child annotation ids
+        """
+        return [
+            (pair[0].annotation_id, pair[1].annotation_id)
+            for pair in match_anns_by_distance(dp, parent_categories, child_categories)
+        ]
+@pipeline_component_registry.register("MatchingService")
+class MatchingService(PipelineComponent):
+    """
+    A service to match annotations of two categories by intersection or distance. The matched annotations will be
+    assigned a relationship. The parent category will receive a relationship to the child category.
+    """
+    def __init__(
+        self,
+        parent_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
+        child_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
+        matcher: Union[IntersectionMatcher, NeighbourMatcher],
+        relationship_key: Relationships,
+    ) -> None:
+        """
+        :param parent_categories: list of categories to be used a for parent class. Will generate a child-relationship
+        :param child_categories: list of categories to be used for a child class.
         """
         self.parent_categories = (
             (get_type(parent_categories),)
@@ -127,13 +222,8 @@ class MatchingService(PipelineComponent):
             if isinstance(child_categories, str)
             else (tuple(get_type(category_name) for category_name in child_categories))
         )
-        if matching_rule not in ("iou", "ioa"):
-            raise ValueError("segment rule must be either iou or ioa")
-        self.matching_rule = matching_rule
-        self.threshold = threshold
-        self.use_weighted_intersections = use_weighted_intersections
-        self.max_parent_only = max_parent_only
+        self.matcher = matcher
+        self.relationship_key = relationship_key
         super().__init__("matching")
     def serve(self, dp: Image) -> None:
@@ -143,24 +233,14 @@ class MatchingService(PipelineComponent):
         :param dp: datapoint image
         """
-        child_index, parent_index, child_anns, parent_anns = match_anns_by_intersection(
-            dp,
-            parent_ann_category_names=self.parent_categories,
-            child_ann_category_names=self.child_categories,
-            matching_rule=self.matching_rule,
-            threshold=self.threshold,
-            use_weighted_intersections=self.use_weighted_intersections,
-            max_parent_only=self.max_parent_only,
-        )
-        with MappingContextManager(dp_name=dp.file_name):
-            matched_child_anns = np.take(child_anns, child_index)  # type: ignore
-            matched_parent_anns = np.take(parent_anns, parent_index)  # type: ignore
-            for idx, parent in enumerate(matched_parent_anns):
-                parent.dump_relationship(Relationships.CHILD, matched_child_anns[idx].annotation_id)
+        matched_pairs = self.matcher.match(dp, self.parent_categories, self.child_categories)
+        for pair in matched_pairs:
+            self.dp_manager.set_relationship_annotation(self.relationship_key, pair[0], pair[1])
     def clone(self) -> PipelineComponent:
-        return self.__class__(self.parent_categories, self.child_categories, self.matching_rule, self.threshold)
+        return self.__class__(self.parent_categories, self.child_categories, self.matcher, self.relationship_key)
     def get_meta_annotation(self) -> MetaAnnotation:
         return MetaAnnotation(
@@ -215,7 +295,12 @@ class PageParsingService(PipelineComponent):
         :param dp: Image
         :return: Page
         """
-        return Page.from_image(dp, self.text_container, self.floating_text_block_categories)
+        return Page.from_image(
+            dp,
+            text_container=self.text_container,
+            floating_text_block_categories=self.floating_text_block_categories,
+            include_residual_text_container=self.include_residual_text_container,
+        )
     def _init_sanity_checks(self) -> None:
         assert self.text_container in (

deepdoctection/pipe/segment.py CHANGED Viewed

@@ -372,7 +372,7 @@ def stretch_items(
     :param remove_iou_threshold_cols: iou threshold for removing overlapping columns
     :return: An Image
     """
-    table_anns = dp.get_annotation_iter(category_names=table_name)
+    table_anns = dp.get_annotation(category_names=table_name)
     for table in table_anns:
         dp = stretch_item_per_table(dp, table, row_name, col_name, remove_iou_threshold_rows, remove_iou_threshold_cols)

deepdoctection/pipe/sub_layout.py CHANGED Viewed

@@ -190,7 +190,7 @@ class SubImageLayoutService(PipelineComponent):
         - Optionally invoke the DetectResultGenerator
         - Generate ImageAnnotations and dump to parent image and sub image.
         """
-        sub_image_anns = dp.get_annotation_iter(category_names=self.sub_image_name)
+        sub_image_anns = dp.get_annotation(category_names=self.sub_image_name)
         for sub_image_ann in sub_image_anns:
             np_image = self.prepare_np_image(sub_image_ann)
             detect_result_list = self.predictor.predict(np_image)

deepdoctection/utils/env_info.py CHANGED Viewed

@@ -176,7 +176,7 @@ def collect_installed_dependencies(data: KeyValEnvInfos) -> KeyValEnvInfos:
         data.append(("Pycocotools", "None"))
     if scipy_available():
-        import scipy  # type: ignore
+        import scipy
         data.append(("Scipy", scipy.__version__))
     else:

deepdoctection/utils/fs.py CHANGED Viewed

@@ -25,6 +25,7 @@ import os
 from base64 import b64encode
 from io import BytesIO
 from pathlib import Path
+from shutil import copyfile
 from typing import Callable, Literal, Optional, Protocol, Union, overload
 from urllib.request import urlretrieve
@@ -50,6 +51,7 @@ __all__ = [
     "get_configs_dir_path",
     "get_weights_dir_path",
     "get_dataset_dir_path",
+    "maybe_copy_config_to_cache",
 ]
@@ -254,34 +256,55 @@ def load_json(path_ann: PathLikeOrStr) -> JsonDict:
     return json_dict
-def get_package_path() -> PathLikeOrStr:
+def get_package_path() -> Path:
     """
     :return: full base path of this package
     """
     return PATH
-def get_weights_dir_path() -> PathLikeOrStr:
+def get_weights_dir_path() -> Path:
     """
     :return: full base path to the model dir
     """
     return MODEL_DIR
-def get_configs_dir_path() -> PathLikeOrStr:
+def get_configs_dir_path() -> Path:
     """
     :return: full base path to the configs dir
     """
     return CONFIGS
-def get_dataset_dir_path() -> PathLikeOrStr:
+def get_dataset_dir_path() -> Path:
     """
     :return: full base path to the dataset dir
     """
     return DATASET_DIR
+def maybe_copy_config_to_cache(
+    package_path: PathLikeOrStr, configs_dir_path: PathLikeOrStr, file_name: str, force_copy: bool = True
+) -> str:
+    """
+    Initial copying of various files
+    :param package_path: base path to directory of source file `file_name`
+    :param configs_dir_path: base path to target directory
+    :param file_name: file to copy
+    :param force_copy: If file is already in target directory, will re-copy the file
+    :return: path to the copied file_name
+    """
+    absolute_path_source = os.path.join(package_path, file_name)
+    absolute_path = os.path.join(configs_dir_path, os.path.join(os.path.split(file_name)[1]))
+    mkdir_p(os.path.split(absolute_path)[0])
+    if not os.path.isfile(absolute_path) or force_copy:
+        copyfile(absolute_path_source, absolute_path)
+    return absolute_path
 @deprecated("Use pathlib operations instead", "2022-06-08")
 def sub_path(anchor_dir: PathLikeOrStr, *paths: PathLikeOrStr) -> PathLikeOrStr:
     """

deepdoctection/utils/pdf_utils.py CHANGED Viewed

@@ -107,8 +107,7 @@ def get_pdf_file_reader(path: PathLikeOrStr) -> PdfReader:
                     )
                     sys.exit()
-    file_reader = PdfReader(open(path, "rb"))  # pylint: disable=R1732
-    return file_reader
+    return PdfReader(os.fspath(path))
 def get_pdf_file_writer() -> PdfWriter:
@@ -125,12 +124,24 @@ class PDFStreamer:
     **Example:**
-             df = dataflow.DataFromIterable.PDFStreamer(path=path)
+             # Building a Dataflow with a PDFStreamer
+             df = dataflow.DataFromIterable(PDFStreamer(path=path))
              df.reset_state()
              for page in df:
                 ... # do whatever you like
+             # Something else you can do:
+            streamer = PDFStreamer(path=path)
+            pages = len(streamer)  # get the number of pages
+            random_int = random.sample(range(0, pages), 2) # select some pages
+            for ran in random_int:
+                pdf_bytes = streamer[ran]   # get the page bytes directly
+            streamer.close() # Do not forget to close the streamer, otherwise the file will never be closed and might
+                             # cause memory leaks if you open many files.
     """
     def __init__(self, path: PathLikeOrStr) -> None:
@@ -150,6 +161,20 @@ class PDFStreamer:
             writer.add_page(self.file_reader.pages[k])
             writer.write(buffer)
             yield buffer.getvalue(), k
+        self.file_reader.close()
+    def __getitem__(self, index: int) -> bytes:
+        buffer = BytesIO()
+        writer = get_pdf_file_writer()
+        writer.add_page(self.file_reader.pages[index])
+        writer.write(buffer)
+        return buffer.getvalue()
+    def close(self) -> None:
+        """
+        Close the file reader
+        """
+        self.file_reader.close()
 # The following functions are modified versions from the Python poppler wrapper

deepdoctection/utils/settings.py CHANGED Viewed

@@ -101,6 +101,7 @@ class DocumentType(ObjectTypes):
     GOVERNMENT_TENDERS = "government_tenders"
     MANUALS = "manuals"
     PATENTS = "patents"
+    MARK = "mark"
 @object_types_registry.register("LayoutType")
@@ -130,6 +131,7 @@ class LayoutType(ObjectTypes):
     BACKGROUND = "background"
     PAGE_NUMBER = "page_number"
     KEY_VALUE_AREA = "key_value_area"
+    LIST_ITEM = "list_item"
 @object_types_registry.register("TableType")
@@ -221,6 +223,7 @@ class Relationships(ObjectTypes):
     CHILD = "child"
     READING_ORDER = "reading_order"
     SEMANTIC_ENTITY_LINK = "semantic_entity_link"
+    LAYOUT_LINK = "layout_link"
 @object_types_registry.register("Languages")

{deepdoctection-0.33.dist-info → deepdoctection-0.34.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: deepdoctection
-Version: 0.33
+Version: 0.34
 Summary: Repository for Document AI
 Home-page: https://github.com/deepdoctection/deepdoctection
 Author: Dr. Janis Meyer
@@ -29,6 +29,7 @@ Requires-Dist: Pillow >=10.0.0
 Requires-Dist: pypdf >=3.16.0
 Requires-Dist: pyyaml >=6.0.1
 Requires-Dist: pyzmq >=16
+Requires-Dist: scipy >=1.13.1
 Requires-Dist: termcolor >=1.1
 Requires-Dist: tabulate >=0.7.7
 Requires-Dist: tqdm ==4.64.0
@@ -74,6 +75,7 @@ Requires-Dist: Pillow >=10.0.0 ; extra == 'pt'
 Requires-Dist: pypdf >=3.16.0 ; extra == 'pt'
 Requires-Dist: pyyaml >=6.0.1 ; extra == 'pt'
 Requires-Dist: pyzmq >=16 ; extra == 'pt'
+Requires-Dist: scipy >=1.13.1 ; extra == 'pt'
 Requires-Dist: termcolor >=1.1 ; extra == 'pt'
 Requires-Dist: tabulate >=0.7.7 ; extra == 'pt'
 Requires-Dist: tqdm ==4.64.0 ; extra == 'pt'
@@ -105,6 +107,7 @@ Requires-Dist: Pillow >=10.0.0 ; extra == 'tf'
 Requires-Dist: pypdf >=3.16.0 ; extra == 'tf'
 Requires-Dist: pyyaml >=6.0.1 ; extra == 'tf'
 Requires-Dist: pyzmq >=16 ; extra == 'tf'
+Requires-Dist: scipy >=1.13.1 ; extra == 'tf'
 Requires-Dist: termcolor >=1.1 ; extra == 'tf'
 Requires-Dist: tabulate >=0.7.7 ; extra == 'tf'
 Requires-Dist: tqdm ==4.64.0 ; extra == 'tf'

deepdoctection 0.33__py3-none-any.whl → 0.34__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.33py3-none-any.whl → 0.34py3-none-any.whl