PyPI - deepdoctection - Versions diffs - 0.39.7__py3-none-any.whl → 0.40.0__py3-none-any.whl - Mend

deepdoctection 0.39.7py3-none-any.whl → 0.40.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (19) hide show

deepdoctection/__init__.py +2 -1
deepdoctection/analyzer/_config.py +0 -1
deepdoctection/analyzer/factory.py +34 -13
deepdoctection/datapoint/image.py +5 -5
deepdoctection/datapoint/view.py +5 -5
deepdoctection/mapper/match.py +28 -8
deepdoctection/pipe/anngen.py +1 -25
deepdoctection/pipe/common.py +91 -38
deepdoctection/pipe/layout.py +26 -13
deepdoctection/pipe/order.py +6 -22
deepdoctection/pipe/segment.py +36 -43
deepdoctection/pipe/sub_layout.py +1 -10
deepdoctection/pipe/text.py +5 -14
deepdoctection/train/hf_detr_train.py +1 -0
{deepdoctection-0.39.7.dist-info → deepdoctection-0.40.0.dist-info}/METADATA +1 -1
{deepdoctection-0.39.7.dist-info → deepdoctection-0.40.0.dist-info}/RECORD +19 -19
{deepdoctection-0.39.7.dist-info → deepdoctection-0.40.0.dist-info}/WHEEL +1 -1
{deepdoctection-0.39.7.dist-info → deepdoctection-0.40.0.dist-info}/licenses/LICENSE +0 -0
{deepdoctection-0.39.7.dist-info → deepdoctection-0.40.0.dist-info}/top_level.txt +0 -0

deepdoctection/__init__.py CHANGED Viewed

@@ -25,7 +25,7 @@ from .utils.logger import LoggingRecord, logger
 # pylint: enable=wrong-import-position
-__version__ = "0.39.7"
+__version__ = "0.40.0"
 _IMPORT_STRUCTURE = {
     "analyzer": ["config_sanity_checks", "get_dd_analyzer", "ServiceFactory"],
@@ -260,6 +260,7 @@ _IMPORT_STRUCTURE = {
         "ImageCroppingService",
         "IntersectionMatcher",
         "NeighbourMatcher",
+        "FamilyCompound",
         "MatchingService",
         "PageParsingService",
         "AnnotationNmsService",

deepdoctection/analyzer/_config.py CHANGED Viewed

@@ -72,7 +72,6 @@ cfg.SEGMENTATION.THRESHOLD_COLS = 0.4
 cfg.SEGMENTATION.FULL_TABLE_TILING = True
 cfg.SEGMENTATION.REMOVE_IOU_THRESHOLD_ROWS = 0.001
 cfg.SEGMENTATION.REMOVE_IOU_THRESHOLD_COLS = 0.001
-cfg.SEGMENTATION.CELL_CATEGORY_ID = 12
 cfg.SEGMENTATION.TABLE_NAME = LayoutType.TABLE
 cfg.SEGMENTATION.PUBTABLES_CELL_NAMES = [
     CellType.SPANNING,

deepdoctection/analyzer/factory.py CHANGED Viewed

@@ -35,13 +35,14 @@ from ..extern.tpdetect import TPFrcnnDetector
 from ..pipe.base import PipelineComponent
 from ..pipe.common import (
     AnnotationNmsService,
+    FamilyCompound,
     IntersectionMatcher,
     MatchingService,
     NeighbourMatcher,
     PageParsingService,
 )
 from ..pipe.doctectionpipe import DoctectionPipe
-from ..pipe.layout import ImageLayoutService
+from ..pipe.layout import ImageLayoutService, skip_if_category_or_service_extracted
 from ..pipe.order import TextOrderService
 from ..pipe.refine import TableSegmentationRefinementService
 from ..pipe.segment import PubtablesSegmentationService, TableSegmentationService
@@ -284,7 +285,6 @@ class ServiceFactory:
         return SubImageLayoutService(
             sub_image_detector=detector,
             sub_image_names=[LayoutType.TABLE, LayoutType.TABLE_ROTATED],
-            category_id_mapping=None,
             detect_result_generator=detect_result_generator,
             padder=padder,
         )
@@ -405,7 +405,6 @@ class ServiceFactory:
                 tile_table_with_items=config.SEGMENTATION.FULL_TABLE_TILING,
                 remove_iou_threshold_rows=config.SEGMENTATION.REMOVE_IOU_THRESHOLD_ROWS,
                 remove_iou_threshold_cols=config.SEGMENTATION.REMOVE_IOU_THRESHOLD_COLS,
-                cell_class_id=config.SEGMENTATION.CELL_CATEGORY_ID,
                 table_name=config.SEGMENTATION.TABLE_NAME,
                 cell_names=config.SEGMENTATION.PUBTABLES_CELL_NAMES,
                 spanning_cell_names=config.SEGMENTATION.PUBTABLES_SPANNING_CELL_NAMES,
@@ -516,6 +515,15 @@ class ServiceFactory:
         """
         return ServiceFactory._build_pdf_miner_text_service(detector)
+    @staticmethod
+    def _build_doctr_word_detector_service(detector: DoctrTextlineDetector) -> ImageLayoutService:
+        """Building a Doctr word detector service
+        :param detector: DoctrTextlineDetector
+        :return: ImageLayoutService
+        """
+        return ImageLayoutService(layout_detector=detector, to_image=True, crop_image=True)
     @staticmethod
     def build_doctr_word_detector_service(detector: DoctrTextlineDetector) -> ImageLayoutService:
         """Building a Doctr word detector service
@@ -523,9 +531,7 @@ class ServiceFactory:
         :param detector: DoctrTextlineDetector
         :return: ImageLayoutService
         """
-        return ImageLayoutService(
-            layout_detector=detector, to_image=True, crop_image=True, skip_if_layout_extracted=True
-        )
+        return ServiceFactory._build_doctr_word_detector_service(detector)
     @staticmethod
     def _build_text_extraction_service(
@@ -539,7 +545,6 @@ class ServiceFactory:
         """
         return TextExtractionService(
             detector,
-            skip_if_text_extracted=config.USE_PDF_MINER,
             extract_from_roi=config.TEXT_CONTAINER if config.OCR.USE_DOCTR else None,
         )
@@ -567,11 +572,16 @@ class ServiceFactory:
             threshold=config.WORD_MATCHING.THRESHOLD,
             max_parent_only=config.WORD_MATCHING.MAX_PARENT_ONLY,
         )
+        family_compounds = [
+            FamilyCompound(
+                parent_categories=config.WORD_MATCHING.PARENTAL_CATEGORIES,
+                child_categories=config.TEXT_CONTAINER,
+                relationship_key=Relationships.CHILD,
+            )
+        ]
         return MatchingService(
-            parent_categories=config.WORD_MATCHING.PARENTAL_CATEGORIES,
-            child_categories=config.TEXT_CONTAINER,
+            family_compounds=family_compounds,
             matcher=matcher,
-            relationship_key=Relationships.CHILD,
         )
     @staticmethod
@@ -591,11 +601,16 @@ class ServiceFactory:
         :return: MatchingService
         """
         neighbor_matcher = NeighbourMatcher()
+        family_compounds = [
+            FamilyCompound(
+                parent_categories=config.LAYOUT_LINK.PARENTAL_CATEGORIES,
+                child_categories=config.LAYOUT_LINK.CHILD_CATEGORIES,
+                relationship_key=Relationships.LAYOUT_LINK,
+            )
+        ]
         return MatchingService(
-            parent_categories=config.LAYOUT_LINK.PARENTAL_CATEGORIES,
-            child_categories=config.LAYOUT_LINK.CHILD_CATEGORIES,
+            family_compounds=family_compounds,
             matcher=neighbor_matcher,
-            relationship_key=Relationships.LAYOUT_LINK,
         )
     @staticmethod
@@ -699,9 +714,11 @@ class ServiceFactory:
                 table_refinement_service = ServiceFactory.build_table_refinement_service(config)
                 pipe_component_list.append(table_refinement_service)
+        d_text_service_id = ""
         if config.USE_PDF_MINER:
             pdf_miner = ServiceFactory.build_pdf_text_detector(config)
             d_text = ServiceFactory.build_pdf_miner_text_service(pdf_miner)
+            d_text_service_id = d_text.service_id
             pipe_component_list.append(d_text)
         # setup ocr
@@ -710,10 +727,14 @@ class ServiceFactory:
             if config.OCR.USE_DOCTR:
                 word_detector = ServiceFactory.build_doctr_word_detector(config)
                 word_service = ServiceFactory.build_doctr_word_detector_service(word_detector)
+                word_service.set_inbound_filter(skip_if_category_or_service_extracted(service_ids=d_text_service_id))
                 pipe_component_list.append(word_service)
             ocr_detector = ServiceFactory.build_ocr_detector(config)
             text_extraction_service = ServiceFactory.build_text_extraction_service(config, ocr_detector)
+            text_extraction_service.set_inbound_filter(
+                skip_if_category_or_service_extracted(service_ids=d_text_service_id)
+            )
             pipe_component_list.append(text_extraction_service)
         if config.USE_PDF_MINER or config.USE_OCR:

deepdoctection/datapoint/image.py CHANGED Viewed

@@ -342,7 +342,7 @@ class Image:
         self,
         category_names: Optional[Union[str, ObjectTypes, Sequence[Union[str, ObjectTypes]]]] = None,
         annotation_ids: Optional[Union[str, Sequence[str]]] = None,
-        service_id: Optional[Union[str, Sequence[str]]] = None,
+        service_ids: Optional[Union[str, Sequence[str]]] = None,
         model_id: Optional[Union[str, Sequence[str]]] = None,
         session_ids: Optional[Union[str, Sequence[str]]] = None,
         ignore_inactive: bool = True,
@@ -356,7 +356,7 @@ class Image:
         :param category_names: A single name or list of names
         :param annotation_ids: A single id or list of ids
-        :param service_id: A single service name or list of service names
+        :param service_ids: A single service name or list of service names
         :param model_id: A single model name or list of model names
         :param session_ids: A single session id or list of session ids
         :param ignore_inactive: If set to `True` only active annotations are returned.
@@ -372,7 +372,7 @@ class Image:
             )
         ann_ids = [annotation_ids] if isinstance(annotation_ids, str) else annotation_ids
-        service_id = [service_id] if isinstance(service_id, str) else service_id
+        service_ids = [service_ids] if isinstance(service_ids, str) else service_ids
         model_id = [model_id] if isinstance(model_id, str) else model_id
         session_id = [session_ids] if isinstance(session_ids, str) else session_ids
@@ -387,8 +387,8 @@ class Image:
         if ann_ids is not None:
             anns = filter(lambda x: x.annotation_id in ann_ids, anns)
-        if service_id is not None:
-            anns = filter(lambda x: x.service_id in service_id, anns)
+        if service_ids is not None:
+            anns = filter(lambda x: x.service_id in service_ids, anns)
         if model_id is not None:
             anns = filter(lambda x: x.model_id in model_id, anns)

deepdoctection/datapoint/view.py CHANGED Viewed

@@ -659,7 +659,7 @@ class Page(Image):
         self,
         category_names: Optional[Union[str, ObjectTypes, Sequence[Union[str, ObjectTypes]]]] = None,
         annotation_ids: Optional[Union[str, Sequence[str]]] = None,
-        service_id: Optional[Union[str, Sequence[str]]] = None,
+        service_ids: Optional[Union[str, Sequence[str]]] = None,
         model_id: Optional[Union[str, Sequence[str]]] = None,
         session_ids: Optional[Union[str, Sequence[str]]] = None,
         ignore_inactive: bool = True,
@@ -676,7 +676,7 @@ class Page(Image):
         :param category_names: A single name or list of names
         :param annotation_ids: A single id or list of ids
-        :param service_id: A single service name or list of service names
+        :param service_ids: A single service name or list of service names
         :param model_id: A single model name or list of model names
         :param session_ids: A single session id or list of session ids
         :param ignore_inactive: If set to `True` only active annotations are returned.
@@ -691,7 +691,7 @@ class Page(Image):
                 else tuple(get_type(cat_name) for cat_name in category_names)
             )
         ann_ids = [annotation_ids] if isinstance(annotation_ids, str) else annotation_ids
-        service_id = [service_id] if isinstance(service_id, str) else service_id
+        service_ids = [service_ids] if isinstance(service_ids, str) else service_ids
         model_id = [model_id] if isinstance(model_id, str) else model_id
         session_id = [session_ids] if isinstance(session_ids, str) else session_ids
@@ -706,8 +706,8 @@ class Page(Image):
         if ann_ids is not None:
             anns = filter(lambda x: x.annotation_id in ann_ids, anns)
-        if service_id is not None:
-            anns = filter(lambda x: x.generating_service in service_id, anns)
+        if service_ids is not None:
+            anns = filter(lambda x: x.generating_service in service_ids, anns)
         if model_id is not None:
             anns = filter(lambda x: x.generating_model in model_id, anns)

deepdoctection/mapper/match.py CHANGED Viewed

@@ -34,13 +34,15 @@ from ..utils.settings import TypeOrStr
 def match_anns_by_intersection(
     dp: Image,
-    parent_ann_category_names: Union[TypeOrStr, Sequence[TypeOrStr]],
-    child_ann_category_names: Union[TypeOrStr, Sequence[TypeOrStr]],
     matching_rule: Literal["iou", "ioa"],
     threshold: float,
     use_weighted_intersections: bool = False,
+    parent_ann_category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
+    child_ann_category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
     parent_ann_ids: Optional[Union[Sequence[str], str]] = None,
     child_ann_ids: Optional[Union[str, Sequence[str]]] = None,
+    parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
+    child_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
     max_parent_only: bool = False,
 ) -> tuple[Any, Any, Sequence[ImageAnnotation], Sequence[ImageAnnotation]]:
     """
@@ -87,13 +89,19 @@ def match_anns_by_intersection(
                            dates which are not in the list.
     :param child_ann_ids: Additional filter condition. If some ids are selected, it will ignore all other children
                           candidates which are not in the list.
+    :param parent_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
+                                   parent candidates which are not in the list.
+    :param child_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
+                                  children candidates which are not in the list.
     :param max_parent_only: Will assign to each child at most one parent with maximum ioa
     :return: child indices, parent indices (see Example), list of parent ids and list of children ids.
     """
     assert matching_rule in ["iou", "ioa"], "matching rule must be either iou or ioa"
-    child_anns = dp.get_annotation(annotation_ids=child_ann_ids, category_names=child_ann_category_names)
+    child_anns = dp.get_annotation(
+        annotation_ids=child_ann_ids, category_names=child_ann_category_names, service_ids=child_ann_service_ids
+    )
     child_ann_boxes = np.array(
         [
             ann.get_bounding_box(dp.image_id).transform(dp.width, dp.height, absolute_coords=True).to_list(mode="xyxy")
@@ -101,7 +109,9 @@ def match_anns_by_intersection(
         ]
     )
-    parent_anns = dp.get_annotation(annotation_ids=parent_ann_ids, category_names=parent_ann_category_names)
+    parent_anns = dp.get_annotation(
+        annotation_ids=parent_ann_ids, category_names=parent_ann_category_names, service_ids=parent_ann_service_ids
+    )
     parent_ann_boxes = np.array(
         [
             ann.get_bounding_box(dp.image_id).transform(dp.width, dp.height, absolute_coords=True).to_list(mode="xyxy")
@@ -147,10 +157,12 @@ def match_anns_by_intersection(
 def match_anns_by_distance(
     dp: Image,
-    parent_ann_category_names: Union[TypeOrStr, Sequence[TypeOrStr]],
-    child_ann_category_names: Union[TypeOrStr, Sequence[TypeOrStr]],
+    parent_ann_category_names:  Optional[Union[TypeOrStr, Sequence[TypeOrStr]]]=None,
+    child_ann_category_names:  Optional[Union[TypeOrStr, Sequence[TypeOrStr]]]=None,
     parent_ann_ids: Optional[Union[Sequence[str], str]] = None,
     child_ann_ids: Optional[Union[str, Sequence[str]]] = None,
+    parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
+    child_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
 ) -> list[tuple[ImageAnnotation, ImageAnnotation]]:
     """
     Generates pairs of parent and child annotations by calculating the euclidean distance between the centers of the
@@ -164,11 +176,19 @@ def match_anns_by_distance(
                            dates which are not in the list.
     :param child_ann_ids: Additional filter condition. If some ids are selected, it will ignore all other children
                           candidates which are not in the list.
+    :param parent_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
+                                   parent candidates which are not in the list.
+    :param child_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
+                                  children candidates which are not in the list.
     :return:
     """
-    parent_anns = dp.get_annotation(annotation_ids=parent_ann_ids, category_names=parent_ann_category_names)
-    child_anns = dp.get_annotation(annotation_ids=child_ann_ids, category_names=child_ann_category_names)
+    parent_anns = dp.get_annotation(
+        annotation_ids=parent_ann_ids, category_names=parent_ann_category_names, service_ids=parent_ann_service_ids
+    )
+    child_anns = dp.get_annotation(
+        annotation_ids=child_ann_ids, category_names=child_ann_category_names, service_ids=child_ann_service_ids
+    )
     child_centers = [block.get_bounding_box(dp.image_id).center for block in child_anns]
     parent_centers = [block.get_bounding_box(dp.image_id).center for block in parent_anns]
     if child_centers and parent_centers:

deepdoctection/pipe/anngen.py CHANGED Viewed

@@ -75,27 +75,6 @@ class DatapointManager:
         """
         assert self.datapoint_is_passed, "Pass datapoint to  DatapointManager before creating anns"
-    def maybe_map_category_id(self, category_id: Union[str, int]) -> int:
-        """
-        Maps categories if a category id mapping is provided in `__init__`.
-        :param category_id: category id via integer or string.
-        :return: mapped category id
-        """
-        if self.category_id_mapping is None:
-            return int(category_id)
-        return self.category_id_mapping[int(category_id)]
-    def set_category_id_mapping(self, category_id_mapping: Mapping[int, int]) -> None:
-        """
-        In many cases the category ids sent back from a model have to be modified. Pass a mapping from model
-        category ids to target annotation category ids.
-        :param category_id_mapping: A mapping of model category ids (sent from DetectionResult) to category ids (saved
-                                    in annotations)
-        """
-        self.category_id_mapping = category_id_mapping
     def set_image_annotation(
         self,
         detect_result: DetectionResult,
@@ -127,13 +106,10 @@ class DatapointManager:
         :return: the annotation_id of the generated image annotation
         """
         self.assert_datapoint_passed()
-        if detect_result.class_id is None:
-            raise ValueError("class_id of detect_result cannot be None")
         if not isinstance(detect_result.box, (list, np.ndarray)):
             raise TypeError(
                 f"detect_result.box must be of type list or np.ndarray, but is of type {(type(detect_result.box))}"
             )
-        detect_result.class_id = self.maybe_map_category_id(detect_result.class_id)
         with MappingContextManager(
             dp_name=self.datapoint.file_name, filter_level="annotation", detect_result=asdict(detect_result)
         ) as annotation_context:
@@ -155,7 +131,7 @@ class DatapointManager:
             ann = ImageAnnotation(
                 category_name=detect_result.class_name,
                 bounding_box=box,
-                category_id=detect_result.class_id,
+                category_id=detect_result.class_id if detect_result.class_id is not None else DEFAULT_CATEGORY_ID,
                 score=detect_result.score,
                 service_id=self.service_id,
                 model_id=self.model_id,

deepdoctection/pipe/common.py CHANGED Viewed

@@ -22,6 +22,7 @@ from __future__ import annotations
 import os
 from copy import deepcopy
+from dataclasses import dataclass, field
 from typing import Literal, Mapping, Optional, Sequence, Union
 import numpy as np
@@ -49,24 +50,30 @@ class ImageCroppingService(PipelineComponent):
     generally not stored.
     """
-    def __init__(self, category_names: Union[TypeOrStr, Sequence[TypeOrStr]]):
+    def __init__(
+        self, category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
+             service_ids: Optional[Sequence[str]] = None
+    ) -> None:
         """
         :param category_names: A single name or a list of category names to crop
         """
-        self.category_names = (
-            (category_names,)
-            if isinstance(category_names, str)
-            else tuple(get_type(category_name) for category_name in category_names)
-        )
+        if category_names is None:
+            self.category_names = None
+        else:
+            self.category_names = (
+                (category_names,)
+                if isinstance(category_names, str)
+                else tuple(get_type(category_name) for category_name in category_names)
+            )
+        self.service_ids = service_ids
         super().__init__("image_crop")
     def serve(self, dp: Image) -> None:
-        for ann in dp.get_annotation(category_names=self.category_names):
+        for ann in dp.get_annotation(category_names=self.category_names, service_ids=self.service_ids):
             dp.image_ann_to_image(ann.annotation_id, crop_image=True)
     def clone(self) -> ImageCroppingService:
-        return self.__class__(self.category_names)
+        return self.__class__(self.category_names, self.service_ids)
     def get_meta_annotation(self) -> MetaAnnotation:
         return MetaAnnotation(image_annotations=(), sub_categories={}, relationships={}, summaries=())
@@ -124,8 +131,10 @@ class IntersectionMatcher:
     def match(
         self,
         dp: Image,
-        parent_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
-        child_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
+        parent_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
+        child_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
+        parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
+        child_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
     ) -> list[tuple[str, str]]:
         """
         The matching algorithm
@@ -133,6 +142,10 @@ class IntersectionMatcher:
         :param dp: datapoint image
         :param parent_categories: list of categories to be used a for parent class. Will generate a child-relationship
         :param child_categories: list of categories to be used for a child class.
+        :param parent_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
+                                        parent candidates which are not in the list.
+        :param child_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
+                                        children candidates which are not in the list.
         :return: A list of tuples with parent and child annotation ids
         """
@@ -144,6 +157,8 @@ class IntersectionMatcher:
             threshold=self.threshold,
             use_weighted_intersections=self.use_weighted_intersections,
             max_parent_only=self.max_parent_only,
+            parent_ann_service_ids=parent_ann_service_ids,
+            child_ann_service_ids=child_ann_service_ids,
         )
         matched_child_anns = np.take(child_anns, child_index)  # type: ignore
@@ -174,8 +189,10 @@ class NeighbourMatcher:
     def match(
         self,
         dp: Image,
-        parent_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
-        child_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
+        parent_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
+        child_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
+        parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
+        child_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
     ) -> list[tuple[str, str]]:
         """
         The matching algorithm
@@ -183,16 +200,54 @@ class NeighbourMatcher:
         :param dp: datapoint image
         :param parent_categories: list of categories to be used a for parent class. Will generate a child-relationship
         :param child_categories: list of categories to be used for a child class.
+        :param parent_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
+                                        parent candidates which are not in the list.
+        :param child_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
+                                        children candidates which are not in the list.
         :return: A list of tuples with parent and child annotation ids
         """
         return [
             (pair[0].annotation_id, pair[1].annotation_id)
-            for pair in match_anns_by_distance(dp, parent_categories, child_categories)
+            for pair in match_anns_by_distance(
+                dp,
+                parent_ann_category_names=parent_categories,
+                child_ann_category_names=child_categories,
+                parent_ann_service_ids=parent_ann_service_ids,
+                child_ann_service_ids=child_ann_service_ids,
+            )
         ]
+@dataclass
+class FamilyCompound:
+    """
+    A family compound is a set of parent and child categories that are related by a relationship key. The parent
+    categories will receive a relationship to the child categories.
+    """
+    relationship_key: Relationships
+    parent_categories: Optional[Union[ObjectTypes, Sequence[ObjectTypes]]] = field(default=None)
+    child_categories: Optional[Union[ObjectTypes, Sequence[ObjectTypes]]] = field(default=None)
+    parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = field(default=None)
+    child_ann_service_ids: Optional[Union[str, Sequence[str]]] = field(default=None)
+    def __post_init__(self) -> None:
+        if isinstance(self.parent_categories, str):
+            self.parent_categories = (get_type(self.parent_categories),)
+        elif self.parent_categories is not None:
+            self.parent_categories = tuple(get_type(parent) for parent in self.parent_categories)
+        if isinstance(self.child_categories, str):
+            self.child_categories = (get_type(self.child_categories),)
+        elif self.child_categories is not None:
+            self.child_categories = tuple(get_type(child) for child in self.child_categories)
+        if isinstance(self.parent_ann_service_ids, str):
+            self.parent_ann_service_ids = (self.parent_ann_service_ids,)
+        if isinstance(self.child_ann_service_ids, str):
+            self.child_ann_service_ids = (self.child_ann_service_ids,)
 @pipeline_component_registry.register("MatchingService")
 class MatchingService(PipelineComponent):
     """
@@ -202,28 +257,15 @@ class MatchingService(PipelineComponent):
     def __init__(
         self,
-        parent_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
-        child_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
+        family_compounds: Sequence[FamilyCompound],
         matcher: Union[IntersectionMatcher, NeighbourMatcher],
-        relationship_key: Relationships,
     ) -> None:
         """
-        :param parent_categories: list of categories to be used a for parent class. Will generate a child-relationship
-        :param child_categories: list of categories to be used for a child class.
+        :param family_compounds: A list of FamilyCompounds
+        :param matcher: A matcher object
         """
-        self.parent_categories = (
-            (get_type(parent_categories),)
-            if isinstance(parent_categories, str)
-            else tuple(get_type(category_name) for category_name in parent_categories)
-        )
-        self.child_categories = (
-            (get_type(child_categories),)
-            if isinstance(child_categories, str)
-            else (tuple(get_type(category_name) for category_name in child_categories))
-        )
+        self.family_compounds = family_compounds
         self.matcher = matcher
-        self.relationship_key = relationship_key
         super().__init__("matching")
     def serve(self, dp: Image) -> None:
@@ -233,20 +275,31 @@ class MatchingService(PipelineComponent):
         :param dp: datapoint image
         """
-        matched_pairs = self.matcher.match(dp, self.parent_categories, self.child_categories)
-        for pair in matched_pairs:
-            self.dp_manager.set_relationship_annotation(self.relationship_key, pair[0], pair[1])
+        for family_compound in self.family_compounds:
+            matched_pairs = self.matcher.match(
+                dp,
+                parent_categories=family_compound.parent_categories,
+                child_categories=family_compound.child_categories,
+                parent_ann_service_ids=family_compound.parent_ann_service_ids,
+                child_ann_service_ids=family_compound.child_ann_service_ids,
+            )
+            for pair in matched_pairs:
+                self.dp_manager.set_relationship_annotation(family_compound.relationship_key, pair[0], pair[1])
     def clone(self) -> PipelineComponent:
-        return self.__class__(self.parent_categories, self.child_categories, self.matcher, self.relationship_key)
+        return self.__class__(self.family_compounds, self.matcher)
     def get_meta_annotation(self) -> MetaAnnotation:
+        relationships: dict[ObjectTypes, set[ObjectTypes]] = {}
+        for family_compound in self.family_compounds:
+            if family_compound.parent_categories is not None:
+                for parent_category in family_compound.parent_categories:
+                    relationships[parent_category] = {family_compound.relationship_key}  # type: ignore
         return MetaAnnotation(
             image_annotations=(),
             sub_categories={},
-            relationships={parent: {Relationships.CHILD} for parent in self.parent_categories},
+            relationships=relationships,
             summaries=(),
         )

deepdoctection/pipe/layout.py CHANGED Viewed

@@ -20,18 +20,41 @@ Module for layout pipeline component
 """
 from __future__ import annotations
-from typing import Optional
+from typing import Optional, Sequence, Union
 import numpy as np
 from ..datapoint.image import Image
 from ..extern.base import ObjectDetector, PdfMiner
+from ..mapper.misc import curry
 from ..utils.error import ImageError
+from ..utils.settings import ObjectTypes
 from ..utils.transform import PadTransform
 from .base import MetaAnnotation, PipelineComponent
 from .registry import pipeline_component_registry
+@curry
+def skip_if_category_or_service_extracted(
+    dp: Image,
+    category_names: Optional[Union[str, Sequence[ObjectTypes]]] = None,
+    service_ids: Optional[Union[str, Sequence[str]]] = None,
+) -> bool:
+    """
+    Skip the processing of the pipeline component if the category or service is already extracted.
+    **Example**
+        detector = # some detector
+        item_component = ImageLayoutService(detector)
+        item_component.set_inbound_filter(skip_if_category_or_service_extracted(detector.get_categories(as_dict=False)))
+    """
+    if dp.get_annotation(category_names=category_names, service_ids=service_ids):
+        return True
+    return False
 @pipeline_component_registry.register("ImageLayoutService")
 class ImageLayoutService(PipelineComponent):
     """
@@ -45,7 +68,7 @@ class ImageLayoutService(PipelineComponent):
     **Example**
-            d_items = TPFrcnnDetector(item_config_path, item_weights_path, {"1": "ROW", "2": "COLUMNS"})
+            d_items = TPFrcnnDetector(item_config_path, item_weights_path, {1: 'row', 2: 'column'})
             item_component = ImageLayoutService(d_items)
     """
@@ -55,7 +78,6 @@ class ImageLayoutService(PipelineComponent):
         to_image: bool = False,
         crop_image: bool = False,
         padder: Optional[PadTransform] = None,
-        skip_if_layout_extracted: bool = False,
     ):
         """
         :param layout_detector: object detector
@@ -65,23 +87,14 @@ class ImageLayoutService(PipelineComponent):
                            to its bounding box and populate the resulting sub image to
                            `ImageAnnotation.image.image`.
         :param padder: If not `None`, will apply the padder to the image before prediction and inverse apply the padder
-        :param skip_if_layout_extracted: When `True` will check, if there are already `ImageAnnotation` of a category
-                                         available that will be predicted by the `layout_detector`. If yes, will skip
-                                         the prediction process.
         """
         self.to_image = to_image
         self.crop_image = crop_image
         self.padder = padder
-        self.skip_if_layout_extracted = skip_if_layout_extracted
         self.predictor = layout_detector
         super().__init__(self._get_name(layout_detector.name), self.predictor.model_id)
     def serve(self, dp: Image) -> None:
-        if self.skip_if_layout_extracted:
-            categories = self.predictor.get_category_names()
-            anns = dp.get_annotation(category_names=categories)
-            if anns:
-                return
         if dp.image is None:
             raise ImageError("image cannot be None")
         np_image = dp.image
@@ -117,7 +130,7 @@ class ImageLayoutService(PipelineComponent):
             padder_clone = self.padder.clone()
         if not isinstance(predictor, ObjectDetector):
             raise TypeError(f"predictor must be of type ObjectDetector, but is of type {type(predictor)}")
-        return self.__class__(predictor, self.to_image, self.crop_image, padder_clone, self.skip_if_layout_extracted)
+        return self.__class__(predictor, self.to_image, self.crop_image, padder_clone)
     def clear_predictor(self) -> None:
         self.predictor.clear_model()

deepdoctection/pipe/order.py CHANGED Viewed

@@ -347,19 +347,15 @@ class TextLineGenerator:
     a paragraph break threshold. This allows to detect a multi column structure just by observing sub lines.
     """
-    def __init__(
-        self, make_sub_lines: bool, line_category_id: Union[int, str], paragraph_break: Optional[float] = None
-    ):
+    def __init__(self, make_sub_lines: bool, paragraph_break: Optional[float] = None):
         """
         :param make_sub_lines: Whether to build sub lines from lines.
-        :param line_category_id: category_id to give a text line
         :param paragraph_break: threshold of two consecutive words. If distance is larger than threshold, two sub-lines
                                 will be built. We use relative coordinates to calculate the distance between two
                                 consecutive words. A reasonable value is 0.035
         """
         if make_sub_lines and paragraph_break is None:
             raise ValueError("You must specify paragraph_break when setting make_sub_lines to True")
-        self.line_category_id = int(line_category_id)
         self.make_sub_lines = make_sub_lines
         self.paragraph_break = paragraph_break
@@ -367,7 +363,6 @@ class TextLineGenerator:
         return DetectionResult(
             box=box.to_list(mode="xyxy"),
             class_name=LayoutType.LINE,
-            class_id=self.line_category_id,
             absolute_coords=box.absolute_coords,
             relationships=relationships,
         )
@@ -475,18 +470,14 @@ class TextLineServiceMixin(PipelineComponent, ABC):
     def __init__(
         self,
         name: str,
-        line_category_id: int = 1,
         include_residual_text_container: bool = True,
         paragraph_break: Optional[float] = None,
     ):
         """
-        Initialize the TextLineService with a line_category_id and a TextLineGenerator instance.
+        Initialize the TextLineServiceMixin with a TextLineGenerator instance.
         """
-        self.line_category_id = line_category_id
         self.include_residual_text_container = include_residual_text_container
-        self.text_line_generator = TextLineGenerator(
-            self.include_residual_text_container, self.line_category_id, paragraph_break
-        )
+        self.text_line_generator = TextLineGenerator(self.include_residual_text_container, paragraph_break)
         super().__init__(name)
     def _create_lines_for_words(self, word_anns: Sequence[ImageAnnotation]) -> Sequence[ImageAnnotation]:
@@ -523,17 +514,15 @@ class TextLineService(TextLineServiceMixin):
     text lines and the words contained in the text lines. The reading order is not arranged.
     """
-    def __init__(self, line_category_id: int = 1, paragraph_break: Optional[float] = None):
+    def __init__(self, paragraph_break: Optional[float] = None):
         """
         Initialize `TextLineService`
-        :param line_category_id: category_id to give a text line
         :param paragraph_break: threshold of two consecutive words. If distance is larger than threshold, two sublines
                                 will be built
         """
         super().__init__(
             name="text_line",
-            line_category_id=line_category_id,
             include_residual_text_container=True,
             paragraph_break=paragraph_break,
         )
@@ -542,7 +531,7 @@ class TextLineService(TextLineServiceMixin):
         """
         This method returns a new instance of the class with the same configuration.
         """
-        return self.__class__(self.line_category_id, self.text_line_generator.paragraph_break)
+        return self.__class__(self.text_line_generator.paragraph_break)
     def serve(self, dp: Image) -> None:
         text_container_anns = dp.get_annotation(category_names=LayoutType.WORD)
@@ -605,7 +594,6 @@ class TextOrderService(TextLineServiceMixin):
         broken_line_tolerance: float = 0.003,
         height_tolerance: float = 2.0,
         paragraph_break: Optional[float] = 0.035,
-        line_category_id: int = 1,
     ):
         """
         :param text_container: name of an image annotation that has a CHARS sub category. These annotations will be
@@ -647,12 +635,9 @@ class TextOrderService(TextLineServiceMixin):
             self.floating_text_block_categories = self.floating_text_block_categories + (LayoutType.LINE,)
         self.include_residual_text_container = include_residual_text_container
         self.order_generator = OrderGenerator(starting_point_tolerance, broken_line_tolerance, height_tolerance)
-        self.text_line_generator = TextLineGenerator(
-            self.include_residual_text_container, line_category_id, paragraph_break
-        )
+        self.text_line_generator = TextLineGenerator(self.include_residual_text_container, paragraph_break)
         super().__init__(
             name="text_order",
-            line_category_id=line_category_id,
             include_residual_text_container=include_residual_text_container,
             paragraph_break=paragraph_break,
         )
@@ -763,7 +748,6 @@ class TextOrderService(TextLineServiceMixin):
             self.order_generator.broken_line_tolerance,
             self.order_generator.height_tolerance,
             self.text_line_generator.paragraph_break,
-            self.text_line_generator.line_category_id,
         )
     def clear_predictor(self) -> None:

deepdoctection/pipe/segment.py CHANGED Viewed

@@ -436,24 +436,24 @@ def segment_table(
     child_ann_ids = table.get_relationship(Relationships.CHILD)
     cell_index_rows, row_index, _, _ = match_anns_by_intersection(
         dp,
-        item_names[0],
-        cell_names,
-        segment_rule,
-        threshold_rows,
-        True,
-        child_ann_ids,
-        child_ann_ids,
+        parent_ann_category_names=item_names[0],
+        child_ann_category_names=cell_names,
+        matching_rule=segment_rule,
+        threshold=threshold_rows,
+        use_weighted_intersections=True,
+        parent_ann_ids=child_ann_ids,
+        child_ann_ids=child_ann_ids,
     )
     cell_index_cols, col_index, _, _ = match_anns_by_intersection(
         dp,
-        item_names[1],
-        cell_names,
-        segment_rule,
-        threshold_cols,
-        True,
-        child_ann_ids,
-        child_ann_ids,
+        parent_ann_category_names=item_names[1],
+        child_ann_category_names=cell_names,
+        matching_rule=segment_rule,
+        threshold=threshold_cols,
+        use_weighted_intersections=True,
+        parent_ann_ids=child_ann_ids,
+        child_ann_ids=child_ann_ids,
     )
     cells = dp.get_annotation(annotation_ids=child_ann_ids, category_names=cell_names)
@@ -499,7 +499,6 @@ def create_intersection_cells(
     rows: Sequence[ImageAnnotation],
     cols: Sequence[ImageAnnotation],
     table_annotation_id: str,
-    cell_class_id: int,
     sub_item_names: Sequence[ObjectTypes],
 ) -> tuple[Sequence[DetectionResult], Sequence[SegmentationResult]]:
     """
@@ -509,7 +508,6 @@ def create_intersection_cells(
     :param rows: list of rows
     :param cols: list of columns
     :param table_annotation_id: annotation_id of underlying table ImageAnnotation
-    :param cell_class_id: The class_id to a synthetically generated DetectionResult
     :param sub_item_names: ObjectTypes for row-/column number
     :return: Pair of lists of `DetectionResult` and `SegmentationResult`.
     """
@@ -526,7 +524,6 @@ def create_intersection_cells(
             detect_result_cells.append(
                 DetectionResult(
                     box=boxes_cells[idx].to_list(mode="xyxy"),
-                    class_id=cell_class_id,
                     absolute_coords=boxes_cells[idx].absolute_coords,
                     class_name=LayoutType.CELL,
                 )
@@ -574,13 +571,13 @@ def header_cell_to_item_detect_result(
     child_ann_ids = table.get_relationship(Relationships.CHILD)
     item_index, _, items, _ = match_anns_by_intersection(
         dp,
-        item_header_name,
-        item_name,
-        segment_rule,
-        threshold,
-        True,
-        child_ann_ids,
-        child_ann_ids,
+        parent_ann_category_names=item_header_name,
+        child_ann_category_names=item_name,
+        matching_rule=segment_rule,
+        threshold=threshold,
+        use_weighted_intersections=True,
+        parent_ann_ids=child_ann_ids,
+        child_ann_ids=child_ann_ids,
     )
     item_headers = []
     for idx, item in enumerate(items):
@@ -622,24 +619,24 @@ def segment_pubtables(
     child_ann_ids = table.get_relationship(Relationships.CHILD)
     cell_index_rows, row_index, _, _ = match_anns_by_intersection(
         dp,
-        item_names[0],
-        spanning_cell_names,
-        segment_rule,
-        threshold_rows,
-        True,
-        child_ann_ids,
-        child_ann_ids,
+        parent_ann_category_names=item_names[0],
+        child_ann_category_names=spanning_cell_names,
+        matching_rule=segment_rule,
+        threshold=threshold_rows,
+        use_weighted_intersections=True,
+        parent_ann_ids=child_ann_ids,
+        child_ann_ids=child_ann_ids,
     )
     cell_index_cols, col_index, _, _ = match_anns_by_intersection(
         dp,
-        item_names[1],
-        spanning_cell_names,
-        segment_rule,
-        threshold_cols,
-        True,
-        child_ann_ids,
-        child_ann_ids,
+        parent_ann_category_names=item_names[1],
+        child_ann_category_names=spanning_cell_names,
+        matching_rule=segment_rule,
+        threshold=threshold_cols,
+        use_weighted_intersections=True,
+        parent_ann_ids=child_ann_ids,
+        child_ann_ids=child_ann_ids,
     )
     spanning_cells = dp.get_annotation(annotation_ids=child_ann_ids, category_names=spanning_cell_names)
@@ -976,7 +973,6 @@ class PubtablesSegmentationService(PipelineComponent):
         tile_table_with_items: bool,
         remove_iou_threshold_rows: float,
         remove_iou_threshold_cols: float,
-        cell_class_id: int,
         table_name: TypeOrStr,
         cell_names: Sequence[TypeOrStr],
         spanning_cell_names: Sequence[TypeOrStr],
@@ -997,7 +993,6 @@ class PubtablesSegmentationService(PipelineComponent):
                                       the adjacent row. Will do a similar shifting with columns.
         :param remove_iou_threshold_rows: iou threshold for removing overlapping rows
         :param remove_iou_threshold_cols: iou threshold for removing overlapping columns
-        :param cell_class_id: 'category_id' for cells to be generated from intersected rows and columns
         :param table_name: layout type table
         :param cell_names: layout type of cells
         :param spanning_cell_names: layout type of spanning cells
@@ -1022,7 +1017,6 @@ class PubtablesSegmentationService(PipelineComponent):
         self.spanning_cell_names = [get_type(cell_name) for cell_name in spanning_cell_names]
         self.remove_iou_threshold_rows = remove_iou_threshold_rows
         self.remove_iou_threshold_cols = remove_iou_threshold_cols
-        self.cell_class_id = cell_class_id
         self.cell_to_image = cell_to_image
         self.crop_cell_image = crop_cell_image
         self.item_names = [get_type(item_name) for item_name in item_names]  # row names must be before column name
@@ -1089,7 +1083,7 @@ class PubtablesSegmentationService(PipelineComponent):
             rows = dp.get_annotation(category_names=self.item_names[0], annotation_ids=item_ann_ids)
             columns = dp.get_annotation(category_names=self.item_names[1], annotation_ids=item_ann_ids)
             detect_result_cells, segment_result_cells = create_intersection_cells(
-                rows, columns, table.annotation_id, self.cell_class_id, self.sub_item_names
+                rows, columns, table.annotation_id, self.sub_item_names
             )
             cell_rn_cn_to_ann_id = {}
             for detect_result, segment_result in zip(detect_result_cells, segment_result_cells):
@@ -1228,7 +1222,6 @@ class PubtablesSegmentationService(PipelineComponent):
             self.tile_table,
             self.remove_iou_threshold_rows,
             self.remove_iou_threshold_cols,
-            self.cell_class_id,
             self.table_name,
             self.cell_names,
             self.spanning_cell_names,

deepdoctection/pipe/sub_layout.py CHANGED Viewed

@@ -92,7 +92,6 @@ class DetectResultGenerator:
                         detect_result_list.append(
                             DetectionResult(
                                 box=[0.0, 0.0, float(self.width), float(self.height)],  # type: ignore
-                                class_id=self.categories_name_as_key[category_name],
                                 class_name=category_name,
                                 score=0.0,
                                 absolute_coords=self.absolute_coords,
@@ -156,14 +155,13 @@ class SubImageLayoutService(PipelineComponent):
             detect_result_generator = DetectResultGenerator(categories_items)
             d_items = TPFrcnnDetector(item_config_path, item_weights_path, {"1": LayoutType.row,
             "2": LayoutType.column})
-            item_component = SubImageLayoutService(d_items, LayoutType.table, {1: 7, 2: 8}, detect_result_generator)
+            item_component = SubImageLayoutService(d_items, LayoutType.table, detect_result_generator)
     """
     def __init__(
         self,
         sub_image_detector: ObjectDetector,
         sub_image_names: Union[str, Sequence[TypeOrStr]],
-        category_id_mapping: Optional[dict[int, int]] = None,
         detect_result_generator: Optional[DetectResultGenerator] = None,
         padder: Optional[PadTransform] = None,
     ):
@@ -186,7 +184,6 @@ class SubImageLayoutService(PipelineComponent):
             if isinstance(sub_image_names, str)
             else tuple((get_type(cat) for cat in sub_image_names))
         )
-        self.category_id_mapping = category_id_mapping
         self.detect_result_generator = detect_result_generator
         self.padder = padder
         self.predictor = sub_image_detector
@@ -223,11 +220,6 @@ class SubImageLayoutService(PipelineComponent):
                 detect_result_list = self.detect_result_generator.create_detection_result(detect_result_list)
             for detect_result in detect_result_list:
-                if self.category_id_mapping:
-                    if detect_result.class_id:
-                        detect_result.class_id = self.category_id_mapping.get(
-                            detect_result.class_id, detect_result.class_id
-                        )
                 self.dp_manager.set_image_annotation(detect_result, sub_image_ann.annotation_id)
     def get_meta_annotation(self) -> MetaAnnotation:
@@ -254,7 +246,6 @@ class SubImageLayoutService(PipelineComponent):
         return self.__class__(
             predictor,
             self.sub_image_name,
-            self.category_id_mapping,
             self.detect_result_generator,
             padder_clone,
         )

deepdoctection/pipe/text.py CHANGED Viewed

@@ -70,7 +70,6 @@ class TextExtractionService(PipelineComponent):
         text_extract_detector: Union[ObjectDetector, PdfMiner, TextRecognizer],
         extract_from_roi: Optional[Union[Sequence[TypeOrStr], TypeOrStr]] = None,
         run_time_ocr_language_selection: bool = False,
-        skip_if_text_extracted: bool = False,
     ):
         """
         :param text_extract_detector: ObjectDetector
@@ -79,8 +78,6 @@ class TextExtractionService(PipelineComponent):
                                                 multiple language selections. Also requires that a language detection
                                                 pipeline component ran before. It will select the expert language OCR
                                                 model based on the determined language.
-        :param skip_if_text_extracted: Set to `True` if text has already been extracted in a previous pipeline component
-                                       and should not be extracted again. Use-case: A PDF with some scanned images.
         """
         if extract_from_roi is None:
@@ -104,11 +101,6 @@ class TextExtractionService(PipelineComponent):
                 raise TypeError("Only TesseractOcrDetector supports multiple languages")
         self.run_time_ocr_language_selection = run_time_ocr_language_selection
-        self.skip_if_text_extracted = skip_if_text_extracted
-        if self.skip_if_text_extracted and isinstance(self.predictor, TextRecognizer):
-            raise ValueError(
-                "skip_if_text_extracted=True and TextRecognizer in TextExtractionService is not compatible"
-            )
     def serve(self, dp: Image) -> None:
         maybe_batched_text_rois = self.get_text_rois(dp)
@@ -154,11 +146,6 @@ class TextExtractionService(PipelineComponent):
         well `get_text_rois` will return an empty list.
         :return: list of ImageAnnotation or Image
         """
-        if self.skip_if_text_extracted:
-            text_categories = self.predictor.get_category_names()
-            text_anns = dp.get_annotation(category_names=text_categories)
-            if text_anns:
-                return []
         if self.extract_from_category:
             if self.predictor.accepts_batch:
@@ -223,7 +210,11 @@ class TextExtractionService(PipelineComponent):
         predictor = self.predictor.clone()
         if not isinstance(predictor, (ObjectDetector, PdfMiner, TextRecognizer)):
             raise ImageError(f"predictor must be of type ObjectDetector or PdfMiner, but is of type {type(predictor)}")
-        return self.__class__(predictor, deepcopy(self.extract_from_category), self.run_time_ocr_language_selection)
+        return self.__class__(
+            text_extract_detector=predictor,
+            extract_from_roi=deepcopy(self.extract_from_category),
+            run_time_ocr_language_selection=self.run_time_ocr_language_selection,
+        )
     def clear_predictor(self) -> None:
         self.predictor.clear_model()

deepdoctection/train/hf_detr_train.py CHANGED Viewed

@@ -272,6 +272,7 @@ def train_hf_detr(
         pretrained_model_name_or_path=path_config_json,
         num_labels=len(id2label),
     )
+    config.use_timm_backbone = True
     if path_weights != "":
         model = TableTransformerForObjectDetection.from_pretrained(

{deepdoctection-0.39.7.dist-info → deepdoctection-0.40.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: deepdoctection
-Version: 0.39.7
+Version: 0.40.0
 Summary: Repository for Document AI
 Home-page: https://github.com/deepdoctection/deepdoctection
 Author: Dr. Janis Meyer

{deepdoctection-0.39.7.dist-info → deepdoctection-0.40.0.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
-deepdoctection/__init__.py,sha256=SgzaP1SOePibE0bw0H_Jecy-ZmPWzhJYsZZ5UT_XLJs,12754
+deepdoctection/__init__.py,sha256=Onsg4vkNNIGYytDmH96KsxYt3xQLxcAbyYHCeOqThR8,12780
 deepdoctection/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 deepdoctection/analyzer/__init__.py,sha256=icClxrd20XutD6LxLgEPIWceSs4j_QfI3szCE-9BL2w,729
-deepdoctection/analyzer/_config.py,sha256=1rfvVrp7cI2YLzpahD77aa1tZ_KFAIQ21DM1NWhxYiI,5058
+deepdoctection/analyzer/_config.py,sha256=kxQzDQvl2ygH84VTnumbRF7JLGM6VeJoBzv1xssm6H4,5019
 deepdoctection/analyzer/dd.py,sha256=bfR7e1JV7BwUNDRLu0jYZU7qQXnyA_vbRAJl2Ylrq5o,5905
-deepdoctection/analyzer/factory.py,sha256=7L-bJ9957TBn_C6OGWJFmZobrh8MPq4Q-Espx5faEiY,32435
+deepdoctection/analyzer/factory.py,sha256=sXGL_faLkKCUBfq5YIpmzV5cWuvWChYy-zP5OtdaM4Y,33251
 deepdoctection/configs/__init__.py,sha256=TX_P6tqDOF1LK1mi9ruAl7x0mtv1Asm8cYWCz3Pe2dk,646
 deepdoctection/configs/conf_dd_one.yaml,sha256=qnrDAST1PHBtdIKE_hdkZexW22FqVvNTI-PEo9wvinM,3025
 deepdoctection/configs/conf_tesseract.yaml,sha256=oF6szDyoi15FHvq7yFUNIEjfA_jNLhGxoowiRsz_zY4,35
@@ -19,8 +19,8 @@ deepdoctection/datapoint/__init__.py,sha256=3K406GbOPhoEp8koVaSbMocmSsmWifnSZ1SP
 deepdoctection/datapoint/annotation.py,sha256=FEgz4COxVDfjic0gG7kS6iHnWLBIgFnquQ63Cbj2a4Y,22531
 deepdoctection/datapoint/box.py,sha256=XPhC_xHqLZJjzafg1pIS_CxnVB5-0_yk-twsZZ3ncUU,30093
 deepdoctection/datapoint/convert.py,sha256=Be2FvmRXt-5prZ1vwa5fG6VjgEQ_31hiQ13hAoXoaes,7740
-deepdoctection/datapoint/image.py,sha256=uGmlgF6zGptvNowZTqf-io4hbd8aFFngAvQqgdEQ5Kw,34040
-deepdoctection/datapoint/view.py,sha256=sK6Ta9R6jdOS7iwF05-uPjL2wSz8wHQ5RIGCatw7i2M,50774
+deepdoctection/datapoint/image.py,sha256=_jN46UJUsOi6GC6VEUcp3L_vLL-iYRW05RKcFLWb6Dc,34048
+deepdoctection/datapoint/view.py,sha256=iZiHMc2hkk6vWn87LK0Qf-toZU_kocW3m7Wq8M4IS2E,50782
 deepdoctection/datasets/__init__.py,sha256=-A3aR90aDsHPmVM35JavfnQ2itYSCn3ujl4krRni1QU,1076
 deepdoctection/datasets/adapter.py,sha256=Ly_vbOAgVI73V41FUccnSX1ECTOyesW_qsuvQuvOZbw,7796
 deepdoctection/datasets/base.py,sha256=AZx-hw8Mchzb7FiOASt7zCbiybFNsM_diBzKXyC-auU,22618
@@ -94,7 +94,7 @@ deepdoctection/mapper/d2struct.py,sha256=Dx-YnycsIQH4a5-9Gn_yMhiQ-gOFgMueNeH3rhX
 deepdoctection/mapper/hfstruct.py,sha256=2PjGKsYturVJBimLT1CahYh09KSRAFEHz_QNtC162kQ,5551
 deepdoctection/mapper/laylmstruct.py,sha256=abMZkYU2W0e_VcCm_c0ZXNFuv-lfMFWcTedcZS5EYvE,42935
 deepdoctection/mapper/maputils.py,sha256=eI6ZcDg9W5uB6xQNBZpMIdEd86HlCxTtkJuyROdTqiw,8146
-deepdoctection/mapper/match.py,sha256=Ed9FsuVPNp_faaW5PKnvUHZoEXcRcrO-muduTMzjp1s,8937
+deepdoctection/mapper/match.py,sha256=RDTYSGtbtT8ph3L83PyHIkezJ2K82MwNerSM72uTMxM,10267
 deepdoctection/mapper/misc.py,sha256=vX-fV420Te00eD-cqTiWBV2twHqdBcBV2_7rAFRgPRg,7164
 deepdoctection/mapper/pascalstruct.py,sha256=TzVU1p0oiw0nOuxTFFbEB9vXJxH1v6VUvTJ7MD0manU,3828
 deepdoctection/mapper/prodigystruct.py,sha256=Re4Sd_zAp6qOvbXZLmMJeG0IGEfMQxebuyDeZgMcTa8,6827
@@ -102,24 +102,24 @@ deepdoctection/mapper/pubstruct.py,sha256=PAJ2N1HSPNS6F2ZrIwlD7PiBhIM-rJscK_Ti8O
 deepdoctection/mapper/tpstruct.py,sha256=YNABRibvcISD5Lavg3jouoE4FMdqXEJoM-hNoB_rnww,4481
 deepdoctection/mapper/xfundstruct.py,sha256=_3r3c0K82fnF2h1HxA85h-9ETYrHwcERa6MNc6Ko6Z8,8807
 deepdoctection/pipe/__init__.py,sha256=ywTVoetftdL6plXg2YlBzMfmqBZupq7yXblSVyvvkcQ,1127
-deepdoctection/pipe/anngen.py,sha256=3319l4aaXzcY4w6ItVBNPX8LGS5fHFDVtyVY9KMefac,16393
+deepdoctection/pipe/anngen.py,sha256=7wvp7eghDwrgcIyu1vjRxmVy4SADPbn-k4ud8y2bgjU,15338
 deepdoctection/pipe/base.py,sha256=wlza9aDOKnHKrXmaz8MLyLz0nMqqcIWQ-6Lu944aicE,15390
-deepdoctection/pipe/common.py,sha256=C1KxEfJFSPeDqlnkiJ1ZYPuA36P8BU_4jVhdsszW_V8,17752
+deepdoctection/pipe/common.py,sha256=S6-NKvR0sqBfqjN-mH76uVgM_aHOZvhPe_ore36UPZA,21028
 deepdoctection/pipe/concurrency.py,sha256=AAKRsVgaBEYNluntbDa46SBF1JZ_XqnWLDSWrNvAzEo,9657
 deepdoctection/pipe/doctectionpipe.py,sha256=bGW3ugky-fb-nEe-3bvO6Oc_4_6w82cQboGM_6p2eIo,12530
 deepdoctection/pipe/language.py,sha256=5zI0UQC6Fh12_r2pfVL42HoCGz2hpHrOhpXAn5m-rYw,5451
-deepdoctection/pipe/layout.py,sha256=xIhnJpyUSbvLbhTXyAKXY1hmG9352jihGYFSclTH_1g,5567
+deepdoctection/pipe/layout.py,sha256=ThULc0b1f9KyaXYk9z0qbuJ0nhIodah9PcrEq2xKpAY,5670
 deepdoctection/pipe/lm.py,sha256=x9NoYpivdjQF1r76a7PPrUuBEmuHP7ZukuXFDkXhXBc,17572
-deepdoctection/pipe/order.py,sha256=PnJZiCnxFluJiECXLTZT0c1Rr66vIRBFraa_G41UA2k,40121
+deepdoctection/pipe/order.py,sha256=0KNiMinedjfuDVVHxJSaDL1yl4Sub-miMPcEC4gGwPA,39423
 deepdoctection/pipe/refine.py,sha256=dTfI396xydPdbzpfo4yqFcuxl3UAB1y-WbSQn1o76ec,22367
 deepdoctection/pipe/registry.py,sha256=aFx-Tn0xhVA5l5H18duNW5QoTNKQltybsEUEzsMgUfg,902
-deepdoctection/pipe/segment.py,sha256=mWYRg7UR80PtIj1SIg_hiujDcCtLlvKJUP9vx4ZpW0Y,59318
-deepdoctection/pipe/sub_layout.py,sha256=ldFFuFIW5em2Rl1O1BYwclrXJ86wg-1RmDZmv35Cruw,13850
-deepdoctection/pipe/text.py,sha256=h9q6d3HFOs7LOg-iwdLUPiQxrPqgunBVNmtYMBrfRQE,11180
+deepdoctection/pipe/segment.py,sha256=sny59GuP7dxLGX3YjHF0wllPxSiXL1GNQEhMGKcF8ZU,59594
+deepdoctection/pipe/sub_layout.py,sha256=OLKvCYJynoFpo7bf2b3HzY0k-TJDLc0PHveWKcDbqZI,13324
+deepdoctection/pipe/text.py,sha256=tLlJtneM__WsrAvp4pQFqwNlmq2RLqKqiPXlJ2lkniU,10483
 deepdoctection/pipe/transform.py,sha256=9Om7X7hJeL4jgUwHM1CHa4sb5v7Qo1PtVG0ls_3nI7w,3798
 deepdoctection/train/__init__.py,sha256=YFTRAZF1F7cEAKTdAIi1BLyYb6rSRcwq09Ui5Lu8d6E,1071
 deepdoctection/train/d2_frcnn_train.py,sha256=sFc_G-mEpaM8d1CCE0_6Gl4nBh11X2RYRBA3p_ylFJQ,16000
-deepdoctection/train/hf_detr_train.py,sha256=NEOoRjZ00bPwN1supTJD7VIcHRgvDJFSYcugiHo_Rqs,12007
+deepdoctection/train/hf_detr_train.py,sha256=uBkkRyxrJF5UF__KbYvIlmb-HRWQ9TY6LiJr1Rm56kI,12043
 deepdoctection/train/hf_layoutlm_train.py,sha256=8kiGp_8GEyqCkLgeMgCJOLJWSVoKWkUBHsZtDjZOcRk,22556
 deepdoctection/train/tp_frcnn_train.py,sha256=pEpXokSVGveqo82pRnhnAmHPmjQ_8wQWpqM4ZyNHJgs,13049
 deepdoctection/utils/__init__.py,sha256=brBceRWeov9WXMiJTjyJOF2rHMP8trGGRRjhMdZ61nI,2371
@@ -141,8 +141,8 @@ deepdoctection/utils/transform.py,sha256=3kCgsEeRkG1efCdkfvj7tUFMs-e2jbjbflq826F
 deepdoctection/utils/types.py,sha256=ti4WdtIJSg3TGK_YPkkoY9PYGMnR2tTX6Xfik8U1pNk,2986
 deepdoctection/utils/utils.py,sha256=csVs_VvCq4QBETPoE2JdTTL4MFYnD4xh-Js5vRb612g,6492
 deepdoctection/utils/viz.py,sha256=Jf8ePNYWlpuyaS6SeTYQ4OyA3eNhtgjvAQZnGNdgHC0,27051
-deepdoctection-0.39.7.dist-info/licenses/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
-deepdoctection-0.39.7.dist-info/METADATA,sha256=f1bypRgjWbclKDMdqJMW_CwoIOCnE88r_C6PA9qGvDY,19763
-deepdoctection-0.39.7.dist-info/WHEEL,sha256=DK49LOLCYiurdXXOXwGJm6U4DkHkg4lcxjhqwRa0CP4,91
-deepdoctection-0.39.7.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
-deepdoctection-0.39.7.dist-info/RECORD,,
+deepdoctection-0.40.0.dist-info/licenses/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
+deepdoctection-0.40.0.dist-info/METADATA,sha256=YyPBlJBcUfAQP_cW7Mhq3eNs2-924o4BMS4X6Sn0Xwo,19763
+deepdoctection-0.40.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+deepdoctection-0.40.0.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
+deepdoctection-0.40.0.dist-info/RECORD,,

{deepdoctection-0.39.7.dist-info → deepdoctection-0.40.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (78.0.2)
+Generator: setuptools (78.1.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{deepdoctection-0.39.7.dist-info → deepdoctection-0.40.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{deepdoctection-0.39.7.dist-info → deepdoctection-0.40.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

deepdoctection 0.39.7__py3-none-any.whl → 0.40.0__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.39.7py3-none-any.whl → 0.40.0py3-none-any.whl