PyPI - deepdoctection - Versions diffs - 0.39.7__py3-none-any.whl → 0.41.0__py3-none-any.whl - Mend

deepdoctection 0.39.7py3-none-any.whl → 0.41.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (30) hide show

deepdoctection/__init__.py +6 -3
deepdoctection/analyzer/_config.py +0 -1
deepdoctection/analyzer/factory.py +35 -14
deepdoctection/datapoint/convert.py +0 -24
deepdoctection/datapoint/image.py +5 -5
deepdoctection/datapoint/view.py +6 -7
deepdoctection/datasets/base.py +3 -1
deepdoctection/extern/base.py +108 -1
deepdoctection/extern/deskew.py +1 -1
deepdoctection/extern/doctrocr.py +2 -1
deepdoctection/extern/tessocr.py +1 -1
deepdoctection/extern/tp/tpfrcnn/preproc.py +1 -1
deepdoctection/mapper/laylmstruct.py +1 -2
deepdoctection/mapper/match.py +28 -8
deepdoctection/pipe/anngen.py +1 -25
deepdoctection/pipe/common.py +92 -38
deepdoctection/pipe/layout.py +26 -13
deepdoctection/pipe/order.py +6 -22
deepdoctection/pipe/segment.py +36 -43
deepdoctection/pipe/sub_layout.py +9 -14
deepdoctection/pipe/text.py +5 -14
deepdoctection/pipe/transform.py +38 -16
deepdoctection/train/hf_detr_train.py +1 -0
deepdoctection/utils/settings.py +5 -0
deepdoctection/utils/transform.py +173 -38
{deepdoctection-0.39.7.dist-info → deepdoctection-0.41.0.dist-info}/METADATA +1 -1
{deepdoctection-0.39.7.dist-info → deepdoctection-0.41.0.dist-info}/RECORD +30 -30
{deepdoctection-0.39.7.dist-info → deepdoctection-0.41.0.dist-info}/WHEEL +1 -1
{deepdoctection-0.39.7.dist-info → deepdoctection-0.41.0.dist-info}/licenses/LICENSE +0 -0
{deepdoctection-0.39.7.dist-info → deepdoctection-0.41.0.dist-info}/top_level.txt +0 -0

deepdoctection/pipe/common.py CHANGED Viewed

@@ -22,6 +22,7 @@ from __future__ import annotations
 import os
 from copy import deepcopy
+from dataclasses import dataclass, field
 from typing import Literal, Mapping, Optional, Sequence, Union
 import numpy as np
@@ -49,24 +50,31 @@ class ImageCroppingService(PipelineComponent):
     generally not stored.
     """
-    def __init__(self, category_names: Union[TypeOrStr, Sequence[TypeOrStr]]):
+    def __init__(
+        self,
+        category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
+        service_ids: Optional[Sequence[str]] = None,
+    ) -> None:
         """
         :param category_names: A single name or a list of category names to crop
         """
-        self.category_names = (
-            (category_names,)
-            if isinstance(category_names, str)
-            else tuple(get_type(category_name) for category_name in category_names)
-        )
+        if category_names is None:
+            self.category_names = None
+        else:
+            self.category_names = (
+                (category_names,)
+                if isinstance(category_names, str)
+                else tuple(get_type(category_name) for category_name in category_names)
+            )
+        self.service_ids = service_ids
         super().__init__("image_crop")
     def serve(self, dp: Image) -> None:
-        for ann in dp.get_annotation(category_names=self.category_names):
+        for ann in dp.get_annotation(category_names=self.category_names, service_ids=self.service_ids):
             dp.image_ann_to_image(ann.annotation_id, crop_image=True)
     def clone(self) -> ImageCroppingService:
-        return self.__class__(self.category_names)
+        return self.__class__(self.category_names, self.service_ids)
     def get_meta_annotation(self) -> MetaAnnotation:
         return MetaAnnotation(image_annotations=(), sub_categories={}, relationships={}, summaries=())
@@ -124,8 +132,10 @@ class IntersectionMatcher:
     def match(
         self,
         dp: Image,
-        parent_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
-        child_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
+        parent_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
+        child_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
+        parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
+        child_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
     ) -> list[tuple[str, str]]:
         """
         The matching algorithm
@@ -133,6 +143,10 @@ class IntersectionMatcher:
         :param dp: datapoint image
         :param parent_categories: list of categories to be used a for parent class. Will generate a child-relationship
         :param child_categories: list of categories to be used for a child class.
+        :param parent_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
+                                        parent candidates which are not in the list.
+        :param child_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
+                                        children candidates which are not in the list.
         :return: A list of tuples with parent and child annotation ids
         """
@@ -144,6 +158,8 @@ class IntersectionMatcher:
             threshold=self.threshold,
             use_weighted_intersections=self.use_weighted_intersections,
             max_parent_only=self.max_parent_only,
+            parent_ann_service_ids=parent_ann_service_ids,
+            child_ann_service_ids=child_ann_service_ids,
         )
         matched_child_anns = np.take(child_anns, child_index)  # type: ignore
@@ -174,8 +190,10 @@ class NeighbourMatcher:
     def match(
         self,
         dp: Image,
-        parent_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
-        child_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
+        parent_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
+        child_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
+        parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
+        child_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
     ) -> list[tuple[str, str]]:
         """
         The matching algorithm
@@ -183,16 +201,54 @@ class NeighbourMatcher:
         :param dp: datapoint image
         :param parent_categories: list of categories to be used a for parent class. Will generate a child-relationship
         :param child_categories: list of categories to be used for a child class.
+        :param parent_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
+                                        parent candidates which are not in the list.
+        :param child_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
+                                        children candidates which are not in the list.
         :return: A list of tuples with parent and child annotation ids
         """
         return [
             (pair[0].annotation_id, pair[1].annotation_id)
-            for pair in match_anns_by_distance(dp, parent_categories, child_categories)
+            for pair in match_anns_by_distance(
+                dp,
+                parent_ann_category_names=parent_categories,
+                child_ann_category_names=child_categories,
+                parent_ann_service_ids=parent_ann_service_ids,
+                child_ann_service_ids=child_ann_service_ids,
+            )
         ]
+@dataclass
+class FamilyCompound:
+    """
+    A family compound is a set of parent and child categories that are related by a relationship key. The parent
+    categories will receive a relationship to the child categories.
+    """
+    relationship_key: Relationships
+    parent_categories: Optional[Union[ObjectTypes, Sequence[ObjectTypes]]] = field(default=None)
+    child_categories: Optional[Union[ObjectTypes, Sequence[ObjectTypes]]] = field(default=None)
+    parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = field(default=None)
+    child_ann_service_ids: Optional[Union[str, Sequence[str]]] = field(default=None)
+    def __post_init__(self) -> None:
+        if isinstance(self.parent_categories, str):
+            self.parent_categories = (get_type(self.parent_categories),)
+        elif self.parent_categories is not None:
+            self.parent_categories = tuple(get_type(parent) for parent in self.parent_categories)
+        if isinstance(self.child_categories, str):
+            self.child_categories = (get_type(self.child_categories),)
+        elif self.child_categories is not None:
+            self.child_categories = tuple(get_type(child) for child in self.child_categories)
+        if isinstance(self.parent_ann_service_ids, str):
+            self.parent_ann_service_ids = (self.parent_ann_service_ids,)
+        if isinstance(self.child_ann_service_ids, str):
+            self.child_ann_service_ids = (self.child_ann_service_ids,)
 @pipeline_component_registry.register("MatchingService")
 class MatchingService(PipelineComponent):
     """
@@ -202,28 +258,15 @@ class MatchingService(PipelineComponent):
     def __init__(
         self,
-        parent_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
-        child_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
+        family_compounds: Sequence[FamilyCompound],
         matcher: Union[IntersectionMatcher, NeighbourMatcher],
-        relationship_key: Relationships,
     ) -> None:
         """
-        :param parent_categories: list of categories to be used a for parent class. Will generate a child-relationship
-        :param child_categories: list of categories to be used for a child class.
+        :param family_compounds: A list of FamilyCompounds
+        :param matcher: A matcher object
         """
-        self.parent_categories = (
-            (get_type(parent_categories),)
-            if isinstance(parent_categories, str)
-            else tuple(get_type(category_name) for category_name in parent_categories)
-        )
-        self.child_categories = (
-            (get_type(child_categories),)
-            if isinstance(child_categories, str)
-            else (tuple(get_type(category_name) for category_name in child_categories))
-        )
+        self.family_compounds = family_compounds
         self.matcher = matcher
-        self.relationship_key = relationship_key
         super().__init__("matching")
     def serve(self, dp: Image) -> None:
@@ -233,20 +276,31 @@ class MatchingService(PipelineComponent):
         :param dp: datapoint image
         """
-        matched_pairs = self.matcher.match(dp, self.parent_categories, self.child_categories)
-        for pair in matched_pairs:
-            self.dp_manager.set_relationship_annotation(self.relationship_key, pair[0], pair[1])
+        for family_compound in self.family_compounds:
+            matched_pairs = self.matcher.match(
+                dp,
+                parent_categories=family_compound.parent_categories,
+                child_categories=family_compound.child_categories,
+                parent_ann_service_ids=family_compound.parent_ann_service_ids,
+                child_ann_service_ids=family_compound.child_ann_service_ids,
+            )
+            for pair in matched_pairs:
+                self.dp_manager.set_relationship_annotation(family_compound.relationship_key, pair[0], pair[1])
     def clone(self) -> PipelineComponent:
-        return self.__class__(self.parent_categories, self.child_categories, self.matcher, self.relationship_key)
+        return self.__class__(self.family_compounds, self.matcher)
     def get_meta_annotation(self) -> MetaAnnotation:
+        relationships: dict[ObjectTypes, set[ObjectTypes]] = {}
+        for family_compound in self.family_compounds:
+            if family_compound.parent_categories is not None:
+                for parent_category in family_compound.parent_categories:
+                    relationships[parent_category] = {family_compound.relationship_key}  # type: ignore
         return MetaAnnotation(
             image_annotations=(),
             sub_categories={},
-            relationships={parent: {Relationships.CHILD} for parent in self.parent_categories},
+            relationships=relationships,
             summaries=(),
         )

deepdoctection/pipe/layout.py CHANGED Viewed

@@ -20,18 +20,41 @@ Module for layout pipeline component
 """
 from __future__ import annotations
-from typing import Optional
+from typing import Optional, Sequence, Union
 import numpy as np
 from ..datapoint.image import Image
 from ..extern.base import ObjectDetector, PdfMiner
+from ..mapper.misc import curry
 from ..utils.error import ImageError
+from ..utils.settings import ObjectTypes
 from ..utils.transform import PadTransform
 from .base import MetaAnnotation, PipelineComponent
 from .registry import pipeline_component_registry
+@curry
+def skip_if_category_or_service_extracted(
+    dp: Image,
+    category_names: Optional[Union[str, Sequence[ObjectTypes]]] = None,
+    service_ids: Optional[Union[str, Sequence[str]]] = None,
+) -> bool:
+    """
+    Skip the processing of the pipeline component if the category or service is already extracted.
+    **Example**
+        detector = # some detector
+        item_component = ImageLayoutService(detector)
+        item_component.set_inbound_filter(skip_if_category_or_service_extracted(detector.get_categories(as_dict=False)))
+    """
+    if dp.get_annotation(category_names=category_names, service_ids=service_ids):
+        return True
+    return False
 @pipeline_component_registry.register("ImageLayoutService")
 class ImageLayoutService(PipelineComponent):
     """
@@ -45,7 +68,7 @@ class ImageLayoutService(PipelineComponent):
     **Example**
-            d_items = TPFrcnnDetector(item_config_path, item_weights_path, {"1": "ROW", "2": "COLUMNS"})
+            d_items = TPFrcnnDetector(item_config_path, item_weights_path, {1: 'row', 2: 'column'})
             item_component = ImageLayoutService(d_items)
     """
@@ -55,7 +78,6 @@ class ImageLayoutService(PipelineComponent):
         to_image: bool = False,
         crop_image: bool = False,
         padder: Optional[PadTransform] = None,
-        skip_if_layout_extracted: bool = False,
     ):
         """
         :param layout_detector: object detector
@@ -65,23 +87,14 @@ class ImageLayoutService(PipelineComponent):
                            to its bounding box and populate the resulting sub image to
                            `ImageAnnotation.image.image`.
         :param padder: If not `None`, will apply the padder to the image before prediction and inverse apply the padder
-        :param skip_if_layout_extracted: When `True` will check, if there are already `ImageAnnotation` of a category
-                                         available that will be predicted by the `layout_detector`. If yes, will skip
-                                         the prediction process.
         """
         self.to_image = to_image
         self.crop_image = crop_image
         self.padder = padder
-        self.skip_if_layout_extracted = skip_if_layout_extracted
         self.predictor = layout_detector
         super().__init__(self._get_name(layout_detector.name), self.predictor.model_id)
     def serve(self, dp: Image) -> None:
-        if self.skip_if_layout_extracted:
-            categories = self.predictor.get_category_names()
-            anns = dp.get_annotation(category_names=categories)
-            if anns:
-                return
         if dp.image is None:
             raise ImageError("image cannot be None")
         np_image = dp.image
@@ -117,7 +130,7 @@ class ImageLayoutService(PipelineComponent):
             padder_clone = self.padder.clone()
         if not isinstance(predictor, ObjectDetector):
             raise TypeError(f"predictor must be of type ObjectDetector, but is of type {type(predictor)}")
-        return self.__class__(predictor, self.to_image, self.crop_image, padder_clone, self.skip_if_layout_extracted)
+        return self.__class__(predictor, self.to_image, self.crop_image, padder_clone)
     def clear_predictor(self) -> None:
         self.predictor.clear_model()

deepdoctection/pipe/order.py CHANGED Viewed

@@ -347,19 +347,15 @@ class TextLineGenerator:
     a paragraph break threshold. This allows to detect a multi column structure just by observing sub lines.
     """
-    def __init__(
-        self, make_sub_lines: bool, line_category_id: Union[int, str], paragraph_break: Optional[float] = None
-    ):
+    def __init__(self, make_sub_lines: bool, paragraph_break: Optional[float] = None):
         """
         :param make_sub_lines: Whether to build sub lines from lines.
-        :param line_category_id: category_id to give a text line
         :param paragraph_break: threshold of two consecutive words. If distance is larger than threshold, two sub-lines
                                 will be built. We use relative coordinates to calculate the distance between two
                                 consecutive words. A reasonable value is 0.035
         """
         if make_sub_lines and paragraph_break is None:
             raise ValueError("You must specify paragraph_break when setting make_sub_lines to True")
-        self.line_category_id = int(line_category_id)
         self.make_sub_lines = make_sub_lines
         self.paragraph_break = paragraph_break
@@ -367,7 +363,6 @@ class TextLineGenerator:
         return DetectionResult(
             box=box.to_list(mode="xyxy"),
             class_name=LayoutType.LINE,
-            class_id=self.line_category_id,
             absolute_coords=box.absolute_coords,
             relationships=relationships,
         )
@@ -475,18 +470,14 @@ class TextLineServiceMixin(PipelineComponent, ABC):
     def __init__(
         self,
         name: str,
-        line_category_id: int = 1,
         include_residual_text_container: bool = True,
         paragraph_break: Optional[float] = None,
     ):
         """
-        Initialize the TextLineService with a line_category_id and a TextLineGenerator instance.
+        Initialize the TextLineServiceMixin with a TextLineGenerator instance.
         """
-        self.line_category_id = line_category_id
         self.include_residual_text_container = include_residual_text_container
-        self.text_line_generator = TextLineGenerator(
-            self.include_residual_text_container, self.line_category_id, paragraph_break
-        )
+        self.text_line_generator = TextLineGenerator(self.include_residual_text_container, paragraph_break)
         super().__init__(name)
     def _create_lines_for_words(self, word_anns: Sequence[ImageAnnotation]) -> Sequence[ImageAnnotation]:
@@ -523,17 +514,15 @@ class TextLineService(TextLineServiceMixin):
     text lines and the words contained in the text lines. The reading order is not arranged.
     """
-    def __init__(self, line_category_id: int = 1, paragraph_break: Optional[float] = None):
+    def __init__(self, paragraph_break: Optional[float] = None):
         """
         Initialize `TextLineService`
-        :param line_category_id: category_id to give a text line
         :param paragraph_break: threshold of two consecutive words. If distance is larger than threshold, two sublines
                                 will be built
         """
         super().__init__(
             name="text_line",
-            line_category_id=line_category_id,
             include_residual_text_container=True,
             paragraph_break=paragraph_break,
         )
@@ -542,7 +531,7 @@ class TextLineService(TextLineServiceMixin):
         """
         This method returns a new instance of the class with the same configuration.
         """
-        return self.__class__(self.line_category_id, self.text_line_generator.paragraph_break)
+        return self.__class__(self.text_line_generator.paragraph_break)
     def serve(self, dp: Image) -> None:
         text_container_anns = dp.get_annotation(category_names=LayoutType.WORD)
@@ -605,7 +594,6 @@ class TextOrderService(TextLineServiceMixin):
         broken_line_tolerance: float = 0.003,
         height_tolerance: float = 2.0,
         paragraph_break: Optional[float] = 0.035,
-        line_category_id: int = 1,
     ):
         """
         :param text_container: name of an image annotation that has a CHARS sub category. These annotations will be
@@ -647,12 +635,9 @@ class TextOrderService(TextLineServiceMixin):
             self.floating_text_block_categories = self.floating_text_block_categories + (LayoutType.LINE,)
         self.include_residual_text_container = include_residual_text_container
         self.order_generator = OrderGenerator(starting_point_tolerance, broken_line_tolerance, height_tolerance)
-        self.text_line_generator = TextLineGenerator(
-            self.include_residual_text_container, line_category_id, paragraph_break
-        )
+        self.text_line_generator = TextLineGenerator(self.include_residual_text_container, paragraph_break)
         super().__init__(
             name="text_order",
-            line_category_id=line_category_id,
             include_residual_text_container=include_residual_text_container,
             paragraph_break=paragraph_break,
         )
@@ -763,7 +748,6 @@ class TextOrderService(TextLineServiceMixin):
             self.order_generator.broken_line_tolerance,
             self.order_generator.height_tolerance,
             self.text_line_generator.paragraph_break,
-            self.text_line_generator.line_category_id,
         )
     def clear_predictor(self) -> None:

deepdoctection/pipe/segment.py CHANGED Viewed

@@ -436,24 +436,24 @@ def segment_table(
     child_ann_ids = table.get_relationship(Relationships.CHILD)
     cell_index_rows, row_index, _, _ = match_anns_by_intersection(
         dp,
-        item_names[0],
-        cell_names,
-        segment_rule,
-        threshold_rows,
-        True,
-        child_ann_ids,
-        child_ann_ids,
+        parent_ann_category_names=item_names[0],
+        child_ann_category_names=cell_names,
+        matching_rule=segment_rule,
+        threshold=threshold_rows,
+        use_weighted_intersections=True,
+        parent_ann_ids=child_ann_ids,
+        child_ann_ids=child_ann_ids,
     )
     cell_index_cols, col_index, _, _ = match_anns_by_intersection(
         dp,
-        item_names[1],
-        cell_names,
-        segment_rule,
-        threshold_cols,
-        True,
-        child_ann_ids,
-        child_ann_ids,
+        parent_ann_category_names=item_names[1],
+        child_ann_category_names=cell_names,
+        matching_rule=segment_rule,
+        threshold=threshold_cols,
+        use_weighted_intersections=True,
+        parent_ann_ids=child_ann_ids,
+        child_ann_ids=child_ann_ids,
     )
     cells = dp.get_annotation(annotation_ids=child_ann_ids, category_names=cell_names)
@@ -499,7 +499,6 @@ def create_intersection_cells(
     rows: Sequence[ImageAnnotation],
     cols: Sequence[ImageAnnotation],
     table_annotation_id: str,
-    cell_class_id: int,
     sub_item_names: Sequence[ObjectTypes],
 ) -> tuple[Sequence[DetectionResult], Sequence[SegmentationResult]]:
     """
@@ -509,7 +508,6 @@ def create_intersection_cells(
     :param rows: list of rows
     :param cols: list of columns
     :param table_annotation_id: annotation_id of underlying table ImageAnnotation
-    :param cell_class_id: The class_id to a synthetically generated DetectionResult
     :param sub_item_names: ObjectTypes for row-/column number
     :return: Pair of lists of `DetectionResult` and `SegmentationResult`.
     """
@@ -526,7 +524,6 @@ def create_intersection_cells(
             detect_result_cells.append(
                 DetectionResult(
                     box=boxes_cells[idx].to_list(mode="xyxy"),
-                    class_id=cell_class_id,
                     absolute_coords=boxes_cells[idx].absolute_coords,
                     class_name=LayoutType.CELL,
                 )
@@ -574,13 +571,13 @@ def header_cell_to_item_detect_result(
     child_ann_ids = table.get_relationship(Relationships.CHILD)
     item_index, _, items, _ = match_anns_by_intersection(
         dp,
-        item_header_name,
-        item_name,
-        segment_rule,
-        threshold,
-        True,
-        child_ann_ids,
-        child_ann_ids,
+        parent_ann_category_names=item_header_name,
+        child_ann_category_names=item_name,
+        matching_rule=segment_rule,
+        threshold=threshold,
+        use_weighted_intersections=True,
+        parent_ann_ids=child_ann_ids,
+        child_ann_ids=child_ann_ids,
     )
     item_headers = []
     for idx, item in enumerate(items):
@@ -622,24 +619,24 @@ def segment_pubtables(
     child_ann_ids = table.get_relationship(Relationships.CHILD)
     cell_index_rows, row_index, _, _ = match_anns_by_intersection(
         dp,
-        item_names[0],
-        spanning_cell_names,
-        segment_rule,
-        threshold_rows,
-        True,
-        child_ann_ids,
-        child_ann_ids,
+        parent_ann_category_names=item_names[0],
+        child_ann_category_names=spanning_cell_names,
+        matching_rule=segment_rule,
+        threshold=threshold_rows,
+        use_weighted_intersections=True,
+        parent_ann_ids=child_ann_ids,
+        child_ann_ids=child_ann_ids,
     )
     cell_index_cols, col_index, _, _ = match_anns_by_intersection(
         dp,
-        item_names[1],
-        spanning_cell_names,
-        segment_rule,
-        threshold_cols,
-        True,
-        child_ann_ids,
-        child_ann_ids,
+        parent_ann_category_names=item_names[1],
+        child_ann_category_names=spanning_cell_names,
+        matching_rule=segment_rule,
+        threshold=threshold_cols,
+        use_weighted_intersections=True,
+        parent_ann_ids=child_ann_ids,
+        child_ann_ids=child_ann_ids,
     )
     spanning_cells = dp.get_annotation(annotation_ids=child_ann_ids, category_names=spanning_cell_names)
@@ -976,7 +973,6 @@ class PubtablesSegmentationService(PipelineComponent):
         tile_table_with_items: bool,
         remove_iou_threshold_rows: float,
         remove_iou_threshold_cols: float,
-        cell_class_id: int,
         table_name: TypeOrStr,
         cell_names: Sequence[TypeOrStr],
         spanning_cell_names: Sequence[TypeOrStr],
@@ -997,7 +993,6 @@ class PubtablesSegmentationService(PipelineComponent):
                                       the adjacent row. Will do a similar shifting with columns.
         :param remove_iou_threshold_rows: iou threshold for removing overlapping rows
         :param remove_iou_threshold_cols: iou threshold for removing overlapping columns
-        :param cell_class_id: 'category_id' for cells to be generated from intersected rows and columns
         :param table_name: layout type table
         :param cell_names: layout type of cells
         :param spanning_cell_names: layout type of spanning cells
@@ -1022,7 +1017,6 @@ class PubtablesSegmentationService(PipelineComponent):
         self.spanning_cell_names = [get_type(cell_name) for cell_name in spanning_cell_names]
         self.remove_iou_threshold_rows = remove_iou_threshold_rows
         self.remove_iou_threshold_cols = remove_iou_threshold_cols
-        self.cell_class_id = cell_class_id
         self.cell_to_image = cell_to_image
         self.crop_cell_image = crop_cell_image
         self.item_names = [get_type(item_name) for item_name in item_names]  # row names must be before column name
@@ -1089,7 +1083,7 @@ class PubtablesSegmentationService(PipelineComponent):
             rows = dp.get_annotation(category_names=self.item_names[0], annotation_ids=item_ann_ids)
             columns = dp.get_annotation(category_names=self.item_names[1], annotation_ids=item_ann_ids)
             detect_result_cells, segment_result_cells = create_intersection_cells(
-                rows, columns, table.annotation_id, self.cell_class_id, self.sub_item_names
+                rows, columns, table.annotation_id, self.sub_item_names
             )
             cell_rn_cn_to_ann_id = {}
             for detect_result, segment_result in zip(detect_result_cells, segment_result_cells):
@@ -1228,7 +1222,6 @@ class PubtablesSegmentationService(PipelineComponent):
             self.tile_table,
             self.remove_iou_threshold_rows,
             self.remove_iou_threshold_cols,
-            self.cell_class_id,
             self.table_name,
             self.cell_names,
             self.spanning_cell_names,

deepdoctection/pipe/sub_layout.py CHANGED Viewed

@@ -92,7 +92,6 @@ class DetectResultGenerator:
                         detect_result_list.append(
                             DetectionResult(
                                 box=[0.0, 0.0, float(self.width), float(self.height)],  # type: ignore
-                                class_id=self.categories_name_as_key[category_name],
                                 class_name=category_name,
                                 score=0.0,
                                 absolute_coords=self.absolute_coords,
@@ -154,16 +153,16 @@ class SubImageLayoutService(PipelineComponent):
     **Example**
             detect_result_generator = DetectResultGenerator(categories_items)
-            d_items = TPFrcnnDetector(item_config_path, item_weights_path, {"1": LayoutType.row,
-            "2": LayoutType.column})
-            item_component = SubImageLayoutService(d_items, LayoutType.table, {1: 7, 2: 8}, detect_result_generator)
+            d_items = TPFrcnnDetector(item_config_path, item_weights_path, {1: LayoutType.row,
+            2: LayoutType.column})
+            item_component = SubImageLayoutService(d_items, LayoutType.table, detect_result_generator)
     """
     def __init__(
         self,
         sub_image_detector: ObjectDetector,
         sub_image_names: Union[str, Sequence[TypeOrStr]],
-        category_id_mapping: Optional[dict[int, int]] = None,
+        service_ids: Optional[Sequence[str]] = None,
         detect_result_generator: Optional[DetectResultGenerator] = None,
         padder: Optional[PadTransform] = None,
     ):
@@ -172,7 +171,8 @@ class SubImageLayoutService(PipelineComponent):
         :param sub_image_names: Category names of ImageAnnotations to be presented to the detector.
                                 Attention: The selected ImageAnnotations must have: attr:`image` and: attr:`image.image`
                                 not None.
-        :param category_id_mapping: Mapping of category IDs. Usually, the category ids start with 1.
+        :param service_ids: List of service ids to be used for filtering the ImageAnnotations. If None, all
+                            ImageAnnotations will be used.
         :param detect_result_generator: 'DetectResultGenerator' instance. 'categories' attribute has to be the same as
                                         the 'categories' attribute of the 'sub_image_detector'. The generator will be
                                         responsible to create 'DetectionResult' for some categories, if they have not
@@ -186,7 +186,7 @@ class SubImageLayoutService(PipelineComponent):
             if isinstance(sub_image_names, str)
             else tuple((get_type(cat) for cat in sub_image_names))
         )
-        self.category_id_mapping = category_id_mapping
+        self.service_ids = service_ids
         self.detect_result_generator = detect_result_generator
         self.padder = padder
         self.predictor = sub_image_detector
@@ -208,7 +208,7 @@ class SubImageLayoutService(PipelineComponent):
         - Optionally invoke the DetectResultGenerator
         - Generate ImageAnnotations and dump to parent image and sub image.
         """
-        sub_image_anns = dp.get_annotation(category_names=self.sub_image_name)
+        sub_image_anns = dp.get_annotation(category_names=self.sub_image_name, service_ids=self.service_ids)
         for sub_image_ann in sub_image_anns:
             np_image = self.prepare_np_image(sub_image_ann)
             detect_result_list = self.predictor.predict(np_image)
@@ -223,11 +223,6 @@ class SubImageLayoutService(PipelineComponent):
                 detect_result_list = self.detect_result_generator.create_detection_result(detect_result_list)
             for detect_result in detect_result_list:
-                if self.category_id_mapping:
-                    if detect_result.class_id:
-                        detect_result.class_id = self.category_id_mapping.get(
-                            detect_result.class_id, detect_result.class_id
-                        )
                 self.dp_manager.set_image_annotation(detect_result, sub_image_ann.annotation_id)
     def get_meta_annotation(self) -> MetaAnnotation:
@@ -254,7 +249,7 @@ class SubImageLayoutService(PipelineComponent):
         return self.__class__(
             predictor,
             self.sub_image_name,
-            self.category_id_mapping,
+            self.service_ids,
             self.detect_result_generator,
             padder_clone,
         )

deepdoctection 0.39.7__py3-none-any.whl → 0.41.0__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.39.7py3-none-any.whl → 0.41.0py3-none-any.whl