PyPI - deepdoctection - Versions diffs - 0.39.6__py3-none-any.whl → 0.40.0__py3-none-any.whl - Mend

deepdoctection 0.39.6py3-none-any.whl → 0.40.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (24) hide show

deepdoctection/__init__.py +2 -1
deepdoctection/analyzer/_config.py +0 -1
deepdoctection/analyzer/factory.py +34 -13
deepdoctection/datapoint/box.py +239 -92
deepdoctection/datapoint/convert.py +4 -0
deepdoctection/datapoint/image.py +5 -5
deepdoctection/datapoint/view.py +5 -5
deepdoctection/datasets/registry.py +1 -1
deepdoctection/mapper/match.py +28 -8
deepdoctection/pipe/anngen.py +1 -25
deepdoctection/pipe/common.py +91 -38
deepdoctection/pipe/layout.py +26 -13
deepdoctection/pipe/order.py +6 -22
deepdoctection/pipe/segment.py +36 -43
deepdoctection/pipe/sub_layout.py +2 -11
deepdoctection/pipe/text.py +5 -14
deepdoctection/train/hf_detr_train.py +41 -8
deepdoctection/train/hf_layoutlm_train.py +2 -2
deepdoctection/utils/types.py +1 -1
{deepdoctection-0.39.6.dist-info → deepdoctection-0.40.0.dist-info}/METADATA +3 -2
{deepdoctection-0.39.6.dist-info → deepdoctection-0.40.0.dist-info}/RECORD +24 -24
{deepdoctection-0.39.6.dist-info → deepdoctection-0.40.0.dist-info}/WHEEL +1 -1
{deepdoctection-0.39.6.dist-info → deepdoctection-0.40.0.dist-info/licenses}/LICENSE +0 -0
{deepdoctection-0.39.6.dist-info → deepdoctection-0.40.0.dist-info}/top_level.txt +0 -0

deepdoctection/datasets/registry.py CHANGED Viewed

@@ -24,7 +24,7 @@ import catalogue  # type: ignore
 from tabulate import tabulate
 from termcolor import colored
-from .base import DatasetBase, CustomDataset
+from .base import CustomDataset, DatasetBase
 __all__ = ["dataset_registry", "get_dataset", "print_dataset_infos"]

deepdoctection/mapper/match.py CHANGED Viewed

@@ -34,13 +34,15 @@ from ..utils.settings import TypeOrStr
 def match_anns_by_intersection(
     dp: Image,
-    parent_ann_category_names: Union[TypeOrStr, Sequence[TypeOrStr]],
-    child_ann_category_names: Union[TypeOrStr, Sequence[TypeOrStr]],
     matching_rule: Literal["iou", "ioa"],
     threshold: float,
     use_weighted_intersections: bool = False,
+    parent_ann_category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
+    child_ann_category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
     parent_ann_ids: Optional[Union[Sequence[str], str]] = None,
     child_ann_ids: Optional[Union[str, Sequence[str]]] = None,
+    parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
+    child_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
     max_parent_only: bool = False,
 ) -> tuple[Any, Any, Sequence[ImageAnnotation], Sequence[ImageAnnotation]]:
     """
@@ -87,13 +89,19 @@ def match_anns_by_intersection(
                            dates which are not in the list.
     :param child_ann_ids: Additional filter condition. If some ids are selected, it will ignore all other children
                           candidates which are not in the list.
+    :param parent_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
+                                   parent candidates which are not in the list.
+    :param child_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
+                                  children candidates which are not in the list.
     :param max_parent_only: Will assign to each child at most one parent with maximum ioa
     :return: child indices, parent indices (see Example), list of parent ids and list of children ids.
     """
     assert matching_rule in ["iou", "ioa"], "matching rule must be either iou or ioa"
-    child_anns = dp.get_annotation(annotation_ids=child_ann_ids, category_names=child_ann_category_names)
+    child_anns = dp.get_annotation(
+        annotation_ids=child_ann_ids, category_names=child_ann_category_names, service_ids=child_ann_service_ids
+    )
     child_ann_boxes = np.array(
         [
             ann.get_bounding_box(dp.image_id).transform(dp.width, dp.height, absolute_coords=True).to_list(mode="xyxy")
@@ -101,7 +109,9 @@ def match_anns_by_intersection(
         ]
     )
-    parent_anns = dp.get_annotation(annotation_ids=parent_ann_ids, category_names=parent_ann_category_names)
+    parent_anns = dp.get_annotation(
+        annotation_ids=parent_ann_ids, category_names=parent_ann_category_names, service_ids=parent_ann_service_ids
+    )
     parent_ann_boxes = np.array(
         [
             ann.get_bounding_box(dp.image_id).transform(dp.width, dp.height, absolute_coords=True).to_list(mode="xyxy")
@@ -147,10 +157,12 @@ def match_anns_by_intersection(
 def match_anns_by_distance(
     dp: Image,
-    parent_ann_category_names: Union[TypeOrStr, Sequence[TypeOrStr]],
-    child_ann_category_names: Union[TypeOrStr, Sequence[TypeOrStr]],
+    parent_ann_category_names:  Optional[Union[TypeOrStr, Sequence[TypeOrStr]]]=None,
+    child_ann_category_names:  Optional[Union[TypeOrStr, Sequence[TypeOrStr]]]=None,
     parent_ann_ids: Optional[Union[Sequence[str], str]] = None,
     child_ann_ids: Optional[Union[str, Sequence[str]]] = None,
+    parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
+    child_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
 ) -> list[tuple[ImageAnnotation, ImageAnnotation]]:
     """
     Generates pairs of parent and child annotations by calculating the euclidean distance between the centers of the
@@ -164,11 +176,19 @@ def match_anns_by_distance(
                            dates which are not in the list.
     :param child_ann_ids: Additional filter condition. If some ids are selected, it will ignore all other children
                           candidates which are not in the list.
+    :param parent_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
+                                   parent candidates which are not in the list.
+    :param child_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
+                                  children candidates which are not in the list.
     :return:
     """
-    parent_anns = dp.get_annotation(annotation_ids=parent_ann_ids, category_names=parent_ann_category_names)
-    child_anns = dp.get_annotation(annotation_ids=child_ann_ids, category_names=child_ann_category_names)
+    parent_anns = dp.get_annotation(
+        annotation_ids=parent_ann_ids, category_names=parent_ann_category_names, service_ids=parent_ann_service_ids
+    )
+    child_anns = dp.get_annotation(
+        annotation_ids=child_ann_ids, category_names=child_ann_category_names, service_ids=child_ann_service_ids
+    )
     child_centers = [block.get_bounding_box(dp.image_id).center for block in child_anns]
     parent_centers = [block.get_bounding_box(dp.image_id).center for block in parent_anns]
     if child_centers and parent_centers:

deepdoctection/pipe/anngen.py CHANGED Viewed

@@ -75,27 +75,6 @@ class DatapointManager:
         """
         assert self.datapoint_is_passed, "Pass datapoint to  DatapointManager before creating anns"
-    def maybe_map_category_id(self, category_id: Union[str, int]) -> int:
-        """
-        Maps categories if a category id mapping is provided in `__init__`.
-        :param category_id: category id via integer or string.
-        :return: mapped category id
-        """
-        if self.category_id_mapping is None:
-            return int(category_id)
-        return self.category_id_mapping[int(category_id)]
-    def set_category_id_mapping(self, category_id_mapping: Mapping[int, int]) -> None:
-        """
-        In many cases the category ids sent back from a model have to be modified. Pass a mapping from model
-        category ids to target annotation category ids.
-        :param category_id_mapping: A mapping of model category ids (sent from DetectionResult) to category ids (saved
-                                    in annotations)
-        """
-        self.category_id_mapping = category_id_mapping
     def set_image_annotation(
         self,
         detect_result: DetectionResult,
@@ -127,13 +106,10 @@ class DatapointManager:
         :return: the annotation_id of the generated image annotation
         """
         self.assert_datapoint_passed()
-        if detect_result.class_id is None:
-            raise ValueError("class_id of detect_result cannot be None")
         if not isinstance(detect_result.box, (list, np.ndarray)):
             raise TypeError(
                 f"detect_result.box must be of type list or np.ndarray, but is of type {(type(detect_result.box))}"
             )
-        detect_result.class_id = self.maybe_map_category_id(detect_result.class_id)
         with MappingContextManager(
             dp_name=self.datapoint.file_name, filter_level="annotation", detect_result=asdict(detect_result)
         ) as annotation_context:
@@ -155,7 +131,7 @@ class DatapointManager:
             ann = ImageAnnotation(
                 category_name=detect_result.class_name,
                 bounding_box=box,
-                category_id=detect_result.class_id,
+                category_id=detect_result.class_id if detect_result.class_id is not None else DEFAULT_CATEGORY_ID,
                 score=detect_result.score,
                 service_id=self.service_id,
                 model_id=self.model_id,

deepdoctection/pipe/common.py CHANGED Viewed

@@ -22,6 +22,7 @@ from __future__ import annotations
 import os
 from copy import deepcopy
+from dataclasses import dataclass, field
 from typing import Literal, Mapping, Optional, Sequence, Union
 import numpy as np
@@ -49,24 +50,30 @@ class ImageCroppingService(PipelineComponent):
     generally not stored.
     """
-    def __init__(self, category_names: Union[TypeOrStr, Sequence[TypeOrStr]]):
+    def __init__(
+        self, category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
+             service_ids: Optional[Sequence[str]] = None
+    ) -> None:
         """
         :param category_names: A single name or a list of category names to crop
         """
-        self.category_names = (
-            (category_names,)
-            if isinstance(category_names, str)
-            else tuple(get_type(category_name) for category_name in category_names)
-        )
+        if category_names is None:
+            self.category_names = None
+        else:
+            self.category_names = (
+                (category_names,)
+                if isinstance(category_names, str)
+                else tuple(get_type(category_name) for category_name in category_names)
+            )
+        self.service_ids = service_ids
         super().__init__("image_crop")
     def serve(self, dp: Image) -> None:
-        for ann in dp.get_annotation(category_names=self.category_names):
+        for ann in dp.get_annotation(category_names=self.category_names, service_ids=self.service_ids):
             dp.image_ann_to_image(ann.annotation_id, crop_image=True)
     def clone(self) -> ImageCroppingService:
-        return self.__class__(self.category_names)
+        return self.__class__(self.category_names, self.service_ids)
     def get_meta_annotation(self) -> MetaAnnotation:
         return MetaAnnotation(image_annotations=(), sub_categories={}, relationships={}, summaries=())
@@ -124,8 +131,10 @@ class IntersectionMatcher:
     def match(
         self,
         dp: Image,
-        parent_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
-        child_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
+        parent_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
+        child_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
+        parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
+        child_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
     ) -> list[tuple[str, str]]:
         """
         The matching algorithm
@@ -133,6 +142,10 @@ class IntersectionMatcher:
         :param dp: datapoint image
         :param parent_categories: list of categories to be used a for parent class. Will generate a child-relationship
         :param child_categories: list of categories to be used for a child class.
+        :param parent_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
+                                        parent candidates which are not in the list.
+        :param child_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
+                                        children candidates which are not in the list.
         :return: A list of tuples with parent and child annotation ids
         """
@@ -144,6 +157,8 @@ class IntersectionMatcher:
             threshold=self.threshold,
             use_weighted_intersections=self.use_weighted_intersections,
             max_parent_only=self.max_parent_only,
+            parent_ann_service_ids=parent_ann_service_ids,
+            child_ann_service_ids=child_ann_service_ids,
         )
         matched_child_anns = np.take(child_anns, child_index)  # type: ignore
@@ -174,8 +189,10 @@ class NeighbourMatcher:
     def match(
         self,
         dp: Image,
-        parent_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
-        child_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
+        parent_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
+        child_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
+        parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
+        child_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
     ) -> list[tuple[str, str]]:
         """
         The matching algorithm
@@ -183,16 +200,54 @@ class NeighbourMatcher:
         :param dp: datapoint image
         :param parent_categories: list of categories to be used a for parent class. Will generate a child-relationship
         :param child_categories: list of categories to be used for a child class.
+        :param parent_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
+                                        parent candidates which are not in the list.
+        :param child_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
+                                        children candidates which are not in the list.
         :return: A list of tuples with parent and child annotation ids
         """
         return [
             (pair[0].annotation_id, pair[1].annotation_id)
-            for pair in match_anns_by_distance(dp, parent_categories, child_categories)
+            for pair in match_anns_by_distance(
+                dp,
+                parent_ann_category_names=parent_categories,
+                child_ann_category_names=child_categories,
+                parent_ann_service_ids=parent_ann_service_ids,
+                child_ann_service_ids=child_ann_service_ids,
+            )
         ]
+@dataclass
+class FamilyCompound:
+    """
+    A family compound is a set of parent and child categories that are related by a relationship key. The parent
+    categories will receive a relationship to the child categories.
+    """
+    relationship_key: Relationships
+    parent_categories: Optional[Union[ObjectTypes, Sequence[ObjectTypes]]] = field(default=None)
+    child_categories: Optional[Union[ObjectTypes, Sequence[ObjectTypes]]] = field(default=None)
+    parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = field(default=None)
+    child_ann_service_ids: Optional[Union[str, Sequence[str]]] = field(default=None)
+    def __post_init__(self) -> None:
+        if isinstance(self.parent_categories, str):
+            self.parent_categories = (get_type(self.parent_categories),)
+        elif self.parent_categories is not None:
+            self.parent_categories = tuple(get_type(parent) for parent in self.parent_categories)
+        if isinstance(self.child_categories, str):
+            self.child_categories = (get_type(self.child_categories),)
+        elif self.child_categories is not None:
+            self.child_categories = tuple(get_type(child) for child in self.child_categories)
+        if isinstance(self.parent_ann_service_ids, str):
+            self.parent_ann_service_ids = (self.parent_ann_service_ids,)
+        if isinstance(self.child_ann_service_ids, str):
+            self.child_ann_service_ids = (self.child_ann_service_ids,)
 @pipeline_component_registry.register("MatchingService")
 class MatchingService(PipelineComponent):
     """
@@ -202,28 +257,15 @@ class MatchingService(PipelineComponent):
     def __init__(
         self,
-        parent_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
-        child_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
+        family_compounds: Sequence[FamilyCompound],
         matcher: Union[IntersectionMatcher, NeighbourMatcher],
-        relationship_key: Relationships,
     ) -> None:
         """
-        :param parent_categories: list of categories to be used a for parent class. Will generate a child-relationship
-        :param child_categories: list of categories to be used for a child class.
+        :param family_compounds: A list of FamilyCompounds
+        :param matcher: A matcher object
         """
-        self.parent_categories = (
-            (get_type(parent_categories),)
-            if isinstance(parent_categories, str)
-            else tuple(get_type(category_name) for category_name in parent_categories)
-        )
-        self.child_categories = (
-            (get_type(child_categories),)
-            if isinstance(child_categories, str)
-            else (tuple(get_type(category_name) for category_name in child_categories))
-        )
+        self.family_compounds = family_compounds
         self.matcher = matcher
-        self.relationship_key = relationship_key
         super().__init__("matching")
     def serve(self, dp: Image) -> None:
@@ -233,20 +275,31 @@ class MatchingService(PipelineComponent):
         :param dp: datapoint image
         """
-        matched_pairs = self.matcher.match(dp, self.parent_categories, self.child_categories)
-        for pair in matched_pairs:
-            self.dp_manager.set_relationship_annotation(self.relationship_key, pair[0], pair[1])
+        for family_compound in self.family_compounds:
+            matched_pairs = self.matcher.match(
+                dp,
+                parent_categories=family_compound.parent_categories,
+                child_categories=family_compound.child_categories,
+                parent_ann_service_ids=family_compound.parent_ann_service_ids,
+                child_ann_service_ids=family_compound.child_ann_service_ids,
+            )
+            for pair in matched_pairs:
+                self.dp_manager.set_relationship_annotation(family_compound.relationship_key, pair[0], pair[1])
     def clone(self) -> PipelineComponent:
-        return self.__class__(self.parent_categories, self.child_categories, self.matcher, self.relationship_key)
+        return self.__class__(self.family_compounds, self.matcher)
     def get_meta_annotation(self) -> MetaAnnotation:
+        relationships: dict[ObjectTypes, set[ObjectTypes]] = {}
+        for family_compound in self.family_compounds:
+            if family_compound.parent_categories is not None:
+                for parent_category in family_compound.parent_categories:
+                    relationships[parent_category] = {family_compound.relationship_key}  # type: ignore
         return MetaAnnotation(
             image_annotations=(),
             sub_categories={},
-            relationships={parent: {Relationships.CHILD} for parent in self.parent_categories},
+            relationships=relationships,
             summaries=(),
         )

deepdoctection/pipe/layout.py CHANGED Viewed

@@ -20,18 +20,41 @@ Module for layout pipeline component
 """
 from __future__ import annotations
-from typing import Optional
+from typing import Optional, Sequence, Union
 import numpy as np
 from ..datapoint.image import Image
 from ..extern.base import ObjectDetector, PdfMiner
+from ..mapper.misc import curry
 from ..utils.error import ImageError
+from ..utils.settings import ObjectTypes
 from ..utils.transform import PadTransform
 from .base import MetaAnnotation, PipelineComponent
 from .registry import pipeline_component_registry
+@curry
+def skip_if_category_or_service_extracted(
+    dp: Image,
+    category_names: Optional[Union[str, Sequence[ObjectTypes]]] = None,
+    service_ids: Optional[Union[str, Sequence[str]]] = None,
+) -> bool:
+    """
+    Skip the processing of the pipeline component if the category or service is already extracted.
+    **Example**
+        detector = # some detector
+        item_component = ImageLayoutService(detector)
+        item_component.set_inbound_filter(skip_if_category_or_service_extracted(detector.get_categories(as_dict=False)))
+    """
+    if dp.get_annotation(category_names=category_names, service_ids=service_ids):
+        return True
+    return False
 @pipeline_component_registry.register("ImageLayoutService")
 class ImageLayoutService(PipelineComponent):
     """
@@ -45,7 +68,7 @@ class ImageLayoutService(PipelineComponent):
     **Example**
-            d_items = TPFrcnnDetector(item_config_path, item_weights_path, {"1": "ROW", "2": "COLUMNS"})
+            d_items = TPFrcnnDetector(item_config_path, item_weights_path, {1: 'row', 2: 'column'})
             item_component = ImageLayoutService(d_items)
     """
@@ -55,7 +78,6 @@ class ImageLayoutService(PipelineComponent):
         to_image: bool = False,
         crop_image: bool = False,
         padder: Optional[PadTransform] = None,
-        skip_if_layout_extracted: bool = False,
     ):
         """
         :param layout_detector: object detector
@@ -65,23 +87,14 @@ class ImageLayoutService(PipelineComponent):
                            to its bounding box and populate the resulting sub image to
                            `ImageAnnotation.image.image`.
         :param padder: If not `None`, will apply the padder to the image before prediction and inverse apply the padder
-        :param skip_if_layout_extracted: When `True` will check, if there are already `ImageAnnotation` of a category
-                                         available that will be predicted by the `layout_detector`. If yes, will skip
-                                         the prediction process.
         """
         self.to_image = to_image
         self.crop_image = crop_image
         self.padder = padder
-        self.skip_if_layout_extracted = skip_if_layout_extracted
         self.predictor = layout_detector
         super().__init__(self._get_name(layout_detector.name), self.predictor.model_id)
     def serve(self, dp: Image) -> None:
-        if self.skip_if_layout_extracted:
-            categories = self.predictor.get_category_names()
-            anns = dp.get_annotation(category_names=categories)
-            if anns:
-                return
         if dp.image is None:
             raise ImageError("image cannot be None")
         np_image = dp.image
@@ -117,7 +130,7 @@ class ImageLayoutService(PipelineComponent):
             padder_clone = self.padder.clone()
         if not isinstance(predictor, ObjectDetector):
             raise TypeError(f"predictor must be of type ObjectDetector, but is of type {type(predictor)}")
-        return self.__class__(predictor, self.to_image, self.crop_image, padder_clone, self.skip_if_layout_extracted)
+        return self.__class__(predictor, self.to_image, self.crop_image, padder_clone)
     def clear_predictor(self) -> None:
         self.predictor.clear_model()

deepdoctection/pipe/order.py CHANGED Viewed

@@ -347,19 +347,15 @@ class TextLineGenerator:
     a paragraph break threshold. This allows to detect a multi column structure just by observing sub lines.
     """
-    def __init__(
-        self, make_sub_lines: bool, line_category_id: Union[int, str], paragraph_break: Optional[float] = None
-    ):
+    def __init__(self, make_sub_lines: bool, paragraph_break: Optional[float] = None):
         """
         :param make_sub_lines: Whether to build sub lines from lines.
-        :param line_category_id: category_id to give a text line
         :param paragraph_break: threshold of two consecutive words. If distance is larger than threshold, two sub-lines
                                 will be built. We use relative coordinates to calculate the distance between two
                                 consecutive words. A reasonable value is 0.035
         """
         if make_sub_lines and paragraph_break is None:
             raise ValueError("You must specify paragraph_break when setting make_sub_lines to True")
-        self.line_category_id = int(line_category_id)
         self.make_sub_lines = make_sub_lines
         self.paragraph_break = paragraph_break
@@ -367,7 +363,6 @@ class TextLineGenerator:
         return DetectionResult(
             box=box.to_list(mode="xyxy"),
             class_name=LayoutType.LINE,
-            class_id=self.line_category_id,
             absolute_coords=box.absolute_coords,
             relationships=relationships,
         )
@@ -475,18 +470,14 @@ class TextLineServiceMixin(PipelineComponent, ABC):
     def __init__(
         self,
         name: str,
-        line_category_id: int = 1,
         include_residual_text_container: bool = True,
         paragraph_break: Optional[float] = None,
     ):
         """
-        Initialize the TextLineService with a line_category_id and a TextLineGenerator instance.
+        Initialize the TextLineServiceMixin with a TextLineGenerator instance.
         """
-        self.line_category_id = line_category_id
         self.include_residual_text_container = include_residual_text_container
-        self.text_line_generator = TextLineGenerator(
-            self.include_residual_text_container, self.line_category_id, paragraph_break
-        )
+        self.text_line_generator = TextLineGenerator(self.include_residual_text_container, paragraph_break)
         super().__init__(name)
     def _create_lines_for_words(self, word_anns: Sequence[ImageAnnotation]) -> Sequence[ImageAnnotation]:
@@ -523,17 +514,15 @@ class TextLineService(TextLineServiceMixin):
     text lines and the words contained in the text lines. The reading order is not arranged.
     """
-    def __init__(self, line_category_id: int = 1, paragraph_break: Optional[float] = None):
+    def __init__(self, paragraph_break: Optional[float] = None):
         """
         Initialize `TextLineService`
-        :param line_category_id: category_id to give a text line
         :param paragraph_break: threshold of two consecutive words. If distance is larger than threshold, two sublines
                                 will be built
         """
         super().__init__(
             name="text_line",
-            line_category_id=line_category_id,
             include_residual_text_container=True,
             paragraph_break=paragraph_break,
         )
@@ -542,7 +531,7 @@ class TextLineService(TextLineServiceMixin):
         """
         This method returns a new instance of the class with the same configuration.
         """
-        return self.__class__(self.line_category_id, self.text_line_generator.paragraph_break)
+        return self.__class__(self.text_line_generator.paragraph_break)
     def serve(self, dp: Image) -> None:
         text_container_anns = dp.get_annotation(category_names=LayoutType.WORD)
@@ -605,7 +594,6 @@ class TextOrderService(TextLineServiceMixin):
         broken_line_tolerance: float = 0.003,
         height_tolerance: float = 2.0,
         paragraph_break: Optional[float] = 0.035,
-        line_category_id: int = 1,
     ):
         """
         :param text_container: name of an image annotation that has a CHARS sub category. These annotations will be
@@ -647,12 +635,9 @@ class TextOrderService(TextLineServiceMixin):
             self.floating_text_block_categories = self.floating_text_block_categories + (LayoutType.LINE,)
         self.include_residual_text_container = include_residual_text_container
         self.order_generator = OrderGenerator(starting_point_tolerance, broken_line_tolerance, height_tolerance)
-        self.text_line_generator = TextLineGenerator(
-            self.include_residual_text_container, line_category_id, paragraph_break
-        )
+        self.text_line_generator = TextLineGenerator(self.include_residual_text_container, paragraph_break)
         super().__init__(
             name="text_order",
-            line_category_id=line_category_id,
             include_residual_text_container=include_residual_text_container,
             paragraph_break=paragraph_break,
         )
@@ -763,7 +748,6 @@ class TextOrderService(TextLineServiceMixin):
             self.order_generator.broken_line_tolerance,
             self.order_generator.height_tolerance,
             self.text_line_generator.paragraph_break,
-            self.text_line_generator.line_category_id,
         )
     def clear_predictor(self) -> None:

deepdoctection 0.39.6__py3-none-any.whl → 0.40.0__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.39.6py3-none-any.whl → 0.40.0py3-none-any.whl