PyPI - deepdoctection - Versions diffs - 0.42.0__py3-none-any.whl → 0.43__py3-none-any.whl - Mend

deepdoctection 0.42.0py3-none-any.whl → 0.43py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (124) hide show

deepdoctection/__init__.py +2 -1
deepdoctection/analyzer/__init__.py +2 -1
deepdoctection/analyzer/config.py +904 -0
deepdoctection/analyzer/dd.py +36 -62
deepdoctection/analyzer/factory.py +311 -141
deepdoctection/configs/conf_dd_one.yaml +100 -44
deepdoctection/configs/profiles.jsonl +32 -0
deepdoctection/dataflow/__init__.py +9 -6
deepdoctection/dataflow/base.py +33 -15
deepdoctection/dataflow/common.py +96 -75
deepdoctection/dataflow/custom.py +36 -29
deepdoctection/dataflow/custom_serialize.py +135 -91
deepdoctection/dataflow/parallel_map.py +33 -31
deepdoctection/dataflow/serialize.py +15 -10
deepdoctection/dataflow/stats.py +41 -28
deepdoctection/datapoint/__init__.py +4 -6
deepdoctection/datapoint/annotation.py +104 -66
deepdoctection/datapoint/box.py +190 -130
deepdoctection/datapoint/convert.py +66 -39
deepdoctection/datapoint/image.py +151 -95
deepdoctection/datapoint/view.py +383 -236
deepdoctection/datasets/__init__.py +2 -6
deepdoctection/datasets/adapter.py +11 -11
deepdoctection/datasets/base.py +118 -81
deepdoctection/datasets/dataflow_builder.py +18 -12
deepdoctection/datasets/info.py +76 -57
deepdoctection/datasets/instances/__init__.py +6 -2
deepdoctection/datasets/instances/doclaynet.py +17 -14
deepdoctection/datasets/instances/fintabnet.py +16 -22
deepdoctection/datasets/instances/funsd.py +11 -6
deepdoctection/datasets/instances/iiitar13k.py +9 -9
deepdoctection/datasets/instances/layouttest.py +9 -9
deepdoctection/datasets/instances/publaynet.py +9 -9
deepdoctection/datasets/instances/pubtables1m.py +13 -13
deepdoctection/datasets/instances/pubtabnet.py +13 -15
deepdoctection/datasets/instances/rvlcdip.py +8 -8
deepdoctection/datasets/instances/xfund.py +11 -9
deepdoctection/datasets/registry.py +18 -11
deepdoctection/datasets/save.py +12 -11
deepdoctection/eval/__init__.py +3 -2
deepdoctection/eval/accmetric.py +72 -52
deepdoctection/eval/base.py +29 -10
deepdoctection/eval/cocometric.py +14 -12
deepdoctection/eval/eval.py +56 -41
deepdoctection/eval/registry.py +6 -3
deepdoctection/eval/tedsmetric.py +24 -9
deepdoctection/eval/tp_eval_callback.py +13 -12
deepdoctection/extern/__init__.py +1 -1
deepdoctection/extern/base.py +176 -97
deepdoctection/extern/d2detect.py +127 -92
deepdoctection/extern/deskew.py +19 -10
deepdoctection/extern/doctrocr.py +157 -106
deepdoctection/extern/fastlang.py +25 -17
deepdoctection/extern/hfdetr.py +137 -60
deepdoctection/extern/hflayoutlm.py +329 -248
deepdoctection/extern/hflm.py +67 -33
deepdoctection/extern/model.py +108 -762
deepdoctection/extern/pdftext.py +37 -12
deepdoctection/extern/pt/nms.py +15 -1
deepdoctection/extern/pt/ptutils.py +13 -9
deepdoctection/extern/tessocr.py +87 -54
deepdoctection/extern/texocr.py +29 -14
deepdoctection/extern/tp/tfutils.py +36 -8
deepdoctection/extern/tp/tpcompat.py +54 -16
deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
deepdoctection/extern/tpdetect.py +4 -2
deepdoctection/mapper/__init__.py +1 -1
deepdoctection/mapper/cats.py +117 -76
deepdoctection/mapper/cocostruct.py +35 -17
deepdoctection/mapper/d2struct.py +56 -29
deepdoctection/mapper/hfstruct.py +32 -19
deepdoctection/mapper/laylmstruct.py +221 -185
deepdoctection/mapper/maputils.py +71 -35
deepdoctection/mapper/match.py +76 -62
deepdoctection/mapper/misc.py +68 -44
deepdoctection/mapper/pascalstruct.py +13 -12
deepdoctection/mapper/prodigystruct.py +33 -19
deepdoctection/mapper/pubstruct.py +42 -32
deepdoctection/mapper/tpstruct.py +39 -19
deepdoctection/mapper/xfundstruct.py +20 -13
deepdoctection/pipe/__init__.py +1 -2
deepdoctection/pipe/anngen.py +104 -62
deepdoctection/pipe/base.py +226 -107
deepdoctection/pipe/common.py +206 -123
deepdoctection/pipe/concurrency.py +74 -47
deepdoctection/pipe/doctectionpipe.py +108 -47
deepdoctection/pipe/language.py +41 -24
deepdoctection/pipe/layout.py +45 -18
deepdoctection/pipe/lm.py +146 -78
deepdoctection/pipe/order.py +196 -113
deepdoctection/pipe/refine.py +111 -63
deepdoctection/pipe/registry.py +1 -1
deepdoctection/pipe/segment.py +213 -142
deepdoctection/pipe/sub_layout.py +76 -46
deepdoctection/pipe/text.py +52 -33
deepdoctection/pipe/transform.py +8 -6
deepdoctection/train/d2_frcnn_train.py +87 -69
deepdoctection/train/hf_detr_train.py +72 -40
deepdoctection/train/hf_layoutlm_train.py +85 -46
deepdoctection/train/tp_frcnn_train.py +56 -28
deepdoctection/utils/concurrency.py +59 -16
deepdoctection/utils/context.py +40 -19
deepdoctection/utils/develop.py +25 -17
deepdoctection/utils/env_info.py +85 -36
deepdoctection/utils/error.py +16 -10
deepdoctection/utils/file_utils.py +246 -62
deepdoctection/utils/fs.py +162 -43
deepdoctection/utils/identifier.py +29 -16
deepdoctection/utils/logger.py +49 -32
deepdoctection/utils/metacfg.py +83 -21
deepdoctection/utils/pdf_utils.py +119 -62
deepdoctection/utils/settings.py +24 -10
deepdoctection/utils/tqdm.py +10 -5
deepdoctection/utils/transform.py +182 -46
deepdoctection/utils/utils.py +61 -28
deepdoctection/utils/viz.py +150 -104
deepdoctection-0.43.dist-info/METADATA +376 -0
deepdoctection-0.43.dist-info/RECORD +149 -0
{deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/WHEEL +1 -1
deepdoctection/analyzer/_config.py +0 -146
deepdoctection-0.42.0.dist-info/METADATA +0 -431
deepdoctection-0.42.0.dist-info/RECORD +0 -148
{deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/licenses/LICENSE +0 -0
{deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/top_level.txt +0 -0

deepdoctection/pipe/base.py CHANGED Viewed

@@ -39,8 +39,16 @@ from .anngen import DatapointManager
 @dataclass(frozen=True)
 class MetaAnnotation:
-    """A immutable dataclass that stores information about what `Image` are being
-    modified through a pipeline compoenent."""
+    """
+    A immutable dataclass that stores information about what `Image` are being
+    modified through a pipeline component.
+    Attributes:
+        image_annotations: Tuple of `ObjectTypes` representing image annotations.
+        sub_categories: Dictionary mapping `ObjectTypes` to sets of `ObjectTypes` for sub-categories.
+        relationships: Dictionary mapping `ObjectTypes` to sets of `ObjectTypes` for relationships.
+        summaries: Tuple of `ObjectTypes` representing summaries.
+    """
     image_annotations: tuple[ObjectTypes, ...] = field(default=())
     sub_categories: dict[ObjectTypes, set[ObjectTypes]] = field(default_factory=dict)
@@ -50,28 +58,38 @@ class MetaAnnotation:
 class PipelineComponent(ABC):
     """
-    Base class for pipeline components. Pipeline components are the parts that make up a pipeline. They contain the
-    abstract `serve`, in which the component steps are defined. Within pipelines, pipeline components take an
-    image, enrich these with annotations or transform existing annotation and transfer the image again. The pipeline
-    component should be implemented in such a way that the pythonic approach of passing arguments via assignment is used
-    well. To support the pipeline component, an intrinsic datapoint manager is provided, which can perform operations on
-    the image datapoint that are common for pipeline components. This includes the creation of an image, sub-category
-    and similar annotations.
-    Pipeline components do not necessarily have to contain predictors but can also contain rule-based transformation
-    steps. (For pipeline components with predictors see `PredictorPipelineComponent`.)
-    The sequential execution of pipeline components is carried out with dataflows. In the case of components with
-    predictors, this allows the predictor graph to be set up first and then to be streamed to the processed data points.
-    **Caution:** Currently, predictors can only process single images. Processing higher number of batches is not
-                 planned.
+    Base class for pipeline components.
+    Pipeline components are the parts that make up a pipeline. They contain the
+    abstract `serve`, in which the component steps are defined. Within pipelines,
+    pipeline components take an image, enrich these with annotations or transform
+    existing annotation and transfer the image again. The pipeline component should
+    be implemented in such a way that the pythonic approach of passing arguments via
+    assignment is used well. To support the pipeline component, an intrinsic
+    datapoint manager is provided, which can perform operations on the image
+    datapoint that are common for pipeline components. This includes the creation of
+    an image, sub-category and similar annotations.
+    Pipeline components do not necessarily have to contain predictors but can also
+    contain rule-based transformation steps. (For pipeline components with
+    predictors see `PredictorPipelineComponent`.)
+    The sequential execution of pipeline components is carried out with dataflows.
+    In the case of components with predictors, this allows the predictor graph to be
+    set up first and then to be streamed to the processed data points.
+    Note:
+        Currently, predictors can only process single images. Processing higher number of batches is not planned.
     """
     def __init__(self, name: str, model_id: Optional[str] = None) -> None:
         """
-        :param name: The name of the pipeline component. The name will be used to identify a pipeline component in a
-                     pipeline. Use something that describe the task of the pipeline.
+        Initializes a `PipelineComponent`.
+        Args:
+            name: The name of the pipeline component. The name will be used to identify a pipeline component in a
+                  pipeline. Use something that describes the task of the pipeline.
+            model_id: Optional model identifier.
         """
         self.name = name
         self.service_id = self.get_service_id()
@@ -81,39 +99,43 @@ class PipelineComponent(ABC):
     def set_inbound_filter(self, filter_func: Callable[[DP], bool]) -> None:
         """
-        Set a filter function to decide, if an image of the inbound dataflow should be passed to self.serve.
-        The filter function should return a boolean value. If the function returns True, the image will not be processed
-        by this pipeline component.
+        Set a filter function to decide if an image of the inbound dataflow should be passed to `self.serve`.
-        **Example:**
+        The filter function should return a boolean value. If the function returns True, the image will not be
+        processed by this pipeline component.
+        Example:
             ```python
             def do_not_process_tables(dp: Image) -> bool:
-                 if "table" not in dp.get_categories_from_current_state():
+                if "table" not in dp.get_categories_from_current_state():
                     return True
-                 return False
+                return False
             layout_component = ImageLayoutService(...)
             layout_component.set_inbound_filter(do_not_process_tables)
             ```
-        :param filter_func: A function that takes an image datapoint and returns a boolean value
+        Args:
+            filter_func: A function that takes an image datapoint and returns a boolean value.
         """
         self.filter_func = filter_func  # type: ignore
     @abstractmethod
     def serve(self, dp: Image) -> None:
         """
-        Processing an image through the whole pipeline component. Abstract method that contains all processing steps of
-        the component. Please note that dp is already available to the dp_manager and operations for this can be carried
-        out via it.
+        Processing an image through the whole pipeline component.
+        Abstract method that contains all processing steps of the component. Please note that `dp` is already available
+        to the `dp_manager` and operations for this can be carried out via it.
+        `dp` was transferred to the `dp_manager` via an assignment. This means that operations on `dp` directly or
+        operations via `dp_manager` are equivalent.
-        dp was transferred to the dp_manager via an assignment. This means that operations on dp directly or operations
-        via dp_manager are equivalent.
+        As a simplified interface `serve` does not have to return a `dp`. The data point is passed on within pipelines
+        internally (via `pass_datapoint`).
-        As a simplified interface `serve` does not have to return a dp. The data point is passed on within
-        pipelines internally (via `pass_datapoint`).
+        Args:
+            dp: The image datapoint to process.
         """
         raise NotImplementedError()
@@ -124,12 +146,15 @@ class PipelineComponent(ABC):
     def pass_datapoint(self, dp: Image) -> Image:
         """
-        Acceptance, handover to dp_manager, transformation and forwarding of dp. To measure the time, use
+        Acceptance, handover to `dp_manager`, transformation and forwarding of `dp`.
-            self.timer_on = True
+        To measure the time, use `self.timer_on = True`.
-        :param dp: datapoint
-        :return: datapoint
+        Args:
+            dp: Datapoint.
+        Returns:
+            Datapoint.
         """
         if self.timer_on:
             with timed_operation(self.__class__.__name__):
@@ -140,42 +165,60 @@ class PipelineComponent(ABC):
     def predict_dataflow(self, df: DataFlow) -> DataFlow:
         """
-        Mapping a datapoint via `pass_datapoint` within a dataflow pipeline
+        Mapping a datapoint via `pass_datapoint` within a dataflow pipeline.
+        Args:
+            df: An input dataflow.
-        :param df: An input dataflow
-        :return: A output dataflow
+        Returns:
+            An output dataflow.
         """
         return MapData(df, self.pass_datapoint)
     @abstractmethod
     def clone(self) -> PipelineComponent:
         """
-        Clone an instance
+        Clone an instance.
+        Returns:
+            A cloned instance of `PipelineComponent`.
         """
         raise NotImplementedError()
     @abstractmethod
     def get_meta_annotation(self) -> MetaAnnotation:
         """
-        Get a dict of list of annotation type. The dict must contain
+        Get a dict of list of annotation type.
-        `image_annotation` with values: a list of category names,
-        `sub_categories` with values: a dict with category names as keys and a list of the generated sub categories
-        `relationships` with values: a dict with category names as keys and a list of the generated relationships
-        `summaries` with values: A list of summary sub categories
-        :return: Dict with meta infos as just described
+        The dict must contain:
+        - `image_annotation` with values: a list of category names,
+        - `sub_categories` with values: a dict with category names as keys and a list of the generated sub categories,
+        - `relationships` with values: a dict with category names as keys and a list of the generated relationships,
+        - `summaries` with values: A list of summary sub categories.
+        Returns:
+            Dict with meta infos as just described.
         """
         raise NotImplementedError()
     def get_service_id(self) -> str:
         """
-        Get the generating model
+        Get the generating service id.
+        Returns:
+            The service id as a string.
         """
         return get_uuid_from_str(self.name)[:8]
     def clear_predictor(self) -> None:
         """
-        Clear the predictor of the pipeline component if it has one. Needed for model updates during training.
+        Clear the predictor of the pipeline component if it has one.
+        Needed for model updates during training.
+        Note:
+            Maybe you forgot to implement this method in your pipeline component. This might be the case when you run
+            evaluation during training and need to update the trained model in your pipeline component.
         """
         raise NotImplementedError(
             "Maybe you forgot to implement this method in your pipeline component. This might "
@@ -185,7 +228,10 @@ class PipelineComponent(ABC):
     def has_predictor(self) -> bool:
         """
-        Check if the pipeline component has a predictor
+        Check if the pipeline component has a predictor.
+        Returns:
+            `True` if the pipeline component has a predictor, otherwise `False`.
         """
         if hasattr(self, "predictor"):
             if self.predictor is not None:
@@ -194,8 +240,16 @@ class PipelineComponent(ABC):
     def _undo(self, dp: Image) -> Image:
         """
-        Undo the processing of the pipeline component. It will remove `ImageAnnotation`, `CategoryAnnotation` and
-        `ContainerAnnotation` with the service_id of the pipeline component.
+        Undo the processing of the pipeline component.
+        It will remove `ImageAnnotation`, `CategoryAnnotation` and `ContainerAnnotation` with the `service_id` of the
+        pipeline component.
+        Args:
+            dp: The image datapoint.
+        Returns:
+            The modified image datapoint.
         """
         if self.timer_on:
             with timed_operation(self.__class__.__name__):
@@ -208,64 +262,69 @@ class PipelineComponent(ABC):
     def undo(self, df: DataFlow) -> DataFlow:
         """
-        Mapping a datapoint via `_undo` within a dataflow pipeline
+        Mapping a datapoint via `_undo` within a dataflow pipeline.
+        Args:
+            df: An input dataflow of Images.
-        :param df: An input dataflow of Images
-        :return: A output dataflow of Images
+        Returns:
+            An output dataflow of Images.
         """
         return MapData(df, self._undo)
 class Pipeline(ABC):
     """
-    Abstract base class for creating pipelines. Pipelines represent the framework with which documents can be processed
-    by reading individual pages, processing the pages through the pipeline infrastructure and returning the extracted
-    information.
+    Abstract base class for creating pipelines.
+    Pipelines represent the framework with which documents can be processed by reading individual pages, processing the
+     pages through the pipeline infrastructure and returning the extracted information.
     The infrastructure, as the backbone of the pipeline, consists of a list of pipeline components in which images can
-    be passed through via dataflows. The order of the pipeline components in the list determines the processing order.
-    The components for the pipeline backbone are composed in `_build_pipe`.
+     be passed through via dataflows. The order of the pipeline components in the list determines the processing order.
+      The components for the pipeline backbone are composed in `_build_pipe`.
-    The pipeline is set up via: `analyze` for a directory with single pages or a document with multiple pages. A
-    data flow is returned that is triggered via a for loop and starts the actual processing.
+    The pipeline is set up via: `analyze` for a directory with single pages or a document with multiple pages. A data
+     flow is returned that is triggered via a for loop and starts the actual processing.
     This creates a pipeline using the following command arrangement:
-    **Example:**
+    Example:
+        ```python
+        layout = LayoutPipeComponent(layout_detector ...)
+        text = TextExtractPipeComponent(text_detector ...)
+        simple_pipe = MyPipeline(pipeline_component = [layout, text])
+        doc_dataflow = simple_pipe.analyze(input = path / to / dir)
-            ```python
-            layout = LayoutPipeComponent(layout_detector ...)
-            text = TextExtractPipeComponent(text_detector ...)
-            simple_pipe = MyPipeline(pipeline_component = [layout, text])
-            doc_dataflow = simple_pipe.analyze(input = path / to / dir)
+        for page in doc_dataflow:
+            print(page)
+        ```
-            for page in doc_dataflow:
-                print(page)
-            ```
-    In doing so, page contains all document structures determined via the pipeline (either directly from the Image core
-    model or already processed further).
+    In doing so, `page` contains all document structures determined via the pipeline (either directly from the `Image`
+     core model or already processed further).
     In addition to `analyze`, the internal `_entry` is used to bundle preprocessing steps.
     It is possible to set a session id for the pipeline. This is useful for logging purposes. The session id can be
-    either passed to the pipeline via the `analyze` method or generated automatically.
-    To generate a session_id automatically:
+     either passed to the pipeline via the `analyze` method or generated automatically.
-    **Example:**
+    To generate a `session_id` automatically:
-           ```python
-           pipe = MyPipeline(pipeline_component = [layout, text])
-           pipe.set_session_id = True
+    Example:
+        ```python
+        pipe = MyPipeline(pipeline_component = [layout, text])
+        pipe.set_session_id = True
-           df = pipe.analyze(input = "path/to/dir") # session_id is generated automatically
-           ```
+        df = pipe.analyze(input = "path/to/dir") # session_id is generated automatically
+        ```
     """
     def __init__(self, pipeline_component_list: list[PipelineComponent]) -> None:
         """
-        :param pipeline_component_list: A list of pipeline components.
+        Initializes a `Pipeline`.
+        Args:
+            pipeline_component_list: A list of pipeline components.
         """
         self.pipe_component_list = pipeline_component_list
         self.set_session_id = False
@@ -274,9 +333,13 @@ class Pipeline(ABC):
     def _entry(self, **kwargs: Any) -> DataFlow:
         """
         Use this method to bundle all preprocessing, such as loading one or more documents, so that a dataflow is
-        provided as a return value that can be passed on to the pipeline backbone.
+         provided as a return value that can be passed on to the pipeline backbone.
+        Args:
+            kwargs: Arguments, for dynamic customizing of the processing or for the transfer of processing types.
-        :param kwargs: Arguments, for dynamic customizing of the processing or for the transfer of processing types
+        Returns:
+            A dataflow for further processing.
         """
         raise NotImplementedError()
@@ -284,38 +347,67 @@ class Pipeline(ABC):
     @curry
     def _undo(dp: Image, service_ids: Optional[list[str]] = None) -> Image:
         """
-        Remove annotations from a datapoint
+        Remove annotations from a datapoint.
+        Args:
+            dp: The image datapoint.
+            service_ids: Optional list of service ids to remove.
+        Returns:
+            The modified image datapoint.
         """
         dp.remove(service_ids=service_ids)
         return dp
     def undo(self, df: DataFlow, service_ids: Optional[set[str]] = None) -> DataFlow:
         """
-        Mapping a datapoint via `_undo` within a dataflow pipeline
+        Mapping a datapoint via `_undo` within a dataflow pipeline.
+        Args:
+            df: An input dataflow of Images.
+            service_ids: A set of service ids to remove.
-        :param df: An input dataflow of Images
-        :param service_ids: A set of service ids to remove
-        :return: A output dataflow of Images
+        Returns:
+            An output dataflow of Images.
         """
         return MapData(df, self._undo(service_ids=service_ids))
     @abstractmethod
     def analyze(self, **kwargs: Any) -> DataFlow:
         """
-        Try to keep this method as the only one necessary for the user. All processing steps, such as preprocessing,
-        setting up the backbone and post-processing are to be bundled. A dataflow generator df is returned, which is
-        generated via
+        Try to keep this method as the only one necessary for the user.
+        All processing steps, such as preprocessing, setting up the backbone and post-processing are to be bundled. A
+        dataflow generator `df` is returned, which is generated via
+        Example:
+            ```python
+            df = pipe.analyze(path="path/to/dir")
+            df.reset_state()
             doc = iter(df)
             page = next(doc)
+            ```
         can be triggered.
+        Args:
+            kwargs: Arguments for analysis.
+        Returns:
+            A dataflow generator.
         """
         raise NotImplementedError()
     def _build_pipe(self, df: DataFlow, session_id: Optional[str] = None) -> DataFlow:
         """
-        Composition of the backbone
+        Composition of the backbone.
+        Args:
+            df: The input dataflow.
+            session_id: Optional session id.
+        Returns:
+            The processed dataflow.
         """
         if session_id is None and self.set_session_id:
             session_id = self.get_session_id()
@@ -327,11 +419,12 @@ class Pipeline(ABC):
     def get_meta_annotation(self) -> MetaAnnotation:
         """
-        Collects meta annotations from all pipeline components and summarizes the returned results
+        Collects meta annotations from all pipeline components and summarizes the returned results.
-        :return: Meta annotations with information about image annotations (list), sub categories (dict with category
-                 names and generated sub categories), relationships (dict with category names and generated
-                 relationships) as well as summaries (list with sub categories)
+        Returns:
+            Meta annotations with information about image annotations (list), sub categories (dict with category
+            names and generated sub categories), relationships (dict with category names and generated relationships)
+            as well as summaries (list with sub categories).
         """
         image_annotations: list[ObjectTypes] = []
         sub_categories = defaultdict(set)
@@ -355,11 +448,18 @@ class Pipeline(ABC):
     def get_pipeline_info(
         self, service_id: Optional[str] = None, name: Optional[str] = None
     ) -> Union[str, Mapping[str, str]]:
-        """Get pipeline information: Returns a dictionary with a description of each pipeline component
-        :param service_id: service_id of the pipeline component to search for
-        :param name: name of the pipeline component to search for
-        :return: Either a full dictionary with position and name of all pipeline components or the name, if the position
-                 has been passed or the position if the name has been passed.
+        """
+        Get pipeline information.
+        Returns a dictionary with a description of each pipeline component.
+        Args:
+            service_id: Service id of the pipeline component to search for.
+            name: Name of the pipeline component to search for.
+        Returns:
+            Either a full dictionary with position and name of all pipeline components or the name, if
+            the position has been passed or the position if the name has been passed.
         """
         comp_info = {comp.service_id: comp.name for comp in self.pipe_component_list}
         comp_info_name_as_key = {value: key for key, value in comp_info.items()}
@@ -369,9 +469,28 @@ class Pipeline(ABC):
             return comp_info_name_as_key[name]
         return comp_info
+    def get_pipeline_component(self, service_id: Optional[str] = None, name: Optional[str] = None) -> PipelineComponent:
+        """
+        Get a pipeline component by `service_id` or `name`.
+        Args:
+            service_id: Service id of the pipeline component to search for.
+            name: Name of the pipeline component to search for.
+        Returns:
+            The pipeline component if found, otherwise raises ValueError.
+        """
+        for comp in self.pipe_component_list:
+            if comp.service_id == service_id or comp.name == name:
+                return comp
+        raise ValueError(f"Pipeline component not found with service_id={service_id} or name={name}")
     @staticmethod
     def get_session_id() -> str:
         """
-        Get the generating a session id
+        Get the generating a session id.
+        Returns:
+            The session id as a string.
         """
         return str(uuid1())[:8]

deepdoctection 0.42.0__py3-none-any.whl → 0.43__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.42.0py3-none-any.whl → 0.43py3-none-any.whl