PyPI - deepdoctection - Versions diffs - 0.30__py3-none-any.whl → 0.32__py3-none-any.whl - Mend

deepdoctection 0.30py3-none-any.whl → 0.32py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (120) hide show

deepdoctection/__init__.py +38 -29
deepdoctection/analyzer/dd.py +36 -29
deepdoctection/configs/conf_dd_one.yaml +34 -31
deepdoctection/dataflow/base.py +0 -19
deepdoctection/dataflow/custom.py +4 -3
deepdoctection/dataflow/custom_serialize.py +14 -5
deepdoctection/dataflow/parallel_map.py +12 -11
deepdoctection/dataflow/serialize.py +5 -4
deepdoctection/datapoint/annotation.py +35 -13
deepdoctection/datapoint/box.py +3 -5
deepdoctection/datapoint/convert.py +3 -1
deepdoctection/datapoint/image.py +79 -36
deepdoctection/datapoint/view.py +152 -49
deepdoctection/datasets/__init__.py +1 -4
deepdoctection/datasets/adapter.py +6 -3
deepdoctection/datasets/base.py +86 -11
deepdoctection/datasets/dataflow_builder.py +1 -1
deepdoctection/datasets/info.py +4 -4
deepdoctection/datasets/instances/doclaynet.py +3 -2
deepdoctection/datasets/instances/fintabnet.py +2 -1
deepdoctection/datasets/instances/funsd.py +2 -1
deepdoctection/datasets/instances/iiitar13k.py +5 -2
deepdoctection/datasets/instances/layouttest.py +4 -8
deepdoctection/datasets/instances/publaynet.py +2 -2
deepdoctection/datasets/instances/pubtables1m.py +6 -3
deepdoctection/datasets/instances/pubtabnet.py +2 -1
deepdoctection/datasets/instances/rvlcdip.py +2 -1
deepdoctection/datasets/instances/xfund.py +2 -1
deepdoctection/eval/__init__.py +1 -4
deepdoctection/eval/accmetric.py +1 -1
deepdoctection/eval/base.py +5 -4
deepdoctection/eval/cocometric.py +2 -1
deepdoctection/eval/eval.py +19 -15
deepdoctection/eval/tedsmetric.py +14 -11
deepdoctection/eval/tp_eval_callback.py +14 -7
deepdoctection/extern/__init__.py +2 -7
deepdoctection/extern/base.py +39 -13
deepdoctection/extern/d2detect.py +182 -90
deepdoctection/extern/deskew.py +36 -9
deepdoctection/extern/doctrocr.py +265 -83
deepdoctection/extern/fastlang.py +49 -9
deepdoctection/extern/hfdetr.py +106 -55
deepdoctection/extern/hflayoutlm.py +441 -122
deepdoctection/extern/hflm.py +225 -0
deepdoctection/extern/model.py +56 -47
deepdoctection/extern/pdftext.py +10 -5
deepdoctection/extern/pt/__init__.py +1 -3
deepdoctection/extern/pt/nms.py +6 -2
deepdoctection/extern/pt/ptutils.py +27 -18
deepdoctection/extern/tessocr.py +134 -22
deepdoctection/extern/texocr.py +6 -2
deepdoctection/extern/tp/tfutils.py +43 -9
deepdoctection/extern/tp/tpcompat.py +14 -11
deepdoctection/extern/tp/tpfrcnn/__init__.py +20 -0
deepdoctection/extern/tp/tpfrcnn/common.py +7 -3
deepdoctection/extern/tp/tpfrcnn/config/__init__.py +20 -0
deepdoctection/extern/tp/tpfrcnn/config/config.py +9 -6
deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +20 -0
deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +17 -7
deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +12 -6
deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +9 -4
deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +8 -5
deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +16 -11
deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +17 -10
deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +14 -8
deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +15 -10
deepdoctection/extern/tp/tpfrcnn/predict.py +9 -4
deepdoctection/extern/tp/tpfrcnn/preproc.py +8 -9
deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +20 -0
deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +10 -2
deepdoctection/extern/tpdetect.py +54 -30
deepdoctection/mapper/__init__.py +3 -8
deepdoctection/mapper/d2struct.py +9 -7
deepdoctection/mapper/hfstruct.py +7 -2
deepdoctection/mapper/laylmstruct.py +164 -21
deepdoctection/mapper/maputils.py +16 -3
deepdoctection/mapper/misc.py +6 -3
deepdoctection/mapper/prodigystruct.py +1 -1
deepdoctection/mapper/pubstruct.py +10 -10
deepdoctection/mapper/tpstruct.py +3 -3
deepdoctection/pipe/__init__.py +1 -1
deepdoctection/pipe/anngen.py +35 -8
deepdoctection/pipe/base.py +53 -19
deepdoctection/pipe/common.py +23 -13
deepdoctection/pipe/concurrency.py +2 -1
deepdoctection/pipe/doctectionpipe.py +2 -2
deepdoctection/pipe/language.py +3 -2
deepdoctection/pipe/layout.py +6 -3
deepdoctection/pipe/lm.py +34 -66
deepdoctection/pipe/order.py +142 -35
deepdoctection/pipe/refine.py +26 -24
deepdoctection/pipe/segment.py +21 -16
deepdoctection/pipe/{cell.py → sub_layout.py} +30 -9
deepdoctection/pipe/text.py +14 -8
deepdoctection/pipe/transform.py +16 -9
deepdoctection/train/__init__.py +6 -12
deepdoctection/train/d2_frcnn_train.py +36 -28
deepdoctection/train/hf_detr_train.py +26 -17
deepdoctection/train/hf_layoutlm_train.py +133 -111
deepdoctection/train/tp_frcnn_train.py +21 -19
deepdoctection/utils/__init__.py +3 -0
deepdoctection/utils/concurrency.py +1 -1
deepdoctection/utils/context.py +2 -2
deepdoctection/utils/env_info.py +41 -84
deepdoctection/utils/error.py +84 -0
deepdoctection/utils/file_utils.py +4 -15
deepdoctection/utils/fs.py +7 -7
deepdoctection/utils/logger.py +1 -0
deepdoctection/utils/mocks.py +93 -0
deepdoctection/utils/pdf_utils.py +5 -4
deepdoctection/utils/settings.py +6 -1
deepdoctection/utils/transform.py +1 -1
deepdoctection/utils/utils.py +0 -6
deepdoctection/utils/viz.py +48 -5
{deepdoctection-0.30.dist-info → deepdoctection-0.32.dist-info}/METADATA +57 -73
deepdoctection-0.32.dist-info/RECORD +146 -0
{deepdoctection-0.30.dist-info → deepdoctection-0.32.dist-info}/WHEEL +1 -1
deepdoctection-0.30.dist-info/RECORD +0 -143
{deepdoctection-0.30.dist-info → deepdoctection-0.32.dist-info}/LICENSE +0 -0
{deepdoctection-0.30.dist-info → deepdoctection-0.32.dist-info}/top_level.txt +0 -0

deepdoctection/pipe/order.py CHANGED Viewed

@@ -18,7 +18,10 @@
 """
 Module for ordering text and layout segments pipeline components
 """
+from __future__ import annotations
 import os
+from abc import ABC
 from copy import copy
 from itertools import chain
 from logging import DEBUG
@@ -349,10 +352,11 @@ class TextLineGenerator:
         self, make_sub_lines: bool, line_category_id: Union[int, str], paragraph_break: Optional[float] = None
     ):
         """
-        :param make_sub_lines: Whether to build sub lines from lines
+        :param make_sub_lines: Whether to build sub lines from lines.
         :param line_category_id: category_id to give a text line
-        :param paragraph_break: threshold of two consecutive words. If distance is larger than threshold, two sublines
-                                will be built
+        :param paragraph_break: threshold of two consecutive words. If distance is larger than threshold, two sub-lines
+                                will be built. We use relative coordinates to calculate the distance between two
+                                consecutive words. A reasonable value is 0.035
         """
         if make_sub_lines and paragraph_break is None:
             raise ValueError("You must specify paragraph_break when setting make_sub_lines to True")
@@ -375,6 +379,7 @@ class TextLineGenerator:
         image_width: float,
         image_height: float,
         image_id: Optional[str] = None,
+        highest_level: bool = True,
     ) -> Sequence[DetectionResult]:
         """
         Creating detecting result of lines (or sub lines) from given word type `ImageAnnotation`.
@@ -392,6 +397,8 @@ class TextLineGenerator:
         # list of  (word index, text line, word annotation_id)
         word_order_list = OrderGenerator.group_words_into_lines(word_anns, image_id)
         number_rows = max(word[1] for word in word_order_list)
+        if number_rows == 1 and not highest_level:
+            return []
         detection_result_list = []
         for number_row in range(1, number_rows + 1):
             # list of  (word index, text line, word annotation_id) for text line equal to number_row
@@ -423,29 +430,141 @@ class TextLineGenerator:
                     if current_box.absolute_coords:
                         current_box = current_box.transform(image_width, image_height)
-                    # If distance between boxes is lower than paragraph break, same sub line
+                    # If distance between boxes is lower than paragraph break, same sub-line
                     if current_box.ulx - prev_box.lrx < self.paragraph_break:  # type: ignore
                         sub_line.append(ann)
                         sub_line_ann_ids.append(ann.annotation_id)
                     else:
-                        boxes = [ann.get_bounding_box(image_id) for ann in sub_line]
-                        merge_box = merge_boxes(*boxes)
-                        detection_result = self._make_detect_result(merge_box, {"child": sub_line_ann_ids})
-                        detection_result_list.append(detection_result)
-                        sub_line = [ann]
-                        sub_line_ann_ids = [ann.annotation_id]
+                        # We need to iterate maybe more than one time, because sub-lines may have more than one line
+                        # if having been split. Take fore example a multi-column layout where a sub-line has
+                        # two lines because of a column break and fonts twice as large as the other column.
+                        detection_results = self.create_detection_result(
+                            sub_line, image_width, image_height, image_id, False
+                        )
+                        if detection_results:
+                            detection_result_list.extend(detection_results)
+                        else:
+                            boxes = [ann.get_bounding_box(image_id) for ann in sub_line]
+                            merge_box = merge_boxes(*boxes)
+                            detection_result = self._make_detect_result(merge_box, {"child": sub_line_ann_ids})
+                            detection_result_list.append(detection_result)
+                            sub_line = [ann]
+                            sub_line_ann_ids = [ann.annotation_id]
                     if idx == len(anns_per_row) - 1:
-                        boxes = [ann.get_bounding_box(image_id) for ann in sub_line]
-                        merge_box = merge_boxes(*boxes)
-                        detection_result = self._make_detect_result(merge_box, {"child": sub_line_ann_ids})
-                        detection_result_list.append(detection_result)
+                        detection_results = self.create_detection_result(
+                            sub_line, image_width, image_height, image_id, False
+                        )
+                        if detection_results:
+                            detection_result_list.extend(detection_results)
+                        else:
+                            boxes = [ann.get_bounding_box(image_id) for ann in sub_line]
+                            merge_box = merge_boxes(*boxes)
+                            detection_result = self._make_detect_result(merge_box, {"child": sub_line_ann_ids})
+                            detection_result_list.append(detection_result)
         return detection_result_list
+class TextLineServiceMixin(PipelineComponent, ABC):
+    """
+    This class is used to create text lines similar to TextOrderService.
+    It uses the logic of the TextOrderService but modifies it to suit its needs.
+    It specifically uses the _create_lines_for_words method and modifies the serve method.
+    """
+    def __init__(
+        self,
+        name: str,
+        line_category_id: int = 1,
+        include_residual_text_container: bool = True,
+        paragraph_break: Optional[float] = None,
+    ):
+        """
+        Initialize the TextLineService with a line_category_id and a TextLineGenerator instance.
+        """
+        self.line_category_id = line_category_id
+        self.include_residual_text_container = include_residual_text_container
+        self.text_line_generator = TextLineGenerator(
+            self.include_residual_text_container, self.line_category_id, paragraph_break
+        )
+        super().__init__(name)
+    def _create_lines_for_words(self, word_anns: Sequence[ImageAnnotation]) -> Sequence[ImageAnnotation]:
+        """
+        This method creates lines for words using the TextLineGenerator instance.
+        """
+        detection_result_list = self.text_line_generator.create_detection_result(
+            word_anns,
+            self.dp_manager.datapoint.width,
+            self.dp_manager.datapoint.height,
+            self.dp_manager.datapoint.image_id,
+        )
+        line_anns = []
+        for detect_result in detection_result_list:
+            ann_id = self.dp_manager.set_image_annotation(detect_result)
+            if ann_id:
+                line_ann = self.dp_manager.get_annotation(ann_id)
+                child_ann_id_list = detect_result.relationships["child"]  # type: ignore
+                for child_ann_id in child_ann_id_list:
+                    line_ann.dump_relationship(Relationships.child, child_ann_id)
+                line_anns.append(line_ann)
+        return line_anns
+class TextLineService(TextLineServiceMixin):
+    """
+    Some OCR systems do not identify lines of text but only provide text boxes for words. This is not sufficient
+    for certain applications. This service determines rule-based text lines based on word boxes. One difficulty is
+    that text lines are not continuous but are interrupted, for example in multi-column layouts.
+    These interruptions are taken into account insofar as the gap between two words on almost the same page height
+    must not be too large.
+    The service constructs new ImageAnnotation of the category `LayoutType.line` and forms relations between the
+    text lines and the words contained in the text lines. The reading order is not arranged.
+    """
+    def __init__(self, line_category_id: int = 1, paragraph_break: Optional[float] = None):
+        """
+        Initialize `TextLineService`
+        :param line_category_id: category_id to give a text line
+        :param paragraph_break: threshold of two consecutive words. If distance is larger than threshold, two sublines
+                                will be built
+        """
+        super().__init__(
+            name="text_line",
+            line_category_id=line_category_id,
+            include_residual_text_container=True,
+            paragraph_break=paragraph_break,
+        )
+    def clone(self) -> PipelineComponent:
+        """
+        This method returns a new instance of the class with the same configuration.
+        """
+        return self.__class__(self.line_category_id, self.text_line_generator.paragraph_break)
+    def serve(self, dp: Image) -> None:
+        text_container_anns = dp.get_annotation(category_names=LayoutType.word)
+        self._create_lines_for_words(text_container_anns)
+    def get_meta_annotation(self) -> JsonDict:
+        """
+        This method returns metadata about the annotations created by this pipeline component.
+        """
+        return dict(
+            [
+                ("image_annotations", [LayoutType.line]),
+                ("sub_categories", {LayoutType.line: {Relationships.child}}),
+                ("relationships", {}),
+                ("summaries", []),
+            ]
+        )
 @pipeline_component_registry.register("TextOrderService")
-class TextOrderService(PipelineComponent):
+class TextOrderService(TextLineServiceMixin):
     """
     Reading order of words within floating text blocks as well as reading order of blocks within simple text blocks.
     To understand the difference between floating text blocks and simple text blocks consider a page containing an
@@ -470,7 +589,8 @@ class TextOrderService(PipelineComponent):
     A category annotation per word is generated, which fixes the order per word in the block, as well as a category
     annotation per block, which saves the reading order of the block per page.
-    The blocks are defined in `_floating_text_block_names` and text blocks in `_floating_text_block_names`.
+    The blocks are defined in `text_block_categories` and text blocks that should be considered when generating
+    narrative text must be added in `floating_text_block_categories`.
         order = TextOrderService(text_container="word",
                                  text_block_categories=["title", "text", "list", "cell",
@@ -533,7 +653,12 @@ class TextOrderService(PipelineComponent):
         self.text_line_generator = TextLineGenerator(
             self.include_residual_text_container, line_category_id, paragraph_break
         )
-        super().__init__("text_order")
+        super().__init__(
+            name="text_order",
+            line_category_id=line_category_id,
+            include_residual_text_container=include_residual_text_container,
+            paragraph_break=paragraph_break,
+        )
         self._init_sanity_checks()
     def serve(self, dp: Image) -> None:
@@ -567,24 +692,6 @@ class TextOrderService(PipelineComponent):
                         Relationships.reading_order, idx, Relationships.reading_order, annotation_id
                     )
-    def _create_lines_for_words(self, word_anns: Sequence[ImageAnnotation]) -> Sequence[ImageAnnotation]:
-        detection_result_list = self.text_line_generator.create_detection_result(
-            word_anns,
-            self.dp_manager.datapoint.width,
-            self.dp_manager.datapoint.height,
-            self.dp_manager.datapoint.image_id,
-        )
-        line_anns = []
-        for detect_result in detection_result_list:
-            ann_id = self.dp_manager.set_image_annotation(detect_result)
-            if ann_id:
-                line_ann = self.dp_manager.get_annotation(ann_id)
-                child_ann_id_list = detect_result.relationships["child"]  # type: ignore
-                for child_ann_id in child_ann_id_list:
-                    line_ann.dump_relationship(Relationships.child, child_ann_id)
-                line_anns.append(line_ann)
-        return line_anns
     def order_text_in_text_block(self, text_block_ann: ImageAnnotation) -> None:
         """
         Order text within a text block. It will take all child-like text containers (determined by a

deepdoctection/pipe/refine.py CHANGED Viewed

@@ -23,7 +23,7 @@ from collections import defaultdict
 from copy import copy
 from dataclasses import asdict
 from itertools import chain, product
-from typing import DefaultDict, List, Optional, Set, Tuple, Union
+from typing import DefaultDict, List, Optional, Sequence, Set, Tuple, Union
 import networkx as nx  # type: ignore
@@ -33,7 +33,8 @@ from ..datapoint.image import Image
 from ..extern.base import DetectionResult
 from ..mapper.maputils import MappingContextManager
 from ..utils.detection_types import JsonDict
-from ..utils.settings import CellType, LayoutType, Relationships, TableType, get_type
+from ..utils.error import AnnotationError, ImageError
+from ..utils.settings import CellType, LayoutType, ObjectTypes, Relationships, TableType, get_type
 from .base import PipelineComponent
 from .registry import pipeline_component_registry
@@ -302,7 +303,7 @@ def generate_html_string(table: ImageAnnotation) -> List[str]:
     :return: HTML representation of the table
     """
     if table.image is None:
-        raise ValueError("table.image cannot be None")
+        raise ImageError("table.image cannot be None")
     table_image = table.image
     cells = table_image.get_annotation(
         category_names=[
@@ -397,22 +398,16 @@ class TableSegmentationRefinementService(PipelineComponent):
     """
-    def __init__(self) -> None:
-        self._table_name = [LayoutType.table, LayoutType.table_rotated]
-        self._cell_names = [
-            LayoutType.cell,
-            CellType.column_header,
-            CellType.projected_row_header,
-            CellType.spanning,
-            CellType.row_header,
-        ]
+    def __init__(self, table_name: Sequence[ObjectTypes], cell_names: Sequence[ObjectTypes]) -> None:
+        self.table_name = table_name
+        self.cell_names = cell_names
         super().__init__("table_segment_refine")
     def serve(self, dp: Image) -> None:
-        tables = dp.get_annotation(category_names=self._table_name)
+        tables = dp.get_annotation(category_names=self.table_name)
         for table in tables:
             if table.image is None:
-                raise ValueError("table.image cannot be None")
+                raise ImageError("table.image cannot be None")
             tiles_to_cells_list = tiles_to_cells(dp, table)
             connected_components, tile_to_cell_dict = connected_component_tiles(tiles_to_cells_list)
             rectangle_tiling = generate_rectangle_tiling(connected_components)
@@ -457,21 +452,28 @@ class TableSegmentationRefinementService(PipelineComponent):
                         for cell in cells:
                             cell.deactivate()
-            cells = table.image.get_annotation(category_names=self._cell_names)
+            cells = table.image.get_annotation(category_names=self.cell_names)
             number_of_rows = max(int(cell.get_sub_category(CellType.row_number).category_id) for cell in cells)
             number_of_cols = max(int(cell.get_sub_category(CellType.column_number).category_id) for cell in cells)
             max_row_span = max(int(cell.get_sub_category(CellType.row_span).category_id) for cell in cells)
             max_col_span = max(int(cell.get_sub_category(CellType.column_span).category_id) for cell in cells)
             # TODO: the summaries should be sub categories of the underlying ann
             if table.image.summary is not None:
-                if TableType.number_of_rows in table.image.summary.sub_categories:
-                    table.get_summary(TableType.number_of_rows)
-                if TableType.number_of_columns in table.image.summary.sub_categories:
-                    table.get_summary(TableType.number_of_columns)
-                if TableType.max_row_span in table.image.summary.sub_categories:
-                    table.get_summary(TableType.max_row_span)
-                if TableType.max_col_span in table.image.summary.sub_categories:
-                    table.get_summary(TableType.max_col_span)
+                if (
+                    TableType.number_of_rows in table.image.summary.sub_categories
+                    and TableType.number_of_columns in table.image.summary.sub_categories
+                    and TableType.max_row_span in table.image.summary.sub_categories
+                    and TableType.max_col_span in table.image.summary.sub_categories
+                ):
+                    table.image.summary.remove_sub_category(TableType.number_of_rows)
+                    table.image.summary.remove_sub_category(TableType.number_of_columns)
+                    table.image.summary.remove_sub_category(TableType.max_row_span)
+                    table.image.summary.remove_sub_category(TableType.max_col_span)
+                else:
+                    raise AnnotationError(
+                        "Table summary does not contain sub categories TableType.number_of_rows, "
+                        "TableType.number_of_columns, TableType.max_row_span, TableType.max_col_span"
+                    )
             self.dp_manager.set_summary_annotation(
                 TableType.number_of_rows, TableType.number_of_rows, number_of_rows, annotation_id=table.annotation_id
@@ -492,7 +494,7 @@ class TableSegmentationRefinementService(PipelineComponent):
             self.dp_manager.set_container_annotation(TableType.html, -1, TableType.html, table.annotation_id, html)
     def clone(self) -> PipelineComponent:
-        return self.__class__()
+        return self.__class__(self.table_name, self.cell_names)
     def get_meta_annotation(self) -> JsonDict:
         return dict(

deepdoctection/pipe/segment.py CHANGED Viewed

@@ -33,6 +33,7 @@ from ..extern.base import DetectionResult
 from ..mapper.maputils import MappingContextManager
 from ..mapper.match import match_anns_by_intersection
 from ..utils.detection_types import JsonDict
+from ..utils.error import ImageError
 from ..utils.settings import CellType, LayoutType, ObjectTypes, Relationships, TableType
 from .base import PipelineComponent
 from .refine import generate_html_string
@@ -136,12 +137,12 @@ def stretch_item_per_table(
     rows = dp.get_annotation(category_names=row_name, annotation_ids=item_ann_ids)
     if table.image is None:
-        raise ValueError("table.image cannot be None")
+        raise ImageError("table.image cannot be None")
     table_embedding_box = table.get_bounding_box(dp.image_id)
     for row in rows:
         if row.image is None:
-            raise ValueError("row.image cannot be None")
+            raise ImageError("row.image cannot be None")
         row_embedding_box = row.get_bounding_box(dp.image_id)
         row_embedding_box.ulx = table_embedding_box.ulx + 1.0
         row_embedding_box.lrx = table_embedding_box.lrx - 1.0
@@ -166,7 +167,7 @@ def stretch_item_per_table(
     for col in cols:
         if col.image is None:
-            raise ValueError("row.image cannot be None")
+            raise ImageError("row.image cannot be None")
         col_embedding_box = col.get_bounding_box(dp.image_id)
         col_embedding_box.uly = table_embedding_box.uly + 1.0
         col_embedding_box.lry = table_embedding_box.lry - 1.0
@@ -194,7 +195,7 @@ def _tile_by_stretching_rows_left_and_rightwise(
     dp: Image, items: List[ImageAnnotation], table: ImageAnnotation, item_name: str
 ) -> None:
     if table.image is None:
-        raise ValueError("table.image cannot be None")
+        raise ImageError("table.image cannot be None")
     table_embedding_box = table.get_bounding_box(dp.image_id)
     tmp_item_xy = table_embedding_box.uly + 1.0 if item_name == LayoutType.row else table_embedding_box.ulx + 1.0
@@ -206,7 +207,7 @@ def _tile_by_stretching_rows_left_and_rightwise(
             image_annotation={"category_name": item.category_name, "annotation_id": item.annotation_id},
         ):
             if item.image is None:
-                raise ValueError("item.image cannot be None")
+                raise ImageError("item.image cannot be None")
             item_embedding_box = item.get_bounding_box(dp.image_id)
             if idx != len(items) - 1:
                 next_item_embedding_box = items[idx + 1].get_bounding_box(dp.image_id)
@@ -258,7 +259,7 @@ def _tile_by_stretching_rows_leftwise_column_downwise(
     dp: Image, items: List[ImageAnnotation], table: ImageAnnotation, item_name: str
 ) -> None:
     if table.image is None:
-        raise ValueError("table.image cannot be None")
+        raise ImageError("table.image cannot be None")
     table_embedding_box = table.get_bounding_box(dp.image_id)
     tmp_item_xy = table_embedding_box.uly + 1.0 if item_name == LayoutType.row else table_embedding_box.ulx + 1.0
@@ -270,7 +271,7 @@ def _tile_by_stretching_rows_leftwise_column_downwise(
             image_annotation={"category_name": item.category_name, "annotation_id": item.annotation_id},
         ):
             if item.image is None:
-                raise ValueError("item.image cannot be None")
+                raise ImageError("item.image cannot be None")
             item_embedding_box = item.get_bounding_box(dp.image_id)
             new_embedding_box = BoundingBox(
                 ulx=item_embedding_box.ulx if item_name == LayoutType.row else tmp_item_xy,
@@ -339,9 +340,9 @@ def tile_tables_with_items_per_table(
     items = dp.get_annotation(category_names=item_name, annotation_ids=item_ann_ids)
     items.sort(
-        key=lambda x: x.get_bounding_box(dp.image_id).cx
-        if item_name == LayoutType.column
-        else x.get_bounding_box(dp.image_id).cy
+        key=lambda x: (
+            x.get_bounding_box(dp.image_id).cx if item_name == LayoutType.column else x.get_bounding_box(dp.image_id).cy
+        )
     )
     if stretch_rule == "left":
@@ -737,9 +738,11 @@ class TableSegmentationService(PipelineComponent):
                 # we will assume that either all or no image attribute has been generated
                 items.sort(
-                    key=lambda x: x.get_bounding_box(dp.image_id).cx  # pylint: disable=W0640
-                    if item_name == LayoutType.column  # pylint: disable=W0640
-                    else x.get_bounding_box(dp.image_id).cy  # pylint: disable=W0640
+                    key=lambda x: (
+                        x.get_bounding_box(dp.image_id).cx  # pylint: disable=W0640
+                        if item_name == LayoutType.column  # pylint: disable=W0640
+                        else x.get_bounding_box(dp.image_id).cy  # pylint: disable=W0640
+                    )
                 )
                 for item_number, item in enumerate(items, 1):
@@ -939,9 +942,11 @@ class PubtablesSegmentationService(PipelineComponent):
                 # we will assume that either all or no image attribute has been generated
                 items.sort(
-                    key=lambda x: x.get_bounding_box(dp.image_id).cx
-                    if item_name == LayoutType.column  # pylint: disable=W0640
-                    else x.get_bounding_box(dp.image_id).cy
+                    key=lambda x: (
+                        x.get_bounding_box(dp.image_id).cx
+                        if item_name == LayoutType.column  # pylint: disable=W0640
+                        else x.get_bounding_box(dp.image_id).cy
+                    )
                 )
                 for item_number, item in enumerate(items, 1):

deepdoctection/pipe/{cell.py → sub_layout.py} RENAMED Viewed

@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# File: cell.py
+# File: sub_layout.py
 # Copyright 2021 Dr. Janis Meyer. All rights reserved.
 #
@@ -24,9 +24,11 @@ from typing import Dict, List, Mapping, Optional, Sequence, Union
 import numpy as np
+from ..datapoint.annotation import ImageAnnotation
+from ..datapoint.box import crop_box_from_image
 from ..datapoint.image import Image
 from ..extern.base import DetectionResult, ObjectDetector, PdfMiner
-from ..utils.detection_types import JsonDict
+from ..utils.detection_types import ImageType, JsonDict
 from ..utils.settings import ObjectTypes, Relationships
 from ..utils.transform import PadTransform
 from .base import PredictorPipelineComponent
@@ -181,18 +183,14 @@ class SubImageLayoutService(PredictorPipelineComponent):
         """
         sub_image_anns = dp.get_annotation_iter(category_names=self.sub_image_name)
         for sub_image_ann in sub_image_anns:
-            if sub_image_ann.image is None:
-                raise ValueError("sub_image_ann.image is None, but must be an image")
-            np_image = sub_image_ann.image.image
-            if self.padder:
-                np_image = self.padder.apply_image(np_image)
-            detect_result_list = self.predictor.predict(np_image)
+            np_image = self.prepare_np_image(sub_image_ann)
+            detect_result_list = self.predictor.predict(np_image)  # type: ignore
             if self.padder and detect_result_list:
                 boxes = np.array([detect_result.box for detect_result in detect_result_list])
                 boxes_orig = self.padder.inverse_apply_coords(boxes)
                 for idx, detect_result in enumerate(detect_result_list):
                     detect_result.box = boxes_orig[idx, :].tolist()
-            if self.detect_result_generator:
+            if self.detect_result_generator and sub_image_ann.image:
                 self.detect_result_generator.width = sub_image_ann.image.width
                 self.detect_result_generator.height = sub_image_ann.image.height
                 detect_result_list = self.detect_result_generator.create_detection_result(detect_result_list)
@@ -235,3 +233,26 @@ class SubImageLayoutService(PredictorPipelineComponent):
             deepcopy(self.detect_result_generator),
             padder_clone,
         )
+    def prepare_np_image(self, sub_image_ann: ImageAnnotation) -> ImageType:
+        """Maybe crop and pad a np_array before passing it to the predictor.
+        Note that we currently assume to a two level hierachy of images, e.g. we can crop a sub-image from the base
+        image, e.g. the original input but we cannot crop a sub-image from an image which is itself a sub-image.
+        :param sub_image_ann: ImageAnnotation to be processed
+        :return: processed np_image
+        """
+        if sub_image_ann.image is None:
+            raise ValueError("sub_image_ann.image is None, but must be an datapoint.Image")
+        np_image = sub_image_ann.image.image
+        if np_image is None and self.dp_manager.datapoint.image is not None:
+            np_image = crop_box_from_image(
+                self.dp_manager.datapoint.image,
+                sub_image_ann.get_bounding_box(self.dp_manager.datapoint.image_id),
+                self.dp_manager.datapoint.width,
+                self.dp_manager.datapoint.height,
+            )
+        if self.padder:
+            np_image = self.padder.apply_image(np_image)
+        return np_image

deepdoctection/pipe/text.py CHANGED Viewed

@@ -26,6 +26,7 @@ from ..datapoint.image import Image
 from ..extern.base import ObjectDetector, PdfMiner, TextRecognizer
 from ..extern.tessocr import TesseractOcrDetector
 from ..utils.detection_types import ImageType, JsonDict
+from ..utils.error import ImageError
 from ..utils.settings import PageType, TypeOrStr, WordType, get_type
 from .base import PredictorPipelineComponent
 from .registry import pipeline_component_registry
@@ -89,7 +90,10 @@ class TextExtractionService(PredictorPipelineComponent):
         super().__init__(self._get_name(text_extract_detector.name), text_extract_detector)
         if self.extract_from_category:
             if not isinstance(self.predictor, (ObjectDetector, TextRecognizer)):
-                raise TypeError("Predicting from a cropped image requires to pass an ObjectDetector or TextRecognizer.")
+                raise TypeError(
+                    f"Predicting from a cropped image requires to pass an ObjectDetector or "
+                    f"TextRecognizer. Got {type(self.predictor)}"
+                )
         if run_time_ocr_language_selection:
             assert isinstance(
                 self.predictor, TesseractOcrDetector
@@ -171,13 +175,13 @@ class TextExtractionService(PredictorPipelineComponent):
         if isinstance(text_roi, ImageAnnotation):
             if text_roi.image is None:
-                raise ValueError("text_roi.image cannot be None")
+                raise ImageError("text_roi.image cannot be None")
             if text_roi.image.image is None:
-                raise ValueError("text_roi.image.image cannot be None")
+                raise ImageError("text_roi.image.image cannot be None")
             return text_roi.image.image
         if isinstance(self.predictor, ObjectDetector):
             if not isinstance(text_roi, Image):
-                raise ValueError("text_roi must be an image")
+                raise ImageError("text_roi must be an image")
             return text_roi.image
         if isinstance(text_roi, list):
             assert all(roi.image is not None for roi in text_roi)
@@ -201,9 +205,11 @@ class TextExtractionService(PredictorPipelineComponent):
             [
                 (
                     "image_annotations",
-                    self.predictor.possible_categories()
-                    if isinstance(self.predictor, (ObjectDetector, PdfMiner))
-                    else [],
+                    (
+                        self.predictor.possible_categories()
+                        if isinstance(self.predictor, (ObjectDetector, PdfMiner))
+                        else []
+                    ),
                 ),
                 ("sub_categories", sub_cat_dict),
                 ("relationships", {}),
@@ -218,5 +224,5 @@ class TextExtractionService(PredictorPipelineComponent):
     def clone(self) -> "PredictorPipelineComponent":
         predictor = self.predictor.clone()
         if not isinstance(predictor, (ObjectDetector, PdfMiner, TextRecognizer)):
-            raise ValueError(f"predictor must be of type ObjectDetector or PdfMiner, but is of type {type(predictor)}")
+            raise ImageError(f"predictor must be of type ObjectDetector or PdfMiner, but is of type {type(predictor)}")
         return self.__class__(predictor, deepcopy(self.extract_from_category), self.run_time_ocr_language_selection)

deepdoctection/pipe/transform.py CHANGED Viewed

@@ -23,7 +23,6 @@ on images (e.g. deskew, de-noising or more general GAN like operations.
 from ..datapoint.image import Image
 from ..extern.base import ImageTransformer
 from ..utils.detection_types import JsonDict
-from ..utils.logger import LoggingRecord, logger
 from .base import ImageTransformPipelineComponent
 from .registry import pipeline_component_registry
@@ -49,16 +48,24 @@ class SimpleTransformService(ImageTransformPipelineComponent):
     def serve(self, dp: Image) -> None:
         if dp.annotations:
-            logger.warning(
-                LoggingRecord(
-                    f"{self.name} has already received image with image annotations. These annotations "
-                    f"will not be transformed and might cause unexpected output in your pipeline."
-                )
+            raise RuntimeError(
+                "SimpleTransformService receives datapoints with ÌmageAnnotations. This violates the "
+                "pipeline building API but this can currently be catched only at runtime. "
+                "Please make sure that this component is the first one in the pipeline."
             )
         if dp.image is not None:
-            np_image_transform = self.transform_predictor.transform(dp.image)
+            detection_result = self.transform_predictor.predict(dp.image)
+            transformed_image = self.transform_predictor.transform(dp.image, detection_result)
             self.dp_manager.datapoint.clear_image(True)
-            self.dp_manager.datapoint.image = np_image_transform
+            self.dp_manager.datapoint.image = transformed_image
+            self.dp_manager.set_summary_annotation(
+                summary_key=self.transform_predictor.possible_category(),
+                summary_name=self.transform_predictor.possible_category(),
+                summary_number=None,
+                summary_value=getattr(detection_result, self.transform_predictor.possible_category().value, None),
+                summary_score=detection_result.score,
+            )
     def clone(self) -> "SimpleTransformService":
         return self.__class__(self.transform_predictor)
@@ -69,7 +76,7 @@ class SimpleTransformService(ImageTransformPipelineComponent):
                 ("image_annotations", []),
                 ("sub_categories", {}),
                 ("relationships", {}),
-                ("summaries", []),
+                ("summaries", [self.transform_predictor.possible_category()]),
             ]
         )

deepdoctection/train/__init__.py CHANGED Viewed

@@ -19,20 +19,14 @@
 Init module for train package
 """
-from ..utils.file_utils import (
-    detectron2_available,
-    pytorch_available,
-    tensorpack_available,
-    tf_available,
-    transformers_available,
-)
+from ..utils.file_utils import detectron2_available, tensorpack_available, transformers_available
-if tf_available() and tensorpack_available():
-    from .tp_frcnn_train import train_faster_rcnn
-if pytorch_available() and detectron2_available():
+if detectron2_available():
     from .d2_frcnn_train import train_d2_faster_rcnn
-if pytorch_available() and transformers_available():
+if transformers_available():
     from .hf_detr_train import train_hf_detr
     from .hf_layoutlm_train import train_hf_layoutlm
+if tensorpack_available():
+    from .tp_frcnn_train import train_faster_rcnn

deepdoctection 0.30__py3-none-any.whl → 0.32__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.30py3-none-any.whl → 0.32py3-none-any.whl