PyPI - deepdoctection - Versions diffs - 0.40.0__py3-none-any.whl → 0.42.0__py3-none-any.whl - Mend

deepdoctection 0.40.0py3-none-any.whl → 0.42.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (28) hide show

deepdoctection/__init__.py +7 -4
deepdoctection/analyzer/_config.py +4 -1
deepdoctection/analyzer/dd.py +7 -1
deepdoctection/analyzer/factory.py +43 -6
deepdoctection/datapoint/convert.py +0 -24
deepdoctection/datapoint/view.py +53 -17
deepdoctection/datasets/base.py +3 -1
deepdoctection/extern/base.py +108 -1
deepdoctection/extern/deskew.py +1 -1
deepdoctection/extern/doctrocr.py +4 -2
deepdoctection/extern/tessocr.py +1 -1
deepdoctection/extern/tp/tpfrcnn/preproc.py +1 -1
deepdoctection/mapper/d2struct.py +64 -1
deepdoctection/mapper/laylmstruct.py +1 -2
deepdoctection/mapper/match.py +2 -2
deepdoctection/mapper/tpstruct.py +30 -6
deepdoctection/pipe/common.py +56 -30
deepdoctection/pipe/segment.py +4 -0
deepdoctection/pipe/sub_layout.py +8 -4
deepdoctection/pipe/transform.py +38 -16
deepdoctection/train/hf_detr_train.py +1 -1
deepdoctection/utils/settings.py +5 -0
deepdoctection/utils/transform.py +173 -38
{deepdoctection-0.40.0.dist-info → deepdoctection-0.42.0.dist-info}/METADATA +1 -1
{deepdoctection-0.40.0.dist-info → deepdoctection-0.42.0.dist-info}/RECORD +28 -28
{deepdoctection-0.40.0.dist-info → deepdoctection-0.42.0.dist-info}/WHEEL +0 -0
{deepdoctection-0.40.0.dist-info → deepdoctection-0.42.0.dist-info}/licenses/LICENSE +0 -0
{deepdoctection-0.40.0.dist-info → deepdoctection-0.42.0.dist-info}/top_level.txt +0 -0

deepdoctection/__init__.py CHANGED Viewed

@@ -25,7 +25,7 @@ from .utils.logger import LoggingRecord, logger
 # pylint: enable=wrong-import-position
-__version__ = "0.40.0"
+__version__ = "0.42.0"
 _IMPORT_STRUCTURE = {
     "analyzer": ["config_sanity_checks", "get_dd_analyzer", "ServiceFactory"],
@@ -90,13 +90,12 @@ _IMPORT_STRUCTURE = {
         "convert_np_array_to_b64_b",
         "convert_bytes_to_np_array",
         "convert_pdf_bytes_to_np_array_v2",
-        "box_to_point4",
-        "point4_to_box",
         "as_dict",
         "ImageAnnotationBaseView",
         "Image",
         "Word",
         "Layout",
+        "List",
         "Cell",
         "Table",
         "Page",
@@ -164,6 +163,7 @@ _IMPORT_STRUCTURE = {
         "LMSequenceClassifier",
         "LanguageDetector",
         "ImageTransformer",
+        "DeterministicImageTransformer",
         "InferenceResize",
         "D2FrcnnDetector",
         "D2FrcnnTracingDetector",
@@ -401,11 +401,14 @@ _IMPORT_STRUCTURE = {
         "get_type",
         "get_tqdm",
         "get_tqdm_default_kwargs",
+        "box_to_point4",
+        "point4_to_box",
         "ResizeTransform",
         "InferenceResize",
         "normalize_image",
         "pad_image",
         "PadTransform",
+        "RotationTransform",
         "delete_keys_from_dict",
         "split_string",
         "string_to_dict",
@@ -438,7 +441,7 @@ if TYPE_CHECKING:
     from .eval import *
     from .extern import *  # type: ignore
     from .mapper import *  # type: ignore
-    from .pipe import *
+    from .pipe import *  # type: ignore
     from .train import *
     from .utils import *

deepdoctection/analyzer/_config.py CHANGED Viewed

@@ -40,7 +40,7 @@ cfg.TF.CELL.FILTER = None
 cfg.TF.ITEM.WEIGHTS = "item/model-1620000_inf_only.data-00000-of-00001"
 cfg.TF.ITEM.FILTER = None
-cfg.PT.ENFORCE_WEIGHTS = False
+cfg.PT.ENFORCE_WEIGHTS.LAYOUT = True
 cfg.PT.LAYOUT.WEIGHTS = "layout/d2_model_0829999_layout_inf_only.pt"
 cfg.PT.LAYOUT.WEIGHTS_TS = "layout/d2_model_0829999_layout_inf_only.ts"
 cfg.PT.LAYOUT.FILTER = None
@@ -49,6 +49,7 @@ cfg.PT.LAYOUT.PAD.RIGHT = 60
 cfg.PT.LAYOUT.PAD.BOTTOM = 60
 cfg.PT.LAYOUT.PAD.LEFT = 60
+cfg.PT.ENFORCE_WEIGHTS.ITEM = True
 cfg.PT.ITEM.WEIGHTS = "item/d2_model_1639999_item_inf_only.pt"
 cfg.PT.ITEM.WEIGHTS_TS = "item/d2_model_1639999_item_inf_only.ts"
 cfg.PT.ITEM.FILTER = None
@@ -57,6 +58,7 @@ cfg.PT.ITEM.PAD.RIGHT = 60
 cfg.PT.ITEM.PAD.BOTTOM = 60
 cfg.PT.ITEM.PAD.LEFT = 60
+cfg.PT.ENFORCE_WEIGHTS.CELL = True
 cfg.PT.CELL.WEIGHTS = "cell/d2_model_1849999_cell_inf_only.pt"
 cfg.PT.CELL.WEIGHTS_TS = "cell/d2_model_1849999_cell_inf_only.ts"
 cfg.PT.CELL.FILTER = None
@@ -137,6 +139,7 @@ cfg.TEXT_ORDERING.HEIGHT_TOLERANCE = 2.0
 cfg.TEXT_ORDERING.PARAGRAPH_BREAK = 0.035
 cfg.USE_LAYOUT_LINK = False
+cfg.USE_LINE_MATCHER = False
 cfg.LAYOUT_LINK.PARENTAL_CATEGORIES = []
 cfg.LAYOUT_LINK.CHILD_CATEGORIES = []

deepdoctection/analyzer/dd.py CHANGED Viewed

@@ -32,7 +32,7 @@ from ..extern.pt.ptutils import get_torch_device
 from ..extern.tp.tfutils import disable_tp_layer_logging, get_tf_device
 from ..pipe.doctectionpipe import DoctectionPipe
 from ..utils.env_info import ENV_VARS_TRUE
-from ..utils.file_utils import tensorpack_available
+from ..utils.file_utils import tensorpack_available, detectron2_available
 from ..utils.fs import get_configs_dir_path, get_package_path, maybe_copy_config_to_cache
 from ..utils.logger import LoggingRecord, logger
 from ..utils.metacfg import set_config_by_yaml
@@ -140,6 +140,12 @@ def get_dd_analyzer(
     cfg.LANGUAGE = None
     cfg.LIB = lib
     cfg.DEVICE = device
+    if not detectron2_available() or cfg.PT.LAYOUT.WEIGHTS is None:
+        cfg.PT.ENFORCE_WEIGHTS.LAYOUT=False
+    if not detectron2_available() or cfg.PT.ITEM.WEIGHTS is None:
+        cfg.PT.ENFORCE_WEIGHTS.ITEM=False
+    if not detectron2_available() or cfg.PT.CELL.WEIGHTS is None:
+        cfg.PT.ENFORCE_WEIGHTS.CELL=False
     cfg.freeze()
     if config_overwrite:

deepdoctection/analyzer/factory.py CHANGED Viewed

@@ -50,7 +50,6 @@ from ..pipe.sub_layout import DetectResultGenerator, SubImageLayoutService
 from ..pipe.text import TextExtractionService
 from ..pipe.transform import SimpleTransformService
 from ..utils.error import DependencyError
-from ..utils.file_utils import detectron2_available
 from ..utils.fs import get_configs_dir_path
 from ..utils.metacfg import AttrDict
 from ..utils.settings import CellType, LayoutType, Relationships
@@ -96,12 +95,13 @@ class ServiceFactory:
         """
         if config.LIB is None:
             raise DependencyError("At least one of the env variables DD_USE_TF or DD_USE_TORCH must be set.")
         weights = (
             getattr(config.TF, mode).WEIGHTS
             if config.LIB == "TF"
             else (
                 getattr(config.PT, mode).WEIGHTS
-                if detectron2_available() or config.PT.ENFORCE_WEIGHTS
+                if getattr(config.PT.ENFORCE_WEIGHTS,mode)
                 else getattr(config.PT, mode).WEIGHTS_TS
             )
         )
@@ -197,7 +197,7 @@ class ServiceFactory:
             getattr(config.PT, mode).PAD.BOTTOM,
             getattr(config.PT, mode).PAD.LEFT,
         )
-        return PadTransform(top=top, right=right, bottom=bottom, left=left)  #
+        return PadTransform(pad_top=top, pad_right=right, pad_bottom=bottom, pad_left=left)  #
     @staticmethod
     def build_padder(config: AttrDict, mode: str) -> PadTransform:
@@ -240,8 +240,6 @@ class ServiceFactory:
         :param config: configuration object
         """
-        if not detectron2_available() and config.LIB == "PT":
-            raise ModuleNotFoundError("LAYOUT_NMS_PAIRS is only available for detectron2")
         if not isinstance(config.LAYOUT_NMS_PAIRS.COMBINATIONS, list) and not isinstance(
             config.LAYOUT_NMS_PAIRS.COMBINATIONS[0], list
         ):
@@ -577,7 +575,14 @@ class ServiceFactory:
                 parent_categories=config.WORD_MATCHING.PARENTAL_CATEGORIES,
                 child_categories=config.TEXT_CONTAINER,
                 relationship_key=Relationships.CHILD,
-            )
+            ),
+            FamilyCompound(
+                parent_categories=[LayoutType.LIST],
+                child_categories=[LayoutType.LIST_ITEM],
+                relationship_key=Relationships.CHILD,
+                create_synthetic_parent=True,
+                synthetic_parent=LayoutType.LIST,
+            ),
         ]
         return MatchingService(
             family_compounds=family_compounds,
@@ -622,6 +627,34 @@ class ServiceFactory:
         """
         return ServiceFactory._build_layout_link_matching_service(config)
+    @staticmethod
+    def _build_line_matching_service(config: AttrDict) -> MatchingService:
+        matcher = IntersectionMatcher(
+            matching_rule=config.WORD_MATCHING.RULE,
+            threshold=config.WORD_MATCHING.THRESHOLD,
+            max_parent_only=config.WORD_MATCHING.MAX_PARENT_ONLY,
+        )
+        family_compounds = [
+            FamilyCompound(
+                parent_categories=[LayoutType.LIST],
+                child_categories=[LayoutType.LINE],
+                relationship_key=Relationships.CHILD,
+            ),
+        ]
+        return MatchingService(
+            family_compounds=family_compounds,
+            matcher=matcher,
+        )
+    @staticmethod
+    def build_line_matching_service(config: AttrDict) -> MatchingService:
+        """Building a word matching service
+        :param config: configuration object
+        :return: MatchingService
+        """
+        return ServiceFactory._build_line_matching_service(config)
     @staticmethod
     def _build_text_order_service(config: AttrDict) -> TextOrderService:
         """Building a text order service
@@ -748,6 +781,10 @@ class ServiceFactory:
             layout_link_matching_service = ServiceFactory.build_layout_link_matching_service(config)
             pipe_component_list.append(layout_link_matching_service)
+        if config.USE_LINE_MATCHER:
+            line_list_matching_service = ServiceFactory.build_line_matching_service(config)
+            pipe_component_list.append(line_list_matching_service)
         page_parsing_service = ServiceFactory.build_page_parsing_service(config)
         return DoctectionPipe(pipeline_component_list=pipe_component_list, page_parsing_service=page_parsing_service)

deepdoctection/datapoint/convert.py CHANGED Viewed

@@ -27,7 +27,6 @@ from typing import Any, Optional, Union, no_type_check
 import numpy as np
 from numpy import uint8
-from numpy.typing import NDArray
 from pypdf import PdfReader
 from ..utils.develop import deprecated
@@ -42,8 +41,6 @@ __all__ = [
     "convert_np_array_to_b64_b",
     "convert_bytes_to_np_array",
     "convert_pdf_bytes_to_np_array_v2",
-    "box_to_point4",
-    "point4_to_box",
     "as_dict",
 ]
@@ -187,24 +184,3 @@ def convert_pdf_bytes_to_np_array_v2(
             width = shape[2] - shape[0]
         return pdf_to_np_array(pdf_bytes, size=(int(width), int(height)))  # type: ignore
     return pdf_to_np_array(pdf_bytes, dpi=dpi)
-def box_to_point4(boxes: NDArray[np.float32]) -> NDArray[np.float32]:
-    """
-    :param boxes: nx4
-    :return: (nx4)x2
-    """
-    box = boxes[:, [0, 1, 2, 3, 0, 3, 2, 1]]
-    box = box.reshape((-1, 2))
-    return box
-def point4_to_box(points: NDArray[np.float32]) -> NDArray[np.float32]:
-    """
-    :param points: (nx4)x2
-    :return: nx4 boxes (x1y1x2y2)
-    """
-    points = points.reshape((-1, 4, 2))
-    min_xy = points.min(axis=1)  # nx2
-    max_xy = points.max(axis=1)  # nx2
-    return np.concatenate((min_xy, max_xy), axis=1)

deepdoctection/datapoint/view.py CHANGED Viewed

@@ -25,7 +25,6 @@ from copy import copy
 from typing import Any, Mapping, Optional, Sequence, Type, TypedDict, Union, no_type_check
 import numpy as np
-from typing_extensions import LiteralString
 from ..utils.error import AnnotationError, ImageError
 from ..utils.logger import LoggingRecord, log_once, logger
@@ -41,12 +40,11 @@ from ..utils.settings import (
     WordType,
     get_type,
 )
-from ..utils.transform import ResizeTransform
+from ..utils.transform import ResizeTransform, box_to_point4, point4_to_box
 from ..utils.types import HTML, AnnotationDict, Chunks, ImageDict, PathLikeOrStr, PixelValues, Text_, csv
 from ..utils.viz import draw_boxes, interactive_imshow, viz_handler
 from .annotation import CategoryAnnotation, ContainerAnnotation, ImageAnnotation, ann_from_dict
 from .box import BoundingBox, crop_box_from_image
-from .convert import box_to_point4, point4_to_box
 from .image import Image
@@ -286,6 +284,52 @@ class Cell(Layout):
         return set(CellType).union(super().get_attribute_names())
+class List(Layout):
+    """
+    List specific subclass of `ImageAnnotationBaseView` modelled by `LayoutType`.
+    """
+    @property
+    def words(self) -> list[ImageAnnotationBaseView]:
+        """
+        Get a list of `ImageAnnotationBaseView` objects with `LayoutType` defined by `text_container`.
+        It will only select those among all annotations that have an entry in `Relationships.child` .
+        """
+        all_words: list[ImageAnnotationBaseView] = []
+        for list_item in self.list_items:
+            all_words.extend(list_item.words)  # type: ignore
+        return all_words
+    def get_ordered_words(self) -> list[ImageAnnotationBaseView]:
+        """Returns a list of words order by reading order. Words with no reading order will not be returned"""
+        try:
+            list_items = self.list_items
+            all_words = []
+            list_items.sort(key=lambda x: x.bbox[1])
+            for list_item in list_items:
+                all_words.extend(list_item.get_ordered_words()) # type: ignore
+            return all_words
+        except (TypeError, AnnotationError):
+            return super().get_ordered_words()
+    @property
+    def list_items(self) -> list[ImageAnnotationBaseView]:
+        """
+        A list of a list items.
+        """
+        all_relation_ids = self.get_relationship(Relationships.CHILD)
+        list_items = self.base_page.get_annotation(
+            annotation_ids=all_relation_ids,
+            category_names=(
+                LayoutType.LIST_ITEM,
+                LayoutType.LINE,
+            ),
+        )
+        list_items.sort(key=lambda x: x.bbox[1])
+        return list_items
 class Table(Layout):
     """
     Table specific sub class of `ImageAnnotationBaseView` modelled by `TableType`.
@@ -373,7 +417,7 @@ class Table(Layout):
             category_names=[LayoutType.CELL, CellType.SPANNING], annotation_ids=all_relation_ids
         )
         row_cells = list(
-            filter(lambda c: row_number in (c.row_number, c.row_number + c.row_span), all_cells)  # type: ignore
+            filter(lambda c: c.row_number <= row_number <= c.row_number + c.row_span - 1, all_cells)  # type: ignore
         )
         row_cells.sort(key=lambda c: c.column_number)  # type: ignore
         column_header_cells = self.column_header_cells
@@ -561,6 +605,7 @@ IMAGE_ANNOTATION_TO_LAYOUTS: dict[ObjectTypes, Type[Union[Layout, Table, Word]]]
     LayoutType.TABLE_ROTATED: Table,
     LayoutType.WORD: Word,
     LayoutType.CELL: Cell,
+    LayoutType.LIST: List,
     CellType.SPANNING: Cell,
     CellType.ROW_HEADER: Cell,
     CellType.COLUMN_HEADER: Cell,
@@ -574,6 +619,7 @@ class ImageDefaults(TypedDict):
     text_container: LayoutType
     floating_text_block_categories: tuple[Union[LayoutType, CellType], ...]
     text_block_categories: tuple[Union[LayoutType, CellType], ...]
+    residual_layouts: tuple[LayoutType, ...]
 IMAGE_DEFAULTS: ImageDefaults = {
@@ -592,6 +638,7 @@ IMAGE_DEFAULTS: ImageDefaults = {
         LayoutType.FIGURE,
         CellType.SPANNING,
     ),
+    "residual_layouts": (LayoutType.LINE,),
 }
@@ -771,19 +818,8 @@ class Page(Image):
         """
         return self.get_annotation(category_names=self._get_residual_layout())
-    def _get_residual_layout(self) -> list[LiteralString]:
-        layouts = copy(list(self.floating_text_block_categories))
-        layouts.extend(
-            [
-                LayoutType.TABLE,
-                LayoutType.FIGURE,
-                self.text_container,
-                LayoutType.CELL,
-                LayoutType.ROW,
-                LayoutType.COLUMN,
-            ]
-        )
-        return [layout for layout in LayoutType if layout not in layouts]
+    def _get_residual_layout(self) -> tuple[LayoutType, ...]:
+        return IMAGE_DEFAULTS["residual_layouts"]
     @classmethod
     def from_image(

deepdoctection/datasets/base.py CHANGED Viewed

@@ -369,7 +369,9 @@ class MergeDataset(DatasetBase):
         self.buffer_datasets(**dataflow_build_kwargs)
         split_defaultdict = defaultdict(list)
         for image in self.datapoint_list:  # type: ignore
-            split_defaultdict[ann_id_to_split[image.image_id]].append(image)
+            maybe_image_id = ann_id_to_split.get(image.image_id)
+            if maybe_image_id is not None:
+                split_defaultdict[maybe_image_id].append(image)
         train_dataset = split_defaultdict["train"]
         val_dataset = split_defaultdict["val"]
         test_dataset = split_defaultdict["test"]

deepdoctection/extern/base.py CHANGED Viewed

@@ -26,6 +26,7 @@ from dataclasses import dataclass, field
 from types import MappingProxyType
 from typing import TYPE_CHECKING, Any, Literal, Mapping, Optional, Sequence, Union, overload
+import numpy as np
 from lazy_imports import try_import
 from ..utils.identifier import get_uuid_from_str
@@ -38,6 +39,7 @@ from ..utils.settings import (
     token_class_tag_to_token_class_with_tag,
     token_class_with_tag_to_token_class_and_tag,
 )
+from ..utils.transform import BaseTransform, box_to_point4, point4_to_box
 from ..utils.types import JsonDict, PixelValues, Requirement
 if TYPE_CHECKING:
@@ -621,7 +623,7 @@ class ImageTransformer(PredictorBase, ABC):
     """
     @abstractmethod
-    def transform(self, np_img: PixelValues, specification: DetectionResult) -> PixelValues:
+    def transform_image(self, np_img: PixelValues, specification: DetectionResult) -> PixelValues:
         """
         Abstract method transform
         """
@@ -641,3 +643,108 @@ class ImageTransformer(PredictorBase, ABC):
     def get_category_names(self) -> tuple[ObjectTypes, ...]:
         """returns category names"""
         raise NotImplementedError()
+    def transform_coords(self, detect_results: Sequence[DetectionResult]) -> Sequence[DetectionResult]:
+        """
+        Transform coordinates aligned with the transform_image method.
+        :param detect_results: List of DetectionResults
+        :return: List of DetectionResults. If you pass uuid it is possible to track the transformed bounding boxes.
+        """
+        raise NotImplementedError()
+    def inverse_transform_coords(self, detect_results: Sequence[DetectionResult]) -> Sequence[DetectionResult]:
+        """
+        Inverse transform coordinates aligned with the transform_image method. Composing transform_coords with
+        inverse_transform_coords should return the original coordinates.
+        :param detect_results: List of DetectionResults
+        :return: List of DetectionResults. If you pass uuid it is possible to track the transformed bounding boxes.
+        """
+        raise NotImplementedError()
+class DeterministicImageTransformer(ImageTransformer):
+    """
+    A wrapper for BaseTransform classes that implements the ImageTransformer interface.
+    This class provides a bridge between the BaseTransform system (which handles image and coordinate
+    transformations like rotation, padding, etc.) and the predictors framework by implementing the
+    ImageTransformer interface. It allows BaseTransform objects to be used within pipelines that
+    expect ImageTransformer components.
+    The transformer performs deterministic transformations on images and their associated coordinates,
+    enabling operations like padding, rotation, and other geometric transformations while maintaining
+    the relationship between image content and annotation coordinates.
+    :param base_transform: A BaseTransform instance that defines the actual transformation operations
+                          to be applied to images and coordinates.
+    """
+    def __init__(self, base_transform: BaseTransform):
+        """
+        Initialize the DeterministicImageTransformer with a BaseTransform instance.
+        :param base_transform: A BaseTransform instance that defines the actual transformation operations
+        """
+        self.base_transform = base_transform
+        self.name = base_transform.__class__.__name__
+        self.model_id = self.get_model_id()
+    def transform_image(self, np_img: PixelValues, specification: DetectionResult) -> PixelValues:
+        return self.base_transform.apply_image(np_img)
+    def transform_coords(self, detect_results: Sequence[DetectionResult]) -> Sequence[DetectionResult]:
+        boxes = np.array([detect_result.box for detect_result in detect_results])
+        # boxes = box_to_point4(boxes)
+        boxes = self.base_transform.apply_coords(boxes)
+        # boxes = point4_to_box(boxes)
+        detection_results = []
+        for idx, detect_result in enumerate(detect_results):
+            detection_results.append(
+                DetectionResult(
+                    box=boxes[idx, :].tolist(),
+                    class_name=detect_result.class_name,
+                    class_id=detect_result.class_id,
+                    score=detect_result.score,
+                    absolute_coords=detect_result.absolute_coords,
+                    uuid=detect_result.uuid,
+                )
+            )
+        return detection_results
+    def inverse_transform_coords(self, detect_results: Sequence[DetectionResult]) -> Sequence[DetectionResult]:
+        boxes = np.array([detect_result.box for detect_result in detect_results])
+        boxes = box_to_point4(boxes)
+        boxes = self.base_transform.inverse_apply_coords(boxes)
+        boxes = point4_to_box(boxes)
+        detection_results = []
+        for idx, detect_result in enumerate(detect_results):
+            detection_results.append(
+                DetectionResult(
+                    box=boxes[idx, :].tolist(),
+                    class_id=detect_result.class_id,
+                    score=detect_result.score,
+                    absolute_coords=detect_result.absolute_coords,
+                    uuid=detect_result.uuid,
+                )
+            )
+        return detection_results
+    def clone(self) -> DeterministicImageTransformer:
+        return self.__class__(self.base_transform)
+    def predict(self, np_img: PixelValues) -> DetectionResult:
+        detect_result = DetectionResult()
+        for init_arg in self.base_transform.get_init_args():
+            setattr(detect_result, init_arg, getattr(self.base_transform, init_arg))
+        return detect_result
+    def get_category_names(self) -> tuple[ObjectTypes, ...]:
+        return self.base_transform.get_category_names()
+    @classmethod
+    def get_requirements(cls) -> list[Requirement]:
+        return []

deepdoctection/extern/deskew.py CHANGED Viewed

@@ -43,7 +43,7 @@ class Jdeskewer(ImageTransformer):
         self.model_id = self.get_model_id()
         self.min_angle_rotation = min_angle_rotation
-    def transform(self, np_img: PixelValues, specification: DetectionResult) -> PixelValues:
+    def transform_image(self, np_img: PixelValues, specification: DetectionResult) -> PixelValues:
         """
         Rotation of the image according to the angle determined by the jdeskew estimator.

deepdoctection/extern/doctrocr.py CHANGED Viewed

@@ -424,7 +424,8 @@ class DoctrTextRecognizer(TextRecognizer):
             custom_configs.pop("task", None)
             recognition_configs["mean"] = custom_configs.pop("mean")
             recognition_configs["std"] = custom_configs.pop("std")
-            batch_size = custom_configs.pop("batch_size")
+            if "batch_size" in custom_configs:
+                batch_size = custom_configs.pop("batch_size")
         recognition_configs["batch_size"] = batch_size
         if isinstance(architecture, str):
@@ -514,8 +515,9 @@ class DocTrRotationTransformer(ImageTransformer):
         self.number_contours = number_contours
         self.ratio_threshold_for_lines = ratio_threshold_for_lines
         self.name = "doctr_rotation_transformer"
+        self.model_id = self.get_model_id()
-    def transform(self, np_img: PixelValues, specification: DetectionResult) -> PixelValues:
+    def transform_image(self, np_img: PixelValues, specification: DetectionResult) -> PixelValues:
         """
         Applies the predicted rotation to the image, effectively rotating the image backwards.
         This method uses either the Pillow library or OpenCV for the rotation operation, depending on the configuration.

deepdoctection/extern/tessocr.py CHANGED Viewed

@@ -423,7 +423,7 @@ class TesseractRotationTransformer(ImageTransformer):
         self.categories = ModelCategories(init_categories={1: PageType.ANGLE})
         self.model_id = self.get_model_id()
-    def transform(self, np_img: PixelValues, specification: DetectionResult) -> PixelValues:
+    def transform_image(self, np_img: PixelValues, specification: DetectionResult) -> PixelValues:
         """
         Applies the predicted rotation to the image, effectively rotating the image backwards.
         This method uses either the Pillow library or OpenCV for the rotation operation, depending on the configuration.

deepdoctection/extern/tp/tpfrcnn/preproc.py CHANGED Viewed

@@ -15,9 +15,9 @@ from typing import Any, List, Optional, Tuple
 import numpy as np
 from lazy_imports import try_import
-from ....datapoint.convert import box_to_point4, point4_to_box
 from ....utils.error import MalformedData
 from ....utils.logger import log_once
+from ....utils.transform import box_to_point4, point4_to_box
 from ....utils.types import JsonDict, PixelValues
 from .common import filter_boxes_inside_shape, np_iou
 from .modeling.model_fpn import get_all_anchors_fpn

deepdoctection/mapper/d2struct.py CHANGED Viewed

@@ -102,7 +102,7 @@ def image_to_d2_frcnn_training(
     return output
-def pt_nms_image_annotations(
+def pt_nms_image_annotations_depr(
     anns: Sequence[ImageAnnotation], threshold: float, image_id: Optional[str] = None, prio: str = ""
 ) -> Sequence[str]:
     """
@@ -147,6 +147,69 @@ def pt_nms_image_annotations(
     return []
+def pt_nms_image_annotations(
+    anns: Sequence[ImageAnnotation], threshold: float, image_id: Optional[str] = None, prio: str = ""
+) -> Sequence[str]:
+    """
+    Processing given image annotations through NMS. This is useful, if you want to supress some specific image
+    annotation, e.g. given by name or returned through different predictors. This is the pt version, for tf check
+    `mapper.tpstruct`
+    :param anns: A sequence of ImageAnnotations. All annotations will be treated as if they belong to one category
+    :param threshold: NMS threshold
+    :param image_id: id in order to get the embedding bounding box
+    :param prio: If an annotation has prio, it will overwrite its given score to 1 so that it will never be suppressed
+    :return: A list of annotation_ids that belong to the given input sequence and that survive the NMS process
+    """
+    if len(anns) == 1:
+        return [anns[0].annotation_id]
+    if not anns:
+        return []
+    # First, identify priority annotations that should always be kept
+    priority_ann_ids = []
+    if prio:
+        for ann in anns:
+            if ann.category_name == prio:
+                priority_ann_ids.append(ann.annotation_id)
+    # If all annotations are priority or none are left for NMS, return all priority IDs
+    if len(priority_ann_ids) == len(anns):
+        return priority_ann_ids
+    def priority_to_confidence(ann: ImageAnnotation, priority: str) -> float:
+        if ann.category_name == priority:
+            return 1.0
+        if ann.score:
+            return ann.score
+        raise ValueError("score cannot be None")
+    # Perform NMS only on non-priority annotations
+    ann_ids = np.array([ann.annotation_id for ann in anns], dtype="object")
+    # Get boxes for non-priority annotations
+    boxes = torch.tensor(
+        [ann.get_bounding_box(image_id).to_list(mode="xyxy") for ann in anns if ann.bounding_box is not None]
+    )
+    scores = torch.tensor([priority_to_confidence(ann, prio) for ann in anns])
+    class_mask = torch.ones(len(boxes), dtype=torch.uint8)
+    keep = batched_nms(boxes, scores, class_mask, threshold)
+    kept_ids = ann_ids[keep]
+    # Convert to list if necessary
+    if isinstance(kept_ids, str):
+        kept_ids = [kept_ids]
+    elif not isinstance(kept_ids, list):
+        kept_ids = kept_ids.tolist()
+    # Combine priority annotations with surviving non-priority annotations
+    return list(set(priority_ann_ids + kept_ids))
 def _get_category_attributes(
     ann: ImageAnnotation, cat_to_sub_cat: Optional[Mapping[ObjectTypes, ObjectTypes]] = None
 ) -> tuple[ObjectTypes, int, Optional[float]]:

deepdoctection/mapper/laylmstruct.py CHANGED Viewed

@@ -31,11 +31,10 @@ import numpy.typing as npt
 from lazy_imports import try_import
 from ..datapoint.annotation import ContainerAnnotation
-from ..datapoint.convert import box_to_point4, point4_to_box
 from ..datapoint.image import Image
 from ..datapoint.view import Page
 from ..utils.settings import DatasetType, LayoutType, PageType, Relationships, WordType
-from ..utils.transform import ResizeTransform, normalize_image
+from ..utils.transform import ResizeTransform, box_to_point4, normalize_image, point4_to_box
 from ..utils.types import JsonDict
 from .maputils import curry

deepdoctection 0.40.0__py3-none-any.whl → 0.42.0__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.40.0py3-none-any.whl → 0.42.0py3-none-any.whl