PyPI - deepdoctection - Versions diffs - 0.40.0__tar.gz → 0.41.0__tar.gz - Mend

deepdoctection 0.40.0tar.gz → 0.41.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (155) hide show

{deepdoctection-0.40.0 → deepdoctection-0.41.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: deepdoctection
-Version: 0.40.0
+Version: 0.41.0
 Summary: Repository for Document AI
 Home-page: https://github.com/deepdoctection/deepdoctection
 Author: Dr. Janis Meyer

{deepdoctection-0.40.0 → deepdoctection-0.41.0}/deepdoctection/__init__.py RENAMED Viewed

@@ -25,7 +25,7 @@ from .utils.logger import LoggingRecord, logger
 # pylint: enable=wrong-import-position
-__version__ = "0.40.0"
+__version__ = "0.41.0"
 _IMPORT_STRUCTURE = {
     "analyzer": ["config_sanity_checks", "get_dd_analyzer", "ServiceFactory"],
@@ -90,8 +90,6 @@ _IMPORT_STRUCTURE = {
         "convert_np_array_to_b64_b",
         "convert_bytes_to_np_array",
         "convert_pdf_bytes_to_np_array_v2",
-        "box_to_point4",
-        "point4_to_box",
         "as_dict",
         "ImageAnnotationBaseView",
         "Image",
@@ -164,6 +162,7 @@ _IMPORT_STRUCTURE = {
         "LMSequenceClassifier",
         "LanguageDetector",
         "ImageTransformer",
+        "DeterministicImageTransformer",
         "InferenceResize",
         "D2FrcnnDetector",
         "D2FrcnnTracingDetector",
@@ -401,11 +400,14 @@ _IMPORT_STRUCTURE = {
         "get_type",
         "get_tqdm",
         "get_tqdm_default_kwargs",
+        "box_to_point4",
+        "point4_to_box",
         "ResizeTransform",
         "InferenceResize",
         "normalize_image",
         "pad_image",
         "PadTransform",
+        "RotationTransform",
         "delete_keys_from_dict",
         "split_string",
         "string_to_dict",

{deepdoctection-0.40.0 → deepdoctection-0.41.0}/deepdoctection/analyzer/factory.py RENAMED Viewed

@@ -197,7 +197,7 @@ class ServiceFactory:
             getattr(config.PT, mode).PAD.BOTTOM,
             getattr(config.PT, mode).PAD.LEFT,
         )
-        return PadTransform(top=top, right=right, bottom=bottom, left=left)  #
+        return PadTransform(pad_top=top, pad_right=right, pad_bottom=bottom, pad_left=left)  #
     @staticmethod
     def build_padder(config: AttrDict, mode: str) -> PadTransform:

{deepdoctection-0.40.0 → deepdoctection-0.41.0}/deepdoctection/datapoint/convert.py RENAMED Viewed

@@ -27,7 +27,6 @@ from typing import Any, Optional, Union, no_type_check
 import numpy as np
 from numpy import uint8
-from numpy.typing import NDArray
 from pypdf import PdfReader
 from ..utils.develop import deprecated
@@ -42,8 +41,6 @@ __all__ = [
     "convert_np_array_to_b64_b",
     "convert_bytes_to_np_array",
     "convert_pdf_bytes_to_np_array_v2",
-    "box_to_point4",
-    "point4_to_box",
     "as_dict",
 ]
@@ -187,24 +184,3 @@ def convert_pdf_bytes_to_np_array_v2(
             width = shape[2] - shape[0]
         return pdf_to_np_array(pdf_bytes, size=(int(width), int(height)))  # type: ignore
     return pdf_to_np_array(pdf_bytes, dpi=dpi)
-def box_to_point4(boxes: NDArray[np.float32]) -> NDArray[np.float32]:
-    """
-    :param boxes: nx4
-    :return: (nx4)x2
-    """
-    box = boxes[:, [0, 1, 2, 3, 0, 3, 2, 1]]
-    box = box.reshape((-1, 2))
-    return box
-def point4_to_box(points: NDArray[np.float32]) -> NDArray[np.float32]:
-    """
-    :param points: (nx4)x2
-    :return: nx4 boxes (x1y1x2y2)
-    """
-    points = points.reshape((-1, 4, 2))
-    min_xy = points.min(axis=1)  # nx2
-    max_xy = points.max(axis=1)  # nx2
-    return np.concatenate((min_xy, max_xy), axis=1)

{deepdoctection-0.40.0 → deepdoctection-0.41.0}/deepdoctection/datapoint/view.py RENAMED Viewed

@@ -41,12 +41,11 @@ from ..utils.settings import (
     WordType,
     get_type,
 )
-from ..utils.transform import ResizeTransform
+from ..utils.transform import ResizeTransform, box_to_point4, point4_to_box
 from ..utils.types import HTML, AnnotationDict, Chunks, ImageDict, PathLikeOrStr, PixelValues, Text_, csv
 from ..utils.viz import draw_boxes, interactive_imshow, viz_handler
 from .annotation import CategoryAnnotation, ContainerAnnotation, ImageAnnotation, ann_from_dict
 from .box import BoundingBox, crop_box_from_image
-from .convert import box_to_point4, point4_to_box
 from .image import Image

{deepdoctection-0.40.0 → deepdoctection-0.41.0}/deepdoctection/datasets/base.py RENAMED Viewed

@@ -369,7 +369,9 @@ class MergeDataset(DatasetBase):
         self.buffer_datasets(**dataflow_build_kwargs)
         split_defaultdict = defaultdict(list)
         for image in self.datapoint_list:  # type: ignore
-            split_defaultdict[ann_id_to_split[image.image_id]].append(image)
+            maybe_image_id = ann_id_to_split.get(image.image_id)
+            if maybe_image_id is not None:
+                split_defaultdict[maybe_image_id].append(image)
         train_dataset = split_defaultdict["train"]
         val_dataset = split_defaultdict["val"]
         test_dataset = split_defaultdict["test"]

{deepdoctection-0.40.0 → deepdoctection-0.41.0}/deepdoctection/extern/base.py RENAMED Viewed

@@ -26,6 +26,7 @@ from dataclasses import dataclass, field
 from types import MappingProxyType
 from typing import TYPE_CHECKING, Any, Literal, Mapping, Optional, Sequence, Union, overload
+import numpy as np
 from lazy_imports import try_import
 from ..utils.identifier import get_uuid_from_str
@@ -38,6 +39,7 @@ from ..utils.settings import (
     token_class_tag_to_token_class_with_tag,
     token_class_with_tag_to_token_class_and_tag,
 )
+from ..utils.transform import BaseTransform, box_to_point4, point4_to_box
 from ..utils.types import JsonDict, PixelValues, Requirement
 if TYPE_CHECKING:
@@ -621,7 +623,7 @@ class ImageTransformer(PredictorBase, ABC):
     """
     @abstractmethod
-    def transform(self, np_img: PixelValues, specification: DetectionResult) -> PixelValues:
+    def transform_image(self, np_img: PixelValues, specification: DetectionResult) -> PixelValues:
         """
         Abstract method transform
         """
@@ -641,3 +643,108 @@ class ImageTransformer(PredictorBase, ABC):
     def get_category_names(self) -> tuple[ObjectTypes, ...]:
         """returns category names"""
         raise NotImplementedError()
+    def transform_coords(self, detect_results: Sequence[DetectionResult]) -> Sequence[DetectionResult]:
+        """
+        Transform coordinates aligned with the transform_image method.
+        :param detect_results: List of DetectionResults
+        :return: List of DetectionResults. If you pass uuid it is possible to track the transformed bounding boxes.
+        """
+        raise NotImplementedError()
+    def inverse_transform_coords(self, detect_results: Sequence[DetectionResult]) -> Sequence[DetectionResult]:
+        """
+        Inverse transform coordinates aligned with the transform_image method. Composing transform_coords with
+        inverse_transform_coords should return the original coordinates.
+        :param detect_results: List of DetectionResults
+        :return: List of DetectionResults. If you pass uuid it is possible to track the transformed bounding boxes.
+        """
+        raise NotImplementedError()
+class DeterministicImageTransformer(ImageTransformer):
+    """
+    A wrapper for BaseTransform classes that implements the ImageTransformer interface.
+    This class provides a bridge between the BaseTransform system (which handles image and coordinate
+    transformations like rotation, padding, etc.) and the predictors framework by implementing the
+    ImageTransformer interface. It allows BaseTransform objects to be used within pipelines that
+    expect ImageTransformer components.
+    The transformer performs deterministic transformations on images and their associated coordinates,
+    enabling operations like padding, rotation, and other geometric transformations while maintaining
+    the relationship between image content and annotation coordinates.
+    :param base_transform: A BaseTransform instance that defines the actual transformation operations
+                          to be applied to images and coordinates.
+    """
+    def __init__(self, base_transform: BaseTransform):
+        """
+        Initialize the DeterministicImageTransformer with a BaseTransform instance.
+        :param base_transform: A BaseTransform instance that defines the actual transformation operations
+        """
+        self.base_transform = base_transform
+        self.name = base_transform.__class__.__name__
+        self.model_id = self.get_model_id()
+    def transform_image(self, np_img: PixelValues, specification: DetectionResult) -> PixelValues:
+        return self.base_transform.apply_image(np_img)
+    def transform_coords(self, detect_results: Sequence[DetectionResult]) -> Sequence[DetectionResult]:
+        boxes = np.array([detect_result.box for detect_result in detect_results])
+        # boxes = box_to_point4(boxes)
+        boxes = self.base_transform.apply_coords(boxes)
+        # boxes = point4_to_box(boxes)
+        detection_results = []
+        for idx, detect_result in enumerate(detect_results):
+            detection_results.append(
+                DetectionResult(
+                    box=boxes[idx, :].tolist(),
+                    class_name=detect_result.class_name,
+                    class_id=detect_result.class_id,
+                    score=detect_result.score,
+                    absolute_coords=detect_result.absolute_coords,
+                    uuid=detect_result.uuid,
+                )
+            )
+        return detection_results
+    def inverse_transform_coords(self, detect_results: Sequence[DetectionResult]) -> Sequence[DetectionResult]:
+        boxes = np.array([detect_result.box for detect_result in detect_results])
+        boxes = box_to_point4(boxes)
+        boxes = self.base_transform.inverse_apply_coords(boxes)
+        boxes = point4_to_box(boxes)
+        detection_results = []
+        for idx, detect_result in enumerate(detect_results):
+            detection_results.append(
+                DetectionResult(
+                    box=boxes[idx, :].tolist(),
+                    class_id=detect_result.class_id,
+                    score=detect_result.score,
+                    absolute_coords=detect_result.absolute_coords,
+                    uuid=detect_result.uuid,
+                )
+            )
+        return detection_results
+    def clone(self) -> DeterministicImageTransformer:
+        return self.__class__(self.base_transform)
+    def predict(self, np_img: PixelValues) -> DetectionResult:
+        detect_result = DetectionResult()
+        for init_arg in self.base_transform.get_init_args():
+            setattr(detect_result, init_arg, getattr(self.base_transform, init_arg))
+        return detect_result
+    def get_category_names(self) -> tuple[ObjectTypes, ...]:
+        return self.base_transform.get_category_names()
+    @classmethod
+    def get_requirements(cls) -> list[Requirement]:
+        return []

{deepdoctection-0.40.0 → deepdoctection-0.41.0}/deepdoctection/extern/deskew.py RENAMED Viewed

@@ -43,7 +43,7 @@ class Jdeskewer(ImageTransformer):
         self.model_id = self.get_model_id()
         self.min_angle_rotation = min_angle_rotation
-    def transform(self, np_img: PixelValues, specification: DetectionResult) -> PixelValues:
+    def transform_image(self, np_img: PixelValues, specification: DetectionResult) -> PixelValues:
         """
         Rotation of the image according to the angle determined by the jdeskew estimator.

{deepdoctection-0.40.0 → deepdoctection-0.41.0}/deepdoctection/extern/doctrocr.py RENAMED Viewed

@@ -514,8 +514,9 @@ class DocTrRotationTransformer(ImageTransformer):
         self.number_contours = number_contours
         self.ratio_threshold_for_lines = ratio_threshold_for_lines
         self.name = "doctr_rotation_transformer"
+        self.model_id = self.get_model_id()
-    def transform(self, np_img: PixelValues, specification: DetectionResult) -> PixelValues:
+    def transform_image(self, np_img: PixelValues, specification: DetectionResult) -> PixelValues:
         """
         Applies the predicted rotation to the image, effectively rotating the image backwards.
         This method uses either the Pillow library or OpenCV for the rotation operation, depending on the configuration.

{deepdoctection-0.40.0 → deepdoctection-0.41.0}/deepdoctection/extern/tessocr.py RENAMED Viewed

@@ -423,7 +423,7 @@ class TesseractRotationTransformer(ImageTransformer):
         self.categories = ModelCategories(init_categories={1: PageType.ANGLE})
         self.model_id = self.get_model_id()
-    def transform(self, np_img: PixelValues, specification: DetectionResult) -> PixelValues:
+    def transform_image(self, np_img: PixelValues, specification: DetectionResult) -> PixelValues:
         """
         Applies the predicted rotation to the image, effectively rotating the image backwards.
         This method uses either the Pillow library or OpenCV for the rotation operation, depending on the configuration.

{deepdoctection-0.40.0 → deepdoctection-0.41.0}/deepdoctection/extern/tp/tpfrcnn/preproc.py RENAMED Viewed

@@ -15,9 +15,9 @@ from typing import Any, List, Optional, Tuple
 import numpy as np
 from lazy_imports import try_import
-from ....datapoint.convert import box_to_point4, point4_to_box
 from ....utils.error import MalformedData
 from ....utils.logger import log_once
+from ....utils.transform import box_to_point4, point4_to_box
 from ....utils.types import JsonDict, PixelValues
 from .common import filter_boxes_inside_shape, np_iou
 from .modeling.model_fpn import get_all_anchors_fpn

{deepdoctection-0.40.0 → deepdoctection-0.41.0}/deepdoctection/mapper/laylmstruct.py RENAMED Viewed

@@ -31,11 +31,10 @@ import numpy.typing as npt
 from lazy_imports import try_import
 from ..datapoint.annotation import ContainerAnnotation
-from ..datapoint.convert import box_to_point4, point4_to_box
 from ..datapoint.image import Image
 from ..datapoint.view import Page
 from ..utils.settings import DatasetType, LayoutType, PageType, Relationships, WordType
-from ..utils.transform import ResizeTransform, normalize_image
+from ..utils.transform import ResizeTransform, box_to_point4, normalize_image, point4_to_box
 from ..utils.types import JsonDict
 from .maputils import curry

{deepdoctection-0.40.0 → deepdoctection-0.41.0}/deepdoctection/mapper/match.py RENAMED Viewed

@@ -157,8 +157,8 @@ def match_anns_by_intersection(
 def match_anns_by_distance(
     dp: Image,
-    parent_ann_category_names:  Optional[Union[TypeOrStr, Sequence[TypeOrStr]]]=None,
-    child_ann_category_names:  Optional[Union[TypeOrStr, Sequence[TypeOrStr]]]=None,
+    parent_ann_category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
+    child_ann_category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
     parent_ann_ids: Optional[Union[Sequence[str], str]] = None,
     child_ann_ids: Optional[Union[str, Sequence[str]]] = None,
     parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,

{deepdoctection-0.40.0 → deepdoctection-0.41.0}/deepdoctection/pipe/common.py RENAMED Viewed

@@ -51,8 +51,9 @@ class ImageCroppingService(PipelineComponent):
     """
     def __init__(
-        self, category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
-             service_ids: Optional[Sequence[str]] = None
+        self,
+        category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
+        service_ids: Optional[Sequence[str]] = None,
     ) -> None:
         """
         :param category_names: A single name or a list of category names to crop

{deepdoctection-0.40.0 → deepdoctection-0.41.0}/deepdoctection/pipe/sub_layout.py RENAMED Viewed

@@ -153,8 +153,8 @@ class SubImageLayoutService(PipelineComponent):
     **Example**
             detect_result_generator = DetectResultGenerator(categories_items)
-            d_items = TPFrcnnDetector(item_config_path, item_weights_path, {"1": LayoutType.row,
-            "2": LayoutType.column})
+            d_items = TPFrcnnDetector(item_config_path, item_weights_path, {1: LayoutType.row,
+            2: LayoutType.column})
             item_component = SubImageLayoutService(d_items, LayoutType.table, detect_result_generator)
     """
@@ -162,6 +162,7 @@ class SubImageLayoutService(PipelineComponent):
         self,
         sub_image_detector: ObjectDetector,
         sub_image_names: Union[str, Sequence[TypeOrStr]],
+        service_ids: Optional[Sequence[str]] = None,
         detect_result_generator: Optional[DetectResultGenerator] = None,
         padder: Optional[PadTransform] = None,
     ):
@@ -170,7 +171,8 @@ class SubImageLayoutService(PipelineComponent):
         :param sub_image_names: Category names of ImageAnnotations to be presented to the detector.
                                 Attention: The selected ImageAnnotations must have: attr:`image` and: attr:`image.image`
                                 not None.
-        :param category_id_mapping: Mapping of category IDs. Usually, the category ids start with 1.
+        :param service_ids: List of service ids to be used for filtering the ImageAnnotations. If None, all
+                            ImageAnnotations will be used.
         :param detect_result_generator: 'DetectResultGenerator' instance. 'categories' attribute has to be the same as
                                         the 'categories' attribute of the 'sub_image_detector'. The generator will be
                                         responsible to create 'DetectionResult' for some categories, if they have not
@@ -184,6 +186,7 @@ class SubImageLayoutService(PipelineComponent):
             if isinstance(sub_image_names, str)
             else tuple((get_type(cat) for cat in sub_image_names))
         )
+        self.service_ids = service_ids
         self.detect_result_generator = detect_result_generator
         self.padder = padder
         self.predictor = sub_image_detector
@@ -205,7 +208,7 @@ class SubImageLayoutService(PipelineComponent):
         - Optionally invoke the DetectResultGenerator
         - Generate ImageAnnotations and dump to parent image and sub image.
         """
-        sub_image_anns = dp.get_annotation(category_names=self.sub_image_name)
+        sub_image_anns = dp.get_annotation(category_names=self.sub_image_name, service_ids=self.service_ids)
         for sub_image_ann in sub_image_anns:
             np_image = self.prepare_np_image(sub_image_ann)
             detect_result_list = self.predictor.predict(np_image)
@@ -246,6 +249,7 @@ class SubImageLayoutService(PipelineComponent):
         return self.__class__(
             predictor,
             self.sub_image_name,
+            self.service_ids,
             self.detect_result_generator,
             padder_clone,
         )

{deepdoctection-0.40.0 → deepdoctection-0.41.0}/deepdoctection/pipe/transform.py RENAMED Viewed

@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# File: transform.py
+# File: test_transform.py
 # Copyright 2022 Dr. Janis Meyer. All rights reserved.
 #
@@ -22,6 +22,7 @@ on images (e.g. deskew, de-noising or more general GAN like operations.
 from __future__ import annotations
+from .. import DetectionResult
 from ..datapoint.image import Image
 from ..extern.base import ImageTransformer
 from .base import MetaAnnotation, PipelineComponent
@@ -49,25 +50,46 @@ class SimpleTransformService(PipelineComponent):
         super().__init__(self._get_name(transform_predictor.name), self.transform_predictor.model_id)
     def serve(self, dp: Image) -> None:
-        if dp.annotations:
-            raise RuntimeError(
-                "SimpleTransformService receives datapoints with ÌmageAnnotations. This violates the "
-                "pipeline building API but this can currently be catched only at runtime. "
-                "Please make sure that this component is the first one in the pipeline."
-            )
         if dp.image is not None:
             detection_result = self.transform_predictor.predict(dp.image)
-            transformed_image = self.transform_predictor.transform(dp.image, detection_result)
+            transformed_image = self.transform_predictor.transform_image(dp.image, detection_result)
             self.dp_manager.datapoint.clear_image(True)
             self.dp_manager.datapoint.image = transformed_image
-            self.dp_manager.set_summary_annotation(
-                summary_key=self.transform_predictor.get_category_names()[0],
-                summary_name=self.transform_predictor.get_category_names()[0],
-                summary_number=None,
-                summary_value=getattr(detection_result, self.transform_predictor.get_category_names()[0].value, None),
-                summary_score=detection_result.score,
-            )
+            for category in self.transform_predictor.get_category_names():
+                self.dp_manager.set_summary_annotation(
+                    summary_key=category,
+                    summary_name=category,
+                    summary_number=None,
+                    summary_value=getattr(detection_result, category.value, None),
+                    summary_score=detection_result.score,
+                )
+            detect_results = []
+            for ann in dp.get_annotation():
+                box = ann.get_bounding_box()
+                if not box.absolute_coords:
+                    box = box.transform(dp.width, dp.height)
+                detect_results.append(
+                    DetectionResult(
+                        box=box.to_list(mode="xyxy"),
+                        class_name=ann.category_name,  # type: ignore
+                        score=ann.score,
+                        class_id=ann.category_id,
+                        uuid=ann.annotation_id,
+                    )
+                )
+            output_detect_results = self.transform_predictor.transform_coords(detect_results)
+            for detect_result in output_detect_results:
+                ann = dp.get_annotation(annotation_ids=detect_result.uuid)[0]
+                transformed_ann_id = self.dp_manager.set_image_annotation(detect_result)
+                if transformed_ann_id is None:
+                    print("here")
+                transformed_ann = self.dp_manager.datapoint.get_annotation(annotation_ids=transformed_ann_id)[0]
+                for key, sub_ann in ann.sub_categories.items():
+                    transformed_ann.dump_sub_category(key, sub_ann)
+                if ann.image is not None:
+                    dp.image_ann_to_image(transformed_ann.annotation_id, ann.image.image is not None)
+                ann.deactivate()
     def clone(self) -> SimpleTransformService:
         return self.__class__(self.transform_predictor)

{deepdoctection-0.40.0 → deepdoctection-0.41.0}/deepdoctection/utils/settings.py RENAMED Viewed

@@ -67,6 +67,11 @@ class PageType(ObjectTypes):
     DOCUMENT_TYPE = "document_type"
     LANGUAGE = "language"
     ANGLE = "angle"
+    SIZE = "size"
+    PAD_TOP = "pad_top"
+    PAD_BOTTOM = "pad_bottom"
+    PAD_LEFT = "pad_left"
+    PAD_RIGHT = "pad_right"
 @object_types_registry.register("SummaryType")

deepdoctection 0.40.0__tar.gz → 0.41.0__tar.gz

Potentially problematic release.

deepdoctection 0.40.0tar.gz → 0.41.0tar.gz