PyPI - deepdoctection - Versions diffs - 0.45.0__tar.gz → 0.46__tar.gz - Mend

deepdoctection 0.45.0tar.gz → 0.46tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (155) hide show

{deepdoctection-0.45.0 → deepdoctection-0.46}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: deepdoctection
-Version: 0.45.0
+Version: 0.46
 Summary: Repository for Document AI
 Home-page: https://github.com/deepdoctection/deepdoctection
 Author: Dr. Janis Meyer
@@ -19,18 +19,15 @@ Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: catalogue==2.0.10
 Requires-Dist: huggingface_hub>=0.26.0
-Requires-Dist: importlib-metadata>=5.0.0
 Requires-Dist: jsonlines==3.1.0
 Requires-Dist: lazy-imports==0.3.1
 Requires-Dist: mock==4.0.3
-Requires-Dist: networkx>=2.7.1
 Requires-Dist: numpy>2.0
 Requires-Dist: packaging>=20.0
 Requires-Dist: Pillow>=10.0.0
 Requires-Dist: pypdf>=6.0.0
 Requires-Dist: pypdfium2>=4.30.0
 Requires-Dist: pyyaml>=6.0.1
-Requires-Dist: pyzmq>=16
 Requires-Dist: scipy>=1.13.1
 Requires-Dist: termcolor>=1.1
 Requires-Dist: tabulate>=0.7.7
@@ -38,18 +35,15 @@ Requires-Dist: tqdm>=4.64.0
 Provides-Extra: tf
 Requires-Dist: catalogue==2.0.10; extra == "tf"
 Requires-Dist: huggingface_hub>=0.26.0; extra == "tf"
-Requires-Dist: importlib-metadata>=5.0.0; extra == "tf"
 Requires-Dist: jsonlines==3.1.0; extra == "tf"
 Requires-Dist: lazy-imports==0.3.1; extra == "tf"
 Requires-Dist: mock==4.0.3; extra == "tf"
-Requires-Dist: networkx>=2.7.1; extra == "tf"
 Requires-Dist: numpy>2.0; extra == "tf"
 Requires-Dist: packaging>=20.0; extra == "tf"
 Requires-Dist: Pillow>=10.0.0; extra == "tf"
 Requires-Dist: pypdf>=6.0.0; extra == "tf"
 Requires-Dist: pypdfium2>=4.30.0; extra == "tf"
 Requires-Dist: pyyaml>=6.0.1; extra == "tf"
-Requires-Dist: pyzmq>=16; extra == "tf"
 Requires-Dist: scipy>=1.13.1; extra == "tf"
 Requires-Dist: termcolor>=1.1; extra == "tf"
 Requires-Dist: tabulate>=0.7.7; extra == "tf"
@@ -62,25 +56,24 @@ Requires-Dist: python-doctr==0.10.0; extra == "tf"
 Requires-Dist: pycocotools>=2.0.2; extra == "tf"
 Requires-Dist: boto3==1.34.102; extra == "tf"
 Requires-Dist: pdfplumber>=0.11.0; extra == "tf"
+Requires-Dist: pyzmq>=16; extra == "tf"
 Requires-Dist: jdeskew>=0.2.2; extra == "tf"
 Requires-Dist: apted==1.0.3; extra == "tf"
 Requires-Dist: distance==0.1.3; extra == "tf"
 Requires-Dist: lxml>=4.9.1; extra == "tf"
+Requires-Dist: networkx>=2.7.1; extra == "tf"
 Provides-Extra: pt
 Requires-Dist: catalogue==2.0.10; extra == "pt"
 Requires-Dist: huggingface_hub>=0.26.0; extra == "pt"
-Requires-Dist: importlib-metadata>=5.0.0; extra == "pt"
 Requires-Dist: jsonlines==3.1.0; extra == "pt"
 Requires-Dist: lazy-imports==0.3.1; extra == "pt"
 Requires-Dist: mock==4.0.3; extra == "pt"
-Requires-Dist: networkx>=2.7.1; extra == "pt"
 Requires-Dist: numpy>2.0; extra == "pt"
 Requires-Dist: packaging>=20.0; extra == "pt"
 Requires-Dist: Pillow>=10.0.0; extra == "pt"
 Requires-Dist: pypdf>=6.0.0; extra == "pt"
 Requires-Dist: pypdfium2>=4.30.0; extra == "pt"
 Requires-Dist: pyyaml>=6.0.1; extra == "pt"
-Requires-Dist: pyzmq>=16; extra == "pt"
 Requires-Dist: scipy>=1.13.1; extra == "pt"
 Requires-Dist: termcolor>=1.1; extra == "pt"
 Requires-Dist: tabulate>=0.7.7; extra == "pt"
@@ -92,10 +85,12 @@ Requires-Dist: python-doctr==0.10.0; extra == "pt"
 Requires-Dist: pycocotools>=2.0.2; extra == "pt"
 Requires-Dist: boto3==1.34.102; extra == "pt"
 Requires-Dist: pdfplumber>=0.11.0; extra == "pt"
+Requires-Dist: pyzmq>=16; extra == "pt"
 Requires-Dist: jdeskew>=0.2.2; extra == "pt"
 Requires-Dist: apted==1.0.3; extra == "pt"
 Requires-Dist: distance==0.1.3; extra == "pt"
 Requires-Dist: lxml>=4.9.1; extra == "pt"
+Requires-Dist: networkx>=2.7.1; extra == "pt"
 Provides-Extra: docs
 Requires-Dist: tensorpack==0.11; extra == "docs"
 Requires-Dist: boto3==1.34.102; extra == "docs"

{deepdoctection-0.45.0 → deepdoctection-0.46}/deepdoctection/__init__.py RENAMED Viewed

@@ -25,7 +25,7 @@ from .utils.logger import LoggingRecord, logger
 # pylint: enable=wrong-import-position
-__version__ = "0.45.0"
+__version__ = "0.46"
 _IMPORT_STRUCTURE = {
     "analyzer": ["config_sanity_checks", "get_dd_analyzer", "ServiceFactory", "update_cfg_from_defaults"],
@@ -271,6 +271,7 @@ _IMPORT_STRUCTURE = {
         "MultiThreadPipelineComponent",
         "DoctectionPipe",
         "LanguageDetectionService",
+        "skip_if_category_or_service_extracted",
         "ImageLayoutService",
         "LMTokenClassifierService",
         "LMSequenceClassifierService",
@@ -310,12 +311,14 @@ _IMPORT_STRUCTURE = {
         "get_tensorpack_requirement",
         "pytorch_available",
         "get_pytorch_requirement",
+        "pyzmq_available",
         "lxml_available",
         "get_lxml_requirement",
         "apted_available",
         "get_apted_requirement",
         "distance_available",
         "get_distance_requirement",
+        "networkx_available",
         "numpy_v1_available",
         "get_numpy_v1_requirement",
         "transformers_available",

{deepdoctection-0.45.0 → deepdoctection-0.46}/deepdoctection/analyzer/config.py RENAMED Viewed

@@ -526,6 +526,9 @@ cfg.USE_LM_SEQUENCE_CLASS = False
 # Enables a token classification pipeline component, e.g. a LayoutLM or Bert-like model
 cfg.USE_LM_TOKEN_CLASS = False
+# Specifies the selection of the rotation model. There are two models available: A rotation estimator
+# based on Tesseract ('tesseract'), and a rotation estimator based on DocTr ('doctr').
+cfg.ROTATOR.MODEL = "tesseract"
 # Relevant when LIB = TF. Specifies the layout detection model.
 # This model should detect multiple or single objects across an entire page.

{deepdoctection-0.45.0 → deepdoctection-0.46}/deepdoctection/analyzer/factory.py RENAMED Viewed

@@ -22,13 +22,13 @@
 from __future__ import annotations
 from os import environ
-from typing import TYPE_CHECKING, Union
+from typing import TYPE_CHECKING, Literal, Union
 from lazy_imports import try_import
 from ..extern.base import ImageTransformer, ObjectDetector, PdfMiner
 from ..extern.d2detect import D2FrcnnDetector, D2FrcnnTracingDetector
-from ..extern.doctrocr import DoctrTextlineDetector, DoctrTextRecognizer
+from ..extern.doctrocr import DocTrRotationTransformer, DoctrTextlineDetector, DoctrTextRecognizer
 from ..extern.hfdetr import HFDetrDerivedDetector
 from ..extern.hflayoutlm import (
     HFLayoutLmSequenceClassifier,
@@ -78,6 +78,7 @@ if TYPE_CHECKING:
     from ..extern.hflayoutlm import LayoutSequenceModels, LayoutTokenModels
     from ..extern.hflm import LmSequenceModels, LmTokenModels
+    RotationTransformer = Union[TesseractRotationTransformer, DocTrRotationTransformer]
 __all__ = [
     "ServiceFactory",
@@ -190,24 +191,32 @@ class ServiceFactory:
         return ServiceFactory._build_layout_detector(config, mode)
     @staticmethod
-    def _build_rotation_detector() -> TesseractRotationTransformer:
+    def _build_rotation_detector(rotator_name: Literal["tesseract", "doctr"]) -> RotationTransformer:
         """
         Building a rotation detector.
         Returns:
             TesseractRotationTransformer: Rotation detector instance.
         """
-        return TesseractRotationTransformer()
+        if rotator_name == "tesseract":
+            return TesseractRotationTransformer()
+        if rotator_name == "doctr":
+            return DocTrRotationTransformer()
+        raise ValueError(
+            f"You have chosen rotator_name: {rotator_name} which is not allowed. Only tesseract or "
+            f"doctr are allowed."
+        )
     @staticmethod
-    def build_rotation_detector() -> TesseractRotationTransformer:
+    def build_rotation_detector(rotator_name: Literal["tesseract", "doctr"]) -> RotationTransformer:
         """
         Building a rotation detector.
         Returns:
             TesseractRotationTransformer: Rotation detector instance.
         """
-        return ServiceFactory._build_rotation_detector()
+        return ServiceFactory._build_rotation_detector(rotator_name)
     @staticmethod
     def _build_transform_service(transform_predictor: ImageTransformer) -> SimpleTransformService:
@@ -1123,7 +1132,7 @@ class ServiceFactory:
         pipe_component_list: list[PipelineComponent] = []
         if config.USE_ROTATOR:
-            rotation_detector = ServiceFactory.build_rotation_detector()
+            rotation_detector = ServiceFactory.build_rotation_detector(config.ROTATOR.MODEL)
             transform_service = ServiceFactory.build_transform_service(transform_predictor=rotation_detector)
             pipe_component_list.append(transform_service)

{deepdoctection-0.45.0 → deepdoctection-0.46}/deepdoctection/dataflow/parallel_map.py RENAMED Viewed

@@ -24,15 +24,19 @@ from abc import ABC, abstractmethod
 from contextlib import contextmanager
 from typing import Any, Callable, Iterator, no_type_check
-import zmq
+from lazy_imports import try_import
 from ..utils.concurrency import StoppableThread, enable_death_signal, start_proc_mask_signal
 from ..utils.error import DataFlowTerminatedError
+from ..utils.file_utils import pyzmq_available
 from ..utils.logger import LoggingRecord, logger
 from .base import DataFlow, DataFlowReentrantGuard, ProxyDataFlow
 from .common import RepeatedData
 from .serialize import PickleSerializer
+with try_import() as import_guard:
+    import zmq
 @no_type_check
 def del_weakref(x):
@@ -77,6 +81,8 @@ def _get_pipe_name(name):
 class _ParallelMapData(ProxyDataFlow, ABC):
     def __init__(self, df: DataFlow, buffer_size: int, strict: bool = False) -> None:
+        if not pyzmq_available():
+            raise ModuleNotFoundError("pyzmq is required for running parallel dataflows (multiprocess/multithread).")
         super().__init__(df)
         if buffer_size <= 0:
             raise ValueError(f"buffer_size must be a positive number, got {buffer_size}")

{deepdoctection-0.45.0 → deepdoctection-0.46}/deepdoctection/datapoint/box.py RENAMED Viewed

@@ -284,7 +284,7 @@ class BoundingBox:
             raise BoundingBoxError(
                 f"bounding box must have height and width >0. Check coords "
                 f"ulx: {self.ulx}, uly: {self.uly}, lrx: {self.lrx}, "
-                f"lry: {self.lry}."
+                f"lry: {self.lry}, absolute_coords: {self.absolute_coords}"
             )
         if not self.absolute_coords and not (
             0 <= self.ulx <= 1 and 0 <= self.uly <= 1 and 0 <= self.lrx <= 1 and 0 <= self.lry <= 1
@@ -505,10 +505,10 @@ class BoundingBox:
             if self.absolute_coords:
                 transformed_box = BoundingBox(
                     absolute_coords=not self.absolute_coords,
-                    ulx=max(self.ulx / image_width, 0.0),
-                    uly=max(self.uly / image_height, 0.0),
-                    lrx=min(self.lrx / image_width, 1.0),
-                    lry=min(self.lry / image_height, 1.0),
+                    ulx=min(max(self.ulx / image_width, 0.0), 1.0),
+                    uly=min(max(self.uly / image_height, 0.0), 1.0),
+                    lrx=max(min(self.lrx / image_width, 1.0), 0.0),
+                    lry=max(min(self.lry / image_height, 1.0), 0.0),
                 )
             else:
                 transformed_box = BoundingBox(

{deepdoctection-0.45.0 → deepdoctection-0.46}/deepdoctection/datapoint/image.py RENAMED Viewed

@@ -36,7 +36,7 @@ from ..utils.logger import LoggingRecord, logger
 from ..utils.settings import ObjectTypes, SummaryType, get_type
 from ..utils.types import ImageDict, PathLikeOrStr, PixelValues
 from .annotation import Annotation, AnnotationMap, BoundingBox, CategoryAnnotation, ImageAnnotation
-from .box import crop_box_from_image, global_to_local_coords, intersection_box
+from .box import BoxCoordinate, crop_box_from_image, global_to_local_coords, intersection_box
 from .convert import as_dict, convert_b64_to_np_array, convert_np_array_to_b64, convert_pdf_bytes_to_np_array_v2
@@ -318,7 +318,7 @@ class Image:
         return _Img(self.image)
     @property
-    def width(self) -> float:
+    def width(self) -> BoxCoordinate:
         """
         `width`
         """
@@ -327,7 +327,7 @@ class Image:
         return self._bbox.width
     @property
-    def height(self) -> float:
+    def height(self) -> BoxCoordinate:
         """
         `height`
         """
@@ -335,7 +335,7 @@ class Image:
             raise ImageError("Height not available. Call set_width_height first")
         return self._bbox.height
-    def set_width_height(self, width: float, height: float) -> None:
+    def set_width_height(self, width: BoxCoordinate, height: BoxCoordinate) -> None:
         """
         Defines bounding box of the image if not already set. Use this, if you do not want to keep the image separated
         for memory reasons.
@@ -345,7 +345,7 @@ class Image:
             height: height of image
         """
         if self._bbox is None:
-            self._bbox = BoundingBox(ulx=0.0, uly=0.0, height=height, width=width, absolute_coords=True)
+            self._bbox = BoundingBox(ulx=0, uly=0, height=height, width=width, absolute_coords=True)
             self._self_embedding()
     def set_embedding(self, image_id: str, bounding_box: BoundingBox) -> None:

{deepdoctection-0.45.0 → deepdoctection-0.46}/deepdoctection/datapoint/view.py RENAMED Viewed

@@ -428,6 +428,8 @@ class List(Layout):
             A list of words order by reading order. Words with no `reading_order` will not be returned"""
         try:
             list_items = self.list_items
+            if not list_items:
+                return super().get_ordered_words()
             all_words = []
             list_items.sort(key=lambda x: x.bbox[1])
             for list_item in list_items:
@@ -755,6 +757,8 @@ class Table(Layout):
         """
         try:
             cells = self.cells
+            if not cells:
+                return super().get_ordered_words()
             all_words = []
             cells.sort(key=lambda x: (x.ROW_NUMBER, x.COLUMN_NUMBER))
             for cell in cells:
@@ -1054,6 +1058,8 @@ class Page(Image):
         Returns:
             A `Page` instance with all annotations as `ImageAnnotationBaseView` subclasses.
         """
+        if isinstance(image_orig, Page):
+            raise ImageError("Page.from_image() cannot be called on a Page instance.")
         if text_container is None:
             text_container = IMAGE_DEFAULTS.TEXT_CONTAINER
@@ -1310,7 +1316,7 @@ class Page(Image):
             If `interactive=False` will return a `np.array`.
         """
-        category_names_list: list[Union[str, None]] = []
+        category_names_list: list[Tuple[Union[str, None], Union[str, None]]] = []
         box_stack = []
         cells_found = False
@@ -1323,22 +1329,23 @@ class Page(Image):
             anns = self.get_annotation(category_names=list(debug_kwargs.keys()))
             for ann in anns:
                 box_stack.append(self._ann_viz_bbox(ann))
-                category_names_list.append(str(getattr(ann, debug_kwargs[ann.category_name])))
+                val = str(getattr(ann, debug_kwargs[ann.category_name]))
+                category_names_list.append((val, val))
         if show_layouts and not debug_kwargs:
             for item in self.layouts:
                 box_stack.append(self._ann_viz_bbox(item))
-                category_names_list.append(item.category_name.value)
+                category_names_list.append((item.category_name.value, item.category_name.value))
         if show_figures and not debug_kwargs:
             for item in self.figures:
                 box_stack.append(self._ann_viz_bbox(item))
-                category_names_list.append(item.category_name.value)
+                category_names_list.append((item.category_name.value, item.category_name.value))
         if show_tables and not debug_kwargs:
             for table in self.tables:
                 box_stack.append(self._ann_viz_bbox(table))
-                category_names_list.append(LayoutType.TABLE.value)
+                category_names_list.append((LayoutType.TABLE.value, LayoutType.TABLE.value))
                 if show_cells:
                     for cell in table.cells:
                         if cell.category_name in {
@@ -1347,21 +1354,21 @@ class Page(Image):
                         }:
                             cells_found = True
                             box_stack.append(self._ann_viz_bbox(cell))
-                            category_names_list.append(None)
+                            category_names_list.append((None, cell.category_name.value))
                 if show_table_structure:
                     rows = table.rows
                     cols = table.columns
                     for row in rows:
                         box_stack.append(self._ann_viz_bbox(row))
-                        category_names_list.append(None)
+                        category_names_list.append((None, row.category_name.value))
                     for col in cols:
                         box_stack.append(self._ann_viz_bbox(col))
-                        category_names_list.append(None)
+                        category_names_list.append((None, col.category_name.value))
         if show_cells and not cells_found and not debug_kwargs:
             for ann in self.get_annotation(category_names=[LayoutType.CELL, CellType.SPANNING]):
                 box_stack.append(self._ann_viz_bbox(ann))
-                category_names_list.append(None)
+                category_names_list.append((None, ann.category_name.value))
         if show_words and not debug_kwargs:
             all_words = []
@@ -1379,22 +1386,36 @@ class Page(Image):
                 for word in all_words:
                     box_stack.append(self._ann_viz_bbox(word))
                     if show_token_class:
-                        category_names_list.append(word.token_class.value if word.token_class is not None else None)
+                        category_names_list.append(
+                            (word.token_class.value, word.token_class.value)
+                            if word.token_class is not None
+                            else (None, None)
+                        )
                     else:
-                        category_names_list.append(word.token_tag.value if word.token_tag is not None else None)
+                        category_names_list.append(
+                            (word.token_tag.value, word.token_tag.value) if word.token_tag is not None else (None, None)
+                        )
             else:
                 for word in all_words:
                     if word.token_class is not None and word.token_class != TokenClasses.OTHER:
                         box_stack.append(self._ann_viz_bbox(word))
                         if show_token_class:
-                            category_names_list.append(word.token_class.value if word.token_class is not None else None)
+                            category_names_list.append(
+                                (word.token_class.value, word.token_class.value)
+                                if word.token_class is not None
+                                else (None, None)
+                            )
                         else:
-                            category_names_list.append(word.token_tag.value if word.token_tag is not None else None)
+                            category_names_list.append(
+                                (word.token_tag.value, word.token_tag.value)
+                                if word.token_tag is not None
+                                else (None, None)
+                            )
         if show_residual_layouts and not debug_kwargs:
             for item in self.residual_layouts:
                 box_stack.append(item.bbox)
-                category_names_list.append(item.category_name.value)
+                category_names_list.append((item.category_name.value, item.category_name.value))
         if self.image is not None:
             scale_fx = scaled_width / self.width

{deepdoctection-0.45.0 → deepdoctection-0.46}/deepdoctection/eval/cocometric.py RENAMED Viewed

@@ -275,6 +275,7 @@ class CocoMetric(MetricBase):
                       get the ultimate F1-score.
             f1_iou: Use with `f1_score=True` and reset the f1 iou threshold
                     per_category: Whether to calculate metrics per category
+            per_category: If set to True, f1 score will be returned by each category.
         """
         if max_detections is not None:
             assert len(max_detections) == 3, max_detections

{deepdoctection-0.45.0 → deepdoctection-0.46}/deepdoctection/extern/base.py RENAMED Viewed

@@ -263,7 +263,7 @@ class PredictorBase(ABC):
         requirements = cls.get_requirements()
         name = cls.__name__ if hasattr(cls, "__name__") else cls.__class__.__name__
         if not all(requirement[1] for requirement in requirements):
-            raise ImportError(
+            raise ModuleNotFoundError(
                 "\n".join(
                     [f"{name} has the following dependencies:"]
                     + [requirement[2] for requirement in requirements if not requirement[1]]
@@ -334,6 +334,11 @@ class DetectionResult:
         block: block number. For reading order from some ocr predictors
         line: line number. For reading order from some ocr predictors
         uuid: uuid. For assigning detection result (e.g. text to image annotations)
+        relationships: A dictionary of relationships. Each key is a relationship type and each value is a list of
+                       uuids of the related annotations.
+        angle: angle of rotation in degrees. Only used for text detection.
+        image_width: image width
+        image_height: image height
     """
     box: Optional[list[float]] = None
@@ -348,6 +353,8 @@ class DetectionResult:
     uuid: Optional[str] = None
     relationships: Optional[dict[str, Any]] = None
     angle: Optional[float] = None
+    image_width: Optional[Union[int, float]] = None
+    image_height: Optional[Union[int, float]] = None
 class ObjectDetector(PredictorBase, ABC):

{deepdoctection-0.45.0 → deepdoctection-0.46}/deepdoctection/extern/doctrocr.py RENAMED Viewed

@@ -24,9 +24,10 @@ from __future__ import annotations
 import os
 from abc import ABC
 from pathlib import Path
-from typing import Any, Literal, Mapping, Optional, Union
+from typing import Any, Literal, Mapping, Optional, Sequence, Union
 from zipfile import ZipFile
+import numpy as np
 from lazy_imports import try_import
 from ..utils.env_info import ENV_VARS_TRUE
@@ -39,6 +40,7 @@ from ..utils.file_utils import (
 )
 from ..utils.fs import load_json
 from ..utils.settings import LayoutType, ObjectTypes, PageType, TypeOrStr
+from ..utils.transform import RotationTransform
 from ..utils.types import PathLikeOrStr, PixelValues, Requirement
 from ..utils.viz import viz_handler
 from .base import DetectionResult, ImageTransformer, ModelCategories, ObjectDetector, TextRecognizer
@@ -558,12 +560,13 @@ class DocTrRotationTransformer(ImageTransformer):
         """
         Args:
             number_contours: the number of contours used for the orientation estimation
-            ratio_threshold_for_lines: this is the ratio w/h used to discriminates lines
+            ratio_threshold_for_lines: this is the ratio w/h used to discriminate lines
         """
         self.number_contours = number_contours
         self.ratio_threshold_for_lines = ratio_threshold_for_lines
         self.name = "doctr_rotation_transformer"
         self.model_id = self.get_model_id()
+        self.rotator = RotationTransform(360)
     def transform_image(self, np_img: PixelValues, specification: DetectionResult) -> PixelValues:
         """
@@ -579,6 +582,19 @@ class DocTrRotationTransformer(ImageTransformer):
         """
         return viz_handler.rotate_image(np_img, specification.angle)  # type: ignore
+    def transform_coords(self, detect_results: Sequence[DetectionResult]) -> Sequence[DetectionResult]:
+        if detect_results:
+            if detect_results[0].angle:
+                self.rotator.set_angle(detect_results[0].angle)  # type: ignore
+                self.rotator.set_image_width(detect_results[0].image_width)  # type: ignore
+                self.rotator.set_image_height(detect_results[0].image_height)  # type: ignore
+                transformed_coords = self.rotator.apply_coords(
+                    np.asarray([detect_result.box for detect_result in detect_results], dtype=float)
+                )
+                for idx, detect_result in enumerate(detect_results):
+                    detect_result.box = transformed_coords[idx, :].tolist()
+        return detect_results
     def predict(self, np_img: PixelValues) -> DetectionResult:
         angle = estimate_orientation(
             np_img, n_ct=self.number_contours, ratio_threshold_for_lines=self.ratio_threshold_for_lines

{deepdoctection-0.45.0 → deepdoctection-0.46}/deepdoctection/extern/hflayoutlm.py RENAMED Viewed

@@ -1024,12 +1024,9 @@ class HFLayoutLmv2SequenceClassifier(HFLayoutLmSequenceClassifierBase):
         else:
             raise ValueError(f"images must be list but is {type(images)}")
-        result = predict_sequence_classes_from_layoutlm(input_ids,
-                                                        attention_mask,
-                                                        token_type_ids,
-                                                        boxes,
-                                                        self.model,
-                                                        images)
+        result = predict_sequence_classes_from_layoutlm(
+            input_ids, attention_mask, token_type_ids, boxes, self.model, images
+        )
         result.class_id += 1
         result.class_name = self.categories.categories[result.class_id]
@@ -1123,12 +1120,9 @@ class HFLayoutLmv3SequenceClassifier(HFLayoutLmSequenceClassifierBase):
         else:
             raise ValueError(f"images must be list but is {type(images)}")
-        result = predict_sequence_classes_from_layoutlm(input_ids,
-                                                        attention_mask,
-                                                        token_type_ids,
-                                                        boxes,
-                                                        self.model,
-                                                        images)
+        result = predict_sequence_classes_from_layoutlm(
+            input_ids, attention_mask, token_type_ids, boxes, self.model, images
+        )
         result.class_id += 1
         result.class_name = self.categories.categories[result.class_id]

{deepdoctection-0.45.0 → deepdoctection-0.46}/deepdoctection/extern/tessocr.py RENAMED Viewed

@@ -28,8 +28,9 @@ from errno import ENOENT
 from itertools import groupby
 from os import environ, fspath
 from pathlib import Path
-from typing import Any, Mapping, Optional, Union
+from typing import Any, Mapping, Optional, Sequence, Union
+import numpy as np
 from packaging.version import InvalidVersion, Version, parse
 from ..utils.context import save_tmp_file, timeout_manager
@@ -37,6 +38,7 @@ from ..utils.error import DependencyError, TesseractError
 from ..utils.file_utils import _TESS_PATH, get_tesseract_requirement
 from ..utils.metacfg import config_to_cli_str, set_config_by_yaml
 from ..utils.settings import LayoutType, ObjectTypes, PageType
+from ..utils.transform import RotationTransform
 from ..utils.types import PathLikeOrStr, PixelValues, Requirement
 from ..utils.viz import viz_handler
 from .base import DetectionResult, ImageTransformer, ModelCategories, ObjectDetector
@@ -450,6 +452,7 @@ class TesseractRotationTransformer(ImageTransformer):
         self.name = fspath(_TESS_PATH) + "-rotation"
         self.categories = ModelCategories(init_categories={1: PageType.ANGLE})
         self.model_id = self.get_model_id()
+        self.rotator = RotationTransform(360)
     def transform_image(self, np_img: PixelValues, specification: DetectionResult) -> PixelValues:
         """
@@ -465,6 +468,19 @@ class TesseractRotationTransformer(ImageTransformer):
         """
         return viz_handler.rotate_image(np_img, specification.angle)  # type: ignore
+    def transform_coords(self, detect_results: Sequence[DetectionResult]) -> Sequence[DetectionResult]:
+        if detect_results:
+            if detect_results[0].angle:
+                self.rotator.set_angle(detect_results[0].angle)  # type: ignore
+                self.rotator.set_image_width(detect_results[0].image_width)  # type: ignore
+                self.rotator.set_image_height(detect_results[0].image_height)  # type: ignore
+                transformed_coords = self.rotator.apply_coords(
+                    np.asarray([detect_result.box for detect_result in detect_results], dtype=float)
+                )
+                for idx, detect_result in enumerate(detect_results):
+                    detect_result.box = transformed_coords[idx, :].tolist()
+        return detect_results
     def predict(self, np_img: PixelValues) -> DetectionResult:
         """
         Determines the angle of the rotated image. It can only handle angles that are multiples of 90 degrees.

{deepdoctection-0.45.0 → deepdoctection-0.46}/deepdoctection/pipe/order.py RENAMED Viewed

@@ -228,8 +228,8 @@ class OrderGenerator:
         columns: list[BoundingBox] = []
         anns.sort(
             key=lambda x: (
-                x.bounding_box.transform(image_width, image_height).cy,  # type: ignore
-                x.bounding_box.transform(image_width, image_height).cx,  # type: ignore
+                x.get_bounding_box(image_id).transform(image_width, image_height).cy,
+                x.get_bounding_box(image_id).transform(image_width, image_height).cx,
             )
         )
         for ann in anns:
@@ -309,7 +309,9 @@ class OrderGenerator:
         filtered_blocks: Sequence[tuple[int, str]]
         for idx in range(max_block_number + 1):
             filtered_blocks = list(filter(lambda x: x[0] == idx, blocks))  # type: ignore # pylint: disable=W0640
-            sorted_blocks.extend(self._sort_anns_grouped_by_blocks(filtered_blocks, anns, image_width, image_height))
+            sorted_blocks.extend(
+                self._sort_anns_grouped_by_blocks(filtered_blocks, anns, image_width, image_height, image_id)
+            )
         reading_blocks = [(idx + 1, block[1]) for idx, block in enumerate(sorted_blocks)]
         if logger.isEnabledFor(DEBUG):
@@ -346,7 +348,11 @@ class OrderGenerator:
     @staticmethod
     def _sort_anns_grouped_by_blocks(
-        block: Sequence[tuple[int, str]], anns: Sequence[ImageAnnotation], image_width: float, image_height: float
+        block: Sequence[tuple[int, str]],
+        anns: Sequence[ImageAnnotation],
+        image_width: float,
+        image_height: float,
+        image_id: Optional[str] = None,
     ) -> list[tuple[int, str]]:
         if not block:
             return []
@@ -356,8 +362,8 @@ class OrderGenerator:
         block_anns = [ann for ann in anns if ann.annotation_id in ann_ids]
         block_anns.sort(
             key=lambda x: (
-                round(x.bounding_box.transform(image_width, image_height).uly, 2),  # type: ignore
-                round(x.bounding_box.transform(image_width, image_height).ulx, 2),  # type: ignore
+                round(x.get_bounding_box(image_id).transform(image_width, image_height).uly, 2),
+                round(x.get_bounding_box(image_id).transform(image_width, image_height).ulx, 2),
             )
         )
         return [(block_number, ann.annotation_id) for ann in block_anns]

deepdoctection 0.45.0__tar.gz → 0.46__tar.gz

Potentially problematic release.

deepdoctection 0.45.0tar.gz → 0.46tar.gz