PyPI - deepdoctection - Versions diffs - 0.44.1__py3-none-any.whl → 0.46__py3-none-any.whl - Mend

deepdoctection 0.44.1py3-none-any.whl → 0.46py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (33) hide show

deepdoctection/__init__.py +7 -3
deepdoctection/analyzer/config.py +44 -0
deepdoctection/analyzer/factory.py +264 -7
deepdoctection/configs/profiles.jsonl +2 -1
deepdoctection/dataflow/parallel_map.py +7 -1
deepdoctection/datapoint/box.py +5 -5
deepdoctection/datapoint/image.py +5 -5
deepdoctection/datapoint/view.py +73 -52
deepdoctection/eval/cocometric.py +1 -0
deepdoctection/extern/__init__.py +1 -0
deepdoctection/extern/base.py +8 -1
deepdoctection/extern/d2detect.py +1 -1
deepdoctection/extern/doctrocr.py +18 -2
deepdoctection/extern/fastlang.py +2 -2
deepdoctection/extern/hflayoutlm.py +17 -10
deepdoctection/extern/hflm.py +432 -7
deepdoctection/extern/tessocr.py +17 -1
deepdoctection/pipe/language.py +4 -4
deepdoctection/pipe/lm.py +7 -3
deepdoctection/pipe/order.py +12 -6
deepdoctection/pipe/refine.py +10 -1
deepdoctection/pipe/text.py +6 -0
deepdoctection/pipe/transform.py +3 -0
deepdoctection/utils/file_utils.py +34 -5
deepdoctection/utils/logger.py +38 -1
deepdoctection/utils/settings.py +2 -0
deepdoctection/utils/transform.py +43 -18
deepdoctection/utils/viz.py +24 -15
{deepdoctection-0.44.1.dist-info → deepdoctection-0.46.dist-info}/METADATA +16 -21
{deepdoctection-0.44.1.dist-info → deepdoctection-0.46.dist-info}/RECORD +33 -33
{deepdoctection-0.44.1.dist-info → deepdoctection-0.46.dist-info}/WHEEL +0 -0
{deepdoctection-0.44.1.dist-info → deepdoctection-0.46.dist-info}/licenses/LICENSE +0 -0
{deepdoctection-0.44.1.dist-info → deepdoctection-0.46.dist-info}/top_level.txt +0 -0

deepdoctection/datapoint/view.py CHANGED Viewed

@@ -319,29 +319,32 @@ class Layout(ImageAnnotationBaseView):
                 token_tag_ann_ids,
                 token_classes_ids,
                 token_tag_ids,
-            ) = map(list, zip(
-                *[
-                    (
-                        word.characters,
-                        word.annotation_id,
-                        word.token_class,
-                        word.get_sub_category(WordType.TOKEN_CLASS).annotation_id
-                        if WordType.TOKEN_CLASS in word.sub_categories
-                        else None,
-                        word.token_tag,
-                        word.get_sub_category(WordType.TOKEN_TAG).annotation_id
-                        if WordType.TOKEN_TAG in word.sub_categories
-                        else None,
-                        word.get_sub_category(WordType.TOKEN_CLASS).category_id
-                        if WordType.TOKEN_CLASS in word.sub_categories
-                        else None,
-                        word.get_sub_category(WordType.TOKEN_TAG).category_id
-                        if WordType.TOKEN_TAG in word.sub_categories
-                        else None,
-                    )
-                    for word in words
-                ]
-            ))
+            ) = map(
+                list,
+                zip(
+                    *[
+                        (
+                            word.characters,
+                            word.annotation_id,
+                            word.token_class,
+                            word.get_sub_category(WordType.TOKEN_CLASS).annotation_id
+                            if WordType.TOKEN_CLASS in word.sub_categories
+                            else None,
+                            word.token_tag,
+                            word.get_sub_category(WordType.TOKEN_TAG).annotation_id
+                            if WordType.TOKEN_TAG in word.sub_categories
+                            else None,
+                            word.get_sub_category(WordType.TOKEN_CLASS).category_id
+                            if WordType.TOKEN_CLASS in word.sub_categories
+                            else None,
+                            word.get_sub_category(WordType.TOKEN_TAG).category_id
+                            if WordType.TOKEN_TAG in word.sub_categories
+                            else None,
+                        )
+                        for word in words
+                    ]
+                ),
+            )
         else:
             (
                 characters,
@@ -364,18 +367,17 @@ class Layout(ImageAnnotationBaseView):
             )
         return Text_(
-            text=" ".join(characters), # type: ignore
-            words=characters, # type: ignore
-            ann_ids=ann_ids, # type: ignore
-            token_classes=token_classes, # type: ignore
-            token_class_ann_ids=token_class_ann_ids, # type: ignore
-            token_tags=token_tags, # type: ignore
-            token_tag_ann_ids=token_tag_ann_ids, # type: ignore
-            token_class_ids=token_classes_ids, # type: ignore
-            token_tag_ids=token_tag_ids, # type: ignore
+            text=" ".join(characters),  # type: ignore
+            words=characters,  # type: ignore
+            ann_ids=ann_ids,  # type: ignore
+            token_classes=token_classes,  # type: ignore
+            token_class_ann_ids=token_class_ann_ids,  # type: ignore
+            token_tags=token_tags,  # type: ignore
+            token_tag_ann_ids=token_tag_ann_ids,  # type: ignore
+            token_class_ids=token_classes_ids,  # type: ignore
+            token_tag_ids=token_tag_ids,  # type: ignore
         )
     def get_attribute_names(self) -> set[str]:
         attr_names = (
             {"words", "text"}
@@ -426,6 +428,8 @@ class List(Layout):
             A list of words order by reading order. Words with no `reading_order` will not be returned"""
         try:
             list_items = self.list_items
+            if not list_items:
+                return super().get_ordered_words()
             all_words = []
             list_items.sort(key=lambda x: x.bbox[1])
             for list_item in list_items:
@@ -464,9 +468,9 @@ class Table(Layout):
             A list of a table cells.
         """
         cell_anns: list[Cell] = []
-        for row_number in range(1, self.number_of_rows + 1):  # type: ignore
-            cell_anns.extend(self.row(row_number))  # type: ignore
+        if self.number_of_rows:
+            for row_number in range(1, self.number_of_rows + 1):  # type: ignore
+                cell_anns.extend(self.row(row_number))  # type: ignore
         return cell_anns
     @property
@@ -731,7 +735,6 @@ class Table(Layout):
             token_tag_ids=token_tag_ids,
         )
     @property
     def words(self) -> list[ImageAnnotationBaseView]:
         """
@@ -754,6 +757,8 @@ class Table(Layout):
         """
         try:
             cells = self.cells
+            if not cells:
+                return super().get_ordered_words()
             all_words = []
             cells.sort(key=lambda x: (x.ROW_NUMBER, x.COLUMN_NUMBER))
             for cell in cells:
@@ -1053,6 +1058,8 @@ class Page(Image):
         Returns:
             A `Page` instance with all annotations as `ImageAnnotationBaseView` subclasses.
         """
+        if isinstance(image_orig, Page):
+            raise ImageError("Page.from_image() cannot be called on a Page instance.")
         if text_container is None:
             text_container = IMAGE_DEFAULTS.TEXT_CONTAINER
@@ -1175,7 +1182,6 @@ class Page(Image):
             token_tag_ids=token_tag_ann_ids,
         )
     def get_layout_context(self, annotation_id: str, context_size: int = 3) -> list[ImageAnnotationBaseView]:
         """
         For a given `annotation_id` get a list of `ImageAnnotation` that are nearby in terms of `reading_order`.
@@ -1310,7 +1316,7 @@ class Page(Image):
             If `interactive=False` will return a `np.array`.
         """
-        category_names_list: list[Union[str, None]] = []
+        category_names_list: list[Tuple[Union[str, None], Union[str, None]]] = []
         box_stack = []
         cells_found = False
@@ -1323,22 +1329,23 @@ class Page(Image):
             anns = self.get_annotation(category_names=list(debug_kwargs.keys()))
             for ann in anns:
                 box_stack.append(self._ann_viz_bbox(ann))
-                category_names_list.append(str(getattr(ann, debug_kwargs[ann.category_name])))
+                val = str(getattr(ann, debug_kwargs[ann.category_name]))
+                category_names_list.append((val, val))
         if show_layouts and not debug_kwargs:
             for item in self.layouts:
                 box_stack.append(self._ann_viz_bbox(item))
-                category_names_list.append(item.category_name.value)
+                category_names_list.append((item.category_name.value, item.category_name.value))
         if show_figures and not debug_kwargs:
             for item in self.figures:
                 box_stack.append(self._ann_viz_bbox(item))
-                category_names_list.append(item.category_name.value)
+                category_names_list.append((item.category_name.value, item.category_name.value))
         if show_tables and not debug_kwargs:
             for table in self.tables:
                 box_stack.append(self._ann_viz_bbox(table))
-                category_names_list.append(LayoutType.TABLE.value)
+                category_names_list.append((LayoutType.TABLE.value, LayoutType.TABLE.value))
                 if show_cells:
                     for cell in table.cells:
                         if cell.category_name in {
@@ -1347,21 +1354,21 @@ class Page(Image):
                         }:
                             cells_found = True
                             box_stack.append(self._ann_viz_bbox(cell))
-                            category_names_list.append(None)
+                            category_names_list.append((None, cell.category_name.value))
                 if show_table_structure:
                     rows = table.rows
                     cols = table.columns
                     for row in rows:
                         box_stack.append(self._ann_viz_bbox(row))
-                        category_names_list.append(None)
+                        category_names_list.append((None, row.category_name.value))
                     for col in cols:
                         box_stack.append(self._ann_viz_bbox(col))
-                        category_names_list.append(None)
+                        category_names_list.append((None, col.category_name.value))
         if show_cells and not cells_found and not debug_kwargs:
             for ann in self.get_annotation(category_names=[LayoutType.CELL, CellType.SPANNING]):
                 box_stack.append(self._ann_viz_bbox(ann))
-                category_names_list.append(None)
+                category_names_list.append((None, ann.category_name.value))
         if show_words and not debug_kwargs:
             all_words = []
@@ -1379,22 +1386,36 @@ class Page(Image):
                 for word in all_words:
                     box_stack.append(self._ann_viz_bbox(word))
                     if show_token_class:
-                        category_names_list.append(word.token_class.value if word.token_class is not None else None)
+                        category_names_list.append(
+                            (word.token_class.value, word.token_class.value)
+                            if word.token_class is not None
+                            else (None, None)
+                        )
                     else:
-                        category_names_list.append(word.token_tag.value if word.token_tag is not None else None)
+                        category_names_list.append(
+                            (word.token_tag.value, word.token_tag.value) if word.token_tag is not None else (None, None)
+                        )
             else:
                 for word in all_words:
                     if word.token_class is not None and word.token_class != TokenClasses.OTHER:
                         box_stack.append(self._ann_viz_bbox(word))
                         if show_token_class:
-                            category_names_list.append(word.token_class.value if word.token_class is not None else None)
+                            category_names_list.append(
+                                (word.token_class.value, word.token_class.value)
+                                if word.token_class is not None
+                                else (None, None)
+                            )
                         else:
-                            category_names_list.append(word.token_tag.value if word.token_tag is not None else None)
+                            category_names_list.append(
+                                (word.token_tag.value, word.token_tag.value)
+                                if word.token_tag is not None
+                                else (None, None)
+                            )
         if show_residual_layouts and not debug_kwargs:
             for item in self.residual_layouts:
                 box_stack.append(item.bbox)
-                category_names_list.append(item.category_name.value)
+                category_names_list.append((item.category_name.value, item.category_name.value))
         if self.image is not None:
             scale_fx = scaled_width / self.width

deepdoctection/eval/cocometric.py CHANGED Viewed

@@ -275,6 +275,7 @@ class CocoMetric(MetricBase):
                       get the ultimate F1-score.
             f1_iou: Use with `f1_score=True` and reset the f1 iou threshold
                     per_category: Whether to calculate metrics per category
+            per_category: If set to True, f1 score will be returned by each category.
         """
         if max_detections is not None:
             assert len(max_detections) == 3, max_detections

deepdoctection/extern/__init__.py CHANGED Viewed

@@ -26,6 +26,7 @@ from .doctrocr import *
 from .fastlang import *
 from .hfdetr import *
 from .hflayoutlm import *
+from .hflm import *
 from .model import *
 from .pdftext import *
 from .tessocr import *

deepdoctection/extern/base.py CHANGED Viewed

@@ -263,7 +263,7 @@ class PredictorBase(ABC):
         requirements = cls.get_requirements()
         name = cls.__name__ if hasattr(cls, "__name__") else cls.__class__.__name__
         if not all(requirement[1] for requirement in requirements):
-            raise ImportError(
+            raise ModuleNotFoundError(
                 "\n".join(
                     [f"{name} has the following dependencies:"]
                     + [requirement[2] for requirement in requirements if not requirement[1]]
@@ -334,6 +334,11 @@ class DetectionResult:
         block: block number. For reading order from some ocr predictors
         line: line number. For reading order from some ocr predictors
         uuid: uuid. For assigning detection result (e.g. text to image annotations)
+        relationships: A dictionary of relationships. Each key is a relationship type and each value is a list of
+                       uuids of the related annotations.
+        angle: angle of rotation in degrees. Only used for text detection.
+        image_width: image width
+        image_height: image height
     """
     box: Optional[list[float]] = None
@@ -348,6 +353,8 @@ class DetectionResult:
     uuid: Optional[str] = None
     relationships: Optional[dict[str, Any]] = None
     angle: Optional[float] = None
+    image_width: Optional[Union[int, float]] = None
+    image_height: Optional[Union[int, float]] = None
 class ObjectDetector(PredictorBase, ABC):

deepdoctection/extern/d2detect.py CHANGED Viewed

@@ -91,7 +91,7 @@ def d2_predict_image(
     """
     height, width = np_img.shape[:2]
     resized_img = resizer.get_transform(np_img).apply_image(np_img)
-    image = torch.as_tensor(resized_img.astype("float32").transpose(2, 0, 1))
+    image = torch.as_tensor(resized_img.astype(np.float32).transpose(2, 0, 1))
     with torch.no_grad():
         inputs = {"image": image, "height": height, "width": width}

deepdoctection/extern/doctrocr.py CHANGED Viewed

@@ -24,9 +24,10 @@ from __future__ import annotations
 import os
 from abc import ABC
 from pathlib import Path
-from typing import Any, Literal, Mapping, Optional, Union
+from typing import Any, Literal, Mapping, Optional, Sequence, Union
 from zipfile import ZipFile
+import numpy as np
 from lazy_imports import try_import
 from ..utils.env_info import ENV_VARS_TRUE
@@ -39,6 +40,7 @@ from ..utils.file_utils import (
 )
 from ..utils.fs import load_json
 from ..utils.settings import LayoutType, ObjectTypes, PageType, TypeOrStr
+from ..utils.transform import RotationTransform
 from ..utils.types import PathLikeOrStr, PixelValues, Requirement
 from ..utils.viz import viz_handler
 from .base import DetectionResult, ImageTransformer, ModelCategories, ObjectDetector, TextRecognizer
@@ -558,12 +560,13 @@ class DocTrRotationTransformer(ImageTransformer):
         """
         Args:
             number_contours: the number of contours used for the orientation estimation
-            ratio_threshold_for_lines: this is the ratio w/h used to discriminates lines
+            ratio_threshold_for_lines: this is the ratio w/h used to discriminate lines
         """
         self.number_contours = number_contours
         self.ratio_threshold_for_lines = ratio_threshold_for_lines
         self.name = "doctr_rotation_transformer"
         self.model_id = self.get_model_id()
+        self.rotator = RotationTransform(360)
     def transform_image(self, np_img: PixelValues, specification: DetectionResult) -> PixelValues:
         """
@@ -579,6 +582,19 @@ class DocTrRotationTransformer(ImageTransformer):
         """
         return viz_handler.rotate_image(np_img, specification.angle)  # type: ignore
+    def transform_coords(self, detect_results: Sequence[DetectionResult]) -> Sequence[DetectionResult]:
+        if detect_results:
+            if detect_results[0].angle:
+                self.rotator.set_angle(detect_results[0].angle)  # type: ignore
+                self.rotator.set_image_width(detect_results[0].image_width)  # type: ignore
+                self.rotator.set_image_height(detect_results[0].image_height)  # type: ignore
+                transformed_coords = self.rotator.apply_coords(
+                    np.asarray([detect_result.box for detect_result in detect_results], dtype=float)
+                )
+                for idx, detect_result in enumerate(detect_results):
+                    detect_result.box = transformed_coords[idx, :].tolist()
+        return detect_results
     def predict(self, np_img: PixelValues) -> DetectionResult:
         angle = estimate_orientation(
             np_img, n_ct=self.number_contours, ratio_threshold_for_lines=self.ratio_threshold_for_lines

deepdoctection/extern/fastlang.py CHANGED Viewed

@@ -36,7 +36,7 @@ from ..utils.types import PathLikeOrStr
 from .base import DetectionResult, LanguageDetector, ModelCategories
 with try_import() as import_guard:
-    from fasttext import load_model  # type: ignore
+    from fasttext import load_model  # type: ignore # pylint: disable=E0401
 class FasttextLangDetectorMixin(LanguageDetector, ABC):
@@ -62,7 +62,7 @@ class FasttextLangDetectorMixin(LanguageDetector, ABC):
         Returns:
             `DetectionResult` filled with `text` and `score`
         """
-        return DetectionResult(text=self.categories_orig[output[0][0]], score=output[1][0])
+        return DetectionResult(class_name=self.categories_orig[output[0][0]], score=output[1][0])
     @staticmethod
     def get_name(path_weights: PathLikeOrStr) -> str:

deepdoctection/extern/hflayoutlm.py CHANGED Viewed

@@ -126,10 +126,13 @@ def get_tokenizer_from_model_class(model_class: str, use_xlm_tokenizer: bool) ->
         ("XLMRobertaForSequenceClassification", True): XLMRobertaTokenizerFast.from_pretrained(
             "FacebookAI/xlm-roberta-base"
         ),
+        ("XLMRobertaForTokenClassification", True): XLMRobertaTokenizerFast.from_pretrained(
+            "FacebookAI/xlm-roberta-base"
+        ),
     }[(model_class, use_xlm_tokenizer)]
-def predict_token_classes(
+def predict_token_classes_from_layoutlm(
     uuids: list[list[str]],
     input_ids: torch.Tensor,
     attention_mask: torch.Tensor,
@@ -192,7 +195,7 @@ def predict_token_classes(
     return all_token_classes
-def predict_sequence_classes(
+def predict_sequence_classes_from_layoutlm(
     input_ids: torch.Tensor,
     attention_mask: torch.Tensor,
     token_type_ids: torch.Tensor,
@@ -462,7 +465,7 @@ class HFLayoutLmTokenClassifier(HFLayoutLmTokenClassifierBase):
         ann_ids, _, input_ids, attention_mask, token_type_ids, boxes, tokens = self._validate_encodings(**encodings)
-        results = predict_token_classes(
+        results = predict_token_classes_from_layoutlm(
             ann_ids, input_ids, attention_mask, token_type_ids, boxes, tokens, self.model, None
         )
@@ -586,7 +589,7 @@ class HFLayoutLmv2TokenClassifier(HFLayoutLmTokenClassifierBase):
             images = images.to(self.device)
         else:
             raise ValueError(f"images must be list but is {type(images)}")
-        results = predict_token_classes(
+        results = predict_token_classes_from_layoutlm(
             ann_ids, input_ids, attention_mask, token_type_ids, boxes, tokens, self.model, images
         )
@@ -710,7 +713,7 @@ class HFLayoutLmv3TokenClassifier(HFLayoutLmTokenClassifierBase):
             images = images.to(self.device)
         else:
             raise ValueError(f"images must be list but is {type(images)}")
-        results = predict_token_classes(
+        results = predict_token_classes_from_layoutlm(
             ann_ids, input_ids, attention_mask, token_type_ids, boxes, tokens, self.model, images
         )
@@ -909,7 +912,7 @@ class HFLayoutLmSequenceClassifier(HFLayoutLmSequenceClassifierBase):
         """
         input_ids, attention_mask, token_type_ids, boxes = self._validate_encodings(**encodings)
-        result = predict_sequence_classes(
+        result = predict_sequence_classes_from_layoutlm(
             input_ids,
             attention_mask,
             token_type_ids,
@@ -1021,7 +1024,9 @@ class HFLayoutLmv2SequenceClassifier(HFLayoutLmSequenceClassifierBase):
         else:
             raise ValueError(f"images must be list but is {type(images)}")
-        result = predict_sequence_classes(input_ids, attention_mask, token_type_ids, boxes, self.model, images)
+        result = predict_sequence_classes_from_layoutlm(
+            input_ids, attention_mask, token_type_ids, boxes, self.model, images
+        )
         result.class_id += 1
         result.class_name = self.categories.categories[result.class_id]
@@ -1115,7 +1120,9 @@ class HFLayoutLmv3SequenceClassifier(HFLayoutLmSequenceClassifierBase):
         else:
             raise ValueError(f"images must be list but is {type(images)}")
-        result = predict_sequence_classes(input_ids, attention_mask, token_type_ids, boxes, self.model, images)
+        result = predict_sequence_classes_from_layoutlm(
+            input_ids, attention_mask, token_type_ids, boxes, self.model, images
+        )
         result.class_id += 1
         result.class_name = self.categories.categories[result.class_id]
@@ -1245,7 +1252,7 @@ class HFLiltTokenClassifier(HFLayoutLmTokenClassifierBase):
         ann_ids, _, input_ids, attention_mask, token_type_ids, boxes, tokens = self._validate_encodings(**encodings)
-        results = predict_token_classes(
+        results = predict_token_classes_from_layoutlm(
             ann_ids, input_ids, attention_mask, token_type_ids, boxes, tokens, self.model, None
         )
@@ -1323,7 +1330,7 @@ class HFLiltSequenceClassifier(HFLayoutLmSequenceClassifierBase):
     def predict(self, **encodings: Union[list[list[str]], torch.Tensor]) -> SequenceClassResult:
         input_ids, attention_mask, token_type_ids, boxes = self._validate_encodings(**encodings)
-        result = predict_sequence_classes(
+        result = predict_sequence_classes_from_layoutlm(
             input_ids,
             attention_mask,
             token_type_ids,

deepdoctection 0.44.1__py3-none-any.whl → 0.46__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.44.1py3-none-any.whl → 0.46py3-none-any.whl