PyPI - deepdoctection - Versions diffs - 0.34__py3-none-any.whl → 0.36__py3-none-any.whl - Mend

deepdoctection 0.34py3-none-any.whl → 0.36py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (25) hide show

deepdoctection/__init__.py +7 -14
deepdoctection/analyzer/__init__.py +1 -0
deepdoctection/analyzer/_config.py +142 -0
deepdoctection/analyzer/dd.py +11 -335
deepdoctection/analyzer/factory.py +718 -0
deepdoctection/configs/conf_dd_one.yaml +5 -0
deepdoctection/datapoint/annotation.py +1 -1
deepdoctection/datapoint/convert.py +6 -4
deepdoctection/datapoint/image.py +16 -6
deepdoctection/datapoint/view.py +91 -15
deepdoctection/eval/cocometric.py +59 -13
deepdoctection/extern/pdftext.py +96 -5
deepdoctection/extern/tessocr.py +1 -0
deepdoctection/mapper/match.py +4 -2
deepdoctection/utils/env_info.py +30 -1
deepdoctection/utils/file_utils.py +19 -0
deepdoctection/utils/metacfg.py +12 -0
deepdoctection/utils/pdf_utils.py +86 -3
deepdoctection/utils/utils.py +39 -0
deepdoctection/utils/viz.py +16 -13
{deepdoctection-0.34.dist-info → deepdoctection-0.36.dist-info}/METADATA +126 -116
{deepdoctection-0.34.dist-info → deepdoctection-0.36.dist-info}/RECORD +25 -23
{deepdoctection-0.34.dist-info → deepdoctection-0.36.dist-info}/WHEEL +1 -1
{deepdoctection-0.34.dist-info → deepdoctection-0.36.dist-info}/LICENSE +0 -0
{deepdoctection-0.34.dist-info → deepdoctection-0.36.dist-info}/top_level.txt +0 -0

deepdoctection/configs/conf_dd_one.yaml CHANGED Viewed

@@ -1,3 +1,4 @@
+USE_ROTATOR: False
 USE_LAYOUT: True
 USE_TABLE_SEGMENTATION: True
 TF:
@@ -97,3 +98,7 @@ TEXT_ORDERING:
   BROKEN_LINE_TOLERANCE: 0.003
   HEIGHT_TOLERANCE: 2.0
   PARAGRAPH_BREAK: 0.035
+  USE_LAYOUT_LINK: False
+  LAYOUT_LINK:
+    PARENTAL_CATEGORIES:
+    CHILD_CATEGORIES:

deepdoctection/datapoint/annotation.py CHANGED Viewed

@@ -527,5 +527,5 @@ class ContainerAnnotation(CategoryAnnotation):
     def from_dict(cls, **kwargs: AnnotationDict) -> ContainerAnnotation:
         container_ann = ann_from_dict(cls, **kwargs)
         value = kwargs.get("value", "")
-        container_ann.value = value if isinstance(value, str) else list(value)
+        container_ann.value = value if isinstance(value, (int, float, str)) else list(value)
         return container_ann

deepdoctection/datapoint/convert.py CHANGED Viewed

@@ -143,11 +143,13 @@ def convert_pdf_bytes_to_np_array(pdf_bytes: bytes, dpi: Optional[int] = None) -
     return np_array.astype(uint8)
-def convert_pdf_bytes_to_np_array_v2(pdf_bytes: bytes, dpi: Optional[int] = None) -> PixelValues:
+def convert_pdf_bytes_to_np_array_v2(pdf_bytes: bytes, dpi: Optional[int] = 200) -> PixelValues:
     """
-    Converts a pdf passed as bytes into a numpy array. Note, that this method expects poppler to be installed. This
-    function, however does not rely on the wrapper pdf2image but uses a function of this lib which calls poppler
-    directly.
+    Converts a pdf passed as bytes into a numpy array. We use poppler or pdfmium to convert the pdf to an image.
+    If both is available you can steer the selection of the render engine with environment variables:
+    USE_DD_POPPLER: Set to 1, "TRUE", "True" to use poppler
+    USE_DD_PDFIUM: Set to 1, "TRUE", "True" to use pdfium
     :param pdf_bytes: A pdf as bytes object. A byte representation can from a pdf file can be generated e.g. with
                       `utils.fs.load_bytes_from_pdf_file`

deepdoctection/datapoint/image.py CHANGED Viewed

@@ -23,7 +23,7 @@ from __future__ import annotations
 import json
 from collections import defaultdict
 from dataclasses import dataclass, field
-from os import environ
+from os import environ, fspath
 from pathlib import Path
 from typing import Any, Optional, Sequence, Union, no_type_check
@@ -412,13 +412,22 @@ class Image:
             img_dict["_image"] = None
         return img_dict
+    def as_json(self) -> str:
+        """
+        Returns the full image dataclass as json string.
+        :return:  A json string.
+        """
+        return json.dumps(self.as_dict(), indent=4)
     @staticmethod
     def remove_keys() -> list[str]:
         """
         A list of attributes to suspend from as_dict creation.
         """
-        return ["_image", "_annotation_ids"]
+        return ["_image", "_annotation_ids", "_category_name"]
     def define_annotation_id(self, annotation: Annotation) -> str:
         """
@@ -443,7 +452,8 @@ class Image:
         Calls `List.remove`. Make sure, the element is in the list for otherwise a ValueError will be raised.
-        :param annotation: The annotation to remove
+        :param annotation_ids: The annotation to remove
+        :param service_ids: The service id to remove
         """
         ann_id_to_annotation_maps = self.get_annotation_id_to_annotation_maps()
@@ -703,13 +713,13 @@ class Image:
             path = path / self.image_id
         suffix = path.suffix
         if suffix:
-            path_json = path.as_posix().replace(suffix, ".json")
+            path_json = fspath(path).replace(suffix, ".json")
         else:
-            path_json = path.as_posix() + ".json"
+            path_json = fspath(path) + ".json"
         if highest_hierarchy_only:
             self.remove_image_from_lower_hierachy()
         export_dict = self.as_dict()
-        export_dict["location"] = str(export_dict["location"])
+        export_dict["location"] = fspath(export_dict["location"])
         if not image_to_json:
             export_dict["_image"] = None
         if dry:

deepdoctection/datapoint/view.py CHANGED Viewed

@@ -25,6 +25,7 @@ from copy import copy
 from typing import Any, Mapping, Optional, Sequence, Type, TypedDict, Union, no_type_check
 import numpy as np
+from typing_extensions import LiteralString
 from ..utils.error import AnnotationError, ImageError
 from ..utils.logger import LoggingRecord, logger
@@ -40,10 +41,12 @@ from ..utils.settings import (
     WordType,
     get_type,
 )
+from ..utils.transform import ResizeTransform
 from ..utils.types import HTML, AnnotationDict, Chunks, ImageDict, PathLikeOrStr, PixelValues, Text_, csv
 from ..utils.viz import draw_boxes, interactive_imshow, viz_handler
 from .annotation import CategoryAnnotation, ContainerAnnotation, ImageAnnotation, ann_from_dict
 from .box import BoundingBox, crop_box_from_image
+from .convert import box_to_point4, point4_to_box
 from .image import Image
@@ -101,7 +104,7 @@ class ImageAnnotationBaseView(ImageAnnotation):
             return np_image
         raise AnnotationError(f"base_page.image is None for {self.annotation_id}")
-    def __getattr__(self, item: str) -> Optional[Union[str, int, list[str]]]:
+    def __getattr__(self, item: str) -> Optional[Union[str, int, list[str], list[ImageAnnotationBaseView]]]:
         """
         Get attributes defined by registered `self.get_attribute_names()` in a multi step process:
@@ -126,6 +129,9 @@ class ImageAnnotationBaseView(ImageAnnotation):
             if isinstance(sub_cat, ContainerAnnotation):
                 return sub_cat.value
             return sub_cat.category_id
+        if item in self.relationships:
+            relationship_ids = self.get_relationship(get_type(item))
+            return self.base_page.get_annotation(annotation_ids=relationship_ids)
         if self.image is not None:
             if item in self.image.summary.sub_categories:
                 sub_cat = self.get_summary(get_type(item))
@@ -165,7 +171,11 @@ class Word(ImageAnnotationBaseView):
     """
     def get_attribute_names(self) -> set[str]:
-        return set(WordType).union(super().get_attribute_names()).union({Relationships.READING_ORDER})
+        return (
+            set(WordType)
+            .union(super().get_attribute_names())
+            .union({Relationships.READING_ORDER, Relationships.LAYOUT_LINK})
+        )
 class Layout(ImageAnnotationBaseView):
@@ -246,7 +256,11 @@ class Layout(ImageAnnotationBaseView):
         }
     def get_attribute_names(self) -> set[str]:
-        return {"words", "text"}.union(super().get_attribute_names()).union({Relationships.READING_ORDER})
+        return (
+            {"words", "text"}
+            .union(super().get_attribute_names())
+            .union({Relationships.READING_ORDER, Relationships.LAYOUT_LINK})
+        )
     def __len__(self) -> int:
         """len of text counted by number of characters"""
@@ -433,8 +447,8 @@ class ImageDefaults(TypedDict):
     """ImageDefaults"""
     text_container: LayoutType
-    floating_text_block_categories: tuple[LayoutType, ...]
-    text_block_categories: tuple[LayoutType, ...]
+    floating_text_block_categories: tuple[Union[LayoutType, CellType], ...]
+    text_block_categories: tuple[Union[LayoutType, CellType], ...]
 IMAGE_DEFAULTS: ImageDefaults = {
@@ -448,9 +462,13 @@ IMAGE_DEFAULTS: ImageDefaults = {
     "text_block_categories": (
         LayoutType.TEXT,
         LayoutType.TITLE,
-        LayoutType.FIGURE,
         LayoutType.LIST,
         LayoutType.CELL,
+        LayoutType.FIGURE,
+        CellType.COLUMN_HEADER,
+        CellType.PROJECTED_ROW_HEADER,
+        CellType.SPANNING,
+        CellType.ROW_HEADER,
     ),
 }
@@ -509,6 +527,9 @@ class Page(Image):
         "location",
         "document_id",
         "page_number",
+        "angle",
+        "figures",
+        "residual_layouts",
     }
     include_residual_text_container: bool = True
@@ -607,6 +628,41 @@ class Page(Image):
         """
         return self.get_annotation(category_names=LayoutType.TABLE)
+    @property
+    def figures(self) -> list[ImageAnnotationBaseView]:
+        """
+        A list of a figures.
+        """
+        return self.get_annotation(category_names=LayoutType.FIGURE)
+    @property
+    def residual_layouts(self) -> list[ImageAnnotationBaseView]:
+        """
+        A list of all residual layouts. Residual layouts are all layouts that are
+           - not floating text blocks,
+           - not text containers,
+           - not tables,
+           - not figures
+           - not cells
+           - not rows
+           - not columns
+        """
+        return self.get_annotation(category_names=self._get_residual_layout())
+    def _get_residual_layout(self) -> list[LiteralString]:
+        layouts = copy(list(self.floating_text_block_categories))
+        layouts.extend(
+            [
+                LayoutType.TABLE,
+                LayoutType.FIGURE,
+                self.text_container,
+                LayoutType.CELL,
+                LayoutType.ROW,
+                LayoutType.COLUMN,
+            ]
+        )
+        return [layout for layout in LayoutType if layout not in layouts]
     @classmethod
     def from_image(
         cls,
@@ -800,12 +856,15 @@ class Page(Image):
         self,
         show_tables: bool = True,
         show_layouts: bool = True,
+        show_figures: bool = False,
+        show_residual_layouts: bool = False,
         show_cells: bool = True,
         show_table_structure: bool = True,
         show_words: bool = False,
         show_token_class: bool = True,
         ignore_default_token_class: bool = False,
         interactive: bool = False,
+        scaled_width: int = 600,
         **debug_kwargs: str,
     ) -> Optional[PixelValues]:
         """
@@ -826,12 +885,14 @@ class Page(Image):
         :param show_tables: Will display all tables boxes as well as cells, rows and columns
         :param show_layouts: Will display all other layout components.
+        :param show_figures: Will display all figures
         :param show_cells: Will display cells within tables. (Only available if `show_tables=True`)
         :param show_table_structure: Will display rows and columns
         :param show_words: Will display bounding boxes around words labeled with token class and bio tag (experimental)
         :param show_token_class: Will display token class instead of token tags (i.e. token classes with tags)
         :param interactive: If set to True will open an interactive image, otherwise it will return a numpy array that
                             can be displayed differently.
+        :param scaled_width: Width of the image to display
         :param ignore_default_token_class: Will ignore displaying word bounding boxes with default or None token class
                                            label
         :return: If `interactive=False` will return a numpy array.
@@ -857,6 +918,11 @@ class Page(Image):
                 box_stack.append(item.bbox)
                 category_names_list.append(item.category_name.value)
+        if show_figures and not debug_kwargs:
+            for item in self.figures:
+                box_stack.append(item.bbox)
+                category_names_list.append(item.category_name.value)
         if show_tables and not debug_kwargs:
             for table in self.tables:
                 box_stack.append(table.bbox)
@@ -913,24 +979,34 @@ class Page(Image):
                         else:
                             category_names_list.append(word.token_tag.value if word.token_tag is not None else None)
+        if show_residual_layouts and not debug_kwargs:
+            for item in self.residual_layouts:
+                box_stack.append(item.bbox)
+                category_names_list.append(item.category_name.value)
         if self.image is not None:
+            scale_fx = scaled_width / self.width
+            scaled_height = int(self.height * scale_fx)
+            img = viz_handler.resize(self.image, scaled_width, scaled_height, "VIZ")
             if box_stack:
                 boxes = np.vstack(box_stack)
+                boxes = box_to_point4(boxes)
+                resizer = ResizeTransform(self.height, self.width, scaled_height, scaled_width, "VIZ")
+                boxes = resizer.apply_coords(boxes)
+                boxes = point4_to_box(boxes)
                 if show_words:
                     img = draw_boxes(
-                        self.image,
-                        boxes,
-                        category_names_list,
+                        np_image=img,
+                        boxes=boxes,
+                        category_names_list=category_names_list,
                         font_scale=1.0,
                         rectangle_thickness=4,
                     )
                 else:
-                    img = draw_boxes(self.image, boxes, category_names_list)
-                scale_fx, scale_fy = 1.3, 1.3
-                scaled_width, scaled_height = int(self.width * scale_fx), int(self.height * scale_fy)
-                img = viz_handler.resize(img, scaled_width, scaled_height, "VIZ")
-            else:
-                img = self.image
+                    img = draw_boxes(
+                        np_image=img, boxes=boxes, category_names_list=category_names_list, show_palette=False
+                    )
             if interactive:
                 interactive_imshow(img)

deepdoctection/eval/cocometric.py CHANGED Viewed

@@ -71,8 +71,8 @@ https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/cocoeva
 def _summarize(  # type: ignore
-    self, ap: int = 1, iouThr: float = 0.9, areaRng: str = "all", maxDets: int = 100
-) -> float:
+    self, ap: int = 1, iouThr: float = 0.9, areaRng: str = "all", maxDets: int = 100, per_category: bool = False
+) -> Union[float, list[float]]:
     # pylint: disable=C0103
     p = self.params
     iStr = " {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}"
@@ -86,6 +86,36 @@ def _summarize(  # type: ignore
     aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
     mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]
+    if per_category:
+        if ap == 1:
+            s = self.eval["precision"]
+            num_classes = s.shape[2]
+            results_per_class = []
+            for idx in range(num_classes):
+                if iouThr is not None:
+                    s = self.eval["precision"]
+                    t = np.where(iouThr == p.iouThrs)[0]
+                    s = s[t]
+                precision = s[:, :, idx, aind, mind]
+                precision = precision[precision > -1]
+                res = np.mean(precision) if precision.size else float("nan")
+                results_per_class.append(float(res))
+                print(f"Precision for class {idx+1}: @[ IoU={iouStr} | area={areaRng} | maxDets={maxDets} ] = {res}")
+        else:
+            s = self.eval["recall"]
+            num_classes = s.shape[1]
+            results_per_class = []
+            for idx in range(num_classes):
+                if iouThr is not None:
+                    s = self.eval["recall"]
+                    t = np.where(iouThr == p.iouThrs)[0]
+                    s = s[t]
+                recall = s[:, idx, aind, mind]
+                recall = recall[recall > -1]
+                res = np.mean(recall) if recall.size else float("nan")
+                results_per_class.append(float(res))
+                print(f"Recall for class {idx+1}: @[ IoU={iouStr} | area={areaRng} | maxDets={maxDets} ] = {res}")
+        return results_per_class
     if ap == 1:
         # dimension of precision: [TxRxKxAxM]
         s = self.eval["precision"]
@@ -124,6 +154,7 @@ class CocoMetric(MetricBase):
     mapper = image_to_coco
     _f1_score = None
     _f1_iou = None
+    _per_category = False
     _params: dict[str, Union[list[int], list[list[int]]]] = {}
     @classmethod
@@ -176,18 +207,28 @@ class CocoMetric(MetricBase):
         if cls._f1_score:
             summary_bbox = [
-                metric.summarize_f1(1, cls._f1_iou, maxDets=metric.params.maxDets[2]),
-                metric.summarize_f1(0, cls._f1_iou, maxDets=metric.params.maxDets[2]),
+                metric.summarize_f1(1, cls._f1_iou, maxDets=metric.params.maxDets[2], per_category=cls._per_category),
+                metric.summarize_f1(0, cls._f1_iou, maxDets=metric.params.maxDets[2], per_category=cls._per_category),
             ]
         else:
             metric.summarize()
             summary_bbox = metric.stats
         results = []
-        for params, value in zip(cls.get_summary_default_parameters(), summary_bbox):
+        default_parameters = cls.get_summary_default_parameters()
+        if cls._per_category:
+            default_parameters = default_parameters * len(summary_bbox[0])
+            summary_bbox = [item for pair in zip(*summary_bbox) for item in pair]
+        val = 0
+        for idx, (params, value) in enumerate(zip(default_parameters, summary_bbox)):
             params = copy(params)
             params["mode"] = "bbox"
             params["val"] = value
+            if cls._per_category:
+                if idx % 2 == 0:
+                    val += 1
+                params["category_id"] = val
             results.append(params)
         return results
@@ -201,15 +242,16 @@ class CocoMetric(MetricBase):
                  area range and maximum detections.
         """
         if cls._f1_score:
+            for el, idx in zip(_F1_DEFAULTS, [2, 2]):
+                if cls._params:
+                    if cls._params.get("maxDets") is not None:
+                        el["maxDets"] = cls._params["maxDets"][idx]
+                el["iouThr"] = cls._f1_iou
+            return _F1_DEFAULTS
+        for el, idx in zip(_COCOEVAL_DEFAULTS, _MAX_DET_INDEX):
             if cls._params:
                 if cls._params.get("maxDets") is not None:
-                    for el, idx in zip(_F1_DEFAULTS, [2, 2]):
-                        el["maxDets"] = cls._params["maxDets"][idx]
-                        el["iouThr"] = cls._f1_iou
-                    return _F1_DEFAULTS
-        if cls._params:
-            if cls._params.get("maxDets") is not None:
-                for el, idx in zip(_COCOEVAL_DEFAULTS, _MAX_DET_INDEX):
                     el["maxDets"] = cls._params["maxDets"][idx]
         return _COCOEVAL_DEFAULTS
@@ -220,13 +262,16 @@ class CocoMetric(MetricBase):
         area_range: Optional[list[list[int]]] = None,
         f1_score: bool = False,
         f1_iou: float = 0.9,
+        per_category: bool = False,
     ) -> None:
         """
         Setting params for different coco metric modes.
         :param max_detections: The maximum number of detections to consider
         :param area_range: The area range to classify objects as "all", "small", "medium" and "large"
-        :param f1_score: Will use f1 score setting with default iouThr 0.9
+        :param f1_score: Will use f1 score setting with default iouThr 0.9. To be more precise it does not calculate
+                         the f1 score but the precision and recall for a given iou threshold. Use the harmonic mean to
+                         get the ultimate f1 score.
         :param f1_iou: Use with f1_score True and reset the f1 iou threshold
         """
         if max_detections is not None:
@@ -238,6 +283,7 @@ class CocoMetric(MetricBase):
         cls._f1_score = f1_score
         cls._f1_iou = f1_iou
+        cls._per_category = per_category
     @classmethod
     def get_requirements(cls) -> list[Requirement]:

deepdoctection/extern/pdftext.py CHANGED Viewed

@@ -24,21 +24,25 @@ from typing import Optional
 from lazy_imports import try_import
 from ..utils.context import save_tmp_file
-from ..utils.file_utils import get_pdfplumber_requirement
+from ..utils.file_utils import get_pdfplumber_requirement, get_pypdfium2_requirement
 from ..utils.settings import LayoutType, ObjectTypes
 from ..utils.types import Requirement
 from .base import DetectionResult, ModelCategories, PdfMiner
-with try_import() as import_guard:
+with try_import() as pdfplumber_import_guard:
     from pdfplumber.pdf import PDF, Page
+with try_import() as pypdfmium_import_guard:
+    import pypdfium2.raw as pypdfium_c
+    from pypdfium2 import PdfDocument
-def _to_detect_result(word: dict[str, str]) -> DetectionResult:
+def _to_detect_result(word: dict[str, str], class_name: ObjectTypes) -> DetectionResult:
     return DetectionResult(
         box=[float(word["x0"]), float(word["top"]), float(word["x1"]), float(word["bottom"])],
         class_id=1,
         text=word["text"],
-        class_name=LayoutType.WORD,
+        class_name=class_name,
     )
@@ -49,6 +53,7 @@ class PdfPlumberTextDetector(PdfMiner):
         pdf_plumber = PdfPlumberTextDetector()
         df = SerializerPdfDoc.load("path/to/document.pdf")
+        df.reset_state()
         for dp in df:
             detection_results = pdf_plumber.predict(dp["pdf_bytes"])
@@ -61,6 +66,8 @@ class PdfPlumberTextDetector(PdfMiner):
         pipe = DoctectionPipe([text_extract])
         df = pipe.analyze(path="path/to/document.pdf")
+        df.reset_state()
         for dp in df:
             ...
@@ -87,7 +94,7 @@ class PdfPlumberTextDetector(PdfMiner):
                 self._page = PDF(fin).pages[0]
                 self._pdf_bytes = pdf_bytes
                 words = self._page.extract_words(x_tolerance=self.x_tolerance, y_tolerance=self.y_tolerance)
-        detect_results = list(map(_to_detect_result, words))
+        detect_results = [_to_detect_result(word, self.get_category_names()[0]) for word in words]
         return detect_results
     @classmethod
@@ -113,3 +120,87 @@ class PdfPlumberTextDetector(PdfMiner):
     def get_category_names(self) -> tuple[ObjectTypes, ...]:
         return self.categories.get_categories(as_dict=False)
+class Pdfmium2TextDetector(PdfMiner):
+    """
+    Text miner based on the pypdfium2 engine. It will return text on text line level and not on word level
+        pdfmium2 = Pdfmium2TextDetector()
+        df = SerializerPdfDoc.load("path/to/document.pdf")
+        df.reset_state()
+        for dp in df:
+            detection_results = pdfmium2.predict(dp["pdf_bytes"])
+    To use it in a more integrated way:
+        pdfmium2 = Pdfmium2TextDetector()
+        text_extract = TextExtractionService(pdfmium2)
+        pipe = DoctectionPipe([text_extract])
+        df = pipe.analyze(path="path/to/document.pdf")
+        df.reset_state()
+        for dp in df:
+            ...
+    """
+    def __init__(self) -> None:
+        self.name = "Pdfmium"
+        self.model_id = self.get_model_id()
+        self.categories = ModelCategories(init_categories={1: LayoutType.LINE})
+        self._page: Optional[Page] = None
+    def predict(self, pdf_bytes: bytes) -> list[DetectionResult]:
+        """
+        Call pypdfium2 and returns detected text as detection results
+        :param pdf_bytes: bytes of a single pdf page
+        :return: A list of DetectionResult
+        """
+        pdf = PdfDocument(pdf_bytes)
+        page = pdf.get_page(0)
+        text = page.get_textpage()
+        words = []
+        height = page.get_height()
+        for obj in page.get_objects((pypdfium_c.FPDF_PAGEOBJ_TEXT,)):
+            box = obj.get_pos()
+            if all(x > 0 for x in box):
+                words.append(
+                    {
+                        "text": text.get_text_bounded(*box),
+                        "x0": box[0],
+                        "x1": box[2],
+                        "top": height - box[3],
+                        "bottom": height - box[1],
+                    }
+                )
+        detect_results = [_to_detect_result(word, self.get_category_names()[0]) for word in words]
+        return detect_results
+    @classmethod
+    def get_requirements(cls) -> list[Requirement]:
+        return [get_pypdfium2_requirement()]
+    def get_width_height(self, pdf_bytes: bytes) -> tuple[float, float]:
+        """
+        Get the width and height of the full page
+        :param pdf_bytes: pdf_bytes generating the pdf
+        :return: width and height
+        """
+        if self._pdf_bytes == pdf_bytes and self._page is not None:
+            return self._page.bbox[2], self._page.bbox[3]  # pylint: disable=E1101
+        # if the pdf bytes is not equal to the cached pdf, will recalculate values
+        pdf = PdfDocument(pdf_bytes)
+        self._page = pdf.get_page(0)
+        self._pdf_bytes = pdf_bytes
+        if self._page is not None:
+            return self._page.get_width(), self._page.get_height()  # type: ignore
+        raise ValueError("Page not found")
+    def get_category_names(self) -> tuple[ObjectTypes, ...]:
+        return self.categories.get_categories(as_dict=False)

deepdoctection/extern/tessocr.py CHANGED Viewed

@@ -421,6 +421,7 @@ class TesseractRotationTransformer(ImageTransformer):
     def __init__(self) -> None:
         self.name = fspath(_TESS_PATH) + "-rotation"
         self.categories = ModelCategories(init_categories={1: PageType.ANGLE})
+        self.model_id = self.get_model_id()
     def transform(self, np_img: PixelValues, specification: DetectionResult) -> PixelValues:
         """

deepdoctection/mapper/match.py CHANGED Viewed

@@ -193,5 +193,7 @@ def match_anns_by_distance(
     child_anns = dp.get_annotation(annotation_ids=child_ann_ids, category_names=child_ann_category_names)
     child_centers = [block.get_bounding_box(dp.image_id).center for block in child_anns]
     parent_centers = [block.get_bounding_box(dp.image_id).center for block in parent_anns]
-    child_indices = distance.cdist(parent_centers, child_centers).argmin(axis=1)
-    return [(parent_anns[i], child_anns[j]) for i, j in enumerate(child_indices)]
+    if child_centers and parent_centers:
+        child_indices = distance.cdist(parent_centers, child_centers).argmin(axis=1)
+        return [(parent_anns[i], child_anns[j]) for i, j in enumerate(child_indices)]
+    return []

deepdoctection 0.34__py3-none-any.whl → 0.36__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.34py3-none-any.whl → 0.36py3-none-any.whl