PyPI - deepdoctection - Versions diffs - 0.26__py3-none-any.whl → 0.27__py3-none-any.whl - Mend

deepdoctection 0.26py3-none-any.whl → 0.27py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (31) hide show

deepdoctection/__init__.py +7 -1
deepdoctection/analyzer/dd.py +15 -3
deepdoctection/configs/conf_dd_one.yaml +4 -0
deepdoctection/datapoint/convert.py +5 -10
deepdoctection/datapoint/image.py +2 -2
deepdoctection/datapoint/view.py +38 -18
deepdoctection/datasets/save.py +3 -3
deepdoctection/extern/d2detect.py +1 -2
deepdoctection/extern/doctrocr.py +14 -9
deepdoctection/extern/tp/tpfrcnn/common.py +2 -3
deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +6 -6
deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +3 -3
deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +6 -2
deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +5 -3
deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +3 -1
deepdoctection/extern/tp/tpfrcnn/predict.py +1 -0
deepdoctection/mapper/laylmstruct.py +2 -3
deepdoctection/utils/context.py +2 -2
deepdoctection/utils/file_utils.py +63 -26
deepdoctection/utils/fs.py +6 -6
deepdoctection/utils/pdf_utils.py +2 -2
deepdoctection/utils/settings.py +8 -1
deepdoctection/utils/transform.py +9 -9
deepdoctection/utils/viz.py +405 -86
{deepdoctection-0.26.dist-info → deepdoctection-0.27.dist-info}/METADATA +93 -94
{deepdoctection-0.26.dist-info → deepdoctection-0.27.dist-info}/RECORD +31 -31
{deepdoctection-0.26.dist-info → deepdoctection-0.27.dist-info}/WHEEL +1 -1
tests/analyzer/test_dd.py +6 -57
tests/conftest.py +2 -0
{deepdoctection-0.26.dist-info → deepdoctection-0.27.dist-info}/LICENSE +0 -0
{deepdoctection-0.26.dist-info → deepdoctection-0.27.dist-info}/top_level.txt +0 -0

deepdoctection/__init__.py CHANGED Viewed

@@ -14,7 +14,7 @@ from packaging import version
 from .utils.file_utils import _LazyModule, get_tf_version, pytorch_available, tf_available
 from .utils.logger import logger
-__version__ = 0.26
+__version__ = 0.27
 _IMPORT_STRUCTURE = {
     "analyzer": ["get_dd_analyzer", "build_analyzer"],
@@ -311,6 +311,10 @@ _IMPORT_STRUCTURE = {
         "get_fasttext_requirement",
         "wandb_available",
         "get_wandb_requirement",
+        "opencv_available",
+        "get_opencv_requirement",
+        "pillow_available",
+        "get_pillow_requirement",
         "load_image_from_file",
         "load_bytes_from_pdf_file",
         "get_load_image_func",
@@ -378,6 +382,7 @@ _IMPORT_STRUCTURE = {
         "draw_text",
         "draw_boxes",
         "interactive_imshow",
+        "viz_handler",
     ],
 }
@@ -403,6 +408,7 @@ if tf_available():
         except Exception:  # pylint: disable=W0703
             pass
 # Direct imports for type-checking
 if TYPE_CHECKING:
     from .analyzer import *

deepdoctection/analyzer/dd.py CHANGED Viewed

@@ -36,7 +36,7 @@ from ..extern.tessocr import TesseractOcrDetector
 from ..extern.texocr import TextractOcrDetector
 from ..pipe.base import PipelineComponent
 from ..pipe.cell import DetectResultGenerator, SubImageLayoutService
-from ..pipe.common import MatchingService, PageParsingService
+from ..pipe.common import AnnotationNmsService, MatchingService, PageParsingService
 from ..pipe.doctectionpipe import DoctectionPipe
 from ..pipe.layout import ImageLayoutService
 from ..pipe.order import TextOrderService
@@ -206,7 +206,7 @@ def _build_ocr(cfg: AttrDict) -> Union[TesseractOcrDetector, DoctrTextRecognizer
         profile = ModelCatalog.get_profile(weights)
         if profile.architecture is None:
             raise ValueError("model profile.architecture must be specified")
-        return DoctrTextRecognizer(profile.architecture, weights_path, cfg.DEVICE)
+        return DoctrTextRecognizer(profile.architecture, weights_path, cfg.DEVICE, lib=cfg.LIB)
     if cfg.OCR.USE_TEXTRACT:
         credentials_kwargs = {
             "aws_access_key_id": environ.get("ACCESS_KEY"),
@@ -225,7 +225,7 @@ def _build_doctr_word(cfg: AttrDict) -> DoctrTextlineDetector:
         raise ValueError("model profile.architecture must be specified")
     if profile.categories is None:
         raise ValueError("model profile.categories must be specified")
-    return DoctrTextlineDetector(profile.architecture, weights_path, profile.categories, cfg.DEVICE)
+    return DoctrTextlineDetector(profile.architecture, weights_path, profile.categories, cfg.DEVICE, lib=cfg.LIB)
 def build_analyzer(cfg: AttrDict) -> DoctectionPipe:
@@ -242,6 +242,17 @@ def build_analyzer(cfg: AttrDict) -> DoctectionPipe:
         layout = _build_service(d_layout, cfg, "LAYOUT")
         pipe_component_list.append(layout)
+    # setup layout nms service
+    if cfg.LAYOUT_NMS_PAIRS.COMBINATIONS and cfg.USE_LAYOUT:
+        if not isinstance(cfg.LAYOUT_NMS_PAIRS.COMBINATIONS, list) and not isinstance(
+            cfg.LAYOUT_NMS_PAIRS.COMBINATIONS[0], list
+        ):
+            raise ValueError("LAYOUT_NMS_PAIRS mus be a list of lists")
+        layout_nms_serivce = AnnotationNmsService(
+            cfg.LAYOUT_NMS_PAIRS.COMBINATIONS, cfg.LAYOUT_NMS_PAIRS.THRESHOLDS, cfg.LAYOUT_NMS_PAIRS.PRIORITY
+        )
+        pipe_component_list.append(layout_nms_serivce)
     # setup tables service
     if cfg.USE_TABLE_SEGMENTATION:
         d_item = _build_detector(cfg, "ITEM")
@@ -302,6 +313,7 @@ def build_analyzer(cfg: AttrDict) -> DoctectionPipe:
         )
         pipe_component_list.append(text)
+    if cfg.USE_PDF_MINER or cfg.USE_OCR:
         match = MatchingService(
             parent_categories=cfg.WORD_MATCHING.PARENTAL_CATEGORIES,
             child_categories=LayoutType.word,

deepdoctection/configs/conf_dd_one.yaml CHANGED Viewed

@@ -33,6 +33,10 @@ PT:
       WEIGHTS: cell/d2_model_1849999_cell_inf_only.pt
       WEIGHTS_TS: cell/d2_model_1849999_cell_inf_only.ts
       FILTER:
+LAYOUT_NMS_PAIRS:
+  COMBINATIONS:
+  THRESHOLDS:
+  PRIORITY:
 SEGMENTATION:
   ASSIGNMENT_RULE: ioa
   THRESHOLD_ROWS: 0.4

deepdoctection/datapoint/convert.py CHANGED Viewed

@@ -25,7 +25,6 @@ from io import BytesIO
 from shutil import which
 from typing import Any, Optional, Union, no_type_check
-import cv2
 import numpy as np
 from numpy import uint8
 from numpy.typing import NDArray
@@ -34,6 +33,7 @@ from PyPDF2 import PdfReader
 from ..utils.detection_types import ImageType
 from ..utils.develop import deprecated
 from ..utils.pdf_utils import pdf_to_np_array
+from ..utils.viz import viz_handler
 __all__ = [
     "convert_b64_to_np_array",
@@ -81,9 +81,8 @@ def convert_b64_to_np_array(image: str) -> ImageType:
     :param image: An image as base64 string.
     :return: numpy array.
     """
-    np_array = np.fromstring(base64.b64decode(image), np.uint8)  # type: ignore
-    np_array = cv2.imdecode(np_array, cv2.IMREAD_COLOR).astype(np.float32)
-    return np_array.astype(uint8)
+    return viz_handler.convert_b64_to_np(image).astype(uint8)
 def convert_np_array_to_b64(np_image: ImageType) -> str:
@@ -93,9 +92,7 @@ def convert_np_array_to_b64(np_image: ImageType) -> str:
     :param np_image: An image as numpy array.
     :return: An image as base64 string.
     """
-    np_encode = cv2.imencode(".png", np_image)
-    image = base64.b64encode(np_encode[1]).decode("utf-8")  # type: ignore
-    return image
+    return viz_handler.convert_np_to_b64(np_image)
 @no_type_check
@@ -106,9 +103,7 @@ def convert_np_array_to_b64_b(np_image: ImageType) -> bytes:
     :param np_image: An image as numpy array.
     :return: An image as base64 bytes.
     """
-    np_encode = cv2.imencode(".png", np_image)
-    b_image = np_encode[1].tobytes()
-    return b_image
+    return viz_handler.encode(np_image)
 @deprecated("Use convert_pdf_bytes_to_np_array_v2", "2022-02-23")

deepdoctection/datapoint/image.py CHANGED Viewed

@@ -626,8 +626,8 @@ class Image:
             self.remove_image_from_lower_hierachy()
         export_dict = self.as_dict()
         export_dict["location"] = str(export_dict["location"])
-        if image_to_json and self.image is not None:
-            export_dict["_image"] = convert_np_array_to_b64(self.image)
+        if not image_to_json:
+            export_dict["_image"] = None
         if dry:
             return export_dict
         with open(path_json, "w", encoding="UTF-8") as file:

deepdoctection/datapoint/view.py CHANGED Viewed

@@ -23,7 +23,6 @@ simplify consumption
 from copy import copy
 from typing import Any, Dict, List, Mapping, Optional, Sequence, Set, Tuple, Type, Union, no_type_check
-import cv2
 import numpy as np
 from ..utils.detection_types import ImageType, JsonDict, Pathlike
@@ -39,7 +38,7 @@ from ..utils.settings import (
     WordType,
     get_type,
 )
-from ..utils.viz import draw_boxes, interactive_imshow
+from ..utils.viz import draw_boxes, interactive_imshow, viz_handler
 from .annotation import ContainerAnnotation, ImageAnnotation, SummaryAnnotation, ann_from_dict
 from .box import BoundingBox
 from .image import Image
@@ -415,6 +414,17 @@ class Page(Image):
     text_container: ObjectTypes
     floating_text_block_categories: List[ObjectTypes]
     image_orig: Image
+    _attribute_names: Set[str] = {
+        "text",
+        "chunks",
+        "tables",
+        "layouts",
+        "words",
+        "file_name",
+        "location",
+        "document_id",
+        "page_number",
+    }
     @no_type_check
     def get_annotation(
@@ -734,7 +744,9 @@ class Page(Image):
                     )
                 else:
                     img = draw_boxes(self.image, boxes, category_names_list)
-                img = cv2.resize(img, None, fx=1.3, fy=1.3, interpolation=cv2.INTER_CUBIC)
+                scale_fx, scale_fy = 1.3, 1.3
+                scaled_width, scaled_height = int(self.width * scale_fx), int(self.height * scale_fy)
+                img = viz_handler.resize(img, scaled_width, scaled_height, "VIZ")
             else:
                 img = self.image
@@ -744,24 +756,32 @@ class Page(Image):
             return img
         return None
-    @staticmethod
-    def get_attribute_names() -> Set[str]:
+    @classmethod
+    def get_attribute_names(cls) -> Set[str]:
         """
         :return: A set of registered attributes.
         """
-        return set(PageType).union(
-            {
-                "text",
-                "chunks",
-                "tables",
-                "layouts",
-                "words",
-                "file_name",
-                "location",
-                "document_id",
-                "page_number",
-            }
-        )
+        return set(PageType).union(cls._attribute_names)
+    @classmethod
+    def add_attribute_name(cls, attribute_name: Union[str, ObjectTypes]) -> None:
+        """
+        Adding a custom attribute name to a Page class.
+                **Example:**
+                Page.add_attribute_name("foo")
+                page = Page.from_image(...)
+                print(page.foo)
+        Note, that the attribute must be registered as a valid `ObjectTypes`
+        :param attribute_name: attribute name to add
+        """
+        attribute_name = get_type(attribute_name)
+        cls._attribute_names.add(attribute_name.value)
     def save(
         self,

deepdoctection/datasets/save.py CHANGED Viewed

@@ -23,13 +23,12 @@ import json
 from pathlib import Path
 from typing import Optional
-from cv2 import imwrite
 from ..dataflow import DataFlow, MapData, SerializerJsonlines
 from ..datapoint.convert import convert_b64_to_np_array
 from ..datapoint.image import Image
 from ..utils.detection_types import JsonDict, Pathlike
 from ..utils.fs import mkdir_p
+from ..utils.viz import viz_handler
 def dataflow_to_json(
@@ -84,7 +83,8 @@ def dataflow_to_json(
                 target_file_png = path / "image" / (dp["file_name"].split(".")[0] + ".png")
                 image = dp.pop("_image")
                 image = convert_b64_to_np_array(image)
-                imwrite(str(target_file_png), image)
+                viz_handler.write_image(str(target_file_png), image)
             with open(target_file, "w", encoding="UTF-8") as file:
                 json.dump(dp, file)

deepdoctection/extern/d2detect.py CHANGED Viewed

@@ -23,7 +23,6 @@ from copy import copy
 from pathlib import Path
 from typing import Any, Dict, List, Literal, Mapping, Optional, Sequence
-import cv2
 import numpy as np
 from ..utils.detection_types import ImageType, Requirement
@@ -130,7 +129,7 @@ def d2_jit_predict_image(
         keep = batched_nms(boxes, scores, class_masks, nms_thresh_class_agnostic).cpu()
         # The exported model does not contain the final resize step, so we need to add it manually here
-        inverse_resizer = ResizeTransform(new_height, new_width, height, width, cv2.INTER_LINEAR)
+        inverse_resizer = ResizeTransform(new_height, new_width, height, width, "VIZ")
         np_boxes = np.reshape(boxes.cpu().numpy(), (-1, 2))
         np_boxes = inverse_resizer.apply_coords(np_boxes)
         np_boxes = np.reshape(np_boxes, (-1, 4))

deepdoctection/extern/doctrocr.py CHANGED Viewed

@@ -62,14 +62,14 @@ def _set_device_str(device: Optional[str] = None) -> str:
     return device
-def _load_model(path_weights: str, doctr_predictor: Any, device: str) -> None:
-    if pytorch_available():
+def _load_model(path_weights: str, doctr_predictor: Any, device: str, lib: str) -> None:
+    if lib == "PT" and pytorch_available():
         state_dict = torch.load(path_weights, map_location=device)
         for key in list(state_dict.keys()):
             state_dict["model." + key] = state_dict.pop(key)
         doctr_predictor.load_state_dict(state_dict)
         doctr_predictor.to(device)
-    elif tf_available():
+    elif lib == "TF" and tf_available():
         # Unzip the archive
         params_path = Path(path_weights).parent
         is_zip_path = path_weights.endswith(".zip")
@@ -99,7 +99,7 @@ def doctr_predict_text_lines(np_img: ImageType, predictor: "DetectionPredictor",
         DetectionResult(
             box=box[:4].tolist(), class_id=1, score=box[4], absolute_coords=False, class_name=LayoutType.word
         )
-        for box in raw_output[0]
+        for box in raw_output[0]["words"]
     ]
     return detection_results
@@ -173,7 +173,9 @@ class DoctrTextlineDetector(ObjectDetector):
         path_weights: str,
         categories: Mapping[str, TypeOrStr],
         device: Optional[Literal["cpu", "cuda"]] = None,
+        lib: str = "TF",
     ) -> None:
+        self.lib = lib
         self.name = "doctr_text_detector"
         self.architecture = architecture
         self.path_weights = path_weights
@@ -205,14 +207,14 @@ class DoctrTextlineDetector(ObjectDetector):
         raise ModuleNotFoundError("Neither Tensorflow nor PyTorch has been installed. Cannot use DoctrTextlineDetector")
     def clone(self) -> PredictorBase:
-        return self.__class__(self.architecture, self.path_weights, self.categories, self.device_input)
+        return self.__class__(self.architecture, self.path_weights, self.categories, self.device_input, self.lib)
     def possible_categories(self) -> List[ObjectTypes]:
         return [LayoutType.word]
     def load_model(self) -> None:
         """Loading model weights"""
-        _load_model(self.path_weights, self.doctr_predictor, self.device)
+        _load_model(self.path_weights, self.doctr_predictor, self.device, self.lib)
 class DoctrTextRecognizer(TextRecognizer):
@@ -252,7 +254,10 @@ class DoctrTextRecognizer(TextRecognizer):
     """
-    def __init__(self, architecture: str, path_weights: str, device: Optional[Literal["cpu", "cuda"]] = None) -> None:
+    def __init__(
+        self, architecture: str, path_weights: str, device: Optional[Literal["cpu", "cuda"]] = None, lib: str = "TF"
+    ) -> None:
+        self.lib = lib
         self.name = "doctr_text_recognizer"
         self.architecture = architecture
         self.path_weights = path_weights
@@ -281,8 +286,8 @@ class DoctrTextRecognizer(TextRecognizer):
         raise ModuleNotFoundError("Neither Tensorflow nor PyTorch has been installed. Cannot use DoctrTextRecognizer")
     def clone(self) -> PredictorBase:
-        return self.__class__(self.architecture, self.path_weights, self.device_input)
+        return self.__class__(self.architecture, self.path_weights, self.device_input, self.lib)
     def load_model(self) -> None:
         """Loading model weights"""
-        _load_model(self.path_weights, self.doctr_predictor, self.device)
+        _load_model(self.path_weights, self.doctr_predictor, self.device, self.lib)

deepdoctection/extern/tp/tpfrcnn/common.py CHANGED Viewed

@@ -10,7 +10,6 @@ This file is modified from
 """
-import cv2
 import numpy as np
 from tensorpack.dataflow.imgaug import ImageAugmentor, ResizeTransform  # pylint: disable=E0401
@@ -25,11 +24,11 @@ class CustomResize(ImageAugmentor):
     Try resizing the shortest edge to a certain number while avoiding the longest edge to exceed max_size.
     """
-    def __init__(self, short_edge_length, max_size, interp=cv2.INTER_LINEAR):
+    def __init__(self, short_edge_length, max_size, interp=1):
         """
         :param short_edge_length: a [min, max] interval from which to sample the shortest edge length.
         :param max_size: maximum allowed longest edge length.
-        :param interp: cv2 interpolation mode
+        :param interp: Interpolation mode. We use Tensorpack's internal `ResizeTransform`, that always requires OpenCV
         """
         super().__init__()
         if isinstance(short_edge_length, int):

deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py CHANGED Viewed

@@ -165,7 +165,7 @@ def resnet_shortcut(l, n_out, stride, activation=tf.identity):
     """
     n_in = l.shape[1]
     if n_in != n_out:  # change dimension when channel is not the same
-        return Conv2D("convshortcut", l, n_out, 1, strides=stride, activation=activation)
+        return Conv2D("convshortcut", l, n_out, 1, strides=stride, activation=activation)  # pylint: disable=E1124
     return l
@@ -181,12 +181,12 @@ def resnet_bottleneck(l, ch_out, stride, cfg):
     """
     shortcut = l
-    l = Conv2D("conv1", l, ch_out, 1, strides=1)
+    l = Conv2D("conv1", l, ch_out, 1, strides=1)  # pylint: disable=E1124
     if stride == 2:
         l = tf.pad(l, [[0, 0], [0, 0], maybe_reverse_pad(cfg, 0, 1), maybe_reverse_pad(cfg, 0, 1)])
-        l = Conv2D("conv2", l, ch_out, 3, strides=2, padding="VALID")
+        l = Conv2D("conv2", l, ch_out, 3, strides=2, padding="VALID")  # pylint: disable=E1124
     else:
-        l = Conv2D("conv2", l, ch_out, 3, strides=stride)
+        l = Conv2D("conv2", l, ch_out, 3, strides=stride)  # pylint: disable=E1124
     if cfg.BACKBONE.NORM != "None":
         l = Conv2D("conv3", l, ch_out * 4, 1, activation=get_norm(cfg, zero_init=True))
     else:
@@ -263,9 +263,9 @@ def resnet_fpn_backbone(image, cfg):
             ),
         )
         l.set_shape([None, chan, None, None])
-        l = Conv2D("conv0", l, 64, 7, strides=2, padding="VALID")
+        l = Conv2D("conv0", l, 64, 7, strides=2, padding="VALID")  # pylint: disable=E1124
         l = tf.pad(l, [[0, 0], [0, 0], maybe_reverse_pad(cfg, 0, 1), maybe_reverse_pad(cfg, 0, 1)])
-        l = MaxPooling("pool0", l, 3, strides=2, padding="VALID")
+        l = MaxPooling("pool0", l, 3, strides=2, padding="VALID")  # pylint: disable=E1124
     bottleneck = resnet_bottleneck if cfg.BACKBONE.BOTTLENECK == "resnet" else resnext32x4d_bottleneck
     with backbone_scope(cfg=cfg, freeze=freeze_at > 1):

deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py CHANGED Viewed

@@ -98,14 +98,14 @@ class GeneralizedRCNN(ModelDescWithConfig):
         image = self.preprocess(inputs["image"])  # 1CHW
-        features = self.backbone(image)
+        features = self.backbone(image)  # pylint: disable=E1101
         anchor_inputs = {k: v for k, v in inputs.items() if k.startswith("anchor_")}
-        proposals, rpn_losses = self.rpn(image, features, anchor_inputs)
+        proposals, rpn_losses = self.rpn(image, features, anchor_inputs)  # pylint: disable=E1101
         targets = [inputs[k] for k in ["gt_boxes", "gt_labels", "gt_masks"] if k in inputs]
         gt_boxes_area = tf.reduce_mean(tf_area(inputs["gt_boxes"]), name="mean_gt_box_area")
         add_moving_summary(gt_boxes_area)
-        head_losses = self.roi_heads(image, features, proposals, targets)
+        head_losses = self.roi_heads(image, features, proposals, targets)  # pylint: disable=E1101
         if self.training:
             wd_cost = regularize_cost(".*/W", l2_regularizer(self.cfg.TRAIN.WEIGHT_DECAY), name="wd_cost")

deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py CHANGED Viewed

@@ -63,7 +63,9 @@ def fpn_model(features, fpn_num_channels, fpn_norm):
                 x = tf.transpose(x, [0, 3, 1, 2])
                 return x
         except AttributeError:
-            return FixedUnPooling(name, x, 2, unpool_mat=np.ones((2, 2), dtype="float32"), data_format="channels_first")
+            return FixedUnPooling(
+                name, x, 2, unpool_mat=np.ones((2, 2), dtype="float32"), data_format="channels_first"
+            )  # pylint: disable=E1124
     with argscope(
         Conv2D,
@@ -85,7 +87,9 @@ def fpn_model(features, fpn_num_channels, fpn_norm):
         p2345 = [Conv2D(f"posthoc_3x3_p{i + 2}", c, num_channel, 3) for i, c in enumerate(lat_sum_5432[::-1])]
         if use_gn:
             p2345 = [GroupNorm(f"gn_p{i + 2}", c) for i, c in enumerate(p2345)]
-        p6 = MaxPooling("maxpool_p6", p2345[-1], pool_size=1, strides=2, data_format="channels_first", padding="VALID")
+        p6 = MaxPooling(
+            "maxpool_p6", p2345[-1], pool_size=1, strides=2, data_format="channels_first", padding="VALID"
+        )  # pylint: disable=E1124
         return p2345 + [p6]

deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py CHANGED Viewed

@@ -267,8 +267,10 @@ def fastrcnn_2fc_head(feature, cfg):
     dim = cfg.FPN.FRCNN_FC_HEAD_DIM
     init = tfv1.variance_scaling_initializer()
-    hidden = FullyConnected("fc6", feature, dim, kernel_initializer=init, activation=tf.nn.relu)
-    hidden = FullyConnected("fc7", hidden, dim, kernel_initializer=init, activation=tf.nn.relu)
+    hidden = FullyConnected(
+        "fc6", feature, dim, kernel_initializer=init, activation=tf.nn.relu
+    )  # pylint: disable=E1124
+    hidden = FullyConnected("fc7", hidden, dim, kernel_initializer=init, activation=tf.nn.relu)  # pylint: disable=E1124
     return hidden
@@ -298,7 +300,7 @@ def fastrcnn_Xconv1fc_head(feature, num_convs, norm=None, **kwargs):  # pylint:
             l = Conv2D(f"conv{k}", l, cfg.FPN.FRCNN_CONV_HEAD_DIM, 3, activation=tf.nn.relu)
             if norm is not None:
                 l = GroupNorm(f"gn{k}", l)
-        l = FullyConnected(
+        l = FullyConnected(  # pylint: disable=E1124
             "fc",
             l,
             cfg.FPN.FRCNN_FC_HEAD_DIM,

deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py CHANGED Viewed

@@ -88,7 +88,9 @@ def maskrcnn_upXconv_head(feature, num_category, num_convs, norm=None, **kwargs)
             l = Conv2D(f"fcn{k}", l, cfg.MRCNN.HEAD_DIM, 3, activation=tf.nn.relu)
             if norm is not None:
                 l = GroupNorm(f"gn{k}", l)
-        l = Conv2DTranspose("deconv", l, cfg.MRCNN.HEAD_DIM, 2, strides=2, activation=tf.nn.relu)
+        l = Conv2DTranspose(
+            "deconv", l, cfg.MRCNN.HEAD_DIM, 2, strides=2, activation=tf.nn.relu
+        )  # pylint: disable=E1124
         l = Conv2D("conv", l, num_category, 1, kernel_initializer=tf.random_normal_initializer(stddev=0.001))
     return l

deepdoctection/extern/tp/tpfrcnn/predict.py CHANGED Viewed

@@ -79,6 +79,7 @@ def _paste_mask(box, mask, shape, mrcnn_accurate_paste):
     # rounding errors could happen here, because masks were not originally computed for this shape.
     # but it's hard to do better, because the network does not know the "original" scale
     mask = (cv2.resize(mask, (w, h)) > 0.5).astype("uint8")
     ret = np.zeros(shape, dtype="uint8")
     ret[y_0 : y_1 + 1, x_0 : x_1 + 1] = mask

deepdoctection/mapper/laylmstruct.py CHANGED Viewed

@@ -26,7 +26,6 @@ from typing import Any, Callable, Dict, List, Literal, NewType, Optional, Sequen
 import numpy as np
 import numpy.typing as npt
-from cv2 import INTER_LINEAR
 from ..datapoint.annotation import ContainerAnnotation
 from ..datapoint.convert import box_to_point4, point4_to_box
@@ -179,11 +178,11 @@ def image_to_raw_layoutlm_features(
     boxes = box_to_point4(boxes)
-    resizer = ResizeTransform(dp.height, dp.width, input_height, input_width, INTER_LINEAR)
+    resizer = ResizeTransform(dp.height, dp.width, input_height, input_width, "VIZ")
     if dp.image is not None:
         if image_width != input_width or image_height != input_height:
-            image_only_resizer = ResizeTransform(dp.height, dp.width, image_height, image_width, INTER_LINEAR)
+            image_only_resizer = ResizeTransform(dp.height, dp.width, image_height, image_width, "VIZ")
             image = image_only_resizer.apply_image(dp.image)
         else:
             image = resizer.apply_image(dp.image)

deepdoctection/utils/context.py CHANGED Viewed

@@ -29,10 +29,10 @@ from time import perf_counter as timer
 from typing import Any, Generator, Iterator, Optional, Tuple, Union
 import numpy as np
-from cv2 import imwrite
 from .detection_types import ImageType
 from .logger import logger
+from .viz import viz_handler
 __all__ = ["timeout_manager", "save_tmp_file", "timed_operation"]
@@ -89,7 +89,7 @@ def save_tmp_file(image: Union[str, ImageType, bytes], prefix: str) -> Iterator[
                 return
             if isinstance(image, (np.ndarray, np.generic)):
                 input_file_name = file.name + ".PNG"
-                imwrite(input_file_name, image)
+                viz_handler.write_image(input_file_name, image)
                 yield file.name, input_file_name
             if isinstance(image, bytes):
                 input_file_name = file.name

deepdoctection 0.26__py3-none-any.whl → 0.27__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.26py3-none-any.whl → 0.27py3-none-any.whl