deepdoctection 0.32__py3-none-any.whl → 0.34__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +8 -25
- deepdoctection/analyzer/dd.py +84 -71
- deepdoctection/dataflow/common.py +9 -5
- deepdoctection/dataflow/custom.py +5 -5
- deepdoctection/dataflow/custom_serialize.py +75 -18
- deepdoctection/dataflow/parallel_map.py +3 -3
- deepdoctection/dataflow/serialize.py +4 -4
- deepdoctection/dataflow/stats.py +3 -3
- deepdoctection/datapoint/annotation.py +78 -56
- deepdoctection/datapoint/box.py +7 -7
- deepdoctection/datapoint/convert.py +6 -6
- deepdoctection/datapoint/image.py +157 -75
- deepdoctection/datapoint/view.py +175 -151
- deepdoctection/datasets/adapter.py +30 -24
- deepdoctection/datasets/base.py +10 -10
- deepdoctection/datasets/dataflow_builder.py +3 -3
- deepdoctection/datasets/info.py +23 -25
- deepdoctection/datasets/instances/doclaynet.py +48 -49
- deepdoctection/datasets/instances/fintabnet.py +44 -45
- deepdoctection/datasets/instances/funsd.py +23 -23
- deepdoctection/datasets/instances/iiitar13k.py +8 -8
- deepdoctection/datasets/instances/layouttest.py +2 -2
- deepdoctection/datasets/instances/publaynet.py +3 -3
- deepdoctection/datasets/instances/pubtables1m.py +18 -18
- deepdoctection/datasets/instances/pubtabnet.py +30 -29
- deepdoctection/datasets/instances/rvlcdip.py +28 -29
- deepdoctection/datasets/instances/xfund.py +51 -30
- deepdoctection/datasets/save.py +6 -6
- deepdoctection/eval/accmetric.py +32 -33
- deepdoctection/eval/base.py +8 -9
- deepdoctection/eval/cocometric.py +13 -12
- deepdoctection/eval/eval.py +32 -26
- deepdoctection/eval/tedsmetric.py +16 -12
- deepdoctection/eval/tp_eval_callback.py +7 -16
- deepdoctection/extern/base.py +339 -134
- deepdoctection/extern/d2detect.py +69 -89
- deepdoctection/extern/deskew.py +11 -10
- deepdoctection/extern/doctrocr.py +81 -64
- deepdoctection/extern/fastlang.py +23 -16
- deepdoctection/extern/hfdetr.py +53 -38
- deepdoctection/extern/hflayoutlm.py +216 -155
- deepdoctection/extern/hflm.py +35 -30
- deepdoctection/extern/model.py +433 -255
- deepdoctection/extern/pdftext.py +15 -15
- deepdoctection/extern/pt/ptutils.py +4 -2
- deepdoctection/extern/tessocr.py +39 -38
- deepdoctection/extern/texocr.py +14 -16
- deepdoctection/extern/tp/tfutils.py +16 -2
- deepdoctection/extern/tp/tpcompat.py +11 -7
- deepdoctection/extern/tp/tpfrcnn/config/config.py +4 -4
- deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +1 -1
- deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +5 -5
- deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +6 -6
- deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +4 -4
- deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +5 -3
- deepdoctection/extern/tp/tpfrcnn/preproc.py +5 -5
- deepdoctection/extern/tpdetect.py +40 -45
- deepdoctection/mapper/cats.py +36 -40
- deepdoctection/mapper/cocostruct.py +16 -12
- deepdoctection/mapper/d2struct.py +22 -22
- deepdoctection/mapper/hfstruct.py +7 -7
- deepdoctection/mapper/laylmstruct.py +22 -24
- deepdoctection/mapper/maputils.py +9 -10
- deepdoctection/mapper/match.py +33 -2
- deepdoctection/mapper/misc.py +6 -7
- deepdoctection/mapper/pascalstruct.py +4 -4
- deepdoctection/mapper/prodigystruct.py +6 -6
- deepdoctection/mapper/pubstruct.py +84 -92
- deepdoctection/mapper/tpstruct.py +3 -3
- deepdoctection/mapper/xfundstruct.py +33 -33
- deepdoctection/pipe/anngen.py +39 -14
- deepdoctection/pipe/base.py +68 -99
- deepdoctection/pipe/common.py +181 -85
- deepdoctection/pipe/concurrency.py +14 -10
- deepdoctection/pipe/doctectionpipe.py +24 -21
- deepdoctection/pipe/language.py +20 -25
- deepdoctection/pipe/layout.py +18 -16
- deepdoctection/pipe/lm.py +49 -47
- deepdoctection/pipe/order.py +63 -65
- deepdoctection/pipe/refine.py +102 -109
- deepdoctection/pipe/segment.py +157 -162
- deepdoctection/pipe/sub_layout.py +50 -40
- deepdoctection/pipe/text.py +37 -36
- deepdoctection/pipe/transform.py +19 -16
- deepdoctection/train/d2_frcnn_train.py +27 -25
- deepdoctection/train/hf_detr_train.py +22 -18
- deepdoctection/train/hf_layoutlm_train.py +49 -48
- deepdoctection/train/tp_frcnn_train.py +10 -11
- deepdoctection/utils/concurrency.py +1 -1
- deepdoctection/utils/context.py +13 -6
- deepdoctection/utils/develop.py +4 -4
- deepdoctection/utils/env_info.py +52 -14
- deepdoctection/utils/file_utils.py +6 -11
- deepdoctection/utils/fs.py +41 -14
- deepdoctection/utils/identifier.py +2 -2
- deepdoctection/utils/logger.py +15 -15
- deepdoctection/utils/metacfg.py +7 -7
- deepdoctection/utils/pdf_utils.py +39 -14
- deepdoctection/utils/settings.py +188 -182
- deepdoctection/utils/tqdm.py +1 -1
- deepdoctection/utils/transform.py +14 -9
- deepdoctection/utils/types.py +104 -0
- deepdoctection/utils/utils.py +7 -7
- deepdoctection/utils/viz.py +70 -69
- {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/METADATA +7 -4
- deepdoctection-0.34.dist-info/RECORD +146 -0
- {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/WHEEL +1 -1
- deepdoctection/utils/detection_types.py +0 -68
- deepdoctection-0.32.dist-info/RECORD +0 -146
- {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/LICENSE +0 -0
- {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/top_level.txt +0 -0
|
@@ -21,6 +21,8 @@ of coordinates. Most have the ideas have been taken from
|
|
|
21
21
|
<https://github.com/tensorpack/dataflow/blob/master/dataflow/dataflow/imgaug/transform.py> .
|
|
22
22
|
"""
|
|
23
23
|
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
24
26
|
from abc import ABC, abstractmethod
|
|
25
27
|
from typing import Literal, Optional, Union
|
|
26
28
|
|
|
@@ -28,7 +30,7 @@ import numpy as np
|
|
|
28
30
|
import numpy.typing as npt
|
|
29
31
|
from numpy import float32
|
|
30
32
|
|
|
31
|
-
from .
|
|
33
|
+
from .types import PixelValues
|
|
32
34
|
from .viz import viz_handler
|
|
33
35
|
|
|
34
36
|
__all__ = ["ResizeTransform", "InferenceResize", "PadTransform", "normalize_image"]
|
|
@@ -45,7 +47,7 @@ class BaseTransform(ABC):
|
|
|
45
47
|
"""
|
|
46
48
|
|
|
47
49
|
@abstractmethod
|
|
48
|
-
def apply_image(self, img:
|
|
50
|
+
def apply_image(self, img: PixelValues) -> PixelValues:
|
|
49
51
|
"""The transformation that should be applied to the image"""
|
|
50
52
|
raise NotImplementedError()
|
|
51
53
|
|
|
@@ -77,7 +79,7 @@ class ResizeTransform(BaseTransform):
|
|
|
77
79
|
self.new_w = int(new_w)
|
|
78
80
|
self.interp = interp
|
|
79
81
|
|
|
80
|
-
def apply_image(self, img:
|
|
82
|
+
def apply_image(self, img: PixelValues) -> PixelValues:
|
|
81
83
|
assert img.shape[:2] == (self.h, self.w)
|
|
82
84
|
ret = viz_handler.resize(img, self.new_w, self.new_h, self.interp)
|
|
83
85
|
if img.ndim == 3 and ret.ndim == 2:
|
|
@@ -85,7 +87,8 @@ class ResizeTransform(BaseTransform):
|
|
|
85
87
|
return ret
|
|
86
88
|
|
|
87
89
|
def apply_coords(self, coords: npt.NDArray[float32]) -> npt.NDArray[float32]:
|
|
88
|
-
"""Transformation that should be applied to coordinates
|
|
90
|
+
"""Transformation that should be applied to coordinates. Coords are supposed to to be passed as
|
|
91
|
+
np array of points"""
|
|
89
92
|
coords[:, 0] = coords[:, 0] * (self.new_w * 1.0 / self.w)
|
|
90
93
|
coords[:, 1] = coords[:, 1] * (self.new_h * 1.0 / self.h)
|
|
91
94
|
return coords
|
|
@@ -106,7 +109,7 @@ class InferenceResize:
|
|
|
106
109
|
self.max_size = max_size
|
|
107
110
|
self.interp = interp
|
|
108
111
|
|
|
109
|
-
def get_transform(self, img:
|
|
112
|
+
def get_transform(self, img: PixelValues) -> ResizeTransform:
|
|
110
113
|
"""
|
|
111
114
|
get transform
|
|
112
115
|
"""
|
|
@@ -129,7 +132,9 @@ class InferenceResize:
|
|
|
129
132
|
return ResizeTransform(h, w, new_h, new_w, self.interp)
|
|
130
133
|
|
|
131
134
|
|
|
132
|
-
def normalize_image(
|
|
135
|
+
def normalize_image(
|
|
136
|
+
image: PixelValues, pixel_mean: npt.NDArray[float32], pixel_std: npt.NDArray[float32]
|
|
137
|
+
) -> PixelValues:
|
|
133
138
|
"""
|
|
134
139
|
Preprocess pixel values of an image by rescaling.
|
|
135
140
|
|
|
@@ -140,7 +145,7 @@ def normalize_image(image: ImageType, pixel_mean: npt.NDArray[float32], pixel_st
|
|
|
140
145
|
return (image - pixel_mean) * (1.0 / pixel_std)
|
|
141
146
|
|
|
142
147
|
|
|
143
|
-
def pad_image(image:
|
|
148
|
+
def pad_image(image: PixelValues, top: int, right: int, bottom: int, left: int) -> PixelValues:
|
|
144
149
|
"""Pad an image with white color and with given top/bottom/right/left pixel values. Only white padding is
|
|
145
150
|
currently supported
|
|
146
151
|
|
|
@@ -181,7 +186,7 @@ class PadTransform(BaseTransform):
|
|
|
181
186
|
self.image_height: Optional[int] = None
|
|
182
187
|
self.mode = mode
|
|
183
188
|
|
|
184
|
-
def apply_image(self, img:
|
|
189
|
+
def apply_image(self, img: PixelValues) -> PixelValues:
|
|
185
190
|
"""Apply padding to image"""
|
|
186
191
|
self.image_width = img.shape[1]
|
|
187
192
|
self.image_height = img.shape[0]
|
|
@@ -214,6 +219,6 @@ class PadTransform(BaseTransform):
|
|
|
214
219
|
coords[:, 1] = np.maximum(coords[:, 1] - self.top, np.zeros(coords[:, 1].shape))
|
|
215
220
|
return coords
|
|
216
221
|
|
|
217
|
-
def clone(self) ->
|
|
222
|
+
def clone(self) -> PadTransform:
|
|
218
223
|
"""clone"""
|
|
219
224
|
return self.__class__(self.top, self.right, self.bottom, self.left, self.mode)
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# File: types.py
|
|
3
|
+
|
|
4
|
+
# Copyright 2021 Dr. Janis Meyer. All rights reserved.
|
|
5
|
+
#
|
|
6
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
|
+
# you may not use this file except in compliance with the License.
|
|
8
|
+
# You may obtain a copy of the License at
|
|
9
|
+
#
|
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
#
|
|
12
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
# See the License for the specific language governing permissions and
|
|
16
|
+
# limitations under the License.
|
|
17
|
+
|
|
18
|
+
"""
|
|
19
|
+
Typing sheet for the whole package
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
import os
|
|
23
|
+
import queue
|
|
24
|
+
from typing import TYPE_CHECKING, Any, Protocol, Type, TypeVar, Union
|
|
25
|
+
|
|
26
|
+
import numpy.typing as npt
|
|
27
|
+
import tqdm
|
|
28
|
+
from numpy import uint8
|
|
29
|
+
from typing_extensions import TypeAlias
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# Type for a general dataclass
|
|
33
|
+
class IsDataclass(Protocol): # pylint: disable=R0903
|
|
34
|
+
"""
|
|
35
|
+
type hint for general dataclass
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
__dataclass_fields__: dict[Any, Any]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# Numpy image type
|
|
42
|
+
PixelValues = npt.NDArray[uint8]
|
|
43
|
+
# b64 encoded image as string
|
|
44
|
+
B64Str: TypeAlias = str
|
|
45
|
+
# b64 encoded image in bytes
|
|
46
|
+
B64: TypeAlias = bytes
|
|
47
|
+
|
|
48
|
+
# Typing for curry decorator
|
|
49
|
+
DP = TypeVar("DP")
|
|
50
|
+
S = TypeVar("S")
|
|
51
|
+
T = TypeVar("T")
|
|
52
|
+
|
|
53
|
+
# Some type hints that must be distinguished when running mypy and linters
|
|
54
|
+
if TYPE_CHECKING:
|
|
55
|
+
QueueType = queue.Queue[Any] # pylint: disable=E1136
|
|
56
|
+
TqdmType = tqdm.tqdm[Any] # pylint: disable=E1136
|
|
57
|
+
BaseExceptionType = Type[BaseException]
|
|
58
|
+
|
|
59
|
+
else:
|
|
60
|
+
BaseExceptionType = bool
|
|
61
|
+
QueueType = queue.Queue
|
|
62
|
+
TqdmType = tqdm.tqdm
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
JsonDict = dict[str, Any]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# Some common deepdoctection dict-types
|
|
69
|
+
AnnotationDict: TypeAlias = dict[str, Any]
|
|
70
|
+
ImageDict: TypeAlias = dict[str, Any]
|
|
71
|
+
|
|
72
|
+
# We use these types for output types of the Page object
|
|
73
|
+
Text_: TypeAlias = dict[str, Any]
|
|
74
|
+
HTML: TypeAlias = str
|
|
75
|
+
csv: TypeAlias = list[list[str]]
|
|
76
|
+
Chunks: TypeAlias = list[tuple[str, str, int, str, str, str, str]]
|
|
77
|
+
|
|
78
|
+
# Some common dict-types used in common annotation schemes converted from a generic JSON object
|
|
79
|
+
CocoDatapointDict: TypeAlias = dict[str, Any]
|
|
80
|
+
PubtabnetDict: TypeAlias = dict[str, Any]
|
|
81
|
+
FunsdDict: TypeAlias = dict[str, Any]
|
|
82
|
+
Detectron2Dict: TypeAlias = dict[str, Any]
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
# A path to a file, directory etc. can be given as a string or Path object
|
|
86
|
+
PathLikeOrStr: TypeAlias = Union[str, os.PathLike]
|
|
87
|
+
|
|
88
|
+
# mainly used in utils
|
|
89
|
+
# Type for requirements. A requirement is a Tuple of string and a callable that returns True if the requirement is
|
|
90
|
+
# available
|
|
91
|
+
PackageAvailable: TypeAlias = bool
|
|
92
|
+
ErrorMsg: TypeAlias = str
|
|
93
|
+
Requirement = tuple[str, PackageAvailable, ErrorMsg]
|
|
94
|
+
|
|
95
|
+
BGR: TypeAlias = tuple[int, int, int]
|
|
96
|
+
|
|
97
|
+
# A type to collect key val pairs of environ information. Mainly used in env_info.py
|
|
98
|
+
KeyValEnvInfos: TypeAlias = list[tuple[str, str]]
|
|
99
|
+
|
|
100
|
+
# mainly used in extern
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
# mainly used in eval
|
|
104
|
+
MetricResults: TypeAlias = dict[str, Union[int, float]]
|
deepdoctection/utils/utils.py
CHANGED
|
@@ -23,16 +23,16 @@ import inspect
|
|
|
23
23
|
import os
|
|
24
24
|
from collections.abc import MutableMapping
|
|
25
25
|
from datetime import datetime
|
|
26
|
-
from typing import Any, Callable,
|
|
26
|
+
from typing import Any, Callable, Sequence, Union
|
|
27
27
|
|
|
28
28
|
import numpy as np
|
|
29
29
|
|
|
30
|
-
from .
|
|
30
|
+
from .types import PathLikeOrStr
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
def delete_keys_from_dict(
|
|
34
|
-
dictionary: Union[
|
|
35
|
-
) ->
|
|
34
|
+
dictionary: Union[dict[Any, Any], MutableMapping], keys: Union[str, list[str], set[str]] # type: ignore
|
|
35
|
+
) -> dict[Any, Any]:
|
|
36
36
|
"""
|
|
37
37
|
Removing key/value pairs from dictionary. Works for nested dicts as well.
|
|
38
38
|
|
|
@@ -62,7 +62,7 @@ def delete_keys_from_dict(
|
|
|
62
62
|
return modified_dict
|
|
63
63
|
|
|
64
64
|
|
|
65
|
-
def split_string(input_string: str) ->
|
|
65
|
+
def split_string(input_string: str) -> list[str]:
|
|
66
66
|
"""
|
|
67
67
|
Takes a string, splits between commas and returns a list with split components as list elements
|
|
68
68
|
|
|
@@ -71,7 +71,7 @@ def split_string(input_string: str) -> List[str]:
|
|
|
71
71
|
return input_string.split(",")
|
|
72
72
|
|
|
73
73
|
|
|
74
|
-
def string_to_dict(input_string: str) ->
|
|
74
|
+
def string_to_dict(input_string: str) -> dict[str, str]:
|
|
75
75
|
"""
|
|
76
76
|
Takes a string of a form `key1=val1,key2=val2` and returns the corresponding dict
|
|
77
77
|
"""
|
|
@@ -144,7 +144,7 @@ def get_rng(obj: Any = None) -> np.random.RandomState:
|
|
|
144
144
|
return np.random.RandomState(seed)
|
|
145
145
|
|
|
146
146
|
|
|
147
|
-
def is_file_extension(file_name:
|
|
147
|
+
def is_file_extension(file_name: PathLikeOrStr, extension: Union[str, Sequence[str]]) -> bool:
|
|
148
148
|
"""
|
|
149
149
|
Check if a given file name has a given extension
|
|
150
150
|
|
deepdoctection/utils/viz.py
CHANGED
|
@@ -25,22 +25,21 @@ and
|
|
|
25
25
|
<https://github.com/facebookresearch/detectron2/blob/main/detectron2/utils/colormap.py>
|
|
26
26
|
"""
|
|
27
27
|
|
|
28
|
-
import ast
|
|
29
28
|
import base64
|
|
30
29
|
import os
|
|
31
30
|
import sys
|
|
32
31
|
from io import BytesIO
|
|
33
|
-
from typing import Any,
|
|
32
|
+
from typing import Any, Optional, Sequence, no_type_check
|
|
34
33
|
|
|
35
34
|
import numpy as np
|
|
36
35
|
import numpy.typing as npt
|
|
37
36
|
from lazy_imports import try_import
|
|
38
37
|
from numpy import float32, uint8
|
|
39
38
|
|
|
40
|
-
from .
|
|
41
|
-
from .env_info import auto_select_viz_library
|
|
39
|
+
from .env_info import ENV_VARS_TRUE, auto_select_viz_library
|
|
42
40
|
from .error import DependencyError
|
|
43
41
|
from .file_utils import get_opencv_requirement, get_pillow_requirement
|
|
42
|
+
from .types import BGR, B64Str, PathLikeOrStr, PixelValues
|
|
44
43
|
|
|
45
44
|
with try_import() as cv2_import_guard:
|
|
46
45
|
import cv2
|
|
@@ -184,7 +183,7 @@ _COLORS = (
|
|
|
184
183
|
)
|
|
185
184
|
|
|
186
185
|
|
|
187
|
-
def random_color(rgb: bool = True, maximum: int = 255) ->
|
|
186
|
+
def random_color(rgb: bool = True, maximum: int = 255) -> tuple[int, int, int]:
|
|
188
187
|
"""
|
|
189
188
|
:param rgb: Whether to return RGB colors or BGR colors.
|
|
190
189
|
:param maximum: either 255 or 1
|
|
@@ -199,14 +198,14 @@ def random_color(rgb: bool = True, maximum: int = 255) -> Tuple[int, int, int]:
|
|
|
199
198
|
|
|
200
199
|
|
|
201
200
|
def draw_boxes(
|
|
202
|
-
np_image:
|
|
201
|
+
np_image: PixelValues,
|
|
203
202
|
boxes: npt.NDArray[float32],
|
|
204
|
-
category_names_list: Optional[
|
|
205
|
-
color: Optional[
|
|
203
|
+
category_names_list: Optional[list[Optional[str]]] = None,
|
|
204
|
+
color: Optional[BGR] = None,
|
|
206
205
|
font_scale: float = 1.0,
|
|
207
206
|
rectangle_thickness: int = 4,
|
|
208
207
|
box_color_by_category: bool = True,
|
|
209
|
-
) ->
|
|
208
|
+
) -> PixelValues:
|
|
210
209
|
"""
|
|
211
210
|
Dray bounding boxes with category names into image.
|
|
212
211
|
|
|
@@ -247,7 +246,7 @@ def draw_boxes(
|
|
|
247
246
|
np_image = np_image.copy()
|
|
248
247
|
|
|
249
248
|
if np_image.ndim == 2 or (np_image.ndim == 3 and np_image.shape[2] == 1):
|
|
250
|
-
np_image = cv2.cvtColor(np_image, cv2.COLOR_GRAY2BGR)
|
|
249
|
+
np_image = cv2.cvtColor(np_image, cv2.COLOR_GRAY2BGR).astype(np.uint8)
|
|
251
250
|
for i in sorted_inds:
|
|
252
251
|
box = boxes[i, :]
|
|
253
252
|
choose_color = category_to_color.get(category_names_list[i]) if category_to_color is not None else color
|
|
@@ -280,7 +279,7 @@ def draw_boxes(
|
|
|
280
279
|
|
|
281
280
|
|
|
282
281
|
@no_type_check
|
|
283
|
-
def interactive_imshow(img:
|
|
282
|
+
def interactive_imshow(img: PixelValues) -> None:
|
|
284
283
|
"""
|
|
285
284
|
Display an image in a pop-up window
|
|
286
285
|
|
|
@@ -329,7 +328,7 @@ class VizPackageHandler:
|
|
|
329
328
|
def __init__(self) -> None:
|
|
330
329
|
"""Selecting the image processing library and fonts"""
|
|
331
330
|
package = self._select_package()
|
|
332
|
-
self.pkg_func_dict:
|
|
331
|
+
self.pkg_func_dict: dict[str, str] = {}
|
|
333
332
|
self.font = None
|
|
334
333
|
self._set_vars(package)
|
|
335
334
|
|
|
@@ -340,8 +339,8 @@ class VizPackageHandler:
|
|
|
340
339
|
Otherwise it will use Pillow as default package
|
|
341
340
|
:return: either 'pillow' or 'cv2'
|
|
342
341
|
"""
|
|
343
|
-
maybe_cv2 = "cv2" if
|
|
344
|
-
maybe_pil = "pillow" if
|
|
342
|
+
maybe_cv2 = "cv2" if os.environ.get("USE_DD_OPENCV", "False") in ENV_VARS_TRUE else None
|
|
343
|
+
maybe_pil = "pillow" if os.environ.get("USE_DD_PILLOW", "True") in ENV_VARS_TRUE else None
|
|
345
344
|
|
|
346
345
|
if not maybe_cv2 and not maybe_pil:
|
|
347
346
|
raise EnvironmentError(
|
|
@@ -387,7 +386,7 @@ class VizPackageHandler:
|
|
|
387
386
|
package = self._select_package()
|
|
388
387
|
self._set_vars(package)
|
|
389
388
|
|
|
390
|
-
def read_image(self, path:
|
|
389
|
+
def read_image(self, path: PathLikeOrStr) -> PixelValues:
|
|
391
390
|
"""Reading an image from file and returning a np.array
|
|
392
391
|
|
|
393
392
|
:param path: Use /path/to/dir/file_name.[suffix]
|
|
@@ -395,16 +394,16 @@ class VizPackageHandler:
|
|
|
395
394
|
return getattr(self, self.pkg_func_dict["read_image"])(path)
|
|
396
395
|
|
|
397
396
|
@staticmethod
|
|
398
|
-
def _cv2_read_image(path:
|
|
399
|
-
return cv2.imread(path, cv2.IMREAD_COLOR)
|
|
397
|
+
def _cv2_read_image(path: PathLikeOrStr) -> PixelValues:
|
|
398
|
+
return cv2.imread(os.fspath(path), cv2.IMREAD_COLOR).astype(np.uint8)
|
|
400
399
|
|
|
401
400
|
@staticmethod
|
|
402
|
-
def _pillow_read_image(path:
|
|
403
|
-
with Image.open(path).convert("RGB") as image:
|
|
401
|
+
def _pillow_read_image(path: PathLikeOrStr) -> PixelValues:
|
|
402
|
+
with Image.open(os.fspath(path)).convert("RGB") as image:
|
|
404
403
|
np_image = np.array(image)[:, :, ::-1]
|
|
405
404
|
return np_image
|
|
406
405
|
|
|
407
|
-
def write_image(self, path:
|
|
406
|
+
def write_image(self, path: PathLikeOrStr, image: PixelValues) -> None:
|
|
408
407
|
"""Writing an image as np.array to a file.
|
|
409
408
|
|
|
410
409
|
:param path: Use /path/to/dir/file_name.[suffix]
|
|
@@ -413,15 +412,15 @@ class VizPackageHandler:
|
|
|
413
412
|
return getattr(self, self.pkg_func_dict["write_image"])(path, image)
|
|
414
413
|
|
|
415
414
|
@staticmethod
|
|
416
|
-
def _cv2_write_image(path:
|
|
417
|
-
cv2.imwrite(path, image)
|
|
415
|
+
def _cv2_write_image(path: PathLikeOrStr, image: PixelValues) -> None:
|
|
416
|
+
cv2.imwrite(os.fspath(path), image)
|
|
418
417
|
|
|
419
418
|
@staticmethod
|
|
420
|
-
def _pillow_write_image(path:
|
|
419
|
+
def _pillow_write_image(path: PathLikeOrStr, image: PixelValues) -> None:
|
|
421
420
|
pil_image = Image.fromarray(np.uint8(image[:, :, ::-1]))
|
|
422
|
-
pil_image.save(path)
|
|
421
|
+
pil_image.save(os.fspath(path))
|
|
423
422
|
|
|
424
|
-
def encode(self, np_image:
|
|
423
|
+
def encode(self, np_image: PixelValues) -> bytes:
|
|
425
424
|
"""Converting an image as np.array into a b64 representation
|
|
426
425
|
|
|
427
426
|
:param np_image: Image as np.array
|
|
@@ -429,19 +428,19 @@ class VizPackageHandler:
|
|
|
429
428
|
return getattr(self, self.pkg_func_dict["encode"])(np_image)
|
|
430
429
|
|
|
431
430
|
@staticmethod
|
|
432
|
-
def _cv2_encode(np_image:
|
|
431
|
+
def _cv2_encode(np_image: PixelValues) -> bytes:
|
|
433
432
|
np_encode = cv2.imencode(".png", np_image)
|
|
434
433
|
b_image = np_encode[1].tobytes()
|
|
435
434
|
return b_image
|
|
436
435
|
|
|
437
436
|
@staticmethod
|
|
438
|
-
def _pillow_encode(np_image:
|
|
437
|
+
def _pillow_encode(np_image: PixelValues) -> bytes:
|
|
439
438
|
buffered = BytesIO()
|
|
440
439
|
pil_image = Image.fromarray(np.uint8(np_image[:, :, ::-1]))
|
|
441
440
|
pil_image.save(buffered, format="PNG")
|
|
442
441
|
return buffered.getvalue()
|
|
443
442
|
|
|
444
|
-
def convert_np_to_b64(self, image:
|
|
443
|
+
def convert_np_to_b64(self, image: PixelValues) -> str:
|
|
445
444
|
"""Converting an image given as np.array into a b64 encoded string
|
|
446
445
|
|
|
447
446
|
:param image: Image as np.array
|
|
@@ -449,18 +448,18 @@ class VizPackageHandler:
|
|
|
449
448
|
return getattr(self, self.pkg_func_dict["convert_np_to_b64"])(image)
|
|
450
449
|
|
|
451
450
|
@staticmethod
|
|
452
|
-
def _cv2_convert_np_to_b64(image:
|
|
451
|
+
def _cv2_convert_np_to_b64(image: PixelValues) -> str:
|
|
453
452
|
np_encode = cv2.imencode(".png", image)
|
|
454
453
|
return base64.b64encode(np_encode[1]).decode("utf-8") # type: ignore
|
|
455
454
|
|
|
456
455
|
@staticmethod
|
|
457
|
-
def _pillow_convert_np_to_b64(np_image:
|
|
456
|
+
def _pillow_convert_np_to_b64(np_image: PixelValues) -> str:
|
|
458
457
|
buffered = BytesIO()
|
|
459
458
|
pil_image = Image.fromarray(np.uint8(np_image[:, :, ::-1]))
|
|
460
459
|
pil_image.save(buffered, format="PNG")
|
|
461
460
|
return base64.b64encode(buffered.getvalue()).decode("utf-8")
|
|
462
461
|
|
|
463
|
-
def convert_b64_to_np(self, image:
|
|
462
|
+
def convert_b64_to_np(self, image: B64Str) -> PixelValues:
|
|
464
463
|
"""
|
|
465
464
|
Converting an image as b64 encoded string into np.array
|
|
466
465
|
|
|
@@ -470,19 +469,19 @@ class VizPackageHandler:
|
|
|
470
469
|
return getattr(self, self.pkg_func_dict["convert_b64_to_np"])(image)
|
|
471
470
|
|
|
472
471
|
@staticmethod
|
|
473
|
-
def _cv2_convert_b64_to_np(image:
|
|
472
|
+
def _cv2_convert_b64_to_np(image: B64Str) -> PixelValues:
|
|
474
473
|
np_array = np.fromstring(base64.b64decode(image), np.uint8) # type: ignore
|
|
475
474
|
np_array = cv2.imdecode(np_array, cv2.IMREAD_COLOR).astype(np.float32)
|
|
476
475
|
return np_array.astype(uint8)
|
|
477
476
|
|
|
478
477
|
@staticmethod
|
|
479
|
-
def _pillow_convert_b64_to_np(image:
|
|
478
|
+
def _pillow_convert_b64_to_np(image: B64Str) -> PixelValues:
|
|
480
479
|
array = base64.b64decode(image)
|
|
481
480
|
im_file = BytesIO(array)
|
|
482
481
|
pil_image = Image.open(im_file)
|
|
483
482
|
return np.array(pil_image)[:, :, ::-1]
|
|
484
483
|
|
|
485
|
-
def resize(self, image:
|
|
484
|
+
def resize(self, image: PixelValues, width: int, height: int, interpolation: str) -> PixelValues:
|
|
486
485
|
"""
|
|
487
486
|
Resize a given image to new width, height. Specifying an interpolation method is required. Depending on the
|
|
488
487
|
chosen image library use one of the following:
|
|
@@ -499,17 +498,19 @@ class VizPackageHandler:
|
|
|
499
498
|
return getattr(self, self.pkg_func_dict["resize"])(image, width, height, interpolation)
|
|
500
499
|
|
|
501
500
|
@staticmethod
|
|
502
|
-
def _cv2_resize(image:
|
|
501
|
+
def _cv2_resize(image: PixelValues, width: int, height: int, interpolation: str) -> PixelValues:
|
|
503
502
|
intpol_method_dict = {
|
|
504
503
|
"INTER_NEAREST": cv2.INTER_NEAREST,
|
|
505
504
|
"INTER_LINEAR": cv2.INTER_LINEAR,
|
|
506
505
|
"INTER_AREA": cv2.INTER_AREA,
|
|
507
506
|
"VIZ": cv2.INTER_LINEAR,
|
|
508
507
|
}
|
|
509
|
-
return cv2.resize(image, (width, height), interpolation=intpol_method_dict[interpolation])
|
|
508
|
+
return cv2.resize(image, dsize=(width, height), interpolation=intpol_method_dict[interpolation]).astype(
|
|
509
|
+
np.uint8
|
|
510
|
+
)
|
|
510
511
|
|
|
511
512
|
@staticmethod
|
|
512
|
-
def _pillow_resize(image:
|
|
513
|
+
def _pillow_resize(image: PixelValues, width: int, height: int, interpolation: str) -> PixelValues:
|
|
513
514
|
intpol_method_dict = {
|
|
514
515
|
"NEAREST": Image.Resampling.NEAREST,
|
|
515
516
|
"BOX": Image.Resampling.BOX,
|
|
@@ -523,7 +524,7 @@ class VizPackageHandler:
|
|
|
523
524
|
)
|
|
524
525
|
return np.array(pil_image_resized)[:, :, ::-1]
|
|
525
526
|
|
|
526
|
-
def get_text_size(self, text: str, font_scale: float) ->
|
|
527
|
+
def get_text_size(self, text: str, font_scale: float) -> tuple[int, int]:
|
|
527
528
|
"""
|
|
528
529
|
Return the text size for a given font scale
|
|
529
530
|
:param text: text as string
|
|
@@ -532,19 +533,19 @@ class VizPackageHandler:
|
|
|
532
533
|
"""
|
|
533
534
|
return getattr(self, self.pkg_func_dict["get_text_size"])(text, font_scale)
|
|
534
535
|
|
|
535
|
-
def _cv2_get_text_size(self, text: str, font_scale: float) ->
|
|
536
|
+
def _cv2_get_text_size(self, text: str, font_scale: float) -> tuple[int, int]:
|
|
536
537
|
((width, height), _) = cv2.getTextSize(text, self.font, font_scale, 1) # type: ignore
|
|
537
538
|
return width, height
|
|
538
539
|
|
|
539
|
-
def _pillow_get_text_size(self, text: str, font_scale: float) ->
|
|
540
|
+
def _pillow_get_text_size(self, text: str, font_scale: float) -> tuple[int, int]: # pylint: disable=W0613
|
|
540
541
|
_, _, width, height = self.font.getbbox(text) # type: ignore
|
|
541
542
|
return width, height
|
|
542
543
|
|
|
543
544
|
def draw_rectangle(
|
|
544
|
-
self, np_image:
|
|
545
|
-
) ->
|
|
545
|
+
self, np_image: PixelValues, box: tuple[Any, Any, Any, Any], color: tuple[int, int, int], thickness: int
|
|
546
|
+
) -> PixelValues:
|
|
546
547
|
"""
|
|
547
|
-
Drawing a rectangle into an image with a given color (b,g,r) and given thickness
|
|
548
|
+
Drawing a rectangle into an image with a given color (b,g,r) and given thickness.
|
|
548
549
|
|
|
549
550
|
:param np_image: image
|
|
550
551
|
:param box: box (x_min, y_min, x_max, y_max)
|
|
@@ -556,15 +557,15 @@ class VizPackageHandler:
|
|
|
556
557
|
|
|
557
558
|
@staticmethod
|
|
558
559
|
def _cv2_draw_rectangle(
|
|
559
|
-
np_image:
|
|
560
|
-
) ->
|
|
560
|
+
np_image: PixelValues, box: tuple[Any, Any, Any, Any], color: Sequence[int], thickness: int
|
|
561
|
+
) -> PixelValues:
|
|
561
562
|
cv2.rectangle(np_image, (box[0], box[1]), (box[2], box[3]), color=color, thickness=thickness)
|
|
562
563
|
return np_image
|
|
563
564
|
|
|
564
565
|
@staticmethod
|
|
565
566
|
def _pillow_draw_rectangle(
|
|
566
|
-
np_image:
|
|
567
|
-
) ->
|
|
567
|
+
np_image: PixelValues, box: tuple[Any, Any, Any, Any], color: Sequence[int], thickness: int
|
|
568
|
+
) -> PixelValues:
|
|
568
569
|
pil_image = Image.fromarray(np.uint8(np_image[:, :, ::-1]))
|
|
569
570
|
draw = ImageDraw.Draw(pil_image)
|
|
570
571
|
draw.rectangle(box, outline=color, width=thickness) # type: ignore
|
|
@@ -573,13 +574,13 @@ class VizPackageHandler:
|
|
|
573
574
|
|
|
574
575
|
def draw_text(
|
|
575
576
|
self,
|
|
576
|
-
np_image:
|
|
577
|
-
pos:
|
|
577
|
+
np_image: PixelValues,
|
|
578
|
+
pos: tuple[Any, Any],
|
|
578
579
|
text: str,
|
|
579
|
-
color:
|
|
580
|
+
color: tuple[int, int, int],
|
|
580
581
|
font_scale: float,
|
|
581
582
|
rectangle_thickness: int = 1,
|
|
582
|
-
) ->
|
|
583
|
+
) -> PixelValues:
|
|
583
584
|
"""
|
|
584
585
|
Drawing a text into a numpy image. The result will differ between PIL and CV2 (and will not look that good when
|
|
585
586
|
using PIL).
|
|
@@ -598,13 +599,13 @@ class VizPackageHandler:
|
|
|
598
599
|
|
|
599
600
|
def _cv2_draw_text(
|
|
600
601
|
self,
|
|
601
|
-
np_image:
|
|
602
|
-
pos:
|
|
602
|
+
np_image: PixelValues,
|
|
603
|
+
pos: tuple[Any, Any],
|
|
603
604
|
text: str,
|
|
604
|
-
color:
|
|
605
|
+
color: tuple[int, int, int],
|
|
605
606
|
font_scale: float,
|
|
606
607
|
rectangle_thickness: int,
|
|
607
|
-
) ->
|
|
608
|
+
) -> PixelValues:
|
|
608
609
|
"""
|
|
609
610
|
Draw text on an image.
|
|
610
611
|
|
|
@@ -642,13 +643,13 @@ class VizPackageHandler:
|
|
|
642
643
|
|
|
643
644
|
@staticmethod
|
|
644
645
|
def _pillow_draw_text(
|
|
645
|
-
np_image:
|
|
646
|
-
pos:
|
|
646
|
+
np_image: PixelValues,
|
|
647
|
+
pos: tuple[Any, Any],
|
|
647
648
|
text: str,
|
|
648
|
-
color:
|
|
649
|
+
color: tuple[int, int, int], # pylint: disable=W0613
|
|
649
650
|
font_scale: float, # pylint: disable=W0613
|
|
650
651
|
rectangle_thickness: int, # pylint: disable=W0613
|
|
651
|
-
) ->
|
|
652
|
+
) -> PixelValues:
|
|
652
653
|
"""Draw a text in an image using PIL."""
|
|
653
654
|
# using PIL default font size that does not scale to larger image sizes.
|
|
654
655
|
# Compare with https://github.com/python-pillow/Pillow/issues/6622
|
|
@@ -657,11 +658,11 @@ class VizPackageHandler:
|
|
|
657
658
|
draw.text(pos, text, fill=(0, 0, 0), anchor="lb")
|
|
658
659
|
return np.array(pil_image)[:, :, ::-1]
|
|
659
660
|
|
|
660
|
-
def interactive_imshow(self, np_image:
|
|
661
|
+
def interactive_imshow(self, np_image: PixelValues) -> None:
|
|
661
662
|
"""Displaying an image in a separate window"""
|
|
662
663
|
return getattr(self, self.pkg_func_dict["interactive_imshow"])(np_image)
|
|
663
664
|
|
|
664
|
-
def _cv2_interactive_imshow(self, np_image:
|
|
665
|
+
def _cv2_interactive_imshow(self, np_image: PixelValues) -> None:
|
|
665
666
|
"""
|
|
666
667
|
Display an image in a pop-up window
|
|
667
668
|
|
|
@@ -682,24 +683,24 @@ class VizPackageHandler:
|
|
|
682
683
|
elif key == "s":
|
|
683
684
|
cv2.imwrite("out.png", np_image)
|
|
684
685
|
elif key in ["+", "="]:
|
|
685
|
-
np_image = cv2.resize(np_image, None, fx=1.3, fy=1.3, interpolation=cv2.INTER_CUBIC)
|
|
686
|
+
np_image = cv2.resize(np_image, None, fx=1.3, fy=1.3, interpolation=cv2.INTER_CUBIC).astype(np.uint8)
|
|
686
687
|
self._cv2_interactive_imshow(np_image)
|
|
687
688
|
elif key == "-":
|
|
688
|
-
np_image = cv2.resize(np_image, None, fx=0.7, fy=0.7, interpolation=cv2.INTER_CUBIC)
|
|
689
|
+
np_image = cv2.resize(np_image, None, fx=0.7, fy=0.7, interpolation=cv2.INTER_CUBIC).astype(np.uint8)
|
|
689
690
|
self._cv2_interactive_imshow(np_image)
|
|
690
691
|
|
|
691
692
|
@staticmethod
|
|
692
|
-
def _pillow_interactive_imshow(np_image:
|
|
693
|
+
def _pillow_interactive_imshow(np_image: PixelValues) -> None:
|
|
693
694
|
name = "q, x: quit / s: save"
|
|
694
695
|
pil_image = Image.fromarray(np.uint8(np_image[:, :, ::-1]))
|
|
695
696
|
pil_image.show(name)
|
|
696
697
|
|
|
697
|
-
def rotate_image(self, np_image:
|
|
698
|
+
def rotate_image(self, np_image: PixelValues, angle: float) -> PixelValues:
|
|
698
699
|
"""Rotating an image by some angle"""
|
|
699
700
|
return getattr(self, self.pkg_func_dict["rotate_image"])(np_image, angle)
|
|
700
701
|
|
|
701
702
|
@staticmethod
|
|
702
|
-
def _cv2_rotate_image(np_image:
|
|
703
|
+
def _cv2_rotate_image(np_image: PixelValues, angle: float) -> PixelValues:
|
|
703
704
|
# copy & paste from https://stackoverflow.com/questions/43892506
|
|
704
705
|
# /opencv-python-rotate-image-without-cropping-sides
|
|
705
706
|
|
|
@@ -719,16 +720,16 @@ class VizPackageHandler:
|
|
|
719
720
|
rotation_mat[0, 2] += bound_w / 2 - image_center[0]
|
|
720
721
|
rotation_mat[1, 2] += bound_h / 2 - image_center[1]
|
|
721
722
|
|
|
722
|
-
np_image = cv2.warpAffine(
|
|
723
|
+
np_image = cv2.warpAffine(
|
|
723
724
|
src=np_image,
|
|
724
725
|
M=rotation_mat,
|
|
725
726
|
dsize=(bound_w, bound_h),
|
|
726
|
-
)
|
|
727
|
+
).astype(np.uint8)
|
|
727
728
|
|
|
728
729
|
return np_image
|
|
729
730
|
|
|
730
731
|
@staticmethod
|
|
731
|
-
def _pillow_rotate_image(np_image:
|
|
732
|
+
def _pillow_rotate_image(np_image: PixelValues, angle: float) -> PixelValues:
|
|
732
733
|
pil_image = Image.fromarray(np.uint8(np_image[:, :, ::-1]))
|
|
733
734
|
pil_image_rotated = pil_image.rotate(angle, expand=True)
|
|
734
735
|
return np.array(pil_image_rotated)[:, :, ::-1]
|