deepdoctection 0.31__py3-none-any.whl → 0.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +16 -29
- deepdoctection/analyzer/dd.py +70 -59
- deepdoctection/configs/conf_dd_one.yaml +34 -31
- deepdoctection/dataflow/common.py +9 -5
- deepdoctection/dataflow/custom.py +5 -5
- deepdoctection/dataflow/custom_serialize.py +75 -18
- deepdoctection/dataflow/parallel_map.py +3 -3
- deepdoctection/dataflow/serialize.py +4 -4
- deepdoctection/dataflow/stats.py +3 -3
- deepdoctection/datapoint/annotation.py +41 -56
- deepdoctection/datapoint/box.py +9 -8
- deepdoctection/datapoint/convert.py +6 -6
- deepdoctection/datapoint/image.py +56 -44
- deepdoctection/datapoint/view.py +245 -150
- deepdoctection/datasets/__init__.py +1 -4
- deepdoctection/datasets/adapter.py +35 -26
- deepdoctection/datasets/base.py +14 -12
- deepdoctection/datasets/dataflow_builder.py +3 -3
- deepdoctection/datasets/info.py +24 -26
- deepdoctection/datasets/instances/doclaynet.py +51 -51
- deepdoctection/datasets/instances/fintabnet.py +46 -46
- deepdoctection/datasets/instances/funsd.py +25 -24
- deepdoctection/datasets/instances/iiitar13k.py +13 -10
- deepdoctection/datasets/instances/layouttest.py +4 -3
- deepdoctection/datasets/instances/publaynet.py +5 -5
- deepdoctection/datasets/instances/pubtables1m.py +24 -21
- deepdoctection/datasets/instances/pubtabnet.py +32 -30
- deepdoctection/datasets/instances/rvlcdip.py +30 -30
- deepdoctection/datasets/instances/xfund.py +26 -26
- deepdoctection/datasets/save.py +6 -6
- deepdoctection/eval/__init__.py +1 -4
- deepdoctection/eval/accmetric.py +32 -33
- deepdoctection/eval/base.py +8 -9
- deepdoctection/eval/cocometric.py +15 -13
- deepdoctection/eval/eval.py +41 -37
- deepdoctection/eval/tedsmetric.py +30 -23
- deepdoctection/eval/tp_eval_callback.py +16 -19
- deepdoctection/extern/__init__.py +2 -7
- deepdoctection/extern/base.py +339 -134
- deepdoctection/extern/d2detect.py +85 -113
- deepdoctection/extern/deskew.py +14 -11
- deepdoctection/extern/doctrocr.py +141 -130
- deepdoctection/extern/fastlang.py +27 -18
- deepdoctection/extern/hfdetr.py +71 -62
- deepdoctection/extern/hflayoutlm.py +504 -211
- deepdoctection/extern/hflm.py +230 -0
- deepdoctection/extern/model.py +488 -302
- deepdoctection/extern/pdftext.py +23 -19
- deepdoctection/extern/pt/__init__.py +1 -3
- deepdoctection/extern/pt/nms.py +6 -2
- deepdoctection/extern/pt/ptutils.py +29 -19
- deepdoctection/extern/tessocr.py +39 -38
- deepdoctection/extern/texocr.py +18 -18
- deepdoctection/extern/tp/tfutils.py +57 -9
- deepdoctection/extern/tp/tpcompat.py +21 -14
- deepdoctection/extern/tp/tpfrcnn/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/common.py +7 -3
- deepdoctection/extern/tp/tpfrcnn/config/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/config/config.py +13 -10
- deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +18 -8
- deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +12 -6
- deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +14 -9
- deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +8 -5
- deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +22 -17
- deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +21 -14
- deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +19 -11
- deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +15 -10
- deepdoctection/extern/tp/tpfrcnn/predict.py +9 -4
- deepdoctection/extern/tp/tpfrcnn/preproc.py +12 -8
- deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +10 -2
- deepdoctection/extern/tpdetect.py +45 -53
- deepdoctection/mapper/__init__.py +3 -8
- deepdoctection/mapper/cats.py +27 -29
- deepdoctection/mapper/cocostruct.py +10 -10
- deepdoctection/mapper/d2struct.py +27 -26
- deepdoctection/mapper/hfstruct.py +13 -8
- deepdoctection/mapper/laylmstruct.py +178 -37
- deepdoctection/mapper/maputils.py +12 -11
- deepdoctection/mapper/match.py +2 -2
- deepdoctection/mapper/misc.py +11 -9
- deepdoctection/mapper/pascalstruct.py +4 -4
- deepdoctection/mapper/prodigystruct.py +5 -5
- deepdoctection/mapper/pubstruct.py +84 -92
- deepdoctection/mapper/tpstruct.py +5 -5
- deepdoctection/mapper/xfundstruct.py +33 -33
- deepdoctection/pipe/__init__.py +1 -1
- deepdoctection/pipe/anngen.py +12 -14
- deepdoctection/pipe/base.py +52 -106
- deepdoctection/pipe/common.py +72 -59
- deepdoctection/pipe/concurrency.py +16 -11
- deepdoctection/pipe/doctectionpipe.py +24 -21
- deepdoctection/pipe/language.py +20 -25
- deepdoctection/pipe/layout.py +20 -16
- deepdoctection/pipe/lm.py +75 -105
- deepdoctection/pipe/order.py +194 -89
- deepdoctection/pipe/refine.py +111 -124
- deepdoctection/pipe/segment.py +156 -161
- deepdoctection/pipe/{cell.py → sub_layout.py} +50 -40
- deepdoctection/pipe/text.py +37 -36
- deepdoctection/pipe/transform.py +19 -16
- deepdoctection/train/__init__.py +6 -12
- deepdoctection/train/d2_frcnn_train.py +48 -41
- deepdoctection/train/hf_detr_train.py +41 -30
- deepdoctection/train/hf_layoutlm_train.py +153 -135
- deepdoctection/train/tp_frcnn_train.py +32 -31
- deepdoctection/utils/concurrency.py +1 -1
- deepdoctection/utils/context.py +13 -6
- deepdoctection/utils/develop.py +4 -4
- deepdoctection/utils/env_info.py +87 -125
- deepdoctection/utils/file_utils.py +6 -11
- deepdoctection/utils/fs.py +22 -18
- deepdoctection/utils/identifier.py +2 -2
- deepdoctection/utils/logger.py +16 -15
- deepdoctection/utils/metacfg.py +7 -7
- deepdoctection/utils/mocks.py +93 -0
- deepdoctection/utils/pdf_utils.py +11 -11
- deepdoctection/utils/settings.py +185 -181
- deepdoctection/utils/tqdm.py +1 -1
- deepdoctection/utils/transform.py +14 -9
- deepdoctection/utils/types.py +104 -0
- deepdoctection/utils/utils.py +7 -7
- deepdoctection/utils/viz.py +74 -72
- {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/METADATA +30 -21
- deepdoctection-0.33.dist-info/RECORD +146 -0
- {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/WHEEL +1 -1
- deepdoctection/utils/detection_types.py +0 -68
- deepdoctection-0.31.dist-info/RECORD +0 -144
- {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/LICENSE +0 -0
- {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# File: types.py
|
|
3
|
+
|
|
4
|
+
# Copyright 2021 Dr. Janis Meyer. All rights reserved.
|
|
5
|
+
#
|
|
6
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
|
+
# you may not use this file except in compliance with the License.
|
|
8
|
+
# You may obtain a copy of the License at
|
|
9
|
+
#
|
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
#
|
|
12
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
# See the License for the specific language governing permissions and
|
|
16
|
+
# limitations under the License.
|
|
17
|
+
|
|
18
|
+
"""
|
|
19
|
+
Typing sheet for the whole package
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
import os
|
|
23
|
+
import queue
|
|
24
|
+
from typing import TYPE_CHECKING, Any, Protocol, Type, TypeVar, Union
|
|
25
|
+
|
|
26
|
+
import numpy.typing as npt
|
|
27
|
+
import tqdm
|
|
28
|
+
from numpy import uint8
|
|
29
|
+
from typing_extensions import TypeAlias
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# Type for a general dataclass
|
|
33
|
+
class IsDataclass(Protocol): # pylint: disable=R0903
|
|
34
|
+
"""
|
|
35
|
+
type hint for general dataclass
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
__dataclass_fields__: dict[Any, Any]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# Numpy image type
|
|
42
|
+
PixelValues = npt.NDArray[uint8]
|
|
43
|
+
# b64 encoded image as string
|
|
44
|
+
B64Str: TypeAlias = str
|
|
45
|
+
# b64 encoded image in bytes
|
|
46
|
+
B64: TypeAlias = bytes
|
|
47
|
+
|
|
48
|
+
# Typing for curry decorator
|
|
49
|
+
DP = TypeVar("DP")
|
|
50
|
+
S = TypeVar("S")
|
|
51
|
+
T = TypeVar("T")
|
|
52
|
+
|
|
53
|
+
# Some type hints that must be distinguished when running mypy and linters
|
|
54
|
+
if TYPE_CHECKING:
|
|
55
|
+
QueueType = queue.Queue[Any] # pylint: disable=E1136
|
|
56
|
+
TqdmType = tqdm.tqdm[Any] # pylint: disable=E1136
|
|
57
|
+
BaseExceptionType = Type[BaseException]
|
|
58
|
+
|
|
59
|
+
else:
|
|
60
|
+
BaseExceptionType = bool
|
|
61
|
+
QueueType = queue.Queue
|
|
62
|
+
TqdmType = tqdm.tqdm
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
JsonDict = dict[str, Any]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# Some common deepdoctection dict-types
|
|
69
|
+
AnnotationDict: TypeAlias = dict[str, Any]
|
|
70
|
+
ImageDict: TypeAlias = dict[str, Any]
|
|
71
|
+
|
|
72
|
+
# We use these types for output types of the Page object
|
|
73
|
+
Text_: TypeAlias = dict[str, Any]
|
|
74
|
+
HTML: TypeAlias = str
|
|
75
|
+
csv: TypeAlias = list[list[str]]
|
|
76
|
+
Chunks: TypeAlias = list[tuple[str, str, int, str, str, str, str]]
|
|
77
|
+
|
|
78
|
+
# Some common dict-types used in common annotation schemes converted from a generic JSON object
|
|
79
|
+
CocoDatapointDict: TypeAlias = dict[str, Any]
|
|
80
|
+
PubtabnetDict: TypeAlias = dict[str, Any]
|
|
81
|
+
FunsdDict: TypeAlias = dict[str, Any]
|
|
82
|
+
Detectron2Dict: TypeAlias = dict[str, Any]
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
# A path to a file, directory etc. can be given as a string or Path object
|
|
86
|
+
PathLikeOrStr: TypeAlias = Union[str, os.PathLike]
|
|
87
|
+
|
|
88
|
+
# mainly used in utils
|
|
89
|
+
# Type for requirements. A requirement is a Tuple of string and a callable that returns True if the requirement is
|
|
90
|
+
# available
|
|
91
|
+
PackageAvailable: TypeAlias = bool
|
|
92
|
+
ErrorMsg: TypeAlias = str
|
|
93
|
+
Requirement = tuple[str, PackageAvailable, ErrorMsg]
|
|
94
|
+
|
|
95
|
+
BGR: TypeAlias = tuple[int, int, int]
|
|
96
|
+
|
|
97
|
+
# A type to collect key val pairs of environ information. Mainly used in env_info.py
|
|
98
|
+
KeyValEnvInfos: TypeAlias = list[tuple[str, str]]
|
|
99
|
+
|
|
100
|
+
# mainly used in extern
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
# mainly used in eval
|
|
104
|
+
MetricResults: TypeAlias = dict[str, Union[int, float]]
|
deepdoctection/utils/utils.py
CHANGED
|
@@ -23,16 +23,16 @@ import inspect
|
|
|
23
23
|
import os
|
|
24
24
|
from collections.abc import MutableMapping
|
|
25
25
|
from datetime import datetime
|
|
26
|
-
from typing import Any, Callable,
|
|
26
|
+
from typing import Any, Callable, Sequence, Union
|
|
27
27
|
|
|
28
28
|
import numpy as np
|
|
29
29
|
|
|
30
|
-
from .
|
|
30
|
+
from .types import PathLikeOrStr
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
def delete_keys_from_dict(
|
|
34
|
-
dictionary: Union[
|
|
35
|
-
) ->
|
|
34
|
+
dictionary: Union[dict[Any, Any], MutableMapping], keys: Union[str, list[str], set[str]] # type: ignore
|
|
35
|
+
) -> dict[Any, Any]:
|
|
36
36
|
"""
|
|
37
37
|
Removing key/value pairs from dictionary. Works for nested dicts as well.
|
|
38
38
|
|
|
@@ -62,7 +62,7 @@ def delete_keys_from_dict(
|
|
|
62
62
|
return modified_dict
|
|
63
63
|
|
|
64
64
|
|
|
65
|
-
def split_string(input_string: str) ->
|
|
65
|
+
def split_string(input_string: str) -> list[str]:
|
|
66
66
|
"""
|
|
67
67
|
Takes a string, splits between commas and returns a list with split components as list elements
|
|
68
68
|
|
|
@@ -71,7 +71,7 @@ def split_string(input_string: str) -> List[str]:
|
|
|
71
71
|
return input_string.split(",")
|
|
72
72
|
|
|
73
73
|
|
|
74
|
-
def string_to_dict(input_string: str) ->
|
|
74
|
+
def string_to_dict(input_string: str) -> dict[str, str]:
|
|
75
75
|
"""
|
|
76
76
|
Takes a string of a form `key1=val1,key2=val2` and returns the corresponding dict
|
|
77
77
|
"""
|
|
@@ -144,7 +144,7 @@ def get_rng(obj: Any = None) -> np.random.RandomState:
|
|
|
144
144
|
return np.random.RandomState(seed)
|
|
145
145
|
|
|
146
146
|
|
|
147
|
-
def is_file_extension(file_name:
|
|
147
|
+
def is_file_extension(file_name: PathLikeOrStr, extension: Union[str, Sequence[str]]) -> bool:
|
|
148
148
|
"""
|
|
149
149
|
Check if a given file name has a given extension
|
|
150
150
|
|
deepdoctection/utils/viz.py
CHANGED
|
@@ -25,26 +25,26 @@ and
|
|
|
25
25
|
<https://github.com/facebookresearch/detectron2/blob/main/detectron2/utils/colormap.py>
|
|
26
26
|
"""
|
|
27
27
|
|
|
28
|
-
import ast
|
|
29
28
|
import base64
|
|
30
29
|
import os
|
|
31
30
|
import sys
|
|
32
31
|
from io import BytesIO
|
|
33
|
-
from typing import Any,
|
|
32
|
+
from typing import Any, Optional, Sequence, no_type_check
|
|
34
33
|
|
|
35
34
|
import numpy as np
|
|
36
35
|
import numpy.typing as npt
|
|
36
|
+
from lazy_imports import try_import
|
|
37
37
|
from numpy import float32, uint8
|
|
38
38
|
|
|
39
|
-
from .
|
|
40
|
-
from .env_info import auto_select_viz_library
|
|
39
|
+
from .env_info import ENV_VARS_TRUE, auto_select_viz_library
|
|
41
40
|
from .error import DependencyError
|
|
42
|
-
from .file_utils import get_opencv_requirement, get_pillow_requirement
|
|
41
|
+
from .file_utils import get_opencv_requirement, get_pillow_requirement
|
|
42
|
+
from .types import BGR, B64Str, PathLikeOrStr, PixelValues
|
|
43
43
|
|
|
44
|
-
|
|
44
|
+
with try_import() as cv2_import_guard:
|
|
45
45
|
import cv2
|
|
46
46
|
|
|
47
|
-
|
|
47
|
+
with try_import() as pil_import_guard:
|
|
48
48
|
from PIL import Image, ImageDraw
|
|
49
49
|
|
|
50
50
|
|
|
@@ -183,7 +183,7 @@ _COLORS = (
|
|
|
183
183
|
)
|
|
184
184
|
|
|
185
185
|
|
|
186
|
-
def random_color(rgb: bool = True, maximum: int = 255) ->
|
|
186
|
+
def random_color(rgb: bool = True, maximum: int = 255) -> tuple[int, int, int]:
|
|
187
187
|
"""
|
|
188
188
|
:param rgb: Whether to return RGB colors or BGR colors.
|
|
189
189
|
:param maximum: either 255 or 1
|
|
@@ -198,14 +198,14 @@ def random_color(rgb: bool = True, maximum: int = 255) -> Tuple[int, int, int]:
|
|
|
198
198
|
|
|
199
199
|
|
|
200
200
|
def draw_boxes(
|
|
201
|
-
np_image:
|
|
201
|
+
np_image: PixelValues,
|
|
202
202
|
boxes: npt.NDArray[float32],
|
|
203
|
-
category_names_list: Optional[
|
|
204
|
-
color: Optional[
|
|
203
|
+
category_names_list: Optional[list[Optional[str]]] = None,
|
|
204
|
+
color: Optional[BGR] = None,
|
|
205
205
|
font_scale: float = 1.0,
|
|
206
206
|
rectangle_thickness: int = 4,
|
|
207
207
|
box_color_by_category: bool = True,
|
|
208
|
-
) ->
|
|
208
|
+
) -> PixelValues:
|
|
209
209
|
"""
|
|
210
210
|
Dray bounding boxes with category names into image.
|
|
211
211
|
|
|
@@ -246,7 +246,7 @@ def draw_boxes(
|
|
|
246
246
|
np_image = np_image.copy()
|
|
247
247
|
|
|
248
248
|
if np_image.ndim == 2 or (np_image.ndim == 3 and np_image.shape[2] == 1):
|
|
249
|
-
np_image = cv2.cvtColor(np_image, cv2.COLOR_GRAY2BGR)
|
|
249
|
+
np_image = cv2.cvtColor(np_image, cv2.COLOR_GRAY2BGR).astype(np.uint8)
|
|
250
250
|
for i in sorted_inds:
|
|
251
251
|
box = boxes[i, :]
|
|
252
252
|
choose_color = category_to_color.get(category_names_list[i]) if category_to_color is not None else color
|
|
@@ -279,7 +279,7 @@ def draw_boxes(
|
|
|
279
279
|
|
|
280
280
|
|
|
281
281
|
@no_type_check
|
|
282
|
-
def interactive_imshow(img:
|
|
282
|
+
def interactive_imshow(img: PixelValues) -> None:
|
|
283
283
|
"""
|
|
284
284
|
Display an image in a pop-up window
|
|
285
285
|
|
|
@@ -328,7 +328,7 @@ class VizPackageHandler:
|
|
|
328
328
|
def __init__(self) -> None:
|
|
329
329
|
"""Selecting the image processing library and fonts"""
|
|
330
330
|
package = self._select_package()
|
|
331
|
-
self.pkg_func_dict:
|
|
331
|
+
self.pkg_func_dict: dict[str, str] = {}
|
|
332
332
|
self.font = None
|
|
333
333
|
self._set_vars(package)
|
|
334
334
|
|
|
@@ -339,8 +339,8 @@ class VizPackageHandler:
|
|
|
339
339
|
Otherwise it will use Pillow as default package
|
|
340
340
|
:return: either 'pillow' or 'cv2'
|
|
341
341
|
"""
|
|
342
|
-
maybe_cv2 = "cv2" if
|
|
343
|
-
maybe_pil = "pillow" if
|
|
342
|
+
maybe_cv2 = "cv2" if os.environ.get("USE_DD_OPENCV", "False") in ENV_VARS_TRUE else None
|
|
343
|
+
maybe_pil = "pillow" if os.environ.get("USE_DD_PILLOW", "True") in ENV_VARS_TRUE else None
|
|
344
344
|
|
|
345
345
|
if not maybe_cv2 and not maybe_pil:
|
|
346
346
|
raise EnvironmentError(
|
|
@@ -386,7 +386,7 @@ class VizPackageHandler:
|
|
|
386
386
|
package = self._select_package()
|
|
387
387
|
self._set_vars(package)
|
|
388
388
|
|
|
389
|
-
def read_image(self, path:
|
|
389
|
+
def read_image(self, path: PathLikeOrStr) -> PixelValues:
|
|
390
390
|
"""Reading an image from file and returning a np.array
|
|
391
391
|
|
|
392
392
|
:param path: Use /path/to/dir/file_name.[suffix]
|
|
@@ -394,16 +394,16 @@ class VizPackageHandler:
|
|
|
394
394
|
return getattr(self, self.pkg_func_dict["read_image"])(path)
|
|
395
395
|
|
|
396
396
|
@staticmethod
|
|
397
|
-
def _cv2_read_image(path:
|
|
398
|
-
return cv2.imread(path, cv2.IMREAD_COLOR)
|
|
397
|
+
def _cv2_read_image(path: PathLikeOrStr) -> PixelValues:
|
|
398
|
+
return cv2.imread(os.fspath(path), cv2.IMREAD_COLOR).astype(np.uint8)
|
|
399
399
|
|
|
400
400
|
@staticmethod
|
|
401
|
-
def _pillow_read_image(path:
|
|
402
|
-
with Image.open(path).convert("RGB") as image:
|
|
401
|
+
def _pillow_read_image(path: PathLikeOrStr) -> PixelValues:
|
|
402
|
+
with Image.open(os.fspath(path)).convert("RGB") as image:
|
|
403
403
|
np_image = np.array(image)[:, :, ::-1]
|
|
404
404
|
return np_image
|
|
405
405
|
|
|
406
|
-
def write_image(self, path:
|
|
406
|
+
def write_image(self, path: PathLikeOrStr, image: PixelValues) -> None:
|
|
407
407
|
"""Writing an image as np.array to a file.
|
|
408
408
|
|
|
409
409
|
:param path: Use /path/to/dir/file_name.[suffix]
|
|
@@ -412,15 +412,15 @@ class VizPackageHandler:
|
|
|
412
412
|
return getattr(self, self.pkg_func_dict["write_image"])(path, image)
|
|
413
413
|
|
|
414
414
|
@staticmethod
|
|
415
|
-
def _cv2_write_image(path:
|
|
416
|
-
cv2.imwrite(path, image)
|
|
415
|
+
def _cv2_write_image(path: PathLikeOrStr, image: PixelValues) -> None:
|
|
416
|
+
cv2.imwrite(os.fspath(path), image)
|
|
417
417
|
|
|
418
418
|
@staticmethod
|
|
419
|
-
def _pillow_write_image(path:
|
|
419
|
+
def _pillow_write_image(path: PathLikeOrStr, image: PixelValues) -> None:
|
|
420
420
|
pil_image = Image.fromarray(np.uint8(image[:, :, ::-1]))
|
|
421
|
-
pil_image.save(path)
|
|
421
|
+
pil_image.save(os.fspath(path))
|
|
422
422
|
|
|
423
|
-
def encode(self, np_image:
|
|
423
|
+
def encode(self, np_image: PixelValues) -> bytes:
|
|
424
424
|
"""Converting an image as np.array into a b64 representation
|
|
425
425
|
|
|
426
426
|
:param np_image: Image as np.array
|
|
@@ -428,19 +428,19 @@ class VizPackageHandler:
|
|
|
428
428
|
return getattr(self, self.pkg_func_dict["encode"])(np_image)
|
|
429
429
|
|
|
430
430
|
@staticmethod
|
|
431
|
-
def _cv2_encode(np_image:
|
|
431
|
+
def _cv2_encode(np_image: PixelValues) -> bytes:
|
|
432
432
|
np_encode = cv2.imencode(".png", np_image)
|
|
433
433
|
b_image = np_encode[1].tobytes()
|
|
434
434
|
return b_image
|
|
435
435
|
|
|
436
436
|
@staticmethod
|
|
437
|
-
def _pillow_encode(np_image:
|
|
437
|
+
def _pillow_encode(np_image: PixelValues) -> bytes:
|
|
438
438
|
buffered = BytesIO()
|
|
439
439
|
pil_image = Image.fromarray(np.uint8(np_image[:, :, ::-1]))
|
|
440
440
|
pil_image.save(buffered, format="PNG")
|
|
441
441
|
return buffered.getvalue()
|
|
442
442
|
|
|
443
|
-
def convert_np_to_b64(self, image:
|
|
443
|
+
def convert_np_to_b64(self, image: PixelValues) -> str:
|
|
444
444
|
"""Converting an image given as np.array into a b64 encoded string
|
|
445
445
|
|
|
446
446
|
:param image: Image as np.array
|
|
@@ -448,18 +448,18 @@ class VizPackageHandler:
|
|
|
448
448
|
return getattr(self, self.pkg_func_dict["convert_np_to_b64"])(image)
|
|
449
449
|
|
|
450
450
|
@staticmethod
|
|
451
|
-
def _cv2_convert_np_to_b64(image:
|
|
451
|
+
def _cv2_convert_np_to_b64(image: PixelValues) -> str:
|
|
452
452
|
np_encode = cv2.imencode(".png", image)
|
|
453
453
|
return base64.b64encode(np_encode[1]).decode("utf-8") # type: ignore
|
|
454
454
|
|
|
455
455
|
@staticmethod
|
|
456
|
-
def _pillow_convert_np_to_b64(np_image:
|
|
456
|
+
def _pillow_convert_np_to_b64(np_image: PixelValues) -> str:
|
|
457
457
|
buffered = BytesIO()
|
|
458
458
|
pil_image = Image.fromarray(np.uint8(np_image[:, :, ::-1]))
|
|
459
459
|
pil_image.save(buffered, format="PNG")
|
|
460
460
|
return base64.b64encode(buffered.getvalue()).decode("utf-8")
|
|
461
461
|
|
|
462
|
-
def convert_b64_to_np(self, image:
|
|
462
|
+
def convert_b64_to_np(self, image: B64Str) -> PixelValues:
|
|
463
463
|
"""
|
|
464
464
|
Converting an image as b64 encoded string into np.array
|
|
465
465
|
|
|
@@ -469,19 +469,19 @@ class VizPackageHandler:
|
|
|
469
469
|
return getattr(self, self.pkg_func_dict["convert_b64_to_np"])(image)
|
|
470
470
|
|
|
471
471
|
@staticmethod
|
|
472
|
-
def _cv2_convert_b64_to_np(image:
|
|
472
|
+
def _cv2_convert_b64_to_np(image: B64Str) -> PixelValues:
|
|
473
473
|
np_array = np.fromstring(base64.b64decode(image), np.uint8) # type: ignore
|
|
474
474
|
np_array = cv2.imdecode(np_array, cv2.IMREAD_COLOR).astype(np.float32)
|
|
475
475
|
return np_array.astype(uint8)
|
|
476
476
|
|
|
477
477
|
@staticmethod
|
|
478
|
-
def _pillow_convert_b64_to_np(image:
|
|
478
|
+
def _pillow_convert_b64_to_np(image: B64Str) -> PixelValues:
|
|
479
479
|
array = base64.b64decode(image)
|
|
480
480
|
im_file = BytesIO(array)
|
|
481
481
|
pil_image = Image.open(im_file)
|
|
482
482
|
return np.array(pil_image)[:, :, ::-1]
|
|
483
483
|
|
|
484
|
-
def resize(self, image:
|
|
484
|
+
def resize(self, image: PixelValues, width: int, height: int, interpolation: str) -> PixelValues:
|
|
485
485
|
"""
|
|
486
486
|
Resize a given image to new width, height. Specifying an interpolation method is required. Depending on the
|
|
487
487
|
chosen image library use one of the following:
|
|
@@ -498,17 +498,19 @@ class VizPackageHandler:
|
|
|
498
498
|
return getattr(self, self.pkg_func_dict["resize"])(image, width, height, interpolation)
|
|
499
499
|
|
|
500
500
|
@staticmethod
|
|
501
|
-
def _cv2_resize(image:
|
|
501
|
+
def _cv2_resize(image: PixelValues, width: int, height: int, interpolation: str) -> PixelValues:
|
|
502
502
|
intpol_method_dict = {
|
|
503
503
|
"INTER_NEAREST": cv2.INTER_NEAREST,
|
|
504
504
|
"INTER_LINEAR": cv2.INTER_LINEAR,
|
|
505
505
|
"INTER_AREA": cv2.INTER_AREA,
|
|
506
506
|
"VIZ": cv2.INTER_LINEAR,
|
|
507
507
|
}
|
|
508
|
-
return cv2.resize(image, (width, height), interpolation=intpol_method_dict[interpolation])
|
|
508
|
+
return cv2.resize(image, dsize=(width, height), interpolation=intpol_method_dict[interpolation]).astype(
|
|
509
|
+
np.uint8
|
|
510
|
+
)
|
|
509
511
|
|
|
510
512
|
@staticmethod
|
|
511
|
-
def _pillow_resize(image:
|
|
513
|
+
def _pillow_resize(image: PixelValues, width: int, height: int, interpolation: str) -> PixelValues:
|
|
512
514
|
intpol_method_dict = {
|
|
513
515
|
"NEAREST": Image.Resampling.NEAREST,
|
|
514
516
|
"BOX": Image.Resampling.BOX,
|
|
@@ -522,7 +524,7 @@ class VizPackageHandler:
|
|
|
522
524
|
)
|
|
523
525
|
return np.array(pil_image_resized)[:, :, ::-1]
|
|
524
526
|
|
|
525
|
-
def get_text_size(self, text: str, font_scale: float) ->
|
|
527
|
+
def get_text_size(self, text: str, font_scale: float) -> tuple[int, int]:
|
|
526
528
|
"""
|
|
527
529
|
Return the text size for a given font scale
|
|
528
530
|
:param text: text as string
|
|
@@ -531,19 +533,19 @@ class VizPackageHandler:
|
|
|
531
533
|
"""
|
|
532
534
|
return getattr(self, self.pkg_func_dict["get_text_size"])(text, font_scale)
|
|
533
535
|
|
|
534
|
-
def _cv2_get_text_size(self, text: str, font_scale: float) ->
|
|
536
|
+
def _cv2_get_text_size(self, text: str, font_scale: float) -> tuple[int, int]:
|
|
535
537
|
((width, height), _) = cv2.getTextSize(text, self.font, font_scale, 1) # type: ignore
|
|
536
538
|
return width, height
|
|
537
539
|
|
|
538
|
-
def _pillow_get_text_size(self, text: str, font_scale: float) ->
|
|
540
|
+
def _pillow_get_text_size(self, text: str, font_scale: float) -> tuple[int, int]: # pylint: disable=W0613
|
|
539
541
|
_, _, width, height = self.font.getbbox(text) # type: ignore
|
|
540
542
|
return width, height
|
|
541
543
|
|
|
542
544
|
def draw_rectangle(
|
|
543
|
-
self, np_image:
|
|
544
|
-
) ->
|
|
545
|
+
self, np_image: PixelValues, box: tuple[Any, Any, Any, Any], color: tuple[int, int, int], thickness: int
|
|
546
|
+
) -> PixelValues:
|
|
545
547
|
"""
|
|
546
|
-
Drawing a rectangle into an image with a given color (b,g,r) and given thickness
|
|
548
|
+
Drawing a rectangle into an image with a given color (b,g,r) and given thickness.
|
|
547
549
|
|
|
548
550
|
:param np_image: image
|
|
549
551
|
:param box: box (x_min, y_min, x_max, y_max)
|
|
@@ -555,15 +557,15 @@ class VizPackageHandler:
|
|
|
555
557
|
|
|
556
558
|
@staticmethod
|
|
557
559
|
def _cv2_draw_rectangle(
|
|
558
|
-
np_image:
|
|
559
|
-
) ->
|
|
560
|
+
np_image: PixelValues, box: tuple[Any, Any, Any, Any], color: Sequence[int], thickness: int
|
|
561
|
+
) -> PixelValues:
|
|
560
562
|
cv2.rectangle(np_image, (box[0], box[1]), (box[2], box[3]), color=color, thickness=thickness)
|
|
561
563
|
return np_image
|
|
562
564
|
|
|
563
565
|
@staticmethod
|
|
564
566
|
def _pillow_draw_rectangle(
|
|
565
|
-
np_image:
|
|
566
|
-
) ->
|
|
567
|
+
np_image: PixelValues, box: tuple[Any, Any, Any, Any], color: Sequence[int], thickness: int
|
|
568
|
+
) -> PixelValues:
|
|
567
569
|
pil_image = Image.fromarray(np.uint8(np_image[:, :, ::-1]))
|
|
568
570
|
draw = ImageDraw.Draw(pil_image)
|
|
569
571
|
draw.rectangle(box, outline=color, width=thickness) # type: ignore
|
|
@@ -572,13 +574,13 @@ class VizPackageHandler:
|
|
|
572
574
|
|
|
573
575
|
def draw_text(
|
|
574
576
|
self,
|
|
575
|
-
np_image:
|
|
576
|
-
pos:
|
|
577
|
+
np_image: PixelValues,
|
|
578
|
+
pos: tuple[Any, Any],
|
|
577
579
|
text: str,
|
|
578
|
-
color:
|
|
580
|
+
color: tuple[int, int, int],
|
|
579
581
|
font_scale: float,
|
|
580
582
|
rectangle_thickness: int = 1,
|
|
581
|
-
) ->
|
|
583
|
+
) -> PixelValues:
|
|
582
584
|
"""
|
|
583
585
|
Drawing a text into a numpy image. The result will differ between PIL and CV2 (and will not look that good when
|
|
584
586
|
using PIL).
|
|
@@ -597,13 +599,13 @@ class VizPackageHandler:
|
|
|
597
599
|
|
|
598
600
|
def _cv2_draw_text(
|
|
599
601
|
self,
|
|
600
|
-
np_image:
|
|
601
|
-
pos:
|
|
602
|
+
np_image: PixelValues,
|
|
603
|
+
pos: tuple[Any, Any],
|
|
602
604
|
text: str,
|
|
603
|
-
color:
|
|
605
|
+
color: tuple[int, int, int],
|
|
604
606
|
font_scale: float,
|
|
605
607
|
rectangle_thickness: int,
|
|
606
|
-
) ->
|
|
608
|
+
) -> PixelValues:
|
|
607
609
|
"""
|
|
608
610
|
Draw text on an image.
|
|
609
611
|
|
|
@@ -641,13 +643,13 @@ class VizPackageHandler:
|
|
|
641
643
|
|
|
642
644
|
@staticmethod
|
|
643
645
|
def _pillow_draw_text(
|
|
644
|
-
np_image:
|
|
645
|
-
pos:
|
|
646
|
+
np_image: PixelValues,
|
|
647
|
+
pos: tuple[Any, Any],
|
|
646
648
|
text: str,
|
|
647
|
-
color:
|
|
649
|
+
color: tuple[int, int, int], # pylint: disable=W0613
|
|
648
650
|
font_scale: float, # pylint: disable=W0613
|
|
649
651
|
rectangle_thickness: int, # pylint: disable=W0613
|
|
650
|
-
) ->
|
|
652
|
+
) -> PixelValues:
|
|
651
653
|
"""Draw a text in an image using PIL."""
|
|
652
654
|
# using PIL default font size that does not scale to larger image sizes.
|
|
653
655
|
# Compare with https://github.com/python-pillow/Pillow/issues/6622
|
|
@@ -656,11 +658,11 @@ class VizPackageHandler:
|
|
|
656
658
|
draw.text(pos, text, fill=(0, 0, 0), anchor="lb")
|
|
657
659
|
return np.array(pil_image)[:, :, ::-1]
|
|
658
660
|
|
|
659
|
-
def interactive_imshow(self, np_image:
|
|
661
|
+
def interactive_imshow(self, np_image: PixelValues) -> None:
|
|
660
662
|
"""Displaying an image in a separate window"""
|
|
661
663
|
return getattr(self, self.pkg_func_dict["interactive_imshow"])(np_image)
|
|
662
664
|
|
|
663
|
-
def _cv2_interactive_imshow(self, np_image:
|
|
665
|
+
def _cv2_interactive_imshow(self, np_image: PixelValues) -> None:
|
|
664
666
|
"""
|
|
665
667
|
Display an image in a pop-up window
|
|
666
668
|
|
|
@@ -681,24 +683,24 @@ class VizPackageHandler:
|
|
|
681
683
|
elif key == "s":
|
|
682
684
|
cv2.imwrite("out.png", np_image)
|
|
683
685
|
elif key in ["+", "="]:
|
|
684
|
-
np_image = cv2.resize(np_image, None, fx=1.3, fy=1.3, interpolation=cv2.INTER_CUBIC)
|
|
686
|
+
np_image = cv2.resize(np_image, None, fx=1.3, fy=1.3, interpolation=cv2.INTER_CUBIC).astype(np.uint8)
|
|
685
687
|
self._cv2_interactive_imshow(np_image)
|
|
686
688
|
elif key == "-":
|
|
687
|
-
np_image = cv2.resize(np_image, None, fx=0.7, fy=0.7, interpolation=cv2.INTER_CUBIC)
|
|
689
|
+
np_image = cv2.resize(np_image, None, fx=0.7, fy=0.7, interpolation=cv2.INTER_CUBIC).astype(np.uint8)
|
|
688
690
|
self._cv2_interactive_imshow(np_image)
|
|
689
691
|
|
|
690
692
|
@staticmethod
|
|
691
|
-
def _pillow_interactive_imshow(np_image:
|
|
693
|
+
def _pillow_interactive_imshow(np_image: PixelValues) -> None:
|
|
692
694
|
name = "q, x: quit / s: save"
|
|
693
695
|
pil_image = Image.fromarray(np.uint8(np_image[:, :, ::-1]))
|
|
694
696
|
pil_image.show(name)
|
|
695
697
|
|
|
696
|
-
def rotate_image(self, np_image:
|
|
698
|
+
def rotate_image(self, np_image: PixelValues, angle: float) -> PixelValues:
|
|
697
699
|
"""Rotating an image by some angle"""
|
|
698
700
|
return getattr(self, self.pkg_func_dict["rotate_image"])(np_image, angle)
|
|
699
701
|
|
|
700
702
|
@staticmethod
|
|
701
|
-
def _cv2_rotate_image(np_image:
|
|
703
|
+
def _cv2_rotate_image(np_image: PixelValues, angle: float) -> PixelValues:
|
|
702
704
|
# copy & paste from https://stackoverflow.com/questions/43892506
|
|
703
705
|
# /opencv-python-rotate-image-without-cropping-sides
|
|
704
706
|
|
|
@@ -718,16 +720,16 @@ class VizPackageHandler:
|
|
|
718
720
|
rotation_mat[0, 2] += bound_w / 2 - image_center[0]
|
|
719
721
|
rotation_mat[1, 2] += bound_h / 2 - image_center[1]
|
|
720
722
|
|
|
721
|
-
np_image = cv2.warpAffine(
|
|
723
|
+
np_image = cv2.warpAffine(
|
|
722
724
|
src=np_image,
|
|
723
725
|
M=rotation_mat,
|
|
724
726
|
dsize=(bound_w, bound_h),
|
|
725
|
-
)
|
|
727
|
+
).astype(np.uint8)
|
|
726
728
|
|
|
727
729
|
return np_image
|
|
728
730
|
|
|
729
731
|
@staticmethod
|
|
730
|
-
def _pillow_rotate_image(np_image:
|
|
732
|
+
def _pillow_rotate_image(np_image: PixelValues, angle: float) -> PixelValues:
|
|
731
733
|
pil_image = Image.fromarray(np.uint8(np_image[:, :, ::-1]))
|
|
732
734
|
pil_image_rotated = pil_image.rotate(angle, expand=True)
|
|
733
735
|
return np.array(pil_image_rotated)[:, :, ::-1]
|