deepdoctection 0.32__py3-none-any.whl → 0.34__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (111) hide show
  1. deepdoctection/__init__.py +8 -25
  2. deepdoctection/analyzer/dd.py +84 -71
  3. deepdoctection/dataflow/common.py +9 -5
  4. deepdoctection/dataflow/custom.py +5 -5
  5. deepdoctection/dataflow/custom_serialize.py +75 -18
  6. deepdoctection/dataflow/parallel_map.py +3 -3
  7. deepdoctection/dataflow/serialize.py +4 -4
  8. deepdoctection/dataflow/stats.py +3 -3
  9. deepdoctection/datapoint/annotation.py +78 -56
  10. deepdoctection/datapoint/box.py +7 -7
  11. deepdoctection/datapoint/convert.py +6 -6
  12. deepdoctection/datapoint/image.py +157 -75
  13. deepdoctection/datapoint/view.py +175 -151
  14. deepdoctection/datasets/adapter.py +30 -24
  15. deepdoctection/datasets/base.py +10 -10
  16. deepdoctection/datasets/dataflow_builder.py +3 -3
  17. deepdoctection/datasets/info.py +23 -25
  18. deepdoctection/datasets/instances/doclaynet.py +48 -49
  19. deepdoctection/datasets/instances/fintabnet.py +44 -45
  20. deepdoctection/datasets/instances/funsd.py +23 -23
  21. deepdoctection/datasets/instances/iiitar13k.py +8 -8
  22. deepdoctection/datasets/instances/layouttest.py +2 -2
  23. deepdoctection/datasets/instances/publaynet.py +3 -3
  24. deepdoctection/datasets/instances/pubtables1m.py +18 -18
  25. deepdoctection/datasets/instances/pubtabnet.py +30 -29
  26. deepdoctection/datasets/instances/rvlcdip.py +28 -29
  27. deepdoctection/datasets/instances/xfund.py +51 -30
  28. deepdoctection/datasets/save.py +6 -6
  29. deepdoctection/eval/accmetric.py +32 -33
  30. deepdoctection/eval/base.py +8 -9
  31. deepdoctection/eval/cocometric.py +13 -12
  32. deepdoctection/eval/eval.py +32 -26
  33. deepdoctection/eval/tedsmetric.py +16 -12
  34. deepdoctection/eval/tp_eval_callback.py +7 -16
  35. deepdoctection/extern/base.py +339 -134
  36. deepdoctection/extern/d2detect.py +69 -89
  37. deepdoctection/extern/deskew.py +11 -10
  38. deepdoctection/extern/doctrocr.py +81 -64
  39. deepdoctection/extern/fastlang.py +23 -16
  40. deepdoctection/extern/hfdetr.py +53 -38
  41. deepdoctection/extern/hflayoutlm.py +216 -155
  42. deepdoctection/extern/hflm.py +35 -30
  43. deepdoctection/extern/model.py +433 -255
  44. deepdoctection/extern/pdftext.py +15 -15
  45. deepdoctection/extern/pt/ptutils.py +4 -2
  46. deepdoctection/extern/tessocr.py +39 -38
  47. deepdoctection/extern/texocr.py +14 -16
  48. deepdoctection/extern/tp/tfutils.py +16 -2
  49. deepdoctection/extern/tp/tpcompat.py +11 -7
  50. deepdoctection/extern/tp/tpfrcnn/config/config.py +4 -4
  51. deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +1 -1
  52. deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +5 -5
  53. deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +6 -6
  54. deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +4 -4
  55. deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +5 -3
  56. deepdoctection/extern/tp/tpfrcnn/preproc.py +5 -5
  57. deepdoctection/extern/tpdetect.py +40 -45
  58. deepdoctection/mapper/cats.py +36 -40
  59. deepdoctection/mapper/cocostruct.py +16 -12
  60. deepdoctection/mapper/d2struct.py +22 -22
  61. deepdoctection/mapper/hfstruct.py +7 -7
  62. deepdoctection/mapper/laylmstruct.py +22 -24
  63. deepdoctection/mapper/maputils.py +9 -10
  64. deepdoctection/mapper/match.py +33 -2
  65. deepdoctection/mapper/misc.py +6 -7
  66. deepdoctection/mapper/pascalstruct.py +4 -4
  67. deepdoctection/mapper/prodigystruct.py +6 -6
  68. deepdoctection/mapper/pubstruct.py +84 -92
  69. deepdoctection/mapper/tpstruct.py +3 -3
  70. deepdoctection/mapper/xfundstruct.py +33 -33
  71. deepdoctection/pipe/anngen.py +39 -14
  72. deepdoctection/pipe/base.py +68 -99
  73. deepdoctection/pipe/common.py +181 -85
  74. deepdoctection/pipe/concurrency.py +14 -10
  75. deepdoctection/pipe/doctectionpipe.py +24 -21
  76. deepdoctection/pipe/language.py +20 -25
  77. deepdoctection/pipe/layout.py +18 -16
  78. deepdoctection/pipe/lm.py +49 -47
  79. deepdoctection/pipe/order.py +63 -65
  80. deepdoctection/pipe/refine.py +102 -109
  81. deepdoctection/pipe/segment.py +157 -162
  82. deepdoctection/pipe/sub_layout.py +50 -40
  83. deepdoctection/pipe/text.py +37 -36
  84. deepdoctection/pipe/transform.py +19 -16
  85. deepdoctection/train/d2_frcnn_train.py +27 -25
  86. deepdoctection/train/hf_detr_train.py +22 -18
  87. deepdoctection/train/hf_layoutlm_train.py +49 -48
  88. deepdoctection/train/tp_frcnn_train.py +10 -11
  89. deepdoctection/utils/concurrency.py +1 -1
  90. deepdoctection/utils/context.py +13 -6
  91. deepdoctection/utils/develop.py +4 -4
  92. deepdoctection/utils/env_info.py +52 -14
  93. deepdoctection/utils/file_utils.py +6 -11
  94. deepdoctection/utils/fs.py +41 -14
  95. deepdoctection/utils/identifier.py +2 -2
  96. deepdoctection/utils/logger.py +15 -15
  97. deepdoctection/utils/metacfg.py +7 -7
  98. deepdoctection/utils/pdf_utils.py +39 -14
  99. deepdoctection/utils/settings.py +188 -182
  100. deepdoctection/utils/tqdm.py +1 -1
  101. deepdoctection/utils/transform.py +14 -9
  102. deepdoctection/utils/types.py +104 -0
  103. deepdoctection/utils/utils.py +7 -7
  104. deepdoctection/utils/viz.py +70 -69
  105. {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/METADATA +7 -4
  106. deepdoctection-0.34.dist-info/RECORD +146 -0
  107. {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/WHEEL +1 -1
  108. deepdoctection/utils/detection_types.py +0 -68
  109. deepdoctection-0.32.dist-info/RECORD +0 -146
  110. {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/LICENSE +0 -0
  111. {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/top_level.txt +0 -0
@@ -21,6 +21,8 @@ of coordinates. Most have the ideas have been taken from
21
21
  <https://github.com/tensorpack/dataflow/blob/master/dataflow/dataflow/imgaug/transform.py> .
22
22
  """
23
23
 
24
+ from __future__ import annotations
25
+
24
26
  from abc import ABC, abstractmethod
25
27
  from typing import Literal, Optional, Union
26
28
 
@@ -28,7 +30,7 @@ import numpy as np
28
30
  import numpy.typing as npt
29
31
  from numpy import float32
30
32
 
31
- from .detection_types import ImageType
33
+ from .types import PixelValues
32
34
  from .viz import viz_handler
33
35
 
34
36
  __all__ = ["ResizeTransform", "InferenceResize", "PadTransform", "normalize_image"]
@@ -45,7 +47,7 @@ class BaseTransform(ABC):
45
47
  """
46
48
 
47
49
  @abstractmethod
48
- def apply_image(self, img: ImageType) -> ImageType:
50
+ def apply_image(self, img: PixelValues) -> PixelValues:
49
51
  """The transformation that should be applied to the image"""
50
52
  raise NotImplementedError()
51
53
 
@@ -77,7 +79,7 @@ class ResizeTransform(BaseTransform):
77
79
  self.new_w = int(new_w)
78
80
  self.interp = interp
79
81
 
80
- def apply_image(self, img: ImageType) -> ImageType:
82
+ def apply_image(self, img: PixelValues) -> PixelValues:
81
83
  assert img.shape[:2] == (self.h, self.w)
82
84
  ret = viz_handler.resize(img, self.new_w, self.new_h, self.interp)
83
85
  if img.ndim == 3 and ret.ndim == 2:
@@ -85,7 +87,8 @@ class ResizeTransform(BaseTransform):
85
87
  return ret
86
88
 
87
89
  def apply_coords(self, coords: npt.NDArray[float32]) -> npt.NDArray[float32]:
88
- """Transformation that should be applied to coordinates"""
90
+ """Transformation that should be applied to coordinates. Coords are supposed to to be passed as
91
+ np array of points"""
89
92
  coords[:, 0] = coords[:, 0] * (self.new_w * 1.0 / self.w)
90
93
  coords[:, 1] = coords[:, 1] * (self.new_h * 1.0 / self.h)
91
94
  return coords
@@ -106,7 +109,7 @@ class InferenceResize:
106
109
  self.max_size = max_size
107
110
  self.interp = interp
108
111
 
109
- def get_transform(self, img: ImageType) -> ResizeTransform:
112
+ def get_transform(self, img: PixelValues) -> ResizeTransform:
110
113
  """
111
114
  get transform
112
115
  """
@@ -129,7 +132,9 @@ class InferenceResize:
129
132
  return ResizeTransform(h, w, new_h, new_w, self.interp)
130
133
 
131
134
 
132
- def normalize_image(image: ImageType, pixel_mean: npt.NDArray[float32], pixel_std: npt.NDArray[float32]) -> ImageType:
135
+ def normalize_image(
136
+ image: PixelValues, pixel_mean: npt.NDArray[float32], pixel_std: npt.NDArray[float32]
137
+ ) -> PixelValues:
133
138
  """
134
139
  Preprocess pixel values of an image by rescaling.
135
140
 
@@ -140,7 +145,7 @@ def normalize_image(image: ImageType, pixel_mean: npt.NDArray[float32], pixel_st
140
145
  return (image - pixel_mean) * (1.0 / pixel_std)
141
146
 
142
147
 
143
- def pad_image(image: ImageType, top: int, right: int, bottom: int, left: int) -> ImageType:
148
+ def pad_image(image: PixelValues, top: int, right: int, bottom: int, left: int) -> PixelValues:
144
149
  """Pad an image with white color and with given top/bottom/right/left pixel values. Only white padding is
145
150
  currently supported
146
151
 
@@ -181,7 +186,7 @@ class PadTransform(BaseTransform):
181
186
  self.image_height: Optional[int] = None
182
187
  self.mode = mode
183
188
 
184
- def apply_image(self, img: ImageType) -> ImageType:
189
+ def apply_image(self, img: PixelValues) -> PixelValues:
185
190
  """Apply padding to image"""
186
191
  self.image_width = img.shape[1]
187
192
  self.image_height = img.shape[0]
@@ -214,6 +219,6 @@ class PadTransform(BaseTransform):
214
219
  coords[:, 1] = np.maximum(coords[:, 1] - self.top, np.zeros(coords[:, 1].shape))
215
220
  return coords
216
221
 
217
- def clone(self) -> "PadTransform":
222
+ def clone(self) -> PadTransform:
218
223
  """clone"""
219
224
  return self.__class__(self.top, self.right, self.bottom, self.left, self.mode)
@@ -0,0 +1,104 @@
1
+ # -*- coding: utf-8 -*-
2
+ # File: types.py
3
+
4
+ # Copyright 2021 Dr. Janis Meyer. All rights reserved.
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+
18
+ """
19
+ Typing sheet for the whole package
20
+ """
21
+
22
+ import os
23
+ import queue
24
+ from typing import TYPE_CHECKING, Any, Protocol, Type, TypeVar, Union
25
+
26
+ import numpy.typing as npt
27
+ import tqdm
28
+ from numpy import uint8
29
+ from typing_extensions import TypeAlias
30
+
31
+
32
+ # Type for a general dataclass
33
+ class IsDataclass(Protocol): # pylint: disable=R0903
34
+ """
35
+ type hint for general dataclass
36
+ """
37
+
38
+ __dataclass_fields__: dict[Any, Any]
39
+
40
+
41
+ # Numpy image type
42
+ PixelValues = npt.NDArray[uint8]
43
+ # b64 encoded image as string
44
+ B64Str: TypeAlias = str
45
+ # b64 encoded image in bytes
46
+ B64: TypeAlias = bytes
47
+
48
+ # Typing for curry decorator
49
+ DP = TypeVar("DP")
50
+ S = TypeVar("S")
51
+ T = TypeVar("T")
52
+
53
+ # Some type hints that must be distinguished when running mypy and linters
54
+ if TYPE_CHECKING:
55
+ QueueType = queue.Queue[Any] # pylint: disable=E1136
56
+ TqdmType = tqdm.tqdm[Any] # pylint: disable=E1136
57
+ BaseExceptionType = Type[BaseException]
58
+
59
+ else:
60
+ BaseExceptionType = bool
61
+ QueueType = queue.Queue
62
+ TqdmType = tqdm.tqdm
63
+
64
+
65
+ JsonDict = dict[str, Any]
66
+
67
+
68
+ # Some common deepdoctection dict-types
69
+ AnnotationDict: TypeAlias = dict[str, Any]
70
+ ImageDict: TypeAlias = dict[str, Any]
71
+
72
+ # We use these types for output types of the Page object
73
+ Text_: TypeAlias = dict[str, Any]
74
+ HTML: TypeAlias = str
75
+ csv: TypeAlias = list[list[str]]
76
+ Chunks: TypeAlias = list[tuple[str, str, int, str, str, str, str]]
77
+
78
+ # Some common dict-types used in common annotation schemes converted from a generic JSON object
79
+ CocoDatapointDict: TypeAlias = dict[str, Any]
80
+ PubtabnetDict: TypeAlias = dict[str, Any]
81
+ FunsdDict: TypeAlias = dict[str, Any]
82
+ Detectron2Dict: TypeAlias = dict[str, Any]
83
+
84
+
85
+ # A path to a file, directory etc. can be given as a string or Path object
86
+ PathLikeOrStr: TypeAlias = Union[str, os.PathLike]
87
+
88
+ # mainly used in utils
89
+ # Type for requirements. A requirement is a Tuple of string and a callable that returns True if the requirement is
90
+ # available
91
+ PackageAvailable: TypeAlias = bool
92
+ ErrorMsg: TypeAlias = str
93
+ Requirement = tuple[str, PackageAvailable, ErrorMsg]
94
+
95
+ BGR: TypeAlias = tuple[int, int, int]
96
+
97
+ # A type to collect key val pairs of environ information. Mainly used in env_info.py
98
+ KeyValEnvInfos: TypeAlias = list[tuple[str, str]]
99
+
100
+ # mainly used in extern
101
+
102
+
103
+ # mainly used in eval
104
+ MetricResults: TypeAlias = dict[str, Union[int, float]]
@@ -23,16 +23,16 @@ import inspect
23
23
  import os
24
24
  from collections.abc import MutableMapping
25
25
  from datetime import datetime
26
- from typing import Any, Callable, Dict, List, Sequence, Set, Union
26
+ from typing import Any, Callable, Sequence, Union
27
27
 
28
28
  import numpy as np
29
29
 
30
- from .detection_types import Pathlike
30
+ from .types import PathLikeOrStr
31
31
 
32
32
 
33
33
  def delete_keys_from_dict(
34
- dictionary: Union[Dict[Any, Any], MutableMapping], keys: Union[str, List[str], Set[str]] # type: ignore
35
- ) -> Dict[Any, Any]:
34
+ dictionary: Union[dict[Any, Any], MutableMapping], keys: Union[str, list[str], set[str]] # type: ignore
35
+ ) -> dict[Any, Any]:
36
36
  """
37
37
  Removing key/value pairs from dictionary. Works for nested dicts as well.
38
38
 
@@ -62,7 +62,7 @@ def delete_keys_from_dict(
62
62
  return modified_dict
63
63
 
64
64
 
65
- def split_string(input_string: str) -> List[str]:
65
+ def split_string(input_string: str) -> list[str]:
66
66
  """
67
67
  Takes a string, splits between commas and returns a list with split components as list elements
68
68
 
@@ -71,7 +71,7 @@ def split_string(input_string: str) -> List[str]:
71
71
  return input_string.split(",")
72
72
 
73
73
 
74
- def string_to_dict(input_string: str) -> Dict[str, str]:
74
+ def string_to_dict(input_string: str) -> dict[str, str]:
75
75
  """
76
76
  Takes a string of a form `key1=val1,key2=val2` and returns the corresponding dict
77
77
  """
@@ -144,7 +144,7 @@ def get_rng(obj: Any = None) -> np.random.RandomState:
144
144
  return np.random.RandomState(seed)
145
145
 
146
146
 
147
- def is_file_extension(file_name: Pathlike, extension: Union[str, Sequence[str]]) -> bool:
147
+ def is_file_extension(file_name: PathLikeOrStr, extension: Union[str, Sequence[str]]) -> bool:
148
148
  """
149
149
  Check if a given file name has a given extension
150
150
 
@@ -25,22 +25,21 @@ and
25
25
  <https://github.com/facebookresearch/detectron2/blob/main/detectron2/utils/colormap.py>
26
26
  """
27
27
 
28
- import ast
29
28
  import base64
30
29
  import os
31
30
  import sys
32
31
  from io import BytesIO
33
- from typing import Any, Dict, List, Optional, Sequence, Tuple, no_type_check
32
+ from typing import Any, Optional, Sequence, no_type_check
34
33
 
35
34
  import numpy as np
36
35
  import numpy.typing as npt
37
36
  from lazy_imports import try_import
38
37
  from numpy import float32, uint8
39
38
 
40
- from .detection_types import ImageType
41
- from .env_info import auto_select_viz_library
39
+ from .env_info import ENV_VARS_TRUE, auto_select_viz_library
42
40
  from .error import DependencyError
43
41
  from .file_utils import get_opencv_requirement, get_pillow_requirement
42
+ from .types import BGR, B64Str, PathLikeOrStr, PixelValues
44
43
 
45
44
  with try_import() as cv2_import_guard:
46
45
  import cv2
@@ -184,7 +183,7 @@ _COLORS = (
184
183
  )
185
184
 
186
185
 
187
- def random_color(rgb: bool = True, maximum: int = 255) -> Tuple[int, int, int]:
186
+ def random_color(rgb: bool = True, maximum: int = 255) -> tuple[int, int, int]:
188
187
  """
189
188
  :param rgb: Whether to return RGB colors or BGR colors.
190
189
  :param maximum: either 255 or 1
@@ -199,14 +198,14 @@ def random_color(rgb: bool = True, maximum: int = 255) -> Tuple[int, int, int]:
199
198
 
200
199
 
201
200
  def draw_boxes(
202
- np_image: ImageType,
201
+ np_image: PixelValues,
203
202
  boxes: npt.NDArray[float32],
204
- category_names_list: Optional[List[Optional[str]]] = None,
205
- color: Optional[Tuple[int, int, int]] = None,
203
+ category_names_list: Optional[list[Optional[str]]] = None,
204
+ color: Optional[BGR] = None,
206
205
  font_scale: float = 1.0,
207
206
  rectangle_thickness: int = 4,
208
207
  box_color_by_category: bool = True,
209
- ) -> ImageType:
208
+ ) -> PixelValues:
210
209
  """
211
210
  Dray bounding boxes with category names into image.
212
211
 
@@ -247,7 +246,7 @@ def draw_boxes(
247
246
  np_image = np_image.copy()
248
247
 
249
248
  if np_image.ndim == 2 or (np_image.ndim == 3 and np_image.shape[2] == 1):
250
- np_image = cv2.cvtColor(np_image, cv2.COLOR_GRAY2BGR) # type: ignore
249
+ np_image = cv2.cvtColor(np_image, cv2.COLOR_GRAY2BGR).astype(np.uint8)
251
250
  for i in sorted_inds:
252
251
  box = boxes[i, :]
253
252
  choose_color = category_to_color.get(category_names_list[i]) if category_to_color is not None else color
@@ -280,7 +279,7 @@ def draw_boxes(
280
279
 
281
280
 
282
281
  @no_type_check
283
- def interactive_imshow(img: ImageType) -> None:
282
+ def interactive_imshow(img: PixelValues) -> None:
284
283
  """
285
284
  Display an image in a pop-up window
286
285
 
@@ -329,7 +328,7 @@ class VizPackageHandler:
329
328
  def __init__(self) -> None:
330
329
  """Selecting the image processing library and fonts"""
331
330
  package = self._select_package()
332
- self.pkg_func_dict: Dict[str, str] = {}
331
+ self.pkg_func_dict: dict[str, str] = {}
333
332
  self.font = None
334
333
  self._set_vars(package)
335
334
 
@@ -340,8 +339,8 @@ class VizPackageHandler:
340
339
  Otherwise it will use Pillow as default package
341
340
  :return: either 'pillow' or 'cv2'
342
341
  """
343
- maybe_cv2 = "cv2" if ast.literal_eval(os.environ.get("USE_DD_OPENCV", "False")) else None
344
- maybe_pil = "pillow" if ast.literal_eval(os.environ.get("USE_DD_PILLOW", "True")) else None
342
+ maybe_cv2 = "cv2" if os.environ.get("USE_DD_OPENCV", "False") in ENV_VARS_TRUE else None
343
+ maybe_pil = "pillow" if os.environ.get("USE_DD_PILLOW", "True") in ENV_VARS_TRUE else None
345
344
 
346
345
  if not maybe_cv2 and not maybe_pil:
347
346
  raise EnvironmentError(
@@ -387,7 +386,7 @@ class VizPackageHandler:
387
386
  package = self._select_package()
388
387
  self._set_vars(package)
389
388
 
390
- def read_image(self, path: str) -> ImageType:
389
+ def read_image(self, path: PathLikeOrStr) -> PixelValues:
391
390
  """Reading an image from file and returning a np.array
392
391
 
393
392
  :param path: Use /path/to/dir/file_name.[suffix]
@@ -395,16 +394,16 @@ class VizPackageHandler:
395
394
  return getattr(self, self.pkg_func_dict["read_image"])(path)
396
395
 
397
396
  @staticmethod
398
- def _cv2_read_image(path: str) -> ImageType:
399
- return cv2.imread(path, cv2.IMREAD_COLOR) # type: ignore
397
+ def _cv2_read_image(path: PathLikeOrStr) -> PixelValues:
398
+ return cv2.imread(os.fspath(path), cv2.IMREAD_COLOR).astype(np.uint8)
400
399
 
401
400
  @staticmethod
402
- def _pillow_read_image(path: str) -> ImageType:
403
- with Image.open(path).convert("RGB") as image:
401
+ def _pillow_read_image(path: PathLikeOrStr) -> PixelValues:
402
+ with Image.open(os.fspath(path)).convert("RGB") as image:
404
403
  np_image = np.array(image)[:, :, ::-1]
405
404
  return np_image
406
405
 
407
- def write_image(self, path: str, image: ImageType) -> None:
406
+ def write_image(self, path: PathLikeOrStr, image: PixelValues) -> None:
408
407
  """Writing an image as np.array to a file.
409
408
 
410
409
  :param path: Use /path/to/dir/file_name.[suffix]
@@ -413,15 +412,15 @@ class VizPackageHandler:
413
412
  return getattr(self, self.pkg_func_dict["write_image"])(path, image)
414
413
 
415
414
  @staticmethod
416
- def _cv2_write_image(path: str, image: ImageType) -> None:
417
- cv2.imwrite(path, image)
415
+ def _cv2_write_image(path: PathLikeOrStr, image: PixelValues) -> None:
416
+ cv2.imwrite(os.fspath(path), image)
418
417
 
419
418
  @staticmethod
420
- def _pillow_write_image(path: str, image: ImageType) -> None:
419
+ def _pillow_write_image(path: PathLikeOrStr, image: PixelValues) -> None:
421
420
  pil_image = Image.fromarray(np.uint8(image[:, :, ::-1]))
422
- pil_image.save(path)
421
+ pil_image.save(os.fspath(path))
423
422
 
424
- def encode(self, np_image: ImageType) -> bytes:
423
+ def encode(self, np_image: PixelValues) -> bytes:
425
424
  """Converting an image as np.array into a b64 representation
426
425
 
427
426
  :param np_image: Image as np.array
@@ -429,19 +428,19 @@ class VizPackageHandler:
429
428
  return getattr(self, self.pkg_func_dict["encode"])(np_image)
430
429
 
431
430
  @staticmethod
432
- def _cv2_encode(np_image: ImageType) -> bytes:
431
+ def _cv2_encode(np_image: PixelValues) -> bytes:
433
432
  np_encode = cv2.imencode(".png", np_image)
434
433
  b_image = np_encode[1].tobytes()
435
434
  return b_image
436
435
 
437
436
  @staticmethod
438
- def _pillow_encode(np_image: ImageType) -> bytes:
437
+ def _pillow_encode(np_image: PixelValues) -> bytes:
439
438
  buffered = BytesIO()
440
439
  pil_image = Image.fromarray(np.uint8(np_image[:, :, ::-1]))
441
440
  pil_image.save(buffered, format="PNG")
442
441
  return buffered.getvalue()
443
442
 
444
- def convert_np_to_b64(self, image: ImageType) -> str:
443
+ def convert_np_to_b64(self, image: PixelValues) -> str:
445
444
  """Converting an image given as np.array into a b64 encoded string
446
445
 
447
446
  :param image: Image as np.array
@@ -449,18 +448,18 @@ class VizPackageHandler:
449
448
  return getattr(self, self.pkg_func_dict["convert_np_to_b64"])(image)
450
449
 
451
450
  @staticmethod
452
- def _cv2_convert_np_to_b64(image: ImageType) -> str:
451
+ def _cv2_convert_np_to_b64(image: PixelValues) -> str:
453
452
  np_encode = cv2.imencode(".png", image)
454
453
  return base64.b64encode(np_encode[1]).decode("utf-8") # type: ignore
455
454
 
456
455
  @staticmethod
457
- def _pillow_convert_np_to_b64(np_image: ImageType) -> str:
456
+ def _pillow_convert_np_to_b64(np_image: PixelValues) -> str:
458
457
  buffered = BytesIO()
459
458
  pil_image = Image.fromarray(np.uint8(np_image[:, :, ::-1]))
460
459
  pil_image.save(buffered, format="PNG")
461
460
  return base64.b64encode(buffered.getvalue()).decode("utf-8")
462
461
 
463
- def convert_b64_to_np(self, image: str) -> ImageType:
462
+ def convert_b64_to_np(self, image: B64Str) -> PixelValues:
464
463
  """
465
464
  Converting an image as b64 encoded string into np.array
466
465
 
@@ -470,19 +469,19 @@ class VizPackageHandler:
470
469
  return getattr(self, self.pkg_func_dict["convert_b64_to_np"])(image)
471
470
 
472
471
  @staticmethod
473
- def _cv2_convert_b64_to_np(image: str) -> ImageType:
472
+ def _cv2_convert_b64_to_np(image: B64Str) -> PixelValues:
474
473
  np_array = np.fromstring(base64.b64decode(image), np.uint8) # type: ignore
475
474
  np_array = cv2.imdecode(np_array, cv2.IMREAD_COLOR).astype(np.float32)
476
475
  return np_array.astype(uint8)
477
476
 
478
477
  @staticmethod
479
- def _pillow_convert_b64_to_np(image: str) -> ImageType:
478
+ def _pillow_convert_b64_to_np(image: B64Str) -> PixelValues:
480
479
  array = base64.b64decode(image)
481
480
  im_file = BytesIO(array)
482
481
  pil_image = Image.open(im_file)
483
482
  return np.array(pil_image)[:, :, ::-1]
484
483
 
485
- def resize(self, image: ImageType, width: int, height: int, interpolation: str) -> ImageType:
484
+ def resize(self, image: PixelValues, width: int, height: int, interpolation: str) -> PixelValues:
486
485
  """
487
486
  Resize a given image to new width, height. Specifying an interpolation method is required. Depending on the
488
487
  chosen image library use one of the following:
@@ -499,17 +498,19 @@ class VizPackageHandler:
499
498
  return getattr(self, self.pkg_func_dict["resize"])(image, width, height, interpolation)
500
499
 
501
500
  @staticmethod
502
- def _cv2_resize(image: ImageType, width: int, height: int, interpolation: str) -> ImageType:
501
+ def _cv2_resize(image: PixelValues, width: int, height: int, interpolation: str) -> PixelValues:
503
502
  intpol_method_dict = {
504
503
  "INTER_NEAREST": cv2.INTER_NEAREST,
505
504
  "INTER_LINEAR": cv2.INTER_LINEAR,
506
505
  "INTER_AREA": cv2.INTER_AREA,
507
506
  "VIZ": cv2.INTER_LINEAR,
508
507
  }
509
- return cv2.resize(image, (width, height), interpolation=intpol_method_dict[interpolation]) # type: ignore
508
+ return cv2.resize(image, dsize=(width, height), interpolation=intpol_method_dict[interpolation]).astype(
509
+ np.uint8
510
+ )
510
511
 
511
512
  @staticmethod
512
- def _pillow_resize(image: ImageType, width: int, height: int, interpolation: str) -> ImageType:
513
+ def _pillow_resize(image: PixelValues, width: int, height: int, interpolation: str) -> PixelValues:
513
514
  intpol_method_dict = {
514
515
  "NEAREST": Image.Resampling.NEAREST,
515
516
  "BOX": Image.Resampling.BOX,
@@ -523,7 +524,7 @@ class VizPackageHandler:
523
524
  )
524
525
  return np.array(pil_image_resized)[:, :, ::-1]
525
526
 
526
- def get_text_size(self, text: str, font_scale: float) -> Tuple[int, int]:
527
+ def get_text_size(self, text: str, font_scale: float) -> tuple[int, int]:
527
528
  """
528
529
  Return the text size for a given font scale
529
530
  :param text: text as string
@@ -532,19 +533,19 @@ class VizPackageHandler:
532
533
  """
533
534
  return getattr(self, self.pkg_func_dict["get_text_size"])(text, font_scale)
534
535
 
535
- def _cv2_get_text_size(self, text: str, font_scale: float) -> Tuple[int, int]:
536
+ def _cv2_get_text_size(self, text: str, font_scale: float) -> tuple[int, int]:
536
537
  ((width, height), _) = cv2.getTextSize(text, self.font, font_scale, 1) # type: ignore
537
538
  return width, height
538
539
 
539
- def _pillow_get_text_size(self, text: str, font_scale: float) -> Tuple[int, int]: # pylint: disable=W0613
540
+ def _pillow_get_text_size(self, text: str, font_scale: float) -> tuple[int, int]: # pylint: disable=W0613
540
541
  _, _, width, height = self.font.getbbox(text) # type: ignore
541
542
  return width, height
542
543
 
543
544
  def draw_rectangle(
544
- self, np_image: ImageType, box: Tuple[Any, Any, Any, Any], color: Tuple[int, int, int], thickness: int
545
- ) -> ImageType:
545
+ self, np_image: PixelValues, box: tuple[Any, Any, Any, Any], color: tuple[int, int, int], thickness: int
546
+ ) -> PixelValues:
546
547
  """
547
- Drawing a rectangle into an image with a given color (b,g,r) and given thickness
548
+ Drawing a rectangle into an image with a given color (b,g,r) and given thickness.
548
549
 
549
550
  :param np_image: image
550
551
  :param box: box (x_min, y_min, x_max, y_max)
@@ -556,15 +557,15 @@ class VizPackageHandler:
556
557
 
557
558
  @staticmethod
558
559
  def _cv2_draw_rectangle(
559
- np_image: ImageType, box: Tuple[Any, Any, Any, Any], color: Sequence[int], thickness: int
560
- ) -> ImageType:
560
+ np_image: PixelValues, box: tuple[Any, Any, Any, Any], color: Sequence[int], thickness: int
561
+ ) -> PixelValues:
561
562
  cv2.rectangle(np_image, (box[0], box[1]), (box[2], box[3]), color=color, thickness=thickness)
562
563
  return np_image
563
564
 
564
565
  @staticmethod
565
566
  def _pillow_draw_rectangle(
566
- np_image: ImageType, box: Tuple[Any, Any, Any, Any], color: Sequence[int], thickness: int
567
- ) -> ImageType:
567
+ np_image: PixelValues, box: tuple[Any, Any, Any, Any], color: Sequence[int], thickness: int
568
+ ) -> PixelValues:
568
569
  pil_image = Image.fromarray(np.uint8(np_image[:, :, ::-1]))
569
570
  draw = ImageDraw.Draw(pil_image)
570
571
  draw.rectangle(box, outline=color, width=thickness) # type: ignore
@@ -573,13 +574,13 @@ class VizPackageHandler:
573
574
 
574
575
  def draw_text(
575
576
  self,
576
- np_image: ImageType,
577
- pos: Tuple[Any, Any],
577
+ np_image: PixelValues,
578
+ pos: tuple[Any, Any],
578
579
  text: str,
579
- color: Tuple[int, int, int],
580
+ color: tuple[int, int, int],
580
581
  font_scale: float,
581
582
  rectangle_thickness: int = 1,
582
- ) -> ImageType:
583
+ ) -> PixelValues:
583
584
  """
584
585
  Drawing a text into a numpy image. The result will differ between PIL and CV2 (and will not look that good when
585
586
  using PIL).
@@ -598,13 +599,13 @@ class VizPackageHandler:
598
599
 
599
600
  def _cv2_draw_text(
600
601
  self,
601
- np_image: ImageType,
602
- pos: Tuple[Any, Any],
602
+ np_image: PixelValues,
603
+ pos: tuple[Any, Any],
603
604
  text: str,
604
- color: Tuple[int, int, int],
605
+ color: tuple[int, int, int],
605
606
  font_scale: float,
606
607
  rectangle_thickness: int,
607
- ) -> ImageType:
608
+ ) -> PixelValues:
608
609
  """
609
610
  Draw text on an image.
610
611
 
@@ -642,13 +643,13 @@ class VizPackageHandler:
642
643
 
643
644
  @staticmethod
644
645
  def _pillow_draw_text(
645
- np_image: ImageType,
646
- pos: Tuple[Any, Any],
646
+ np_image: PixelValues,
647
+ pos: tuple[Any, Any],
647
648
  text: str,
648
- color: Tuple[int, int, int], # pylint: disable=W0613
649
+ color: tuple[int, int, int], # pylint: disable=W0613
649
650
  font_scale: float, # pylint: disable=W0613
650
651
  rectangle_thickness: int, # pylint: disable=W0613
651
- ) -> ImageType:
652
+ ) -> PixelValues:
652
653
  """Draw a text in an image using PIL."""
653
654
  # using PIL default font size that does not scale to larger image sizes.
654
655
  # Compare with https://github.com/python-pillow/Pillow/issues/6622
@@ -657,11 +658,11 @@ class VizPackageHandler:
657
658
  draw.text(pos, text, fill=(0, 0, 0), anchor="lb")
658
659
  return np.array(pil_image)[:, :, ::-1]
659
660
 
660
- def interactive_imshow(self, np_image: ImageType) -> None:
661
+ def interactive_imshow(self, np_image: PixelValues) -> None:
661
662
  """Displaying an image in a separate window"""
662
663
  return getattr(self, self.pkg_func_dict["interactive_imshow"])(np_image)
663
664
 
664
- def _cv2_interactive_imshow(self, np_image: ImageType) -> None:
665
+ def _cv2_interactive_imshow(self, np_image: PixelValues) -> None:
665
666
  """
666
667
  Display an image in a pop-up window
667
668
 
@@ -682,24 +683,24 @@ class VizPackageHandler:
682
683
  elif key == "s":
683
684
  cv2.imwrite("out.png", np_image)
684
685
  elif key in ["+", "="]:
685
- np_image = cv2.resize(np_image, None, fx=1.3, fy=1.3, interpolation=cv2.INTER_CUBIC) # type: ignore
686
+ np_image = cv2.resize(np_image, None, fx=1.3, fy=1.3, interpolation=cv2.INTER_CUBIC).astype(np.uint8)
686
687
  self._cv2_interactive_imshow(np_image)
687
688
  elif key == "-":
688
- np_image = cv2.resize(np_image, None, fx=0.7, fy=0.7, interpolation=cv2.INTER_CUBIC) # type: ignore
689
+ np_image = cv2.resize(np_image, None, fx=0.7, fy=0.7, interpolation=cv2.INTER_CUBIC).astype(np.uint8)
689
690
  self._cv2_interactive_imshow(np_image)
690
691
 
691
692
  @staticmethod
692
- def _pillow_interactive_imshow(np_image: ImageType) -> None:
693
+ def _pillow_interactive_imshow(np_image: PixelValues) -> None:
693
694
  name = "q, x: quit / s: save"
694
695
  pil_image = Image.fromarray(np.uint8(np_image[:, :, ::-1]))
695
696
  pil_image.show(name)
696
697
 
697
- def rotate_image(self, np_image: ImageType, angle: int) -> ImageType:
698
+ def rotate_image(self, np_image: PixelValues, angle: float) -> PixelValues:
698
699
  """Rotating an image by some angle"""
699
700
  return getattr(self, self.pkg_func_dict["rotate_image"])(np_image, angle)
700
701
 
701
702
  @staticmethod
702
- def _cv2_rotate_image(np_image: ImageType, angle: float) -> ImageType:
703
+ def _cv2_rotate_image(np_image: PixelValues, angle: float) -> PixelValues:
703
704
  # copy & paste from https://stackoverflow.com/questions/43892506
704
705
  # /opencv-python-rotate-image-without-cropping-sides
705
706
 
@@ -719,16 +720,16 @@ class VizPackageHandler:
719
720
  rotation_mat[0, 2] += bound_w / 2 - image_center[0]
720
721
  rotation_mat[1, 2] += bound_h / 2 - image_center[1]
721
722
 
722
- np_image = cv2.warpAffine( # type: ignore
723
+ np_image = cv2.warpAffine(
723
724
  src=np_image,
724
725
  M=rotation_mat,
725
726
  dsize=(bound_w, bound_h),
726
- )
727
+ ).astype(np.uint8)
727
728
 
728
729
  return np_image
729
730
 
730
731
  @staticmethod
731
- def _pillow_rotate_image(np_image: ImageType, angle: int) -> ImageType:
732
+ def _pillow_rotate_image(np_image: PixelValues, angle: float) -> PixelValues:
732
733
  pil_image = Image.fromarray(np.uint8(np_image[:, :, ::-1]))
733
734
  pil_image_rotated = pil_image.rotate(angle, expand=True)
734
735
  return np.array(pil_image_rotated)[:, :, ::-1]