deepdoctection 0.31__py3-none-any.whl → 0.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (131) hide show
  1. deepdoctection/__init__.py +16 -29
  2. deepdoctection/analyzer/dd.py +70 -59
  3. deepdoctection/configs/conf_dd_one.yaml +34 -31
  4. deepdoctection/dataflow/common.py +9 -5
  5. deepdoctection/dataflow/custom.py +5 -5
  6. deepdoctection/dataflow/custom_serialize.py +75 -18
  7. deepdoctection/dataflow/parallel_map.py +3 -3
  8. deepdoctection/dataflow/serialize.py +4 -4
  9. deepdoctection/dataflow/stats.py +3 -3
  10. deepdoctection/datapoint/annotation.py +41 -56
  11. deepdoctection/datapoint/box.py +9 -8
  12. deepdoctection/datapoint/convert.py +6 -6
  13. deepdoctection/datapoint/image.py +56 -44
  14. deepdoctection/datapoint/view.py +245 -150
  15. deepdoctection/datasets/__init__.py +1 -4
  16. deepdoctection/datasets/adapter.py +35 -26
  17. deepdoctection/datasets/base.py +14 -12
  18. deepdoctection/datasets/dataflow_builder.py +3 -3
  19. deepdoctection/datasets/info.py +24 -26
  20. deepdoctection/datasets/instances/doclaynet.py +51 -51
  21. deepdoctection/datasets/instances/fintabnet.py +46 -46
  22. deepdoctection/datasets/instances/funsd.py +25 -24
  23. deepdoctection/datasets/instances/iiitar13k.py +13 -10
  24. deepdoctection/datasets/instances/layouttest.py +4 -3
  25. deepdoctection/datasets/instances/publaynet.py +5 -5
  26. deepdoctection/datasets/instances/pubtables1m.py +24 -21
  27. deepdoctection/datasets/instances/pubtabnet.py +32 -30
  28. deepdoctection/datasets/instances/rvlcdip.py +30 -30
  29. deepdoctection/datasets/instances/xfund.py +26 -26
  30. deepdoctection/datasets/save.py +6 -6
  31. deepdoctection/eval/__init__.py +1 -4
  32. deepdoctection/eval/accmetric.py +32 -33
  33. deepdoctection/eval/base.py +8 -9
  34. deepdoctection/eval/cocometric.py +15 -13
  35. deepdoctection/eval/eval.py +41 -37
  36. deepdoctection/eval/tedsmetric.py +30 -23
  37. deepdoctection/eval/tp_eval_callback.py +16 -19
  38. deepdoctection/extern/__init__.py +2 -7
  39. deepdoctection/extern/base.py +339 -134
  40. deepdoctection/extern/d2detect.py +85 -113
  41. deepdoctection/extern/deskew.py +14 -11
  42. deepdoctection/extern/doctrocr.py +141 -130
  43. deepdoctection/extern/fastlang.py +27 -18
  44. deepdoctection/extern/hfdetr.py +71 -62
  45. deepdoctection/extern/hflayoutlm.py +504 -211
  46. deepdoctection/extern/hflm.py +230 -0
  47. deepdoctection/extern/model.py +488 -302
  48. deepdoctection/extern/pdftext.py +23 -19
  49. deepdoctection/extern/pt/__init__.py +1 -3
  50. deepdoctection/extern/pt/nms.py +6 -2
  51. deepdoctection/extern/pt/ptutils.py +29 -19
  52. deepdoctection/extern/tessocr.py +39 -38
  53. deepdoctection/extern/texocr.py +18 -18
  54. deepdoctection/extern/tp/tfutils.py +57 -9
  55. deepdoctection/extern/tp/tpcompat.py +21 -14
  56. deepdoctection/extern/tp/tpfrcnn/__init__.py +20 -0
  57. deepdoctection/extern/tp/tpfrcnn/common.py +7 -3
  58. deepdoctection/extern/tp/tpfrcnn/config/__init__.py +20 -0
  59. deepdoctection/extern/tp/tpfrcnn/config/config.py +13 -10
  60. deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +20 -0
  61. deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +18 -8
  62. deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +12 -6
  63. deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +14 -9
  64. deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +8 -5
  65. deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +22 -17
  66. deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +21 -14
  67. deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +19 -11
  68. deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +15 -10
  69. deepdoctection/extern/tp/tpfrcnn/predict.py +9 -4
  70. deepdoctection/extern/tp/tpfrcnn/preproc.py +12 -8
  71. deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +20 -0
  72. deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +10 -2
  73. deepdoctection/extern/tpdetect.py +45 -53
  74. deepdoctection/mapper/__init__.py +3 -8
  75. deepdoctection/mapper/cats.py +27 -29
  76. deepdoctection/mapper/cocostruct.py +10 -10
  77. deepdoctection/mapper/d2struct.py +27 -26
  78. deepdoctection/mapper/hfstruct.py +13 -8
  79. deepdoctection/mapper/laylmstruct.py +178 -37
  80. deepdoctection/mapper/maputils.py +12 -11
  81. deepdoctection/mapper/match.py +2 -2
  82. deepdoctection/mapper/misc.py +11 -9
  83. deepdoctection/mapper/pascalstruct.py +4 -4
  84. deepdoctection/mapper/prodigystruct.py +5 -5
  85. deepdoctection/mapper/pubstruct.py +84 -92
  86. deepdoctection/mapper/tpstruct.py +5 -5
  87. deepdoctection/mapper/xfundstruct.py +33 -33
  88. deepdoctection/pipe/__init__.py +1 -1
  89. deepdoctection/pipe/anngen.py +12 -14
  90. deepdoctection/pipe/base.py +52 -106
  91. deepdoctection/pipe/common.py +72 -59
  92. deepdoctection/pipe/concurrency.py +16 -11
  93. deepdoctection/pipe/doctectionpipe.py +24 -21
  94. deepdoctection/pipe/language.py +20 -25
  95. deepdoctection/pipe/layout.py +20 -16
  96. deepdoctection/pipe/lm.py +75 -105
  97. deepdoctection/pipe/order.py +194 -89
  98. deepdoctection/pipe/refine.py +111 -124
  99. deepdoctection/pipe/segment.py +156 -161
  100. deepdoctection/pipe/{cell.py → sub_layout.py} +50 -40
  101. deepdoctection/pipe/text.py +37 -36
  102. deepdoctection/pipe/transform.py +19 -16
  103. deepdoctection/train/__init__.py +6 -12
  104. deepdoctection/train/d2_frcnn_train.py +48 -41
  105. deepdoctection/train/hf_detr_train.py +41 -30
  106. deepdoctection/train/hf_layoutlm_train.py +153 -135
  107. deepdoctection/train/tp_frcnn_train.py +32 -31
  108. deepdoctection/utils/concurrency.py +1 -1
  109. deepdoctection/utils/context.py +13 -6
  110. deepdoctection/utils/develop.py +4 -4
  111. deepdoctection/utils/env_info.py +87 -125
  112. deepdoctection/utils/file_utils.py +6 -11
  113. deepdoctection/utils/fs.py +22 -18
  114. deepdoctection/utils/identifier.py +2 -2
  115. deepdoctection/utils/logger.py +16 -15
  116. deepdoctection/utils/metacfg.py +7 -7
  117. deepdoctection/utils/mocks.py +93 -0
  118. deepdoctection/utils/pdf_utils.py +11 -11
  119. deepdoctection/utils/settings.py +185 -181
  120. deepdoctection/utils/tqdm.py +1 -1
  121. deepdoctection/utils/transform.py +14 -9
  122. deepdoctection/utils/types.py +104 -0
  123. deepdoctection/utils/utils.py +7 -7
  124. deepdoctection/utils/viz.py +74 -72
  125. {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/METADATA +30 -21
  126. deepdoctection-0.33.dist-info/RECORD +146 -0
  127. {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/WHEEL +1 -1
  128. deepdoctection/utils/detection_types.py +0 -68
  129. deepdoctection-0.31.dist-info/RECORD +0 -144
  130. {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/LICENSE +0 -0
  131. {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,104 @@
1
+ # -*- coding: utf-8 -*-
2
+ # File: types.py
3
+
4
+ # Copyright 2021 Dr. Janis Meyer. All rights reserved.
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+
18
+ """
19
+ Typing sheet for the whole package
20
+ """
21
+
22
+ import os
23
+ import queue
24
+ from typing import TYPE_CHECKING, Any, Protocol, Type, TypeVar, Union
25
+
26
+ import numpy.typing as npt
27
+ import tqdm
28
+ from numpy import uint8
29
+ from typing_extensions import TypeAlias
30
+
31
+
32
+ # Type for a general dataclass
33
+ class IsDataclass(Protocol): # pylint: disable=R0903
34
+ """
35
+ type hint for general dataclass
36
+ """
37
+
38
+ __dataclass_fields__: dict[Any, Any]
39
+
40
+
41
+ # Numpy image type
42
+ PixelValues = npt.NDArray[uint8]
43
+ # b64 encoded image as string
44
+ B64Str: TypeAlias = str
45
+ # b64 encoded image in bytes
46
+ B64: TypeAlias = bytes
47
+
48
+ # Typing for curry decorator
49
+ DP = TypeVar("DP")
50
+ S = TypeVar("S")
51
+ T = TypeVar("T")
52
+
53
+ # Some type hints that must be distinguished when running mypy and linters
54
+ if TYPE_CHECKING:
55
+ QueueType = queue.Queue[Any] # pylint: disable=E1136
56
+ TqdmType = tqdm.tqdm[Any] # pylint: disable=E1136
57
+ BaseExceptionType = Type[BaseException]
58
+
59
+ else:
60
+ BaseExceptionType = bool
61
+ QueueType = queue.Queue
62
+ TqdmType = tqdm.tqdm
63
+
64
+
65
+ JsonDict = dict[str, Any]
66
+
67
+
68
+ # Some common deepdoctection dict-types
69
+ AnnotationDict: TypeAlias = dict[str, Any]
70
+ ImageDict: TypeAlias = dict[str, Any]
71
+
72
+ # We use these types for output types of the Page object
73
+ Text_: TypeAlias = dict[str, Any]
74
+ HTML: TypeAlias = str
75
+ csv: TypeAlias = list[list[str]]
76
+ Chunks: TypeAlias = list[tuple[str, str, int, str, str, str, str]]
77
+
78
+ # Some common dict-types used in common annotation schemes converted from a generic JSON object
79
+ CocoDatapointDict: TypeAlias = dict[str, Any]
80
+ PubtabnetDict: TypeAlias = dict[str, Any]
81
+ FunsdDict: TypeAlias = dict[str, Any]
82
+ Detectron2Dict: TypeAlias = dict[str, Any]
83
+
84
+
85
+ # A path to a file, directory etc. can be given as a string or Path object
86
+ PathLikeOrStr: TypeAlias = Union[str, os.PathLike]
87
+
88
+ # mainly used in utils
89
+ # Type for requirements. A requirement is a Tuple of string and a callable that returns True if the requirement is
90
+ # available
91
+ PackageAvailable: TypeAlias = bool
92
+ ErrorMsg: TypeAlias = str
93
+ Requirement = tuple[str, PackageAvailable, ErrorMsg]
94
+
95
+ BGR: TypeAlias = tuple[int, int, int]
96
+
97
+ # A type to collect key val pairs of environ information. Mainly used in env_info.py
98
+ KeyValEnvInfos: TypeAlias = list[tuple[str, str]]
99
+
100
+ # mainly used in extern
101
+
102
+
103
+ # mainly used in eval
104
+ MetricResults: TypeAlias = dict[str, Union[int, float]]
@@ -23,16 +23,16 @@ import inspect
23
23
  import os
24
24
  from collections.abc import MutableMapping
25
25
  from datetime import datetime
26
- from typing import Any, Callable, Dict, List, Sequence, Set, Union
26
+ from typing import Any, Callable, Sequence, Union
27
27
 
28
28
  import numpy as np
29
29
 
30
- from .detection_types import Pathlike
30
+ from .types import PathLikeOrStr
31
31
 
32
32
 
33
33
  def delete_keys_from_dict(
34
- dictionary: Union[Dict[Any, Any], MutableMapping], keys: Union[str, List[str], Set[str]] # type: ignore
35
- ) -> Dict[Any, Any]:
34
+ dictionary: Union[dict[Any, Any], MutableMapping], keys: Union[str, list[str], set[str]] # type: ignore
35
+ ) -> dict[Any, Any]:
36
36
  """
37
37
  Removing key/value pairs from dictionary. Works for nested dicts as well.
38
38
 
@@ -62,7 +62,7 @@ def delete_keys_from_dict(
62
62
  return modified_dict
63
63
 
64
64
 
65
- def split_string(input_string: str) -> List[str]:
65
+ def split_string(input_string: str) -> list[str]:
66
66
  """
67
67
  Takes a string, splits between commas and returns a list with split components as list elements
68
68
 
@@ -71,7 +71,7 @@ def split_string(input_string: str) -> List[str]:
71
71
  return input_string.split(",")
72
72
 
73
73
 
74
- def string_to_dict(input_string: str) -> Dict[str, str]:
74
+ def string_to_dict(input_string: str) -> dict[str, str]:
75
75
  """
76
76
  Takes a string of a form `key1=val1,key2=val2` and returns the corresponding dict
77
77
  """
@@ -144,7 +144,7 @@ def get_rng(obj: Any = None) -> np.random.RandomState:
144
144
  return np.random.RandomState(seed)
145
145
 
146
146
 
147
- def is_file_extension(file_name: Pathlike, extension: Union[str, Sequence[str]]) -> bool:
147
+ def is_file_extension(file_name: PathLikeOrStr, extension: Union[str, Sequence[str]]) -> bool:
148
148
  """
149
149
  Check if a given file name has a given extension
150
150
 
@@ -25,26 +25,26 @@ and
25
25
  <https://github.com/facebookresearch/detectron2/blob/main/detectron2/utils/colormap.py>
26
26
  """
27
27
 
28
- import ast
29
28
  import base64
30
29
  import os
31
30
  import sys
32
31
  from io import BytesIO
33
- from typing import Any, Dict, List, Optional, Sequence, Tuple, no_type_check
32
+ from typing import Any, Optional, Sequence, no_type_check
34
33
 
35
34
  import numpy as np
36
35
  import numpy.typing as npt
36
+ from lazy_imports import try_import
37
37
  from numpy import float32, uint8
38
38
 
39
- from .detection_types import ImageType
40
- from .env_info import auto_select_viz_library
39
+ from .env_info import ENV_VARS_TRUE, auto_select_viz_library
41
40
  from .error import DependencyError
42
- from .file_utils import get_opencv_requirement, get_pillow_requirement, opencv_available, pillow_available
41
+ from .file_utils import get_opencv_requirement, get_pillow_requirement
42
+ from .types import BGR, B64Str, PathLikeOrStr, PixelValues
43
43
 
44
- if opencv_available():
44
+ with try_import() as cv2_import_guard:
45
45
  import cv2
46
46
 
47
- if pillow_available():
47
+ with try_import() as pil_import_guard:
48
48
  from PIL import Image, ImageDraw
49
49
 
50
50
 
@@ -183,7 +183,7 @@ _COLORS = (
183
183
  )
184
184
 
185
185
 
186
- def random_color(rgb: bool = True, maximum: int = 255) -> Tuple[int, int, int]:
186
+ def random_color(rgb: bool = True, maximum: int = 255) -> tuple[int, int, int]:
187
187
  """
188
188
  :param rgb: Whether to return RGB colors or BGR colors.
189
189
  :param maximum: either 255 or 1
@@ -198,14 +198,14 @@ def random_color(rgb: bool = True, maximum: int = 255) -> Tuple[int, int, int]:
198
198
 
199
199
 
200
200
  def draw_boxes(
201
- np_image: ImageType,
201
+ np_image: PixelValues,
202
202
  boxes: npt.NDArray[float32],
203
- category_names_list: Optional[List[Optional[str]]] = None,
204
- color: Optional[Tuple[int, int, int]] = None,
203
+ category_names_list: Optional[list[Optional[str]]] = None,
204
+ color: Optional[BGR] = None,
205
205
  font_scale: float = 1.0,
206
206
  rectangle_thickness: int = 4,
207
207
  box_color_by_category: bool = True,
208
- ) -> ImageType:
208
+ ) -> PixelValues:
209
209
  """
210
210
  Dray bounding boxes with category names into image.
211
211
 
@@ -246,7 +246,7 @@ def draw_boxes(
246
246
  np_image = np_image.copy()
247
247
 
248
248
  if np_image.ndim == 2 or (np_image.ndim == 3 and np_image.shape[2] == 1):
249
- np_image = cv2.cvtColor(np_image, cv2.COLOR_GRAY2BGR) # type: ignore
249
+ np_image = cv2.cvtColor(np_image, cv2.COLOR_GRAY2BGR).astype(np.uint8)
250
250
  for i in sorted_inds:
251
251
  box = boxes[i, :]
252
252
  choose_color = category_to_color.get(category_names_list[i]) if category_to_color is not None else color
@@ -279,7 +279,7 @@ def draw_boxes(
279
279
 
280
280
 
281
281
  @no_type_check
282
- def interactive_imshow(img: ImageType) -> None:
282
+ def interactive_imshow(img: PixelValues) -> None:
283
283
  """
284
284
  Display an image in a pop-up window
285
285
 
@@ -328,7 +328,7 @@ class VizPackageHandler:
328
328
  def __init__(self) -> None:
329
329
  """Selecting the image processing library and fonts"""
330
330
  package = self._select_package()
331
- self.pkg_func_dict: Dict[str, str] = {}
331
+ self.pkg_func_dict: dict[str, str] = {}
332
332
  self.font = None
333
333
  self._set_vars(package)
334
334
 
@@ -339,8 +339,8 @@ class VizPackageHandler:
339
339
  Otherwise it will use Pillow as default package
340
340
  :return: either 'pillow' or 'cv2'
341
341
  """
342
- maybe_cv2 = "cv2" if ast.literal_eval(os.environ.get("USE_DD_OPENCV", "False")) else None
343
- maybe_pil = "pillow" if ast.literal_eval(os.environ.get("USE_DD_PILLOW", "True")) else None
342
+ maybe_cv2 = "cv2" if os.environ.get("USE_DD_OPENCV", "False") in ENV_VARS_TRUE else None
343
+ maybe_pil = "pillow" if os.environ.get("USE_DD_PILLOW", "True") in ENV_VARS_TRUE else None
344
344
 
345
345
  if not maybe_cv2 and not maybe_pil:
346
346
  raise EnvironmentError(
@@ -386,7 +386,7 @@ class VizPackageHandler:
386
386
  package = self._select_package()
387
387
  self._set_vars(package)
388
388
 
389
- def read_image(self, path: str) -> ImageType:
389
+ def read_image(self, path: PathLikeOrStr) -> PixelValues:
390
390
  """Reading an image from file and returning a np.array
391
391
 
392
392
  :param path: Use /path/to/dir/file_name.[suffix]
@@ -394,16 +394,16 @@ class VizPackageHandler:
394
394
  return getattr(self, self.pkg_func_dict["read_image"])(path)
395
395
 
396
396
  @staticmethod
397
- def _cv2_read_image(path: str) -> ImageType:
398
- return cv2.imread(path, cv2.IMREAD_COLOR) # type: ignore
397
+ def _cv2_read_image(path: PathLikeOrStr) -> PixelValues:
398
+ return cv2.imread(os.fspath(path), cv2.IMREAD_COLOR).astype(np.uint8)
399
399
 
400
400
  @staticmethod
401
- def _pillow_read_image(path: str) -> ImageType:
402
- with Image.open(path).convert("RGB") as image:
401
+ def _pillow_read_image(path: PathLikeOrStr) -> PixelValues:
402
+ with Image.open(os.fspath(path)).convert("RGB") as image:
403
403
  np_image = np.array(image)[:, :, ::-1]
404
404
  return np_image
405
405
 
406
- def write_image(self, path: str, image: ImageType) -> None:
406
+ def write_image(self, path: PathLikeOrStr, image: PixelValues) -> None:
407
407
  """Writing an image as np.array to a file.
408
408
 
409
409
  :param path: Use /path/to/dir/file_name.[suffix]
@@ -412,15 +412,15 @@ class VizPackageHandler:
412
412
  return getattr(self, self.pkg_func_dict["write_image"])(path, image)
413
413
 
414
414
  @staticmethod
415
- def _cv2_write_image(path: str, image: ImageType) -> None:
416
- cv2.imwrite(path, image)
415
+ def _cv2_write_image(path: PathLikeOrStr, image: PixelValues) -> None:
416
+ cv2.imwrite(os.fspath(path), image)
417
417
 
418
418
  @staticmethod
419
- def _pillow_write_image(path: str, image: ImageType) -> None:
419
+ def _pillow_write_image(path: PathLikeOrStr, image: PixelValues) -> None:
420
420
  pil_image = Image.fromarray(np.uint8(image[:, :, ::-1]))
421
- pil_image.save(path)
421
+ pil_image.save(os.fspath(path))
422
422
 
423
- def encode(self, np_image: ImageType) -> bytes:
423
+ def encode(self, np_image: PixelValues) -> bytes:
424
424
  """Converting an image as np.array into a b64 representation
425
425
 
426
426
  :param np_image: Image as np.array
@@ -428,19 +428,19 @@ class VizPackageHandler:
428
428
  return getattr(self, self.pkg_func_dict["encode"])(np_image)
429
429
 
430
430
  @staticmethod
431
- def _cv2_encode(np_image: ImageType) -> bytes:
431
+ def _cv2_encode(np_image: PixelValues) -> bytes:
432
432
  np_encode = cv2.imencode(".png", np_image)
433
433
  b_image = np_encode[1].tobytes()
434
434
  return b_image
435
435
 
436
436
  @staticmethod
437
- def _pillow_encode(np_image: ImageType) -> bytes:
437
+ def _pillow_encode(np_image: PixelValues) -> bytes:
438
438
  buffered = BytesIO()
439
439
  pil_image = Image.fromarray(np.uint8(np_image[:, :, ::-1]))
440
440
  pil_image.save(buffered, format="PNG")
441
441
  return buffered.getvalue()
442
442
 
443
- def convert_np_to_b64(self, image: ImageType) -> str:
443
+ def convert_np_to_b64(self, image: PixelValues) -> str:
444
444
  """Converting an image given as np.array into a b64 encoded string
445
445
 
446
446
  :param image: Image as np.array
@@ -448,18 +448,18 @@ class VizPackageHandler:
448
448
  return getattr(self, self.pkg_func_dict["convert_np_to_b64"])(image)
449
449
 
450
450
  @staticmethod
451
- def _cv2_convert_np_to_b64(image: ImageType) -> str:
451
+ def _cv2_convert_np_to_b64(image: PixelValues) -> str:
452
452
  np_encode = cv2.imencode(".png", image)
453
453
  return base64.b64encode(np_encode[1]).decode("utf-8") # type: ignore
454
454
 
455
455
  @staticmethod
456
- def _pillow_convert_np_to_b64(np_image: ImageType) -> str:
456
+ def _pillow_convert_np_to_b64(np_image: PixelValues) -> str:
457
457
  buffered = BytesIO()
458
458
  pil_image = Image.fromarray(np.uint8(np_image[:, :, ::-1]))
459
459
  pil_image.save(buffered, format="PNG")
460
460
  return base64.b64encode(buffered.getvalue()).decode("utf-8")
461
461
 
462
- def convert_b64_to_np(self, image: str) -> ImageType:
462
+ def convert_b64_to_np(self, image: B64Str) -> PixelValues:
463
463
  """
464
464
  Converting an image as b64 encoded string into np.array
465
465
 
@@ -469,19 +469,19 @@ class VizPackageHandler:
469
469
  return getattr(self, self.pkg_func_dict["convert_b64_to_np"])(image)
470
470
 
471
471
  @staticmethod
472
- def _cv2_convert_b64_to_np(image: str) -> ImageType:
472
+ def _cv2_convert_b64_to_np(image: B64Str) -> PixelValues:
473
473
  np_array = np.fromstring(base64.b64decode(image), np.uint8) # type: ignore
474
474
  np_array = cv2.imdecode(np_array, cv2.IMREAD_COLOR).astype(np.float32)
475
475
  return np_array.astype(uint8)
476
476
 
477
477
  @staticmethod
478
- def _pillow_convert_b64_to_np(image: str) -> ImageType:
478
+ def _pillow_convert_b64_to_np(image: B64Str) -> PixelValues:
479
479
  array = base64.b64decode(image)
480
480
  im_file = BytesIO(array)
481
481
  pil_image = Image.open(im_file)
482
482
  return np.array(pil_image)[:, :, ::-1]
483
483
 
484
- def resize(self, image: ImageType, width: int, height: int, interpolation: str) -> ImageType:
484
+ def resize(self, image: PixelValues, width: int, height: int, interpolation: str) -> PixelValues:
485
485
  """
486
486
  Resize a given image to new width, height. Specifying an interpolation method is required. Depending on the
487
487
  chosen image library use one of the following:
@@ -498,17 +498,19 @@ class VizPackageHandler:
498
498
  return getattr(self, self.pkg_func_dict["resize"])(image, width, height, interpolation)
499
499
 
500
500
  @staticmethod
501
- def _cv2_resize(image: ImageType, width: int, height: int, interpolation: str) -> ImageType:
501
+ def _cv2_resize(image: PixelValues, width: int, height: int, interpolation: str) -> PixelValues:
502
502
  intpol_method_dict = {
503
503
  "INTER_NEAREST": cv2.INTER_NEAREST,
504
504
  "INTER_LINEAR": cv2.INTER_LINEAR,
505
505
  "INTER_AREA": cv2.INTER_AREA,
506
506
  "VIZ": cv2.INTER_LINEAR,
507
507
  }
508
- return cv2.resize(image, (width, height), interpolation=intpol_method_dict[interpolation]) # type: ignore
508
+ return cv2.resize(image, dsize=(width, height), interpolation=intpol_method_dict[interpolation]).astype(
509
+ np.uint8
510
+ )
509
511
 
510
512
  @staticmethod
511
- def _pillow_resize(image: ImageType, width: int, height: int, interpolation: str) -> ImageType:
513
+ def _pillow_resize(image: PixelValues, width: int, height: int, interpolation: str) -> PixelValues:
512
514
  intpol_method_dict = {
513
515
  "NEAREST": Image.Resampling.NEAREST,
514
516
  "BOX": Image.Resampling.BOX,
@@ -522,7 +524,7 @@ class VizPackageHandler:
522
524
  )
523
525
  return np.array(pil_image_resized)[:, :, ::-1]
524
526
 
525
- def get_text_size(self, text: str, font_scale: float) -> Tuple[int, int]:
527
+ def get_text_size(self, text: str, font_scale: float) -> tuple[int, int]:
526
528
  """
527
529
  Return the text size for a given font scale
528
530
  :param text: text as string
@@ -531,19 +533,19 @@ class VizPackageHandler:
531
533
  """
532
534
  return getattr(self, self.pkg_func_dict["get_text_size"])(text, font_scale)
533
535
 
534
- def _cv2_get_text_size(self, text: str, font_scale: float) -> Tuple[int, int]:
536
+ def _cv2_get_text_size(self, text: str, font_scale: float) -> tuple[int, int]:
535
537
  ((width, height), _) = cv2.getTextSize(text, self.font, font_scale, 1) # type: ignore
536
538
  return width, height
537
539
 
538
- def _pillow_get_text_size(self, text: str, font_scale: float) -> Tuple[int, int]: # pylint: disable=W0613
540
+ def _pillow_get_text_size(self, text: str, font_scale: float) -> tuple[int, int]: # pylint: disable=W0613
539
541
  _, _, width, height = self.font.getbbox(text) # type: ignore
540
542
  return width, height
541
543
 
542
544
  def draw_rectangle(
543
- self, np_image: ImageType, box: Tuple[Any, Any, Any, Any], color: Tuple[int, int, int], thickness: int
544
- ) -> ImageType:
545
+ self, np_image: PixelValues, box: tuple[Any, Any, Any, Any], color: tuple[int, int, int], thickness: int
546
+ ) -> PixelValues:
545
547
  """
546
- Drawing a rectangle into an image with a given color (b,g,r) and given thickness
548
+ Drawing a rectangle into an image with a given color (b,g,r) and given thickness.
547
549
 
548
550
  :param np_image: image
549
551
  :param box: box (x_min, y_min, x_max, y_max)
@@ -555,15 +557,15 @@ class VizPackageHandler:
555
557
 
556
558
  @staticmethod
557
559
  def _cv2_draw_rectangle(
558
- np_image: ImageType, box: Tuple[Any, Any, Any, Any], color: Sequence[int], thickness: int
559
- ) -> ImageType:
560
+ np_image: PixelValues, box: tuple[Any, Any, Any, Any], color: Sequence[int], thickness: int
561
+ ) -> PixelValues:
560
562
  cv2.rectangle(np_image, (box[0], box[1]), (box[2], box[3]), color=color, thickness=thickness)
561
563
  return np_image
562
564
 
563
565
  @staticmethod
564
566
  def _pillow_draw_rectangle(
565
- np_image: ImageType, box: Tuple[Any, Any, Any, Any], color: Sequence[int], thickness: int
566
- ) -> ImageType:
567
+ np_image: PixelValues, box: tuple[Any, Any, Any, Any], color: Sequence[int], thickness: int
568
+ ) -> PixelValues:
567
569
  pil_image = Image.fromarray(np.uint8(np_image[:, :, ::-1]))
568
570
  draw = ImageDraw.Draw(pil_image)
569
571
  draw.rectangle(box, outline=color, width=thickness) # type: ignore
@@ -572,13 +574,13 @@ class VizPackageHandler:
572
574
 
573
575
  def draw_text(
574
576
  self,
575
- np_image: ImageType,
576
- pos: Tuple[Any, Any],
577
+ np_image: PixelValues,
578
+ pos: tuple[Any, Any],
577
579
  text: str,
578
- color: Tuple[int, int, int],
580
+ color: tuple[int, int, int],
579
581
  font_scale: float,
580
582
  rectangle_thickness: int = 1,
581
- ) -> ImageType:
583
+ ) -> PixelValues:
582
584
  """
583
585
  Drawing a text into a numpy image. The result will differ between PIL and CV2 (and will not look that good when
584
586
  using PIL).
@@ -597,13 +599,13 @@ class VizPackageHandler:
597
599
 
598
600
  def _cv2_draw_text(
599
601
  self,
600
- np_image: ImageType,
601
- pos: Tuple[Any, Any],
602
+ np_image: PixelValues,
603
+ pos: tuple[Any, Any],
602
604
  text: str,
603
- color: Tuple[int, int, int],
605
+ color: tuple[int, int, int],
604
606
  font_scale: float,
605
607
  rectangle_thickness: int,
606
- ) -> ImageType:
608
+ ) -> PixelValues:
607
609
  """
608
610
  Draw text on an image.
609
611
 
@@ -641,13 +643,13 @@ class VizPackageHandler:
641
643
 
642
644
  @staticmethod
643
645
  def _pillow_draw_text(
644
- np_image: ImageType,
645
- pos: Tuple[Any, Any],
646
+ np_image: PixelValues,
647
+ pos: tuple[Any, Any],
646
648
  text: str,
647
- color: Tuple[int, int, int], # pylint: disable=W0613
649
+ color: tuple[int, int, int], # pylint: disable=W0613
648
650
  font_scale: float, # pylint: disable=W0613
649
651
  rectangle_thickness: int, # pylint: disable=W0613
650
- ) -> ImageType:
652
+ ) -> PixelValues:
651
653
  """Draw a text in an image using PIL."""
652
654
  # using PIL default font size that does not scale to larger image sizes.
653
655
  # Compare with https://github.com/python-pillow/Pillow/issues/6622
@@ -656,11 +658,11 @@ class VizPackageHandler:
656
658
  draw.text(pos, text, fill=(0, 0, 0), anchor="lb")
657
659
  return np.array(pil_image)[:, :, ::-1]
658
660
 
659
- def interactive_imshow(self, np_image: ImageType) -> None:
661
+ def interactive_imshow(self, np_image: PixelValues) -> None:
660
662
  """Displaying an image in a separate window"""
661
663
  return getattr(self, self.pkg_func_dict["interactive_imshow"])(np_image)
662
664
 
663
- def _cv2_interactive_imshow(self, np_image: ImageType) -> None:
665
+ def _cv2_interactive_imshow(self, np_image: PixelValues) -> None:
664
666
  """
665
667
  Display an image in a pop-up window
666
668
 
@@ -681,24 +683,24 @@ class VizPackageHandler:
681
683
  elif key == "s":
682
684
  cv2.imwrite("out.png", np_image)
683
685
  elif key in ["+", "="]:
684
- np_image = cv2.resize(np_image, None, fx=1.3, fy=1.3, interpolation=cv2.INTER_CUBIC) # type: ignore
686
+ np_image = cv2.resize(np_image, None, fx=1.3, fy=1.3, interpolation=cv2.INTER_CUBIC).astype(np.uint8)
685
687
  self._cv2_interactive_imshow(np_image)
686
688
  elif key == "-":
687
- np_image = cv2.resize(np_image, None, fx=0.7, fy=0.7, interpolation=cv2.INTER_CUBIC) # type: ignore
689
+ np_image = cv2.resize(np_image, None, fx=0.7, fy=0.7, interpolation=cv2.INTER_CUBIC).astype(np.uint8)
688
690
  self._cv2_interactive_imshow(np_image)
689
691
 
690
692
  @staticmethod
691
- def _pillow_interactive_imshow(np_image: ImageType) -> None:
693
+ def _pillow_interactive_imshow(np_image: PixelValues) -> None:
692
694
  name = "q, x: quit / s: save"
693
695
  pil_image = Image.fromarray(np.uint8(np_image[:, :, ::-1]))
694
696
  pil_image.show(name)
695
697
 
696
- def rotate_image(self, np_image: ImageType, angle: int) -> ImageType:
698
+ def rotate_image(self, np_image: PixelValues, angle: float) -> PixelValues:
697
699
  """Rotating an image by some angle"""
698
700
  return getattr(self, self.pkg_func_dict["rotate_image"])(np_image, angle)
699
701
 
700
702
  @staticmethod
701
- def _cv2_rotate_image(np_image: ImageType, angle: float) -> ImageType:
703
+ def _cv2_rotate_image(np_image: PixelValues, angle: float) -> PixelValues:
702
704
  # copy & paste from https://stackoverflow.com/questions/43892506
703
705
  # /opencv-python-rotate-image-without-cropping-sides
704
706
 
@@ -718,16 +720,16 @@ class VizPackageHandler:
718
720
  rotation_mat[0, 2] += bound_w / 2 - image_center[0]
719
721
  rotation_mat[1, 2] += bound_h / 2 - image_center[1]
720
722
 
721
- np_image = cv2.warpAffine( # type: ignore
723
+ np_image = cv2.warpAffine(
722
724
  src=np_image,
723
725
  M=rotation_mat,
724
726
  dsize=(bound_w, bound_h),
725
- )
727
+ ).astype(np.uint8)
726
728
 
727
729
  return np_image
728
730
 
729
731
  @staticmethod
730
- def _pillow_rotate_image(np_image: ImageType, angle: int) -> ImageType:
732
+ def _pillow_rotate_image(np_image: PixelValues, angle: float) -> PixelValues:
731
733
  pil_image = Image.fromarray(np.uint8(np_image[:, :, ::-1]))
732
734
  pil_image_rotated = pil_image.rotate(angle, expand=True)
733
735
  return np.array(pil_image_rotated)[:, :, ::-1]