deepdoctection 0.26__py3-none-any.whl → 0.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +7 -1
- deepdoctection/analyzer/dd.py +15 -3
- deepdoctection/configs/conf_dd_one.yaml +4 -0
- deepdoctection/datapoint/convert.py +5 -10
- deepdoctection/datapoint/image.py +2 -2
- deepdoctection/datapoint/view.py +38 -18
- deepdoctection/datasets/save.py +3 -3
- deepdoctection/extern/d2detect.py +1 -2
- deepdoctection/extern/doctrocr.py +14 -9
- deepdoctection/extern/tp/tpfrcnn/common.py +2 -3
- deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +6 -6
- deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +3 -3
- deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +6 -2
- deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +5 -3
- deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +3 -1
- deepdoctection/extern/tp/tpfrcnn/predict.py +1 -0
- deepdoctection/mapper/laylmstruct.py +2 -3
- deepdoctection/utils/context.py +2 -2
- deepdoctection/utils/file_utils.py +63 -26
- deepdoctection/utils/fs.py +6 -6
- deepdoctection/utils/pdf_utils.py +2 -2
- deepdoctection/utils/settings.py +8 -1
- deepdoctection/utils/transform.py +9 -9
- deepdoctection/utils/viz.py +405 -86
- {deepdoctection-0.26.dist-info → deepdoctection-0.27.dist-info}/METADATA +93 -94
- {deepdoctection-0.26.dist-info → deepdoctection-0.27.dist-info}/RECORD +31 -31
- {deepdoctection-0.26.dist-info → deepdoctection-0.27.dist-info}/WHEEL +1 -1
- tests/analyzer/test_dd.py +6 -57
- tests/conftest.py +2 -0
- {deepdoctection-0.26.dist-info → deepdoctection-0.27.dist-info}/LICENSE +0 -0
- {deepdoctection-0.26.dist-info → deepdoctection-0.27.dist-info}/top_level.txt +0 -0
deepdoctection/utils/viz.py
CHANGED
|
@@ -24,17 +24,28 @@ and
|
|
|
24
24
|
|
|
25
25
|
<https://github.com/facebookresearch/detectron2/blob/main/detectron2/utils/colormap.py>
|
|
26
26
|
"""
|
|
27
|
+
|
|
28
|
+
import base64
|
|
29
|
+
import os
|
|
27
30
|
import sys
|
|
28
|
-
from
|
|
31
|
+
from io import BytesIO
|
|
32
|
+
from typing import Any, Dict, List, Optional, Sequence, Tuple, no_type_check
|
|
29
33
|
|
|
30
|
-
import cv2
|
|
31
34
|
import numpy as np
|
|
32
35
|
import numpy.typing as npt
|
|
33
|
-
from numpy import float32
|
|
36
|
+
from numpy import float32, uint8
|
|
34
37
|
|
|
35
38
|
from .detection_types import ImageType
|
|
39
|
+
from .file_utils import get_opencv_requirement, get_pillow_requirement, opencv_available, pillow_available
|
|
40
|
+
|
|
41
|
+
if opencv_available():
|
|
42
|
+
import cv2
|
|
43
|
+
|
|
44
|
+
if pillow_available():
|
|
45
|
+
from PIL import Image, ImageDraw
|
|
36
46
|
|
|
37
|
-
|
|
47
|
+
|
|
48
|
+
__all__ = ["draw_boxes", "interactive_imshow", "viz_handler"]
|
|
38
49
|
|
|
39
50
|
_COLORS = (
|
|
40
51
|
np.array(
|
|
@@ -183,39 +194,6 @@ def random_color(rgb: bool = True, maximum: int = 255) -> Tuple[int, int, int]:
|
|
|
183
194
|
return tuple(int(x) for x in ret) # type: ignore
|
|
184
195
|
|
|
185
196
|
|
|
186
|
-
def draw_text(
|
|
187
|
-
np_image: ImageType, pos: Tuple[int, int], text: str, color: Tuple[int, int, int], font_scale: float = 1.0
|
|
188
|
-
) -> ImageType:
|
|
189
|
-
"""
|
|
190
|
-
Draw text on an image.
|
|
191
|
-
|
|
192
|
-
:param np_image: image as np.ndarray
|
|
193
|
-
:param pos: x, y; the position of the text
|
|
194
|
-
:param text: text string to draw
|
|
195
|
-
:param color: a 3-tuple BGR color in [0, 255]
|
|
196
|
-
:param font_scale: float
|
|
197
|
-
:return: numpy array
|
|
198
|
-
"""
|
|
199
|
-
|
|
200
|
-
np_image = np_image.astype(np.uint8)
|
|
201
|
-
x_0, y_0 = int(pos[0]), int(pos[1])
|
|
202
|
-
# Compute text size.
|
|
203
|
-
font = cv2.FONT_HERSHEY_SIMPLEX
|
|
204
|
-
((text_w, text_h), _) = cv2.getTextSize(text, font, font_scale, 1)
|
|
205
|
-
# Place text background.
|
|
206
|
-
if x_0 + text_w > np_image.shape[1]:
|
|
207
|
-
x_0 = np_image.shape[1] - text_w
|
|
208
|
-
if y_0 - int(1.15 * text_h) < 0:
|
|
209
|
-
y_0 = int(1.15 * text_h)
|
|
210
|
-
back_top_left = x_0, y_0 - int(1.3 * text_h)
|
|
211
|
-
back_bottom_right = x_0 + text_w, y_0
|
|
212
|
-
cv2.rectangle(np_image, back_top_left, back_bottom_right, color, -1)
|
|
213
|
-
# Show text.
|
|
214
|
-
text_bottomleft = x_0, y_0 - int(0.25 * text_h)
|
|
215
|
-
cv2.putText(np_image, text, text_bottomleft, font, font_scale, (0, 0, 0), thickness=1, lineType=cv2.LINE_AA)
|
|
216
|
-
return np_image
|
|
217
|
-
|
|
218
|
-
|
|
219
197
|
def draw_boxes(
|
|
220
198
|
np_image: ImageType,
|
|
221
199
|
boxes: npt.NDArray[float32],
|
|
@@ -245,12 +223,13 @@ def draw_boxes(
|
|
|
245
223
|
category_names = set(category_names_list) # type: ignore
|
|
246
224
|
category_to_color = {category: random_color() for category in category_names}
|
|
247
225
|
|
|
248
|
-
boxes = np.
|
|
226
|
+
boxes = np.array(boxes, dtype="int32")
|
|
249
227
|
if category_names_list is not None:
|
|
250
228
|
assert len(category_names_list) == len(boxes), f"{len(category_names_list)} != {len(boxes)}"
|
|
251
229
|
areas = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1)
|
|
252
230
|
sorted_inds = np.argsort(-areas) # draw large ones first
|
|
253
231
|
assert areas.min() > 0, areas.min()
|
|
232
|
+
|
|
254
233
|
# allow equal, because we are not very strict about rounding error here
|
|
255
234
|
assert (
|
|
256
235
|
boxes[:, 0].min() >= 0
|
|
@@ -270,71 +249,411 @@ def draw_boxes(
|
|
|
270
249
|
if choose_color is None:
|
|
271
250
|
choose_color = random_color()
|
|
272
251
|
if category_names_list[i] is not None:
|
|
273
|
-
np_image = draw_text(
|
|
252
|
+
np_image = viz_handler.draw_text(
|
|
274
253
|
np_image, (box[0], box[1]), category_names_list[i], color=choose_color, font_scale=font_scale
|
|
275
254
|
)
|
|
276
|
-
|
|
277
|
-
np_image, (box[0], box[1]
|
|
255
|
+
np_image = viz_handler.draw_rectangle(
|
|
256
|
+
np_image, (box[0], box[1], box[2], box[3]), choose_color, rectangle_thickness
|
|
278
257
|
)
|
|
279
258
|
|
|
280
259
|
# draw a (very ugly) color palette
|
|
281
260
|
y_0 = np_image.shape[0]
|
|
282
261
|
for category, col in category_to_color.items():
|
|
283
262
|
if category is not None:
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
((_, text_h), _) = cv2.getTextSize(category, font, font_scale, 2)
|
|
263
|
+
viz_handler.draw_text(np_image, (np_image.shape[1], y_0), category, color=col, font_scale=font_scale)
|
|
264
|
+
_, text_h = viz_handler.get_text_size(category, 2)
|
|
287
265
|
y_0 = y_0 - int(10 * text_h)
|
|
288
266
|
|
|
289
267
|
return np_image
|
|
290
268
|
|
|
291
269
|
|
|
292
270
|
@no_type_check
|
|
293
|
-
def interactive_imshow(
|
|
294
|
-
img: ImageType,
|
|
295
|
-
lclick_cb: Optional[Callable[[npt.NDArray[float32], int, int], None]] = None,
|
|
296
|
-
rclick_cb: Optional[Callable[[npt.NDArray[float32], int, int], None]] = None,
|
|
297
|
-
**kwargs: str,
|
|
298
|
-
) -> None:
|
|
271
|
+
def interactive_imshow(img: ImageType) -> None:
|
|
299
272
|
"""
|
|
300
273
|
Display an image in a pop-up window
|
|
301
274
|
|
|
302
275
|
:param img: An image (expect BGR) to show.
|
|
303
|
-
:param lclick_cb: a callback ``func(img, x, y)`` for left/right click event.
|
|
304
|
-
:param rclick_cb: a callback ``func(img, x, y)`` for left/right click event.
|
|
305
|
-
:param kwargs: can be {key_cb_a: callback_img, key_cb_b: callback_img}, to specify a callback ``func(img)`` for
|
|
306
|
-
keypress. Some existing keypress event handler:
|
|
307
|
-
|
|
308
|
-
* q: destroy the current window
|
|
309
|
-
* x: execute ``sys.exit()``
|
|
310
|
-
* s: save image to "out.png"
|
|
311
276
|
"""
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
277
|
+
viz_handler.interactive_imshow(img)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
class VizPackageHandler:
|
|
281
|
+
"""
|
|
282
|
+
A handler for the image processing libraries PIL or OpenCV. Explicit use of the libraries is not intended.
|
|
283
|
+
If the environ.ment variable USE_OPENCV=True is set, only the CV2 functions will be used via the handler.
|
|
284
|
+
The default library is PIL. Compared to OpenCV, PIL is somewhat slower (this applies to reading and writing
|
|
285
|
+
image files), which can lead to a bottleneck during training, especially if the loading is not parallelized
|
|
286
|
+
"""
|
|
287
|
+
|
|
288
|
+
PACKAGE_FUNCS = {
|
|
289
|
+
"cv2": {
|
|
290
|
+
"read_image": "_cv2_read_image",
|
|
291
|
+
"write_image": "_cv2_write_image",
|
|
292
|
+
"convert_np_to_b64": "_cv2_convert_np_to_b64",
|
|
293
|
+
"convert_b64_to_np": "_cv2_convert_b64_to_np",
|
|
294
|
+
"resize": "_cv2_resize",
|
|
295
|
+
"get_text_size": "_cv2_get_text_size",
|
|
296
|
+
"draw_rectangle": "_cv2_draw_rectangle",
|
|
297
|
+
"draw_text": "_cv2_draw_text",
|
|
298
|
+
"interactive_imshow": "_cv2_interactive_imshow",
|
|
299
|
+
"encode": "_cv2_encode",
|
|
300
|
+
},
|
|
301
|
+
"pillow": {
|
|
302
|
+
"read_image": "_pillow_read_image",
|
|
303
|
+
"write_image": "_pillow_write_image",
|
|
304
|
+
"convert_np_to_b64": "_pillow_convert_np_to_b64",
|
|
305
|
+
"convert_b64_to_np": "_pillow_convert_b64_to_np",
|
|
306
|
+
"resize": "_pillow_resize",
|
|
307
|
+
"get_text_size": "_pillow_get_text_size",
|
|
308
|
+
"draw_rectangle": "_pillow_draw_rectangle",
|
|
309
|
+
"draw_text": "_pillow_draw_text",
|
|
310
|
+
"interactive_imshow": "_pillow_interactive_imshow",
|
|
311
|
+
"encode": "_pillow_encode",
|
|
312
|
+
},
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
def __init__(self) -> None:
|
|
316
|
+
"""Selecting the image processing library and fonts"""
|
|
317
|
+
package = self._select_package()
|
|
318
|
+
self.pkg_func_dict: Dict[str, str] = {}
|
|
319
|
+
self.font = None
|
|
320
|
+
self._set_vars(package)
|
|
321
|
+
|
|
322
|
+
@staticmethod
|
|
323
|
+
def _select_package() -> str:
|
|
324
|
+
"""
|
|
325
|
+
USE_OPENCV has priority and will enforce to use OpenCV
|
|
326
|
+
Otherwise it will use Pillow as default package, if it is installed.
|
|
327
|
+
If Pillow is not installed it will try to load OpenCV again
|
|
328
|
+
:return: either 'pillow' or 'cv2'
|
|
329
|
+
"""
|
|
330
|
+
if os.environ.get("USE_OPENCV") is not None:
|
|
331
|
+
requirements = get_opencv_requirement()
|
|
332
|
+
if not requirements[1]:
|
|
333
|
+
raise ImportError(requirements[2])
|
|
334
|
+
return "cv2"
|
|
335
|
+
requirements = get_pillow_requirement()
|
|
336
|
+
if os.environ["USE_PILLOW"]:
|
|
337
|
+
if not requirements[1]:
|
|
338
|
+
raise ImportError(requirements[2])
|
|
339
|
+
return "pillow"
|
|
340
|
+
requirements = get_opencv_requirement()
|
|
341
|
+
if not requirements[1]:
|
|
342
|
+
raise ImportError(requirements[2])
|
|
343
|
+
return "cv2"
|
|
344
|
+
|
|
345
|
+
def _set_vars(self, package: str) -> None:
|
|
346
|
+
self.pkg_func_dict = self.PACKAGE_FUNCS[package]
|
|
347
|
+
if package == "pillow":
|
|
348
|
+
image = Image.fromarray(np.uint8(np.ones((1, 1, 3))))
|
|
349
|
+
self.font = ImageDraw.ImageDraw(image).getfont()
|
|
350
|
+
else:
|
|
351
|
+
self.font = cv2.FONT_HERSHEY_SIMPLEX # type: ignore
|
|
352
|
+
|
|
353
|
+
def refresh(self) -> None:
|
|
354
|
+
"""
|
|
355
|
+
Refresh the viz_handler setting. Useful if you change the env variable on run time and want to take account of
|
|
356
|
+
the changes.
|
|
357
|
+
|
|
358
|
+
**Example**
|
|
359
|
+
|
|
360
|
+
os.env["USE_OPENCV"]="True"
|
|
361
|
+
viz_handler.refresh() # this will reset the original config and now use OpenCV
|
|
362
|
+
|
|
363
|
+
:return:
|
|
364
|
+
"""
|
|
365
|
+
package = self._select_package()
|
|
366
|
+
self._set_vars(package)
|
|
367
|
+
|
|
368
|
+
def read_image(self, path: str) -> ImageType:
|
|
369
|
+
"""Reading an image from file and returning a np.array
|
|
370
|
+
|
|
371
|
+
:param path: Use /path/to/dir/file_name.[suffix]
|
|
372
|
+
"""
|
|
373
|
+
return getattr(self, self.pkg_func_dict["read_image"])(path)
|
|
374
|
+
|
|
375
|
+
@staticmethod
|
|
376
|
+
def _cv2_read_image(path: str) -> ImageType:
|
|
377
|
+
return cv2.imread(path, cv2.IMREAD_COLOR)
|
|
378
|
+
|
|
379
|
+
@staticmethod
|
|
380
|
+
def _pillow_read_image(path: str) -> ImageType:
|
|
381
|
+
with Image.open(path) as image:
|
|
382
|
+
np_image = np.array(image)[:, :, ::-1]
|
|
383
|
+
return np_image
|
|
384
|
+
|
|
385
|
+
def write_image(self, path: str, image: ImageType) -> None:
|
|
386
|
+
"""Writing an image as np.array to a file.
|
|
387
|
+
|
|
388
|
+
:param path: Use /path/to/dir/file_name.[suffix]
|
|
389
|
+
:param image: pixel values as np.array
|
|
390
|
+
"""
|
|
391
|
+
return getattr(self, self.pkg_func_dict["write_image"])(path, image)
|
|
392
|
+
|
|
393
|
+
@staticmethod
|
|
394
|
+
def _cv2_write_image(path: str, image: ImageType) -> None:
|
|
395
|
+
cv2.imwrite(path, image)
|
|
396
|
+
|
|
397
|
+
@staticmethod
|
|
398
|
+
def _pillow_write_image(path: str, image: ImageType) -> None:
|
|
399
|
+
pil_image = Image.fromarray(np.uint8(image[:, :, ::-1]))
|
|
400
|
+
pil_image.save(path)
|
|
401
|
+
|
|
402
|
+
def encode(self, np_image: ImageType) -> bytes:
|
|
403
|
+
"""Converting an image as np.array into a b64 representation
|
|
404
|
+
|
|
405
|
+
:param np_image: Image as np.array
|
|
406
|
+
"""
|
|
407
|
+
return getattr(self, self.pkg_func_dict["encode"])(np_image)
|
|
408
|
+
|
|
409
|
+
@staticmethod
|
|
410
|
+
def _cv2_encode(np_image: ImageType) -> bytes:
|
|
411
|
+
np_encode = cv2.imencode(".png", np_image)
|
|
412
|
+
b_image = np_encode[1].tobytes()
|
|
413
|
+
return b_image
|
|
414
|
+
|
|
415
|
+
@staticmethod
|
|
416
|
+
def _pillow_encode(np_image: ImageType) -> bytes:
|
|
417
|
+
buffered = BytesIO()
|
|
418
|
+
pil_image = Image.fromarray(np.uint8(np_image[:, :, ::-1]))
|
|
419
|
+
pil_image.save(buffered, format="PNG")
|
|
420
|
+
return buffered.getvalue()
|
|
421
|
+
|
|
422
|
+
def convert_np_to_b64(self, image: ImageType) -> str:
|
|
423
|
+
"""Converting an image given as np.array into a b64 encoded string
|
|
424
|
+
|
|
425
|
+
:param image: Image as np.array
|
|
426
|
+
"""
|
|
427
|
+
return getattr(self, self.pkg_func_dict["convert_np_to_b64"])(image)
|
|
428
|
+
|
|
429
|
+
@staticmethod
|
|
430
|
+
def _cv2_convert_np_to_b64(image: ImageType) -> str:
|
|
431
|
+
np_encode = cv2.imencode(".png", image)
|
|
432
|
+
return base64.b64encode(np_encode[1]).decode("utf-8") # type: ignore
|
|
433
|
+
|
|
434
|
+
@staticmethod
|
|
435
|
+
def _pillow_convert_np_to_b64(np_image: ImageType) -> str:
|
|
436
|
+
buffered = BytesIO()
|
|
437
|
+
pil_image = Image.fromarray(np.uint8(np_image[:, :, ::-1]))
|
|
438
|
+
pil_image.save(buffered, format="PNG")
|
|
439
|
+
return base64.b64encode(buffered.getvalue()).decode("utf-8")
|
|
440
|
+
|
|
441
|
+
def convert_b64_to_np(self, image: str) -> ImageType:
|
|
442
|
+
"""
|
|
443
|
+
Converting an image as b64 encoded string into np.array
|
|
444
|
+
|
|
445
|
+
:param image: b64 encoded string
|
|
446
|
+
:return: np.array
|
|
447
|
+
"""
|
|
448
|
+
return getattr(self, self.pkg_func_dict["convert_b64_to_np"])(image)
|
|
449
|
+
|
|
450
|
+
@staticmethod
|
|
451
|
+
def _cv2_convert_b64_to_np(image: str) -> ImageType:
|
|
452
|
+
np_array = np.fromstring(base64.b64decode(image), np.uint8) # type: ignore
|
|
453
|
+
np_array = cv2.imdecode(np_array, cv2.IMREAD_COLOR).astype(np.float32)
|
|
454
|
+
return np_array.astype(uint8)
|
|
455
|
+
|
|
456
|
+
@staticmethod
|
|
457
|
+
def _pillow_convert_b64_to_np(image: str) -> ImageType:
|
|
458
|
+
array = base64.b64decode(image)
|
|
459
|
+
im_file = BytesIO(array)
|
|
460
|
+
pil_image = Image.open(im_file)
|
|
461
|
+
return np.array(pil_image)[:, :, ::-1]
|
|
462
|
+
|
|
463
|
+
def resize(self, image: ImageType, width: int, height: int, interpolation: str) -> ImageType:
|
|
464
|
+
"""
|
|
465
|
+
Resize a given image to new width, height. Specifying an interpolation method is required. Depending on the
|
|
466
|
+
chosen image library use one of the following:
|
|
467
|
+
|
|
468
|
+
PIL: NEAREST, BOX, BILINEAR, BICUBIC, VIZ (available for CV2 as well)
|
|
469
|
+
CV2: INTER_NEAREST, INTER_LINEAR, INTER_AREA, VIZ
|
|
470
|
+
|
|
471
|
+
:param image: image as np.array
|
|
472
|
+
:param width: the new image width
|
|
473
|
+
:param height: the new image height
|
|
474
|
+
:param interpolation: interpolation method as string.
|
|
475
|
+
:return: resized image as np.array
|
|
476
|
+
"""
|
|
477
|
+
return getattr(self, self.pkg_func_dict["resize"])(image, width, height, interpolation)
|
|
478
|
+
|
|
479
|
+
@staticmethod
|
|
480
|
+
def _cv2_resize(image: ImageType, width: int, height: int, interpolation: str) -> ImageType:
|
|
481
|
+
intpol_method_dict = {
|
|
482
|
+
"INTER_NEAREST": cv2.INTER_NEAREST,
|
|
483
|
+
"INTER_LINEAR": cv2.INTER_LINEAR,
|
|
484
|
+
"INTER_AREA": cv2.INTER_AREA,
|
|
485
|
+
"VIZ": cv2.INTER_LINEAR,
|
|
486
|
+
}
|
|
487
|
+
return cv2.resize(image, (width, height), interpolation=intpol_method_dict[interpolation])
|
|
488
|
+
|
|
489
|
+
@staticmethod
|
|
490
|
+
def _pillow_resize(image: ImageType, width: int, height: int, interpolation: str) -> ImageType:
|
|
491
|
+
intpol_method_dict = {
|
|
492
|
+
"NEAREST": Image.Resampling.NEAREST,
|
|
493
|
+
"BOX": Image.Resampling.BOX,
|
|
494
|
+
"BILINEAR": Image.Resampling.BILINEAR,
|
|
495
|
+
"BICUBIC": Image.Resampling.BICUBIC,
|
|
496
|
+
"VIZ": Image.Resampling.BILINEAR,
|
|
497
|
+
}
|
|
498
|
+
pil_image = Image.fromarray(np.uint8(image[:, :, ::-1]))
|
|
499
|
+
pil_image_resized = pil_image.resize(
|
|
500
|
+
size=(width, height), resample=intpol_method_dict[interpolation], box=None, reducing_gap=None
|
|
501
|
+
)
|
|
502
|
+
return np.array(pil_image_resized)[:, :, ::-1]
|
|
503
|
+
|
|
504
|
+
def get_text_size(self, text: str, font_scale: float) -> Tuple[int, int]:
|
|
505
|
+
"""
|
|
506
|
+
Return the text size for a given font scale
|
|
507
|
+
:param text: text as string
|
|
508
|
+
:param font_scale: scale
|
|
509
|
+
:return: A tuple with width and height of the text
|
|
510
|
+
"""
|
|
511
|
+
return getattr(self, self.pkg_func_dict["get_text_size"])(text, font_scale)
|
|
512
|
+
|
|
513
|
+
def _cv2_get_text_size(self, text: str, font_scale: float) -> Tuple[int, int]:
|
|
514
|
+
((width, height), _) = cv2.getTextSize(text, self.font, font_scale, 1) # type: ignore
|
|
515
|
+
return width, height
|
|
516
|
+
|
|
517
|
+
def _pillow_get_text_size(self, text: str, font_scale: float) -> Tuple[int, int]: # pylint: disable=W0613
|
|
518
|
+
_, _, width, height = self.font.getbbox(text) # type: ignore
|
|
519
|
+
return width, height
|
|
520
|
+
|
|
521
|
+
def draw_rectangle(
|
|
522
|
+
self, np_image: ImageType, box: Tuple[Any, Any, Any, Any], color: Tuple[int, int, int], thickness: int
|
|
523
|
+
) -> ImageType:
|
|
524
|
+
"""
|
|
525
|
+
Drawing a rectangle into an image with a given color (b,g,r) and given thickness
|
|
526
|
+
|
|
527
|
+
:param np_image: image
|
|
528
|
+
:param box: box (x_min, y_min, x_max, y_max)
|
|
529
|
+
:param color: (b,g,r) between 0 and 255
|
|
530
|
+
:param thickness: pixel width of the rectangle lines
|
|
531
|
+
:return: image with rectangle
|
|
532
|
+
"""
|
|
533
|
+
return getattr(self, self.pkg_func_dict["draw_rectangle"])(np_image, box, color, thickness)
|
|
534
|
+
|
|
535
|
+
@staticmethod
|
|
536
|
+
def _cv2_draw_rectangle(
|
|
537
|
+
np_image: ImageType, box: Tuple[Any, Any, Any, Any], color: Sequence[int], thickness: int
|
|
538
|
+
) -> ImageType:
|
|
539
|
+
cv2.rectangle(np_image, (box[0], box[1]), (box[2], box[3]), color=color, thickness=thickness)
|
|
540
|
+
return np_image
|
|
541
|
+
|
|
542
|
+
@staticmethod
|
|
543
|
+
def _pillow_draw_rectangle(
|
|
544
|
+
np_image: ImageType, box: Tuple[Any, Any, Any, Any], color: Sequence[int], thickness: int
|
|
545
|
+
) -> ImageType:
|
|
546
|
+
pil_image = Image.fromarray(np.uint8(np_image[:, :, ::-1]))
|
|
547
|
+
draw = ImageDraw.Draw(pil_image)
|
|
548
|
+
draw.rectangle(box, outline=color, width=thickness) # type: ignore
|
|
549
|
+
np_image = np.array(pil_image)[:, :, ::-1]
|
|
550
|
+
return np_image
|
|
551
|
+
|
|
552
|
+
def draw_text(
|
|
553
|
+
self, np_image: ImageType, pos: Tuple[Any, Any], text: str, color: Tuple[int, int, int], font_scale: float
|
|
554
|
+
) -> ImageType:
|
|
555
|
+
"""
|
|
556
|
+
Drawing a text into a numpy image. The result will differ between PIL and CV2 (and will not look that good when
|
|
557
|
+
using PIL).
|
|
558
|
+
|
|
559
|
+
:param np_image: image
|
|
560
|
+
:param pos: x_min, y_min position of the starting point of the text
|
|
561
|
+
:param text: text string
|
|
562
|
+
:param color: (b,g,r) between 0 and 255
|
|
563
|
+
:param font_scale: scale of font. This will only be used within a OPenCV framework
|
|
564
|
+
:return: image with text
|
|
565
|
+
"""
|
|
566
|
+
return getattr(self, self.pkg_func_dict["draw_text"])(np_image, pos, text, color, font_scale)
|
|
567
|
+
|
|
568
|
+
def _cv2_draw_text(
|
|
569
|
+
self, np_image: ImageType, pos: Tuple[Any, Any], text: str, color: Tuple[int, int, int], font_scale: float
|
|
570
|
+
) -> ImageType:
|
|
571
|
+
"""
|
|
572
|
+
Draw text on an image.
|
|
573
|
+
|
|
574
|
+
:param np_image: image as np.ndarray
|
|
575
|
+
:param pos: x, y; the position of the text
|
|
576
|
+
:param text: text string to draw
|
|
577
|
+
:param color: a 3-tuple BGR color in [0, 255]
|
|
578
|
+
:param font_scale: float
|
|
579
|
+
:return: numpy array
|
|
580
|
+
"""
|
|
581
|
+
|
|
582
|
+
np_image = np_image.astype(np.uint8)
|
|
583
|
+
x_0, y_0 = int(pos[0]), int(pos[1])
|
|
584
|
+
# Compute text size.
|
|
585
|
+
font = cv2.FONT_HERSHEY_SIMPLEX
|
|
586
|
+
text_w, text_h = viz_handler.get_text_size(text, font_scale)
|
|
587
|
+
# Place text background.
|
|
588
|
+
if x_0 + text_w > np_image.shape[1]:
|
|
589
|
+
x_0 = np_image.shape[1] - text_w
|
|
590
|
+
if y_0 - int(1.15 * text_h) < 0:
|
|
591
|
+
y_0 = int(1.15 * text_h)
|
|
592
|
+
back_top_left = x_0, y_0 - int(1.3 * text_h)
|
|
593
|
+
back_bottom_right = x_0 + text_w, y_0
|
|
594
|
+
np_image = self.draw_rectangle(
|
|
595
|
+
np_image, (back_top_left[0], back_top_left[1], back_bottom_right[0], back_bottom_right[1]), color, 1
|
|
596
|
+
)
|
|
597
|
+
# Show text.
|
|
598
|
+
text_bottomleft = x_0, y_0 - int(0.25 * text_h)
|
|
599
|
+
cv2.putText(np_image, text, text_bottomleft, font, font_scale, (0, 0, 0), thickness=1, lineType=cv2.LINE_AA)
|
|
600
|
+
return np_image
|
|
601
|
+
|
|
602
|
+
@staticmethod
|
|
603
|
+
def _pillow_draw_text(
|
|
604
|
+
np_image: ImageType,
|
|
605
|
+
pos: Tuple[Any, Any],
|
|
606
|
+
text: str,
|
|
607
|
+
color: Tuple[int, int, int], # pylint: disable=W0613
|
|
608
|
+
font_scale: float, # pylint: disable=W0613
|
|
609
|
+
) -> ImageType:
|
|
610
|
+
"""Draw a text in an image using PIL."""
|
|
611
|
+
# using PIL default font size that does not scale to larger image sizes.
|
|
612
|
+
# Compare with https://github.com/python-pillow/Pillow/issues/6622
|
|
613
|
+
pil_image = Image.fromarray(np.uint8(np_image[:, :, ::-1]))
|
|
614
|
+
draw = ImageDraw.Draw(pil_image)
|
|
615
|
+
draw.text(pos, text, fill=(0, 0, 0), anchor="lb")
|
|
616
|
+
return np.array(pil_image)[:, :, ::-1]
|
|
617
|
+
|
|
618
|
+
def interactive_imshow(self, np_image: ImageType) -> None:
|
|
619
|
+
"""Displaying an image in a separate window"""
|
|
620
|
+
return getattr(self, self.pkg_func_dict["interactive_imshow"])(np_image)
|
|
621
|
+
|
|
622
|
+
def _cv2_interactive_imshow(self, np_image: ImageType) -> None:
|
|
623
|
+
"""
|
|
624
|
+
Display an image in a pop-up window
|
|
625
|
+
|
|
626
|
+
:param img: An image (expect BGR) to show.
|
|
627
|
+
"""
|
|
628
|
+
name = "q, x: quit / s: save"
|
|
629
|
+
cv2.imshow(name, np_image)
|
|
630
|
+
|
|
324
631
|
key = cv2.waitKey(-1)
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
632
|
+
while key >= 128:
|
|
633
|
+
key = cv2.waitKey(-1)
|
|
634
|
+
key = chr(key & 0xFF) # type: ignore
|
|
635
|
+
|
|
636
|
+
if key == "q":
|
|
637
|
+
cv2.destroyWindow(name)
|
|
638
|
+
elif key == "x":
|
|
639
|
+
sys.exit()
|
|
640
|
+
elif key == "s":
|
|
641
|
+
cv2.imwrite("out.png", np_image)
|
|
642
|
+
elif key in ["+", "="]:
|
|
643
|
+
np_image = cv2.resize(np_image, None, fx=1.3, fy=1.3, interpolation=cv2.INTER_CUBIC)
|
|
644
|
+
self._cv2_interactive_imshow(np_image)
|
|
645
|
+
elif key == "-":
|
|
646
|
+
np_image = cv2.resize(np_image, None, fx=0.7, fy=0.7, interpolation=cv2.INTER_CUBIC)
|
|
647
|
+
self._cv2_interactive_imshow(np_image)
|
|
648
|
+
|
|
649
|
+
@staticmethod
|
|
650
|
+
def _pillow_interactive_imshow(np_image: ImageType) -> None:
|
|
651
|
+
name = "q, x: quit / s: save"
|
|
652
|
+
pil_image = Image.fromarray(np.uint8(np_image[:, :, ::-1]))
|
|
653
|
+
pil_image.show(name)
|
|
654
|
+
|
|
655
|
+
|
|
656
|
+
# default image package
|
|
657
|
+
os.environ["USE_PILLOW"] = "True"
|
|
658
|
+
|
|
659
|
+
viz_handler = VizPackageHandler()
|