docling-ibm-models 1.3.1__py3-none-any.whl → 1.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docling_ibm_models/tableformer/common.py +0 -94
- docling_ibm_models/tableformer/data_management/functional.py +1 -478
- docling_ibm_models/tableformer/data_management/tf_predictor.py +5 -71
- docling_ibm_models/tableformer/data_management/transforms.py +0 -305
- docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py +1 -1
- {docling_ibm_models-1.3.1.dist-info → docling_ibm_models-1.3.2.dist-info}/METADATA +1 -1
- {docling_ibm_models-1.3.1.dist-info → docling_ibm_models-1.3.2.dist-info}/RECORD +9 -13
- docling_ibm_models/tableformer/data_management/data_transformer.py +0 -504
- docling_ibm_models/tableformer/data_management/tf_dataset.py +0 -1233
- docling_ibm_models/tableformer/test_dataset_cache.py +0 -37
- docling_ibm_models/tableformer/test_prepare_image.py +0 -99
- {docling_ibm_models-1.3.1.dist-info → docling_ibm_models-1.3.2.dist-info}/LICENSE +0 -0
- {docling_ibm_models-1.3.1.dist-info → docling_ibm_models-1.3.2.dist-info}/WHEEL +0 -0
@@ -48,32 +48,6 @@ def validate_config(config):
|
|
48
48
|
return True
|
49
49
|
|
50
50
|
|
51
|
-
def parse_arguments():
|
52
|
-
r"""
|
53
|
-
Parse the input arguments
|
54
|
-
A ValueError exception will be thrown in case the config file is invalid
|
55
|
-
"""
|
56
|
-
parser = argparse.ArgumentParser(description="Train the TableModel")
|
57
|
-
parser.add_argument(
|
58
|
-
"-c", "--config", required=True, default=None, help="configuration file (JSON)"
|
59
|
-
)
|
60
|
-
args = parser.parse_args()
|
61
|
-
config_filename = args.config
|
62
|
-
|
63
|
-
assert os.path.isfile(config_filename), "FAILURE: Config file not found."
|
64
|
-
return read_config(config_filename)
|
65
|
-
|
66
|
-
|
67
|
-
def read_config(config_filename):
|
68
|
-
with open(config_filename, "r") as fd:
|
69
|
-
config = json.load(fd)
|
70
|
-
|
71
|
-
# Validate the config file
|
72
|
-
validate_config(config)
|
73
|
-
|
74
|
-
return config
|
75
|
-
|
76
|
-
|
77
51
|
def safe_get_parameter(input_dict, index_path, default=None, required=False):
|
78
52
|
r"""
|
79
53
|
Safe get parameter from a nested dictionary.
|
@@ -130,71 +104,3 @@ def get_prepared_data_filename(prepared_data_part, dataset_name):
|
|
130
104
|
if "<POSTFIX>" in template:
|
131
105
|
template = template.replace("<POSTFIX>", dataset_name)
|
132
106
|
return template
|
133
|
-
|
134
|
-
|
135
|
-
def create_dataset_and_model(config, purpose, fixed_padding=False):
|
136
|
-
r"""
|
137
|
-
Gets a model from configuration
|
138
|
-
|
139
|
-
Parameters
|
140
|
-
---------
|
141
|
-
config : Dictionary
|
142
|
-
The configuration of the model
|
143
|
-
purpose : string
|
144
|
-
One of "train", "eval", "predict"
|
145
|
-
fixed_padding : bool
|
146
|
-
Parameter passed to the constructor of the DataLoader
|
147
|
-
|
148
|
-
Returns
|
149
|
-
-------
|
150
|
-
In case a Model cannot be initialized return None, None, None. Otherwise:
|
151
|
-
|
152
|
-
device : selected device
|
153
|
-
dataset : Instance of the DataLoader
|
154
|
-
model : Instance of the model
|
155
|
-
"""
|
156
|
-
from docling_ibm_models.tableformer.data_management.tf_dataset import TFDataset
|
157
|
-
|
158
|
-
model_type = config["model"]["type"]
|
159
|
-
model = None
|
160
|
-
|
161
|
-
# Get env vars:
|
162
|
-
use_cpu_only = os.environ.get("USE_CPU_ONLY", False)
|
163
|
-
use_cuda_only = not use_cpu_only
|
164
|
-
|
165
|
-
# Use the cpu for the evaluation
|
166
|
-
device = "cpu" # Default, run on CPU
|
167
|
-
num_gpus = torch.cuda.device_count() # Check if GPU is available
|
168
|
-
if use_cuda_only:
|
169
|
-
device = "cuda:0" if num_gpus > 0 else "cpu" # Run on first available GPU
|
170
|
-
else:
|
171
|
-
device = "cpu"
|
172
|
-
|
173
|
-
# Create the DataLoader
|
174
|
-
# loader = DataLoader(config, purpose, fixed_padding=fixed_padding)
|
175
|
-
dataset = TFDataset(config, purpose, fixed_padding=fixed_padding)
|
176
|
-
dataset.set_device(device)
|
177
|
-
dataset_val = None
|
178
|
-
if config["train"]["validation"] and purpose == "train":
|
179
|
-
dataset_val = TFDataset(config, "val", fixed_padding=fixed_padding)
|
180
|
-
dataset_val.set_device(device)
|
181
|
-
if model_type == "TableModel04_rs":
|
182
|
-
from docling_ibm_models.tableformer.models.table04_rs.tablemodel04_rs import ( # noqa: F401
|
183
|
-
TableModel04_rs,
|
184
|
-
)
|
185
|
-
# Find the model class and create an instance of it
|
186
|
-
for candidate in BaseModel.__subclasses__():
|
187
|
-
if candidate.__name__ == model_type:
|
188
|
-
init_data = dataset.get_init_data()
|
189
|
-
model = candidate(config, init_data, purpose, device)
|
190
|
-
|
191
|
-
if model is None:
|
192
|
-
logger.warn("Not found model: " + str(model_type))
|
193
|
-
return None, None, None
|
194
|
-
|
195
|
-
logger.info("Found model: " + str(model_type))
|
196
|
-
|
197
|
-
if purpose == s.PREDICT_PURPOSE:
|
198
|
-
return device, dataset, model
|
199
|
-
else:
|
200
|
-
return device, dataset, dataset_val, model
|
@@ -35,61 +35,6 @@ def _is_numpy_image(img):
|
|
35
35
|
return isinstance(img, np.ndarray) and (img.ndim in {2, 3})
|
36
36
|
|
37
37
|
|
38
|
-
def to_tensor(pic):
|
39
|
-
"""Converts a numpy.ndarray (H x W x C) in the range
|
40
|
-
[0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0].
|
41
|
-
Args:
|
42
|
-
pic (np.ndarray, torch.Tensor): Image to be converted to tensor, (H x W x C[RGB]).
|
43
|
-
Returns:
|
44
|
-
Tensor: Converted image.
|
45
|
-
"""
|
46
|
-
|
47
|
-
if _is_numpy_image(pic):
|
48
|
-
if len(pic.shape) == 2:
|
49
|
-
pic = cv2.cvtColor(pic, cv2.COLOR_GRAY2RGB)
|
50
|
-
img = torch.from_numpy(pic.transpose((2, 0, 1)))
|
51
|
-
# backward compatibility
|
52
|
-
if isinstance(img, torch.ByteTensor) or img.max() > 1:
|
53
|
-
return img.float().div(255)
|
54
|
-
else:
|
55
|
-
return img
|
56
|
-
elif _is_tensor_image(pic):
|
57
|
-
return pic
|
58
|
-
|
59
|
-
else:
|
60
|
-
try:
|
61
|
-
return to_tensor(np.array(pic))
|
62
|
-
except Exception:
|
63
|
-
raise TypeError("pic should be ndarray. Got {}".format(type(pic)))
|
64
|
-
|
65
|
-
|
66
|
-
def to_cv_image(pic, mode=None):
|
67
|
-
"""Convert a tensor to an ndarray.
|
68
|
-
Args:
|
69
|
-
pic (Tensor or numpy.ndarray): Image to be converted to PIL Image.
|
70
|
-
mode (str): color space and pixel depth of input data (optional)
|
71
|
-
for example: cv2.COLOR_RGB2BGR.
|
72
|
-
Returns:
|
73
|
-
np.array: Image converted to PIL Image.
|
74
|
-
"""
|
75
|
-
if not (_is_numpy_image(pic) or _is_tensor_image(pic)):
|
76
|
-
raise TypeError("pic should be Tensor or ndarray. Got {}.".format(type(pic)))
|
77
|
-
|
78
|
-
npimg = pic
|
79
|
-
if isinstance(pic, torch.FloatTensor):
|
80
|
-
pic = pic.mul(255).byte()
|
81
|
-
if torch.is_tensor(pic):
|
82
|
-
npimg = np.squeeze(np.transpose(pic.numpy(), (1, 2, 0)))
|
83
|
-
|
84
|
-
if not isinstance(npimg, np.ndarray):
|
85
|
-
raise TypeError("Input pic must be a torch.Tensor or NumPy ndarray")
|
86
|
-
if mode is None:
|
87
|
-
return npimg
|
88
|
-
|
89
|
-
else:
|
90
|
-
return cv2.cvtColor(npimg, mode)
|
91
|
-
|
92
|
-
|
93
38
|
def normalize(tensor, mean, std):
|
94
39
|
"""Normalize a tensor image with mean and standard deviation.
|
95
40
|
See ``Normalize`` for more details.
|
@@ -128,6 +73,7 @@ def resize(img, size, interpolation="BILINEAR"):
|
|
128
73
|
if not (isinstance(size, int) or (isinstance(size, Iterable) and len(size) == 2)):
|
129
74
|
raise TypeError("Got inappropriate size arg: {}".format(size))
|
130
75
|
|
76
|
+
# TODO(Nikos): Try to remove the opencv dependency
|
131
77
|
if isinstance(size, int):
|
132
78
|
h, w, c = img.shape
|
133
79
|
if (w <= h and w == size) or (h <= w and h == size):
|
@@ -149,426 +95,3 @@ def resize(img, size, interpolation="BILINEAR"):
|
|
149
95
|
return cv2.resize(
|
150
96
|
img, dsize=(int(ow), int(oh)), interpolation=INTER_MODE[interpolation]
|
151
97
|
)
|
152
|
-
|
153
|
-
|
154
|
-
def to_rgb_bgr(pic):
|
155
|
-
"""Converts a color image stored in BGR sequence to RGB (BGR to RGB)
|
156
|
-
or stored in RGB sequence to BGR (RGB to BGR).
|
157
|
-
Args:
|
158
|
-
pic (np.ndarray, torch.Tensor): Image to be converted, (H x W x 3).
|
159
|
-
Returns:
|
160
|
-
Tensor: Converted image.
|
161
|
-
"""
|
162
|
-
|
163
|
-
if _is_numpy_image(pic) or _is_tensor_image(pic):
|
164
|
-
img = pic[:, :, [2, 1, 0]]
|
165
|
-
return img
|
166
|
-
else:
|
167
|
-
try:
|
168
|
-
return to_rgb_bgr(np.array(pic))
|
169
|
-
except Exception:
|
170
|
-
raise TypeError("pic should be numpy.ndarray or torch.Tensor.")
|
171
|
-
|
172
|
-
|
173
|
-
def pad(img, padding, fill=(0, 0, 0), padding_mode="constant"):
|
174
|
-
"""Pad the given CV Image on all sides with speficified padding mode and fill value.
|
175
|
-
Args:
|
176
|
-
img (np.ndarray): Image to be padded.
|
177
|
-
padding (int or tuple): Padding on each border. If a single int is provided this
|
178
|
-
is used to pad all borders. If tuple of length 2 is provided this is the padding
|
179
|
-
on left/right and top/bottom respectively. If a tuple of length 4 is provided
|
180
|
-
this is the padding for the left, top, right and bottom borders
|
181
|
-
respectively.
|
182
|
-
fill (int, tuple): Pixel fill value for constant fill. Default is 0. If a tuple of
|
183
|
-
length 3, it is used to fill R, G, B channels respectively.
|
184
|
-
This value is only used when the padding_mode is constant
|
185
|
-
padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric.
|
186
|
-
Default is constant.
|
187
|
-
constant: pads with a constant value, this value is specified with fill
|
188
|
-
edge: pads with the last value on the edge of the image
|
189
|
-
reflect: pads with reflection of image (without repeating the last value on the edge)
|
190
|
-
padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
|
191
|
-
will result in [3, 2, 1, 2, 3, 4, 3, 2]
|
192
|
-
symmetric: pads with reflection of image (repeating the last value on the edge)
|
193
|
-
padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
|
194
|
-
will result in [2, 1, 1, 2, 3, 4, 4, 3]
|
195
|
-
Returns:
|
196
|
-
CV Image: Padded image.
|
197
|
-
"""
|
198
|
-
if not _is_numpy_image(img):
|
199
|
-
raise TypeError("img should be CV Image. Got {}".format(type(img)))
|
200
|
-
|
201
|
-
if not isinstance(padding, (numbers.Number, tuple)):
|
202
|
-
raise TypeError("Got inappropriate padding arg")
|
203
|
-
if not isinstance(fill, (numbers.Number, str, tuple)):
|
204
|
-
raise TypeError("Got inappropriate fill arg")
|
205
|
-
if not isinstance(padding_mode, str):
|
206
|
-
raise TypeError("Got inappropriate padding_mode arg")
|
207
|
-
|
208
|
-
if isinstance(padding, Sequence) and len(padding) not in [2, 4]:
|
209
|
-
raise ValueError("Padding must be an int or a 2, or 4 element tuple")
|
210
|
-
|
211
|
-
assert padding_mode in [
|
212
|
-
"constant",
|
213
|
-
"edge",
|
214
|
-
"reflect",
|
215
|
-
"symmetric",
|
216
|
-
], "Padding mode should be either constant, edge, reflect or symmetric"
|
217
|
-
|
218
|
-
if isinstance(padding, int):
|
219
|
-
pad_left = pad_right = pad_top = pad_bottom = padding
|
220
|
-
if isinstance(padding, Sequence) and len(padding) == 2:
|
221
|
-
pad_left = pad_right = padding[0]
|
222
|
-
pad_top = pad_bottom = padding[1]
|
223
|
-
if isinstance(padding, Sequence) and len(padding) == 4:
|
224
|
-
pad_left, pad_top, pad_right, pad_bottom = padding
|
225
|
-
|
226
|
-
if isinstance(fill, numbers.Number):
|
227
|
-
fill = (fill,) * (2 * len(img.shape) - 3)
|
228
|
-
|
229
|
-
if padding_mode == "constant":
|
230
|
-
assert (len(fill) == 3 and len(img.shape) == 3) or (
|
231
|
-
len(fill) == 1 and len(img.shape) == 2
|
232
|
-
), "channel of image is {} but length of fill is {}".format(
|
233
|
-
img.shape[-1], len(fill)
|
234
|
-
)
|
235
|
-
|
236
|
-
img = cv2.copyMakeBorder(
|
237
|
-
src=img,
|
238
|
-
top=pad_top,
|
239
|
-
bottom=pad_bottom,
|
240
|
-
left=pad_left,
|
241
|
-
right=pad_right,
|
242
|
-
borderType=PAD_MOD[padding_mode],
|
243
|
-
value=fill,
|
244
|
-
)
|
245
|
-
return img
|
246
|
-
|
247
|
-
|
248
|
-
def crop(img, x, y, h, w):
|
249
|
-
"""Crop the given CV Image.
|
250
|
-
Args:
|
251
|
-
img (np.ndarray): Image to be cropped.
|
252
|
-
x: Upper pixel coordinate.
|
253
|
-
y: Left pixel coordinate.
|
254
|
-
h: Height of the cropped image.
|
255
|
-
w: Width of the cropped image.
|
256
|
-
Returns:
|
257
|
-
CV Image: Cropped image.
|
258
|
-
"""
|
259
|
-
assert _is_numpy_image(img), "img should be CV Image. Got {}".format(type(img))
|
260
|
-
assert h > 0 and w > 0, "h={} and w={} should greater than 0".format(h, w)
|
261
|
-
|
262
|
-
x1, y1, x2, y2 = round(x), round(y), round(x + h), round(y + w)
|
263
|
-
|
264
|
-
# try:
|
265
|
-
# check_point1 = img[x1, y1, ...]
|
266
|
-
# check_point2 = img[x2-1, y2-1, ...]
|
267
|
-
# except IndexError:
|
268
|
-
# img = cv2.copyMakeBorder(img, - min(0, x1), max(x2 - img.shape[0], 0),
|
269
|
-
# -min(0, y1), max(y2 - img.shape[1], 0),
|
270
|
-
# cv2.BORDER_CONSTANT, value=[0, 0, 0])
|
271
|
-
# y2 += -min(0, y1)
|
272
|
-
# y1 += -min(0, y1)
|
273
|
-
# x2 += -min(0, x1)
|
274
|
-
# x1 += -min(0, x1)
|
275
|
-
#
|
276
|
-
# finally:
|
277
|
-
# return img[x1:x2, y1:y2, ...].copy()
|
278
|
-
return img[x1:x2, y1:y2, ...].copy()
|
279
|
-
|
280
|
-
|
281
|
-
def center_crop(img, output_size):
|
282
|
-
if isinstance(output_size, numbers.Number):
|
283
|
-
output_size = (int(output_size), int(output_size))
|
284
|
-
h, w, _ = img.shape
|
285
|
-
th, tw = output_size
|
286
|
-
i = int(round((h - th) * 0.5))
|
287
|
-
j = int(round((w - tw) * 0.5))
|
288
|
-
return crop(img, i, j, th, tw)
|
289
|
-
|
290
|
-
|
291
|
-
def resized_crop(img, i, j, h, w, size, interpolation="BILINEAR"):
|
292
|
-
"""Crop the given CV Image and resize it to desired size. Notably used in RandomResizedCrop.
|
293
|
-
Args:
|
294
|
-
img (np.ndarray): Image to be cropped.
|
295
|
-
i: Upper pixel coordinate.
|
296
|
-
j: Left pixel coordinate.
|
297
|
-
h: Height of the cropped image.
|
298
|
-
w: Width of the cropped image.
|
299
|
-
size (sequence or int): Desired output size. Same semantics as ``scale``.
|
300
|
-
interpolation (str, optional): Desired interpolation. Default is
|
301
|
-
``BILINEAR``.
|
302
|
-
Returns:
|
303
|
-
np.ndarray: Cropped image.
|
304
|
-
"""
|
305
|
-
assert _is_numpy_image(img), "img should be CV Image"
|
306
|
-
img = crop(img, i, j, h, w)
|
307
|
-
img = resize(img, size, interpolation)
|
308
|
-
return img
|
309
|
-
|
310
|
-
|
311
|
-
def hflip(img):
|
312
|
-
"""Horizontally flip the given PIL Image.
|
313
|
-
Args:
|
314
|
-
img (np.ndarray): Image to be flipped.
|
315
|
-
Returns:
|
316
|
-
np.ndarray: Horizontall flipped image.
|
317
|
-
"""
|
318
|
-
if not _is_numpy_image(img):
|
319
|
-
raise TypeError("img should be CV Image. Got {}".format(type(img)))
|
320
|
-
|
321
|
-
return cv2.flip(img, 1)
|
322
|
-
|
323
|
-
|
324
|
-
def vflip(img):
|
325
|
-
"""Vertically flip the given PIL Image.
|
326
|
-
Args:
|
327
|
-
img (CV Image): Image to be flipped.
|
328
|
-
Returns:
|
329
|
-
PIL Image: Vertically flipped image.
|
330
|
-
"""
|
331
|
-
if not _is_numpy_image(img):
|
332
|
-
raise TypeError("img should be PIL Image. Got {}".format(type(img)))
|
333
|
-
|
334
|
-
return cv2.flip(img, 0)
|
335
|
-
|
336
|
-
|
337
|
-
def five_crop(img, size):
|
338
|
-
"""Crop the given CV Image into four corners and the central crop.
|
339
|
-
.. Note::
|
340
|
-
This transform returns a tuple of images and there may be a
|
341
|
-
mismatch in the number of inputs and targets your ``Dataset`` returns.
|
342
|
-
Args:
|
343
|
-
size (sequence or int): Desired output size of the crop. If size is an
|
344
|
-
int instead of sequence like (h, w), a square crop (size, size) is
|
345
|
-
made.
|
346
|
-
Returns:
|
347
|
-
tuple: tuple (tl, tr, bl, br, center) corresponding top left,
|
348
|
-
top right, bottom left, bottom right and center crop.
|
349
|
-
"""
|
350
|
-
if isinstance(size, numbers.Number):
|
351
|
-
size = (int(size), int(size))
|
352
|
-
else:
|
353
|
-
assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
|
354
|
-
|
355
|
-
h, w, _ = img.shape
|
356
|
-
crop_h, crop_w = size
|
357
|
-
if crop_w > w or crop_h > h:
|
358
|
-
raise ValueError(
|
359
|
-
"Requested crop size {} is bigger than input size {}".format(size, (h, w))
|
360
|
-
)
|
361
|
-
tl = crop(img, 0, 0, crop_h, crop_w)
|
362
|
-
tr = crop(img, 0, w - crop_w, crop_h, crop_w)
|
363
|
-
bl = crop(img, h - crop_h, 0, crop_h, crop_w)
|
364
|
-
br = crop(img, h - crop_h, w - crop_w, crop_h, crop_w)
|
365
|
-
center = center_crop(img, (crop_h, crop_w))
|
366
|
-
return (tl, tr, bl, br, center)
|
367
|
-
|
368
|
-
|
369
|
-
def ten_crop(img, size, vertical_flip=False):
|
370
|
-
"""Crop the given CV Image into four corners and the central crop plus the
|
371
|
-
flipped version of these (horizontal flipping is used by default).
|
372
|
-
.. Note::
|
373
|
-
This transform returns a tuple of images and there may be a
|
374
|
-
mismatch in the number of inputs and targets your ``Dataset`` returns.
|
375
|
-
Args:
|
376
|
-
size (sequence or int): Desired output size of the crop. If size is an
|
377
|
-
int instead of sequence like (h, w), a square crop (size, size) is
|
378
|
-
made.
|
379
|
-
vertical_flip (bool): Use vertical flipping instead of horizontal
|
380
|
-
Returns:
|
381
|
-
tuple: tuple (tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip,
|
382
|
-
br_flip, center_flip) corresponding top left, top right,
|
383
|
-
bottom left, bottom right and center crop and same for the
|
384
|
-
flipped image.
|
385
|
-
"""
|
386
|
-
if isinstance(size, numbers.Number):
|
387
|
-
size = (int(size), int(size))
|
388
|
-
else:
|
389
|
-
assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
|
390
|
-
|
391
|
-
first_five = five_crop(img, size)
|
392
|
-
|
393
|
-
if vertical_flip:
|
394
|
-
img = vflip(img)
|
395
|
-
else:
|
396
|
-
img = hflip(img)
|
397
|
-
|
398
|
-
second_five = five_crop(img, size)
|
399
|
-
return first_five + second_five
|
400
|
-
|
401
|
-
|
402
|
-
def adjust_brightness(img, brightness_factor):
|
403
|
-
"""Adjust brightness of an Image.
|
404
|
-
Args:
|
405
|
-
img (np.ndarray): CV Image to be adjusted.
|
406
|
-
brightness_factor (float): How much to adjust the brightness. Can be
|
407
|
-
any non negative number. 0 gives a black image, 1 gives the
|
408
|
-
original image while 2 increases the brightness by a factor of 2.
|
409
|
-
Returns:
|
410
|
-
np.ndarray: Brightness adjusted image.
|
411
|
-
"""
|
412
|
-
if not _is_numpy_image(img):
|
413
|
-
raise TypeError("img should be CV Image. Got {}".format(type(img)))
|
414
|
-
|
415
|
-
im = img.astype(np.float32) * brightness_factor
|
416
|
-
im = im.clip(min=0, max=255)
|
417
|
-
return im.astype(img.dtype)
|
418
|
-
|
419
|
-
|
420
|
-
def adjust_contrast(img, contrast_factor):
|
421
|
-
"""Adjust contrast of an Image.
|
422
|
-
Args:
|
423
|
-
img (np.ndarray): CV Image to be adjusted.
|
424
|
-
contrast_factor (float): How much to adjust the contrast. Can be any
|
425
|
-
non negative number. 0 gives a solid gray image, 1 gives the
|
426
|
-
original image while 2 increases the contrast by a factor of 2.
|
427
|
-
Returns:
|
428
|
-
np.ndarray: Contrast adjusted image.
|
429
|
-
"""
|
430
|
-
if not _is_numpy_image(img):
|
431
|
-
raise TypeError("img should be CV Image. Got {}".format(type(img)))
|
432
|
-
im = img.astype(np.float32)
|
433
|
-
mean = round(cv2.cvtColor(im, cv2.COLOR_RGB2GRAY).mean())
|
434
|
-
im = (1 - contrast_factor) * mean + contrast_factor * im
|
435
|
-
im = im.clip(min=0, max=255)
|
436
|
-
return im.astype(img.dtype)
|
437
|
-
|
438
|
-
|
439
|
-
def adjust_saturation(img, saturation_factor):
|
440
|
-
"""Adjust color saturation of an image.
|
441
|
-
Args:
|
442
|
-
img (np.ndarray): CV Image to be adjusted.
|
443
|
-
saturation_factor (float): How much to adjust the saturation. 0 will
|
444
|
-
give a gray image, 1 will give the original image while
|
445
|
-
2 will enhance the saturation by a factor of 2.
|
446
|
-
Returns:
|
447
|
-
np.ndarray: Saturation adjusted image.
|
448
|
-
"""
|
449
|
-
if not _is_numpy_image(img):
|
450
|
-
raise TypeError("img should be PIL Image. Got {}".format(type(img)))
|
451
|
-
|
452
|
-
im = img.astype(np.float32)
|
453
|
-
degenerate = cv2.cvtColor(cv2.cvtColor(im, cv2.COLOR_RGB2GRAY), cv2.COLOR_GRAY2RGB)
|
454
|
-
im = (1 - saturation_factor) * degenerate + saturation_factor * im
|
455
|
-
im = im.clip(min=0, max=255)
|
456
|
-
return im.astype(img.dtype)
|
457
|
-
|
458
|
-
|
459
|
-
def adjust_hue(img, hue_factor):
|
460
|
-
"""Adjust hue of an image.
|
461
|
-
The image hue is adjusted by converting the image to HSV and
|
462
|
-
cyclically shifting the intensities in the hue channel (H).
|
463
|
-
The image is then converted back to original image mode.
|
464
|
-
`hue_factor` is the amount of shift in H channel and must be in the
|
465
|
-
interval `[-0.5, 0.5]`.
|
466
|
-
See https://en.wikipedia.org/wiki/Hue for more details on Hue.
|
467
|
-
Args:
|
468
|
-
img (np.ndarray): CV Image to be adjusted.
|
469
|
-
hue_factor (float): How much to shift the hue channel. Should be in
|
470
|
-
[-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in
|
471
|
-
HSV space in positive and negative direction respectively.
|
472
|
-
0 means no shift. Therefore, both -0.5 and 0.5 will give an image
|
473
|
-
with complementary colors while 0 gives the original image.
|
474
|
-
Returns:
|
475
|
-
np.ndarray: Hue adjusted image.
|
476
|
-
"""
|
477
|
-
if not (-0.5 <= hue_factor <= 0.5):
|
478
|
-
raise ValueError("hue_factor is not in [-0.5, 0.5].")
|
479
|
-
|
480
|
-
if not _is_numpy_image(img):
|
481
|
-
raise TypeError("img should be CV Image. Got {}".format(type(img)))
|
482
|
-
|
483
|
-
im = img.astype(np.uint8)
|
484
|
-
hsv = cv2.cvtColor(im, cv2.COLOR_RGB2HSV_FULL)
|
485
|
-
hsv[..., 0] += np.uint8(hue_factor * 255)
|
486
|
-
|
487
|
-
im = cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB_FULL)
|
488
|
-
return im.astype(img.dtype)
|
489
|
-
|
490
|
-
|
491
|
-
def adjust_gamma(img, gamma, gain=1):
|
492
|
-
"""Perform gamma correction on an image.
|
493
|
-
Also known as Power Law Transform. Intensities in RGB mode are adjusted
|
494
|
-
based on the following equation:
|
495
|
-
I_out = 255 * gain * ((I_in / 255) ** gamma)
|
496
|
-
See https://en.wikipedia.org/wiki/Gamma_correction for more details.
|
497
|
-
Args:
|
498
|
-
img (np.ndarray): CV Image to be adjusted.
|
499
|
-
gamma (float): Non negative real number. gamma larger than 1 make the
|
500
|
-
shadows darker, while gamma smaller than 1 make dark regions
|
501
|
-
lighter.
|
502
|
-
gain (float): The constant multiplier.
|
503
|
-
"""
|
504
|
-
if not _is_numpy_image(img):
|
505
|
-
raise TypeError("img should be CV Image. Got {}".format(type(img)))
|
506
|
-
|
507
|
-
if gamma < 0:
|
508
|
-
raise ValueError("Gamma should be a non-negative real number")
|
509
|
-
|
510
|
-
im = img.astype(np.float32)
|
511
|
-
im = 255.0 * gain * np.power(im / 255.0, gamma)
|
512
|
-
im = im.clip(min=0.0, max=255.0)
|
513
|
-
return im.astype(img.dtype)
|
514
|
-
|
515
|
-
|
516
|
-
def to_grayscale(img, num_output_channels=1):
|
517
|
-
"""Convert image to grayscale version of image.
|
518
|
-
Args:
|
519
|
-
img (np.ndarray): Image to be converted to grayscale.
|
520
|
-
Returns:
|
521
|
-
CV Image: Grayscale version of the image.
|
522
|
-
if num_output_channels == 1 : returned image is single channel
|
523
|
-
if num_output_channels == 3 : returned image is 3 channel with r == g == b
|
524
|
-
"""
|
525
|
-
if not _is_numpy_image(img):
|
526
|
-
raise TypeError("img should be CV Image. Got {}".format(type(img)))
|
527
|
-
|
528
|
-
if num_output_channels == 1:
|
529
|
-
img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
|
530
|
-
elif num_output_channels == 3:
|
531
|
-
img = cv2.cvtColor(cv2.cvtColor(img, cv2.COLOR_RGB2GRAY), cv2.COLOR_GRAY2RGB)
|
532
|
-
else:
|
533
|
-
raise ValueError("num_output_channels should be either 1 or 3")
|
534
|
-
|
535
|
-
return img
|
536
|
-
|
537
|
-
|
538
|
-
def gaussian_noise(img: np.ndarray, mean, std):
|
539
|
-
imgtype = img.dtype
|
540
|
-
gauss = np.random.normal(mean, std, img.shape).astype(np.float32)
|
541
|
-
noisy = np.clip((1 + gauss) * img.astype(np.float32), 0, 255)
|
542
|
-
return noisy.astype(imgtype)
|
543
|
-
|
544
|
-
|
545
|
-
def poisson_noise(img):
|
546
|
-
imgtype = img.dtype
|
547
|
-
img = img.astype(np.float32) / 255.0
|
548
|
-
vals = len(np.unique(img))
|
549
|
-
vals = 2 ** np.ceil(np.log2(vals))
|
550
|
-
noisy = 255 * np.clip(
|
551
|
-
np.random.poisson(img.astype(np.float32) * vals) / float(vals), 0, 1
|
552
|
-
)
|
553
|
-
return noisy.astype(imgtype)
|
554
|
-
|
555
|
-
|
556
|
-
def salt_and_pepper(img, prob=0.01):
|
557
|
-
"""Adds "Salt & Pepper" noise to an image.
|
558
|
-
prob: probability (threshold) that controls level of noise
|
559
|
-
"""
|
560
|
-
imgtype = img.dtype
|
561
|
-
rnd = np.random.rand(img.shape[0], img.shape[1])
|
562
|
-
noisy = img.copy()
|
563
|
-
noisy[rnd < prob / 2] = 0.0
|
564
|
-
noisy[rnd > 1 - prob / 2] = 255.0
|
565
|
-
return noisy.astype(imgtype)
|
566
|
-
|
567
|
-
|
568
|
-
def cv_transform(img):
|
569
|
-
img = salt_and_pepper(img)
|
570
|
-
return to_tensor(img)
|
571
|
-
|
572
|
-
|
573
|
-
def pil_transform(img):
|
574
|
-
return functional.to_tensor(img)
|