mttf 1.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mt/keras/__init__.py +8 -0
- mt/keras_src/__init__.py +16 -0
- mt/keras_src/applications_src/__init__.py +33 -0
- mt/keras_src/applications_src/classifier.py +497 -0
- mt/keras_src/applications_src/mobilenet_v3_split.py +544 -0
- mt/keras_src/applications_src/mobilevit.py +292 -0
- mt/keras_src/base.py +28 -0
- mt/keras_src/constraints_src/__init__.py +14 -0
- mt/keras_src/constraints_src/center_around.py +19 -0
- mt/keras_src/layers_src/__init__.py +43 -0
- mt/keras_src/layers_src/counter.py +27 -0
- mt/keras_src/layers_src/floor.py +24 -0
- mt/keras_src/layers_src/identical.py +15 -0
- mt/keras_src/layers_src/image_sizing.py +1605 -0
- mt/keras_src/layers_src/normed_conv2d.py +239 -0
- mt/keras_src/layers_src/simple_mha.py +472 -0
- mt/keras_src/layers_src/soft_bend.py +36 -0
- mt/keras_src/layers_src/transformer_encoder.py +246 -0
- mt/keras_src/layers_src/utils.py +88 -0
- mt/keras_src/layers_src/var_regularizer.py +38 -0
- mt/tf/__init__.py +10 -0
- mt/tf/init.py +25 -0
- mt/tf/keras_applications/__init__.py +5 -0
- mt/tf/keras_layers/__init__.py +5 -0
- mt/tf/mttf_version.py +5 -0
- mt/tf/utils.py +44 -0
- mt/tf/version.py +5 -0
- mt/tfc/__init__.py +291 -0
- mt/tfg/__init__.py +8 -0
- mt/tfp/__init__.py +11 -0
- mt/tfp/real_nvp.py +116 -0
- mttf-1.3.6.data/scripts/dmt_build_package_and_upload_to_nexus.sh +25 -0
- mttf-1.3.6.data/scripts/dmt_pipi.sh +7 -0
- mttf-1.3.6.data/scripts/dmt_twineu.sh +2 -0
- mttf-1.3.6.data/scripts/pipi.sh +7 -0
- mttf-1.3.6.data/scripts/user_build_package_and_upload_to_nexus.sh +25 -0
- mttf-1.3.6.data/scripts/user_pipi.sh +8 -0
- mttf-1.3.6.data/scripts/user_twineu.sh +3 -0
- mttf-1.3.6.data/scripts/wml_build_package_and_upload_to_nexus.sh +25 -0
- mttf-1.3.6.data/scripts/wml_nexus.py +50 -0
- mttf-1.3.6.data/scripts/wml_pipi.sh +7 -0
- mttf-1.3.6.data/scripts/wml_twineu.sh +2 -0
- mttf-1.3.6.dist-info/METADATA +18 -0
- mttf-1.3.6.dist-info/RECORD +47 -0
- mttf-1.3.6.dist-info/WHEEL +5 -0
- mttf-1.3.6.dist-info/licenses/LICENSE +21 -0
- mttf-1.3.6.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1605 @@
|
|
|
1
|
+
"""Module involves upsizing and downsizing images in each axis individually using convolutions of residuals."""
|
|
2
|
+
|
|
3
|
+
import tensorflow as tf
|
|
4
|
+
from mt import tp, np
|
|
5
|
+
from .. import layers, initializers, regularizers, constraints
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def mirror_all_weights(l_weights: list) -> list:
|
|
9
|
+
"""TBC"""
|
|
10
|
+
|
|
11
|
+
l_newWeights = []
|
|
12
|
+
for arr in l_weights:
|
|
13
|
+
if arr.ndim == 1:
|
|
14
|
+
new_arr = np.tile(arr, 2)
|
|
15
|
+
elif arr.ndim == 4:
|
|
16
|
+
zero_arr = np.zeros_like(arr, dtype=arr.dtype)
|
|
17
|
+
x = np.stack([arr, zero_arr, zero_arr, arr], axis=-1)
|
|
18
|
+
x = x.reshape(arr.shape + (2, 2))
|
|
19
|
+
x = np.transpose(x, [0, 1, 4, 2, 5, 3])
|
|
20
|
+
new_arr = x.reshape(
|
|
21
|
+
(arr.shape[0], arr.shape[1], arr.shape[2] << 1, arr.shape[3] << 1)
|
|
22
|
+
)
|
|
23
|
+
else:
|
|
24
|
+
raise NotImplementedError("SOTA exceeded!")
|
|
25
|
+
l_newWeights.append(new_arr)
|
|
26
|
+
|
|
27
|
+
return l_newWeights
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class DUCLayer(layers.Layer):
|
|
31
|
+
"""Base layer for all DUC layer implementations.
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
kernel_size : int or tuple or list
|
|
36
|
+
An integer or tuple/list of 2 integers, specifying the height and width of the 2D
|
|
37
|
+
convolution window. Can be a single integer to specify the same value for all spatial
|
|
38
|
+
dimensions.
|
|
39
|
+
kernel_initializer : object
|
|
40
|
+
Initializer for the convolutional kernels.
|
|
41
|
+
bias_initializer : object
|
|
42
|
+
Initializer for the convolutional biases.
|
|
43
|
+
kernel_regularizer : object
|
|
44
|
+
Regularizer for the convolutional kernels.
|
|
45
|
+
bias_regularizer : object
|
|
46
|
+
Regularizer for the convolutional biases.
|
|
47
|
+
kernel_constraint: object
|
|
48
|
+
Contraint function applied to the convolutional layer kernels.
|
|
49
|
+
bias_constraint: object
|
|
50
|
+
Contraint function applied to the convolutional layer biases.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
def __init__(
|
|
54
|
+
self,
|
|
55
|
+
kernel_size: tp.Union[int, tuple, list] = 3,
|
|
56
|
+
kernel_initializer="glorot_uniform",
|
|
57
|
+
bias_initializer="zeros",
|
|
58
|
+
kernel_regularizer=None,
|
|
59
|
+
bias_regularizer=None,
|
|
60
|
+
kernel_constraint=None,
|
|
61
|
+
bias_constraint=None,
|
|
62
|
+
**kwargs
|
|
63
|
+
):
|
|
64
|
+
|
|
65
|
+
super(DUCLayer, self).__init__(**kwargs)
|
|
66
|
+
|
|
67
|
+
self._kernel_size = kernel_size
|
|
68
|
+
self._kernel_initializer = initializers.get(kernel_initializer)
|
|
69
|
+
self._bias_initializer = initializers.get(bias_initializer)
|
|
70
|
+
self._kernel_regularizer = regularizers.get(kernel_regularizer)
|
|
71
|
+
self._bias_regularizer = regularizers.get(bias_regularizer)
|
|
72
|
+
self._kernel_constraint = constraints.get(kernel_constraint)
|
|
73
|
+
self._bias_constraint = constraints.get(bias_constraint)
|
|
74
|
+
|
|
75
|
+
def get_config(self):
|
|
76
|
+
config = {
|
|
77
|
+
"kernel_size": self._kernel_size,
|
|
78
|
+
"kernel_initializer": initializers.serialize(self._kernel_initializer),
|
|
79
|
+
"bias_initializer": initializers.serialize(self._bias_initializer),
|
|
80
|
+
"kernel_regularizer": regularizers.serialize(self._kernel_regularizer),
|
|
81
|
+
"bias_regularizer": regularizers.serialize(self._bias_regularizer),
|
|
82
|
+
"kernel_constraint": constraints.serialize(self._kernel_constraint),
|
|
83
|
+
"bias_constraint": constraints.serialize(self._bias_constraint),
|
|
84
|
+
}
|
|
85
|
+
base_config = super(DUCLayer, self).get_config()
|
|
86
|
+
return dict(list(base_config.items()) + list(config.items()))
|
|
87
|
+
|
|
88
|
+
get_config.__doc__ = layers.Layer.get_config.__doc__
|
|
89
|
+
|
|
90
|
+
def get_mirrored_weights(self):
|
|
91
|
+
return mirror_all_weights(self.get_weights())
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class Upsize2D(DUCLayer):
|
|
95
|
+
"""Upsizing along the x-axis and the y-axis using convolutions of residuals.
|
|
96
|
+
|
|
97
|
+
Upsizing means doubling the width and the height and halving the number of channels.
|
|
98
|
+
|
|
99
|
+
Input at each grid cell is a pair of `(avg, res)` images at resolution `(H,W,C)`. The pair is
|
|
100
|
+
transformed to `4*expansion_factor` hidden images and then 4 residual images
|
|
101
|
+
`(res1, res2, res3, res4)`. Then, `avg` is added to the 4 residual images, forming at each cell
|
|
102
|
+
a 2x2 block of images `(avg+res1, avg+res2, avg+res3, avg+res4)`. Finally, the new blocks
|
|
103
|
+
across the whole tensor form a new grid, doubling the height and width. Note that each
|
|
104
|
+
`avg+resK` image serves as a pair of average and residual images in the higher resolution.
|
|
105
|
+
|
|
106
|
+
Parameters
|
|
107
|
+
----------
|
|
108
|
+
input_dim : int
|
|
109
|
+
the dimensionality of each input pixel. Must be even.
|
|
110
|
+
expansion_factor : int
|
|
111
|
+
the coefficient defining the number of hidden images per cell needed.
|
|
112
|
+
kernel_size : int or tuple or list
|
|
113
|
+
An integer or tuple/list of 2 integers, specifying the height and width of the 2D
|
|
114
|
+
convolution window. Can be a single integer to specify the same value for all spatial
|
|
115
|
+
dimensions.
|
|
116
|
+
kernel_initializer : object
|
|
117
|
+
Initializer for the convolutional kernels.
|
|
118
|
+
bias_initializer : object
|
|
119
|
+
Initializer for the convolutional biases.
|
|
120
|
+
kernel_regularizer : object
|
|
121
|
+
Regularizer for the convolutional kernels.
|
|
122
|
+
bias_regularizer : object
|
|
123
|
+
Regularizer for the convolutional biases.
|
|
124
|
+
kernel_constraint: object
|
|
125
|
+
Contraint function applied to the convolutional layer kernels.
|
|
126
|
+
bias_constraint: object
|
|
127
|
+
Contraint function applied to the convolutional layer biases.
|
|
128
|
+
"""
|
|
129
|
+
|
|
130
|
+
def __init__(
|
|
131
|
+
self,
|
|
132
|
+
input_dim: int,
|
|
133
|
+
expansion_factor: int = 2,
|
|
134
|
+
kernel_size: tp.Union[int, tuple, list] = 3,
|
|
135
|
+
kernel_initializer="glorot_uniform",
|
|
136
|
+
bias_initializer="zeros",
|
|
137
|
+
kernel_regularizer=None,
|
|
138
|
+
bias_regularizer=None,
|
|
139
|
+
kernel_constraint=None,
|
|
140
|
+
bias_constraint=None,
|
|
141
|
+
**kwargs
|
|
142
|
+
):
|
|
143
|
+
super(Upsize2D, self).__init__(
|
|
144
|
+
kernel_size=kernel_size,
|
|
145
|
+
kernel_initializer=kernel_initializer,
|
|
146
|
+
bias_initializer=bias_initializer,
|
|
147
|
+
kernel_regularizer=kernel_regularizer,
|
|
148
|
+
bias_regularizer=bias_regularizer,
|
|
149
|
+
kernel_constraint=kernel_constraint,
|
|
150
|
+
bias_constraint=bias_constraint,
|
|
151
|
+
**kwargs
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
if input_dim & 1 != 0:
|
|
155
|
+
raise ValueError(
|
|
156
|
+
"Input dimensionality must be even. Got {}.".format(input_dim)
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
self._input_dim = input_dim
|
|
160
|
+
self._expansion_factor = expansion_factor
|
|
161
|
+
|
|
162
|
+
if self._expansion_factor > 1:
|
|
163
|
+
self.prenorm1_layer = layers.LayerNormalization(name="prenorm1")
|
|
164
|
+
self.expansion_layer = layers.Conv2D(
|
|
165
|
+
self._input_dim * 2 * expansion_factor,
|
|
166
|
+
self._kernel_size,
|
|
167
|
+
padding="same",
|
|
168
|
+
activation="swish",
|
|
169
|
+
kernel_initializer=self._kernel_initializer,
|
|
170
|
+
bias_initializer=self._bias_initializer,
|
|
171
|
+
kernel_regularizer=self._kernel_regularizer,
|
|
172
|
+
bias_regularizer=self._bias_regularizer,
|
|
173
|
+
kernel_constraint=self._kernel_constraint,
|
|
174
|
+
bias_constraint=self._bias_constraint,
|
|
175
|
+
name="expand",
|
|
176
|
+
)
|
|
177
|
+
self.prenorm2_layer = layers.LayerNormalization(name="prenorm2")
|
|
178
|
+
self.projection_layer = layers.Conv2D(
|
|
179
|
+
self._input_dim * 2,
|
|
180
|
+
self._kernel_size,
|
|
181
|
+
padding="same",
|
|
182
|
+
activation="tanh", # (-1., 1.)
|
|
183
|
+
kernel_initializer=self._kernel_initializer,
|
|
184
|
+
bias_initializer=self._bias_initializer,
|
|
185
|
+
kernel_regularizer=self._kernel_regularizer,
|
|
186
|
+
bias_regularizer=self._bias_regularizer,
|
|
187
|
+
kernel_constraint=self._kernel_constraint,
|
|
188
|
+
bias_constraint=self._bias_constraint,
|
|
189
|
+
name="project",
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
def call(self, x, training: bool = False):
|
|
193
|
+
x_avg = x[:, :, :, : self._input_dim // 2]
|
|
194
|
+
|
|
195
|
+
if self._expansion_factor > 1: # expand
|
|
196
|
+
x = self.prenorm1_layer(x, training=training)
|
|
197
|
+
x = self.expansion_layer(x, training=training)
|
|
198
|
+
|
|
199
|
+
# project
|
|
200
|
+
x = self.prenorm2_layer(x, training=training)
|
|
201
|
+
x = self.projection_layer(x, training=training)
|
|
202
|
+
|
|
203
|
+
# reshape
|
|
204
|
+
input_shape = tf.shape(x)
|
|
205
|
+
x = tf.reshape(
|
|
206
|
+
x,
|
|
207
|
+
[
|
|
208
|
+
input_shape[0],
|
|
209
|
+
input_shape[1],
|
|
210
|
+
input_shape[2],
|
|
211
|
+
2,
|
|
212
|
+
2,
|
|
213
|
+
self._input_dim // 2,
|
|
214
|
+
],
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
# add average
|
|
218
|
+
x += x_avg[:, :, :, tf.newaxis, tf.newaxis, :]
|
|
219
|
+
|
|
220
|
+
# make a new grid
|
|
221
|
+
x = tf.transpose(x, perm=[0, 1, 3, 2, 4, 5])
|
|
222
|
+
x = tf.reshape(
|
|
223
|
+
x,
|
|
224
|
+
[
|
|
225
|
+
input_shape[0],
|
|
226
|
+
input_shape[1] * 2,
|
|
227
|
+
input_shape[2] * 2,
|
|
228
|
+
self._input_dim // 2,
|
|
229
|
+
],
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
return x
|
|
233
|
+
|
|
234
|
+
call.__doc__ = DUCLayer.call.__doc__
|
|
235
|
+
|
|
236
|
+
def compute_output_shape(self, input_shape):
|
|
237
|
+
if len(input_shape) != 4:
|
|
238
|
+
raise ValueError(
|
|
239
|
+
"Expected input shape to be (B, H, W, C). Got: {}.".format(input_shape)
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
if input_shape[3] != self._input_dim:
|
|
243
|
+
raise ValueError(
|
|
244
|
+
"The input dim must be {}. Got {}.".format(
|
|
245
|
+
self._input_dim, input_shape[3]
|
|
246
|
+
)
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
output_shape = (
|
|
250
|
+
input_shape[0],
|
|
251
|
+
input_shape[1] * 2,
|
|
252
|
+
input_shape[2] * 2,
|
|
253
|
+
self._input_dim // 2,
|
|
254
|
+
)
|
|
255
|
+
return output_shape
|
|
256
|
+
|
|
257
|
+
compute_output_shape.__doc__ = DUCLayer.compute_output_shape.__doc__
|
|
258
|
+
|
|
259
|
+
def get_config(self):
|
|
260
|
+
config = {
|
|
261
|
+
"input_dim": self._input_dim,
|
|
262
|
+
"expansion_factor": self._expansion_factor,
|
|
263
|
+
}
|
|
264
|
+
base_config = super(Upsize2D, self).get_config()
|
|
265
|
+
return dict(list(base_config.items()) + list(config.items()))
|
|
266
|
+
|
|
267
|
+
get_config.__doc__ = DUCLayer.get_config.__doc__
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
class Downsize2D(DUCLayer):
|
|
271
|
+
"""Downsizing along the x-axis and the y-axis using convolutions of residuals.
|
|
272
|
+
|
|
273
|
+
Downsizing means halving the width and the height and doubling the number of channels.
|
|
274
|
+
|
|
275
|
+
This layer is supposed to be nearly an inverse of the Upsize2D layer.
|
|
276
|
+
|
|
277
|
+
Parameters
|
|
278
|
+
----------
|
|
279
|
+
input_dim : int
|
|
280
|
+
the dimensionality (number of channels) of each input pixel
|
|
281
|
+
expansion_factor : int
|
|
282
|
+
the coefficient defining the number of hidden images per cell needed.
|
|
283
|
+
kernel_size : int or tuple or list
|
|
284
|
+
An integer or tuple/list of 2 integers, specifying the height and width of the 2D
|
|
285
|
+
convolution window. Can be a single integer to specify the same value for all spatial
|
|
286
|
+
dimensions.
|
|
287
|
+
kernel_initializer : object
|
|
288
|
+
Initializer for the convolutional kernels.
|
|
289
|
+
bias_initializer : object
|
|
290
|
+
Initializer for the convolutional biases.
|
|
291
|
+
kernel_regularizer : object
|
|
292
|
+
Regularizer for the convolutional kernels.
|
|
293
|
+
bias_regularizer : object
|
|
294
|
+
Regularizer for the convolutional biases.
|
|
295
|
+
kernel_constraint: object
|
|
296
|
+
Contraint function applied to the convolutional layer kernels.
|
|
297
|
+
bias_constraint: object
|
|
298
|
+
Contraint function applied to the convolutional layer biases.
|
|
299
|
+
"""
|
|
300
|
+
|
|
301
|
+
def __init__(
|
|
302
|
+
self,
|
|
303
|
+
input_dim: int,
|
|
304
|
+
expansion_factor: int = 2,
|
|
305
|
+
kernel_size: tp.Union[int, tuple, list] = 3,
|
|
306
|
+
kernel_initializer="glorot_uniform",
|
|
307
|
+
bias_initializer="zeros",
|
|
308
|
+
kernel_regularizer=None,
|
|
309
|
+
bias_regularizer=None,
|
|
310
|
+
kernel_constraint=None,
|
|
311
|
+
bias_constraint=None,
|
|
312
|
+
**kwargs
|
|
313
|
+
):
|
|
314
|
+
super(Downsize2D, self).__init__(
|
|
315
|
+
kernel_size=kernel_size,
|
|
316
|
+
kernel_initializer=kernel_initializer,
|
|
317
|
+
bias_initializer=bias_initializer,
|
|
318
|
+
kernel_regularizer=kernel_regularizer,
|
|
319
|
+
bias_regularizer=bias_regularizer,
|
|
320
|
+
kernel_constraint=kernel_constraint,
|
|
321
|
+
bias_constraint=bias_constraint,
|
|
322
|
+
**kwargs
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
self._input_dim = input_dim
|
|
326
|
+
self._expansion_factor = expansion_factor
|
|
327
|
+
|
|
328
|
+
if self._expansion_factor > 1:
|
|
329
|
+
self.prenorm1_layer = layers.LayerNormalization(name="prenorm1")
|
|
330
|
+
self.expansion_layer = layers.Conv2D(
|
|
331
|
+
self._input_dim * 4 * self._expansion_factor,
|
|
332
|
+
self._kernel_size,
|
|
333
|
+
padding="same",
|
|
334
|
+
activation="swish",
|
|
335
|
+
kernel_initializer=self._kernel_initializer,
|
|
336
|
+
bias_initializer=self._bias_initializer,
|
|
337
|
+
kernel_regularizer=self._kernel_regularizer,
|
|
338
|
+
bias_regularizer=self._bias_regularizer,
|
|
339
|
+
kernel_constraint=self._kernel_constraint,
|
|
340
|
+
bias_constraint=self._bias_constraint,
|
|
341
|
+
name="expand",
|
|
342
|
+
)
|
|
343
|
+
self.prenorm2_layer = layers.LayerNormalization(name="prenorm2")
|
|
344
|
+
self.projection_layer = layers.Conv2D(
|
|
345
|
+
self._input_dim,
|
|
346
|
+
self._kernel_size,
|
|
347
|
+
padding="same",
|
|
348
|
+
activation="sigmoid", # (0., 1.)
|
|
349
|
+
kernel_initializer=self._kernel_initializer,
|
|
350
|
+
bias_initializer=self._bias_initializer,
|
|
351
|
+
kernel_regularizer=self._kernel_regularizer,
|
|
352
|
+
bias_regularizer=self._bias_regularizer,
|
|
353
|
+
kernel_constraint=self._kernel_constraint,
|
|
354
|
+
bias_constraint=self._bias_constraint,
|
|
355
|
+
name="project",
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
def call(self, x, training: bool = False):
|
|
359
|
+
# reshape
|
|
360
|
+
input_shape = tf.shape(x)
|
|
361
|
+
x = tf.reshape(
|
|
362
|
+
x,
|
|
363
|
+
[
|
|
364
|
+
input_shape[0],
|
|
365
|
+
input_shape[1] // 2,
|
|
366
|
+
2,
|
|
367
|
+
input_shape[2] // 2,
|
|
368
|
+
2,
|
|
369
|
+
input_shape[3],
|
|
370
|
+
],
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
# extract average
|
|
374
|
+
x_avg = tf.reduce_mean(x, axis=[2, 4], keepdims=True)
|
|
375
|
+
x -= x_avg # residuals
|
|
376
|
+
x_avg = x_avg[:, :, 0, :, 0, :] # means
|
|
377
|
+
|
|
378
|
+
# make a new grid
|
|
379
|
+
x = tf.transpose(x, perm=[0, 1, 3, 2, 4, 5])
|
|
380
|
+
x = tf.reshape(
|
|
381
|
+
x,
|
|
382
|
+
[
|
|
383
|
+
input_shape[0],
|
|
384
|
+
input_shape[1] // 2,
|
|
385
|
+
input_shape[2] // 2,
|
|
386
|
+
input_shape[3] * 4,
|
|
387
|
+
],
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
x = tf.concat([x_avg, x], axis=3)
|
|
391
|
+
|
|
392
|
+
if self._expansion_factor > 1: # expand
|
|
393
|
+
x = self.prenorm1_layer(x, training=training)
|
|
394
|
+
x = self.expansion_layer(x, training=training)
|
|
395
|
+
|
|
396
|
+
# project
|
|
397
|
+
x = self.prenorm2_layer(x, training=training)
|
|
398
|
+
x = self.projection_layer(x, training=training)
|
|
399
|
+
|
|
400
|
+
# form output
|
|
401
|
+
x = tf.concat([x_avg, x], axis=3)
|
|
402
|
+
|
|
403
|
+
return x
|
|
404
|
+
|
|
405
|
+
call.__doc__ = DUCLayer.call.__doc__
|
|
406
|
+
|
|
407
|
+
def compute_output_shape(self, input_shape):
|
|
408
|
+
if len(input_shape) != 4:
|
|
409
|
+
raise ValueError(
|
|
410
|
+
"Expected input shape to be (B, H, W, C). Got: {}.".format(input_shape)
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
if input_shape[1] % 2 != 0:
|
|
414
|
+
raise ValueError("The height must be even. Got {}.".format(input_shape[1]))
|
|
415
|
+
|
|
416
|
+
if input_shape[2] % 2 != 0:
|
|
417
|
+
raise ValueError("The width must be even. Got {}.".format(input_shape[2]))
|
|
418
|
+
|
|
419
|
+
if input_shape[3] != self._input_dim:
|
|
420
|
+
raise ValueError(
|
|
421
|
+
"The input dim must be {}. Got {}.".format(
|
|
422
|
+
self._input_dim, input_shape[3]
|
|
423
|
+
)
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
output_shape = (
|
|
427
|
+
input_shape[0],
|
|
428
|
+
input_shape[1] // 2,
|
|
429
|
+
input_shape[2] // 2,
|
|
430
|
+
self._input_dim * 2,
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
return output_shape
|
|
434
|
+
|
|
435
|
+
compute_output_shape.__doc__ = DUCLayer.compute_output_shape.__doc__
|
|
436
|
+
|
|
437
|
+
def get_config(self):
|
|
438
|
+
config = {
|
|
439
|
+
"input_dim": self._input_dim,
|
|
440
|
+
"expansion_factor": self._expansion_factor,
|
|
441
|
+
}
|
|
442
|
+
base_config = super(Downsize2D, self).get_config()
|
|
443
|
+
return dict(list(base_config.items()) + list(config.items()))
|
|
444
|
+
|
|
445
|
+
get_config.__doc__ = DUCLayer.get_config.__doc__
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
# ----- v2 -----
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
class Downsize2D_V2(DUCLayer):
|
|
452
|
+
"""Downsizing along the x-axis and the y-axis using convolutions of residuals.
|
|
453
|
+
|
|
454
|
+
Downsizing means halving the width and the height and doubling the number of channels.
|
|
455
|
+
|
|
456
|
+
This layer is supposed to be nearly an inverse of the Upsize2D layer.
|
|
457
|
+
|
|
458
|
+
Input dimensionality consists of image dimensionality and residual dimensionality.
|
|
459
|
+
|
|
460
|
+
Parameters
|
|
461
|
+
----------
|
|
462
|
+
img_dim : int
|
|
463
|
+
the image dimensionality
|
|
464
|
+
res_dim : int
|
|
465
|
+
the residual dimensionality
|
|
466
|
+
expansion_factor : int
|
|
467
|
+
the coefficient defining the number of hidden images per cell needed.
|
|
468
|
+
kernel_size : int or tuple or list
|
|
469
|
+
An integer or tuple/list of 2 integers, specifying the height and width of the 2D
|
|
470
|
+
convolution window. Can be a single integer to specify the same value for all spatial
|
|
471
|
+
dimensions.
|
|
472
|
+
kernel_initializer : object
|
|
473
|
+
Initializer for the convolutional kernels.
|
|
474
|
+
bias_initializer : object
|
|
475
|
+
Initializer for the convolutional biases.
|
|
476
|
+
kernel_regularizer : object
|
|
477
|
+
Regularizer for the convolutional kernels.
|
|
478
|
+
bias_regularizer : object
|
|
479
|
+
Regularizer for the convolutional biases.
|
|
480
|
+
kernel_constraint: object
|
|
481
|
+
Contraint function applied to the convolutional layer kernels.
|
|
482
|
+
bias_constraint: object
|
|
483
|
+
Contraint function applied to the convolutional layer biases.
|
|
484
|
+
projection_uses_bias : bool
|
|
485
|
+
whether or not the projection convolution layer uses a bias vector
|
|
486
|
+
"""
|
|
487
|
+
|
|
488
|
+
def __init__(
|
|
489
|
+
self,
|
|
490
|
+
img_dim: int,
|
|
491
|
+
res_dim: int,
|
|
492
|
+
expansion_factor: int = 2,
|
|
493
|
+
kernel_size: tp.Union[int, tuple, list] = 1,
|
|
494
|
+
kernel_initializer="glorot_uniform",
|
|
495
|
+
bias_initializer="zeros",
|
|
496
|
+
kernel_regularizer=None,
|
|
497
|
+
bias_regularizer=None,
|
|
498
|
+
kernel_constraint=None,
|
|
499
|
+
bias_constraint=None,
|
|
500
|
+
projection_uses_bias: bool = True,
|
|
501
|
+
**kwargs
|
|
502
|
+
):
|
|
503
|
+
super(Downsize2D_V2, self).__init__(
|
|
504
|
+
kernel_size=kernel_size,
|
|
505
|
+
kernel_initializer=kernel_initializer,
|
|
506
|
+
bias_initializer=bias_initializer,
|
|
507
|
+
kernel_regularizer=kernel_regularizer,
|
|
508
|
+
bias_regularizer=bias_regularizer,
|
|
509
|
+
kernel_constraint=kernel_constraint,
|
|
510
|
+
bias_constraint=bias_constraint,
|
|
511
|
+
**kwargs
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
self._img_dim = img_dim
|
|
515
|
+
self._res_dim = res_dim
|
|
516
|
+
self._expansion_factor = expansion_factor
|
|
517
|
+
self._projection_uses_bias = projection_uses_bias
|
|
518
|
+
|
|
519
|
+
if self._expansion_factor > 1:
|
|
520
|
+
self.prenorm1_layer = layers.LayerNormalization(name="prenorm1")
|
|
521
|
+
self.expansion_layer = layers.Conv2D(
|
|
522
|
+
(self._img_dim + self._res_dim) * 4 * self._expansion_factor,
|
|
523
|
+
self._kernel_size,
|
|
524
|
+
padding="same",
|
|
525
|
+
activation="swish",
|
|
526
|
+
kernel_initializer=self._kernel_initializer,
|
|
527
|
+
bias_initializer=self._bias_initializer,
|
|
528
|
+
kernel_regularizer=self._kernel_regularizer,
|
|
529
|
+
bias_regularizer=self._bias_regularizer,
|
|
530
|
+
kernel_constraint=self._kernel_constraint,
|
|
531
|
+
bias_constraint=self._bias_constraint,
|
|
532
|
+
name="expand",
|
|
533
|
+
)
|
|
534
|
+
self.prenorm2_layer = layers.LayerNormalization(name="prenorm2")
|
|
535
|
+
self.projection_layer = layers.Conv2D(
|
|
536
|
+
self._img_dim + self._res_dim * 2,
|
|
537
|
+
1,
|
|
538
|
+
padding="same",
|
|
539
|
+
activation="sigmoid", # (0., 1.)
|
|
540
|
+
use_bias=self._projection_uses_bias,
|
|
541
|
+
kernel_initializer=self._kernel_initializer,
|
|
542
|
+
bias_initializer=self._bias_initializer,
|
|
543
|
+
kernel_regularizer=self._kernel_regularizer,
|
|
544
|
+
bias_regularizer=self._bias_regularizer,
|
|
545
|
+
kernel_constraint=self._kernel_constraint,
|
|
546
|
+
bias_constraint=self._bias_constraint,
|
|
547
|
+
name="project",
|
|
548
|
+
)
|
|
549
|
+
|
|
550
|
+
def call(self, x, training: bool = False):
|
|
551
|
+
# reshape
|
|
552
|
+
I = self._img_dim
|
|
553
|
+
R = self._res_dim
|
|
554
|
+
input_shape = tf.shape(x)
|
|
555
|
+
B = input_shape[0]
|
|
556
|
+
H = input_shape[1] // 2
|
|
557
|
+
W = input_shape[2] // 2
|
|
558
|
+
x = tf.reshape(x, [B, H, 2, W, 2, I + R])
|
|
559
|
+
|
|
560
|
+
# extract average over the image dimensions
|
|
561
|
+
x_avg = tf.reduce_mean(x[:, :, :, :, :, :I], axis=[2, 4], keepdims=True)
|
|
562
|
+
zeros = tf.zeros([B, H, 1, W, 1, R])
|
|
563
|
+
x -= tf.concat([x_avg, zeros], axis=5) # residuals
|
|
564
|
+
x_avg = x_avg[:, :, 0, :, 0, :] # means
|
|
565
|
+
|
|
566
|
+
# make a new grid
|
|
567
|
+
x = tf.transpose(x, perm=[0, 1, 3, 2, 4, 5])
|
|
568
|
+
x = tf.reshape(x, [B, H, W, (I + R) * 4])
|
|
569
|
+
|
|
570
|
+
x = tf.concat([x_avg, x], axis=3)
|
|
571
|
+
|
|
572
|
+
if self._expansion_factor > 1: # expand
|
|
573
|
+
x = self.prenorm1_layer(x, training=training)
|
|
574
|
+
x = self.expansion_layer(x, training=training)
|
|
575
|
+
|
|
576
|
+
# project
|
|
577
|
+
x = self.prenorm2_layer(x, training=training)
|
|
578
|
+
x = self.projection_layer(x, training=training)
|
|
579
|
+
|
|
580
|
+
# form output
|
|
581
|
+
x = tf.concat([x_avg, x], axis=3)
|
|
582
|
+
|
|
583
|
+
return x
|
|
584
|
+
|
|
585
|
+
call.__doc__ = DUCLayer.call.__doc__
|
|
586
|
+
|
|
587
|
+
def compute_output_shape(self, input_shape):
|
|
588
|
+
if len(input_shape) != 4:
|
|
589
|
+
raise ValueError(
|
|
590
|
+
"Expected input shape to be (B, H, W, C). Got: {}.".format(input_shape)
|
|
591
|
+
)
|
|
592
|
+
|
|
593
|
+
if input_shape[1] % 2 != 0:
|
|
594
|
+
raise ValueError("The height must be even. Got {}.".format(input_shape[1]))
|
|
595
|
+
|
|
596
|
+
if input_shape[2] % 2 != 0:
|
|
597
|
+
raise ValueError("The width must be even. Got {}.".format(input_shape[2]))
|
|
598
|
+
|
|
599
|
+
if input_shape[3] != self._img_dim + self._res_dim:
|
|
600
|
+
raise ValueError(
|
|
601
|
+
"The input dim must be {}. Got {}.".format(
|
|
602
|
+
self._img_dim + self._res_dim, input_shape[3]
|
|
603
|
+
)
|
|
604
|
+
)
|
|
605
|
+
|
|
606
|
+
output_shape = (
|
|
607
|
+
input_shape[0],
|
|
608
|
+
input_shape[1] // 2,
|
|
609
|
+
input_shape[2] // 2,
|
|
610
|
+
(self._img_dim + self._res_dim) * 2,
|
|
611
|
+
)
|
|
612
|
+
|
|
613
|
+
return output_shape
|
|
614
|
+
|
|
615
|
+
compute_output_shape.__doc__ = DUCLayer.compute_output_shape.__doc__
|
|
616
|
+
|
|
617
|
+
def get_config(self):
|
|
618
|
+
config = {
|
|
619
|
+
"img_dim": self._img_dim,
|
|
620
|
+
"res_dim": self._res_dim,
|
|
621
|
+
"expansion_factor": self._expansion_factor,
|
|
622
|
+
"projection_uses_bias": self._projection_uses_bias,
|
|
623
|
+
}
|
|
624
|
+
base_config = super(Downsize2D_V2, self).get_config()
|
|
625
|
+
return dict(list(base_config.items()) + list(config.items()))
|
|
626
|
+
|
|
627
|
+
get_config.__doc__ = DUCLayer.get_config.__doc__
|
|
628
|
+
|
|
629
|
+
|
|
630
|
+
class Upsize2D_V2(DUCLayer):
|
|
631
|
+
"""Upsizing along the x-axis and the y-axis using convolutions of residuals.
|
|
632
|
+
|
|
633
|
+
Upsizing means doubling the width and the height and halving the number of channels.
|
|
634
|
+
|
|
635
|
+
Input at each grid cell is a pair of `(avg, res)` images at resolution `(H,W,C)`. The pair is
|
|
636
|
+
transformed to `4*expansion_factor` hidden images and then 4 residual images
|
|
637
|
+
`(res1, res2, res3, res4)`. Then, `avg` is added to the 4 residual images, forming at each cell
|
|
638
|
+
a 2x2 block of images `(avg+res1, avg+res2, avg+res3, avg+res4)`. Finally, the new blocks
|
|
639
|
+
across the whole tensor form a new grid, doubling the height and width. Note that each
|
|
640
|
+
`avg+resK` image serves as a pair of average and residual images in the higher resolution.
|
|
641
|
+
|
|
642
|
+
Input dimensionality consists of image dimensionality and residual dimensionality. It must be
|
|
643
|
+
even.
|
|
644
|
+
|
|
645
|
+
Parameters
|
|
646
|
+
----------
|
|
647
|
+
img_dim : int
|
|
648
|
+
the image dimensionality.
|
|
649
|
+
res_dim : int
|
|
650
|
+
the residual dimensionality.
|
|
651
|
+
expansion_factor : int
|
|
652
|
+
the coefficient defining the number of hidden images per cell needed.
|
|
653
|
+
kernel_size : int or tuple or list
|
|
654
|
+
An integer or tuple/list of 2 integers, specifying the height and width of the 2D
|
|
655
|
+
convolution window. Can be a single integer to specify the same value for all spatial
|
|
656
|
+
dimensions.
|
|
657
|
+
kernel_initializer : object
|
|
658
|
+
Initializer for the convolutional kernels.
|
|
659
|
+
bias_initializer : object
|
|
660
|
+
Initializer for the convolutional biases.
|
|
661
|
+
kernel_regularizer : object
|
|
662
|
+
Regularizer for the convolutional kernels.
|
|
663
|
+
bias_regularizer : object
|
|
664
|
+
Regularizer for the convolutional biases.
|
|
665
|
+
kernel_constraint: object
|
|
666
|
+
Contraint function applied to the convolutional layer kernels.
|
|
667
|
+
bias_constraint: object
|
|
668
|
+
Contraint function applied to the convolutional layer biases.
|
|
669
|
+
"""
|
|
670
|
+
|
|
671
|
+
def __init__(
|
|
672
|
+
self,
|
|
673
|
+
img_dim: int,
|
|
674
|
+
res_dim: int,
|
|
675
|
+
expansion_factor: int = 2,
|
|
676
|
+
kernel_size: tp.Union[int, tuple, list] = 3,
|
|
677
|
+
kernel_initializer="glorot_uniform",
|
|
678
|
+
bias_initializer="zeros",
|
|
679
|
+
kernel_regularizer=None,
|
|
680
|
+
bias_regularizer=None,
|
|
681
|
+
kernel_constraint=None,
|
|
682
|
+
bias_constraint=None,
|
|
683
|
+
**kwargs
|
|
684
|
+
):
|
|
685
|
+
super(Upsize2D_V2, self).__init__(
|
|
686
|
+
kernel_size=kernel_size,
|
|
687
|
+
kernel_initializer=kernel_initializer,
|
|
688
|
+
bias_initializer=bias_initializer,
|
|
689
|
+
kernel_regularizer=kernel_regularizer,
|
|
690
|
+
bias_regularizer=bias_regularizer,
|
|
691
|
+
kernel_constraint=kernel_constraint,
|
|
692
|
+
bias_constraint=bias_constraint,
|
|
693
|
+
**kwargs
|
|
694
|
+
)
|
|
695
|
+
|
|
696
|
+
input_dim = img_dim + res_dim
|
|
697
|
+
if input_dim & 1 != 0:
|
|
698
|
+
raise ValueError(
|
|
699
|
+
"Image dimensionality must be even. Got {}.".format(input_dim)
|
|
700
|
+
)
|
|
701
|
+
|
|
702
|
+
self._img_dim = img_dim
|
|
703
|
+
self._res_dim = res_dim
|
|
704
|
+
self._expansion_factor = expansion_factor
|
|
705
|
+
|
|
706
|
+
if self._expansion_factor > 1:
|
|
707
|
+
self.prenorm1_layer = layers.LayerNormalization(name="prenorm1")
|
|
708
|
+
self.expansion_layer = layers.Conv2D(
|
|
709
|
+
(self._img_dim + self._res_dim) * 2 * expansion_factor,
|
|
710
|
+
self._kernel_size,
|
|
711
|
+
padding="same",
|
|
712
|
+
activation="swish",
|
|
713
|
+
kernel_initializer=self._kernel_initializer,
|
|
714
|
+
bias_initializer=self._bias_initializer,
|
|
715
|
+
kernel_regularizer=self._kernel_regularizer,
|
|
716
|
+
bias_regularizer=self._bias_regularizer,
|
|
717
|
+
kernel_constraint=self._kernel_constraint,
|
|
718
|
+
bias_constraint=self._bias_constraint,
|
|
719
|
+
name="expand",
|
|
720
|
+
)
|
|
721
|
+
self.prenorm2_layer = layers.LayerNormalization(name="prenorm2")
|
|
722
|
+
self.projection_layer = layers.Conv2D(
|
|
723
|
+
(self._img_dim + self._res_dim) * 2,
|
|
724
|
+
self._kernel_size if self._expansion_factor <= 1 else 1,
|
|
725
|
+
padding="same",
|
|
726
|
+
activation="tanh", # (-1., 1.)
|
|
727
|
+
kernel_initializer=self._kernel_initializer,
|
|
728
|
+
bias_initializer=self._bias_initializer,
|
|
729
|
+
kernel_regularizer=self._kernel_regularizer,
|
|
730
|
+
bias_regularizer=self._bias_regularizer,
|
|
731
|
+
kernel_constraint=self._kernel_constraint,
|
|
732
|
+
bias_constraint=self._bias_constraint,
|
|
733
|
+
name="project",
|
|
734
|
+
)
|
|
735
|
+
|
|
736
|
+
def call(self, x, training: bool = False):
|
|
737
|
+
I = self._img_dim
|
|
738
|
+
R = (self._res_dim - self._img_dim) // 2
|
|
739
|
+
input_shape = tf.shape(x)
|
|
740
|
+
B = input_shape[0]
|
|
741
|
+
H = input_shape[1]
|
|
742
|
+
W = input_shape[2]
|
|
743
|
+
|
|
744
|
+
x_avg = x[:, :, :, :I]
|
|
745
|
+
|
|
746
|
+
if self._expansion_factor > 1: # expand
|
|
747
|
+
x = self.prenorm1_layer(x, training=training)
|
|
748
|
+
x = self.expansion_layer(x, training=training)
|
|
749
|
+
|
|
750
|
+
# project
|
|
751
|
+
x = self.prenorm2_layer(x, training=training)
|
|
752
|
+
x = self.projection_layer(x, training=training)
|
|
753
|
+
|
|
754
|
+
# reshape
|
|
755
|
+
x = tf.reshape(x, [B, H, W, 2, 2, I + R])
|
|
756
|
+
|
|
757
|
+
# add average
|
|
758
|
+
zeros = tf.zeros([B, H, W, R])
|
|
759
|
+
x_avg = tf.concat([x_avg, zeros], axis=3) # expanded average
|
|
760
|
+
x += x_avg[:, :, :, tf.newaxis, tf.newaxis, :]
|
|
761
|
+
|
|
762
|
+
# make a new grid
|
|
763
|
+
x = tf.transpose(x, perm=[0, 1, 3, 2, 4, 5])
|
|
764
|
+
x = tf.reshape(x, [B, H * 2, W * 2, I + R])
|
|
765
|
+
|
|
766
|
+
return x
|
|
767
|
+
|
|
768
|
+
call.__doc__ = DUCLayer.call.__doc__
|
|
769
|
+
|
|
770
|
+
def compute_output_shape(self, input_shape):
|
|
771
|
+
if len(input_shape) != 4:
|
|
772
|
+
raise ValueError(
|
|
773
|
+
"Expected input shape to be (B, H, W, C). Got: {}.".format(input_shape)
|
|
774
|
+
)
|
|
775
|
+
|
|
776
|
+
if input_shape[3] != (self._img_dim + self._res_dim):
|
|
777
|
+
raise ValueError(
|
|
778
|
+
"The input dim must be {}. Got {}.".format(
|
|
779
|
+
(self._img_dim + self._res_dim), input_shape[3]
|
|
780
|
+
)
|
|
781
|
+
)
|
|
782
|
+
|
|
783
|
+
output_shape = (
|
|
784
|
+
input_shape[0],
|
|
785
|
+
input_shape[1] * 2,
|
|
786
|
+
input_shape[2] * 2,
|
|
787
|
+
(self._img_dim + self._res_dim) // 2,
|
|
788
|
+
)
|
|
789
|
+
return output_shape
|
|
790
|
+
|
|
791
|
+
compute_output_shape.__doc__ = DUCLayer.compute_output_shape.__doc__
|
|
792
|
+
|
|
793
|
+
def get_config(self):
|
|
794
|
+
config = {
|
|
795
|
+
"img_dim": self._img_dim,
|
|
796
|
+
"res_dim": self._res_dim,
|
|
797
|
+
"expansion_factor": self._expansion_factor,
|
|
798
|
+
}
|
|
799
|
+
base_config = super(Upsize2D_V2, self).get_config()
|
|
800
|
+
return dict(list(base_config.items()) + list(config.items()))
|
|
801
|
+
|
|
802
|
+
get_config.__doc__ = DUCLayer.get_config.__doc__
|
|
803
|
+
|
|
804
|
+
|
|
805
|
+
# ----- v3 -----
|
|
806
|
+
|
|
807
|
+
|
|
808
|
+
class Downsize2D_V3(DUCLayer):
|
|
809
|
+
"""Downsizing along the x-axis and the y-axis using convolutions of residuals.
|
|
810
|
+
|
|
811
|
+
Downsizing means halving the width and the height and doubling the number of channels.
|
|
812
|
+
|
|
813
|
+
TBC
|
|
814
|
+
|
|
815
|
+
This layer is supposed to be nearly an inverse of the Upsize2D layer.
|
|
816
|
+
|
|
817
|
+
Input dimensionality consists of image dimensionality and residual dimensionality.
|
|
818
|
+
|
|
819
|
+
Parameters
|
|
820
|
+
----------
|
|
821
|
+
img_dim : int
|
|
822
|
+
the image dimensionality
|
|
823
|
+
res_dim : int
|
|
824
|
+
the residual dimensionality
|
|
825
|
+
kernel_size : int or tuple or list
|
|
826
|
+
An integer or tuple/list of 2 integers, specifying the height and width of the 2D
|
|
827
|
+
convolution window. Can be a single integer to specify the same value for all spatial
|
|
828
|
+
dimensions.
|
|
829
|
+
kernel_initializer : object
|
|
830
|
+
Initializer for the convolutional kernels.
|
|
831
|
+
bias_initializer : object
|
|
832
|
+
Initializer for the convolutional biases.
|
|
833
|
+
kernel_regularizer : object
|
|
834
|
+
Regularizer for the convolutional kernels.
|
|
835
|
+
bias_regularizer : object
|
|
836
|
+
Regularizer for the convolutional biases.
|
|
837
|
+
kernel_constraint: object
|
|
838
|
+
Contraint function applied to the convolutional layer kernels.
|
|
839
|
+
bias_constraint: object
|
|
840
|
+
Contraint function applied to the convolutional layer biases.
|
|
841
|
+
"""
|
|
842
|
+
|
|
843
|
+
def __init__(
|
|
844
|
+
self,
|
|
845
|
+
img_dim: int,
|
|
846
|
+
res_dim: int,
|
|
847
|
+
kernel_size: tp.Union[int, tuple, list] = 1,
|
|
848
|
+
kernel_initializer="glorot_uniform",
|
|
849
|
+
bias_initializer="zeros",
|
|
850
|
+
kernel_regularizer=None,
|
|
851
|
+
bias_regularizer=None,
|
|
852
|
+
kernel_constraint=None,
|
|
853
|
+
bias_constraint=None,
|
|
854
|
+
**kwargs
|
|
855
|
+
):
|
|
856
|
+
super(Downsize2D_V3, self).__init__(
|
|
857
|
+
kernel_size=kernel_size,
|
|
858
|
+
kernel_initializer=kernel_initializer,
|
|
859
|
+
bias_initializer=bias_initializer,
|
|
860
|
+
kernel_regularizer=kernel_regularizer,
|
|
861
|
+
bias_regularizer=bias_regularizer,
|
|
862
|
+
kernel_constraint=kernel_constraint,
|
|
863
|
+
bias_constraint=bias_constraint,
|
|
864
|
+
**kwargs
|
|
865
|
+
)
|
|
866
|
+
|
|
867
|
+
self._img_dim = img_dim
|
|
868
|
+
self._res_dim = res_dim
|
|
869
|
+
|
|
870
|
+
if res_dim > 0:
|
|
871
|
+
if res_dim > img_dim:
|
|
872
|
+
self.prenorm1_layer = layers.LayerNormalization(name="prenorm1")
|
|
873
|
+
self.expand1_layer = layers.Conv2D(
|
|
874
|
+
img_dim * 2 + res_dim * 4,
|
|
875
|
+
self._kernel_size,
|
|
876
|
+
padding="same",
|
|
877
|
+
activation="swish",
|
|
878
|
+
kernel_initializer=self._kernel_initializer,
|
|
879
|
+
bias_initializer=self._bias_initializer,
|
|
880
|
+
kernel_regularizer=self._kernel_regularizer,
|
|
881
|
+
bias_regularizer=self._bias_regularizer,
|
|
882
|
+
kernel_constraint=self._kernel_constraint,
|
|
883
|
+
bias_constraint=self._bias_constraint,
|
|
884
|
+
name="expand1",
|
|
885
|
+
)
|
|
886
|
+
RR = (img_dim + res_dim * 3 + 1) // 2
|
|
887
|
+
self.prenorm2_layer = layers.LayerNormalization(name="prenorm2")
|
|
888
|
+
self.project1_layer = layers.Conv2D(
|
|
889
|
+
RR,
|
|
890
|
+
1,
|
|
891
|
+
padding="same",
|
|
892
|
+
activation="swish",
|
|
893
|
+
kernel_initializer=self._kernel_initializer,
|
|
894
|
+
bias_initializer=self._bias_initializer,
|
|
895
|
+
kernel_regularizer=self._kernel_regularizer,
|
|
896
|
+
bias_regularizer=self._bias_regularizer,
|
|
897
|
+
kernel_constraint=self._kernel_constraint,
|
|
898
|
+
bias_constraint=self._bias_constraint,
|
|
899
|
+
name="project1",
|
|
900
|
+
)
|
|
901
|
+
self.prenorm3_layer = layers.LayerNormalization(name="prenorm3")
|
|
902
|
+
self.expand2_layer = layers.Conv2D(
|
|
903
|
+
img_dim * 2 + RR * 4,
|
|
904
|
+
self._kernel_size,
|
|
905
|
+
padding="same",
|
|
906
|
+
activation="swish",
|
|
907
|
+
kernel_initializer=self._kernel_initializer,
|
|
908
|
+
bias_initializer=self._bias_initializer,
|
|
909
|
+
kernel_regularizer=self._kernel_regularizer,
|
|
910
|
+
bias_regularizer=self._bias_regularizer,
|
|
911
|
+
kernel_constraint=self._kernel_constraint,
|
|
912
|
+
bias_constraint=self._bias_constraint,
|
|
913
|
+
name="expand2",
|
|
914
|
+
)
|
|
915
|
+
self.prenorm4_layer = layers.LayerNormalization(name="prenorm4")
|
|
916
|
+
self.project2_layer = layers.Conv2D(
|
|
917
|
+
img_dim + res_dim * 2,
|
|
918
|
+
1,
|
|
919
|
+
padding="same",
|
|
920
|
+
activation="sigmoid", # (0., 1.)
|
|
921
|
+
kernel_initializer=self._kernel_initializer,
|
|
922
|
+
bias_initializer=self._bias_initializer,
|
|
923
|
+
kernel_regularizer=self._kernel_regularizer,
|
|
924
|
+
bias_regularizer=self._bias_regularizer,
|
|
925
|
+
kernel_constraint=self._kernel_constraint,
|
|
926
|
+
bias_constraint=self._bias_constraint,
|
|
927
|
+
name="project2",
|
|
928
|
+
)
|
|
929
|
+
|
|
930
|
+
def call(self, x, training: bool = False):
|
|
931
|
+
# shape
|
|
932
|
+
I = self._img_dim
|
|
933
|
+
R = self._res_dim
|
|
934
|
+
input_shape = tf.shape(x)
|
|
935
|
+
B = input_shape[0]
|
|
936
|
+
H = input_shape[1] // 2
|
|
937
|
+
W = input_shape[2] // 2
|
|
938
|
+
|
|
939
|
+
# merge pairs of consecutive pixels in each row
|
|
940
|
+
# target R = (I + 3R)/2 if R > 0 else I
|
|
941
|
+
x = tf.reshape(x, [B, H * 2, W, 2, I + R])
|
|
942
|
+
xl = x[:, :, :, 0, :I]
|
|
943
|
+
xr = x[:, :, :, 1, :I]
|
|
944
|
+
x_avg = (xl + xr) * 0.5 # shape = [B, H * 2, W, I]
|
|
945
|
+
x_res = xl - xr # shape = [B, H * 2, W, I]
|
|
946
|
+
if R > 0:
|
|
947
|
+
x = tf.concat([x_avg, x_res, x[:, :, :, 0, I:], x[:, :, :, 1, I:]], axis=3)
|
|
948
|
+
if R > I:
|
|
949
|
+
x = self.prenorm1_layer(x, training=training)
|
|
950
|
+
x = self.expand1_layer(
|
|
951
|
+
x, training=training
|
|
952
|
+
) # shape = [B, H * 2, W, I * 2 + R * 4]
|
|
953
|
+
x = self.prenorm2_layer(x, training=training)
|
|
954
|
+
x = self.project1_layer(x, training=training) # shape = [B, H*2, W, RR]
|
|
955
|
+
RR = (I + R * 3 + 1) // 2
|
|
956
|
+
else:
|
|
957
|
+
x = x_res
|
|
958
|
+
RR = I
|
|
959
|
+
x_avg = tf.reshape(x_avg, [B, H, 2, W, I])
|
|
960
|
+
x = tf.reshape(x, [B, H, 2, W, RR])
|
|
961
|
+
|
|
962
|
+
# merge pairs of consecutive pixels in each column
|
|
963
|
+
xt = x_avg[:, :, 0, :, :]
|
|
964
|
+
xb = x_avg[:, :, 1, :, :]
|
|
965
|
+
x_avg = (xt + xb) * 0.5 # shape = [B, H, W, I]
|
|
966
|
+
x_res = xt - xb # shape = [B, H, W, I]
|
|
967
|
+
x = tf.concat([x_avg, x_res, x[:, :, 0, :, :], x[:, :, 1, :, :]], axis=3)
|
|
968
|
+
if R > 0:
|
|
969
|
+
x = self.prenorm3_layer(x, training=training)
|
|
970
|
+
x = self.expand2_layer(x, training=training) # shape = [B, H, W, I*2+RR*4]
|
|
971
|
+
x = self.prenorm4_layer(x, training=training)
|
|
972
|
+
x = self.project2_layer(x, training=training) # shape = [B, H, W, I + 2 * R]
|
|
973
|
+
x = tf.concat([x_avg, x], axis=3) # shape = [B, H, W, 2 * (I + R)]
|
|
974
|
+
|
|
975
|
+
# output
|
|
976
|
+
return x
|
|
977
|
+
|
|
978
|
+
call.__doc__ = DUCLayer.call.__doc__
|
|
979
|
+
|
|
980
|
+
def compute_output_shape(self, input_shape):
|
|
981
|
+
if len(input_shape) != 4:
|
|
982
|
+
raise ValueError(
|
|
983
|
+
"Expected input shape to be (B, H, W, C). Got: {}.".format(input_shape)
|
|
984
|
+
)
|
|
985
|
+
|
|
986
|
+
if input_shape[1] % 2 != 0:
|
|
987
|
+
raise ValueError("The height must be even. Got {}.".format(input_shape[1]))
|
|
988
|
+
|
|
989
|
+
if input_shape[2] % 2 != 0:
|
|
990
|
+
raise ValueError("The width must be even. Got {}.".format(input_shape[2]))
|
|
991
|
+
|
|
992
|
+
if input_shape[3] != self._img_dim + self._res_dim:
|
|
993
|
+
raise ValueError(
|
|
994
|
+
"The input dim must be {}. Got {}.".format(
|
|
995
|
+
self._img_dim + self._res_dim, input_shape[3]
|
|
996
|
+
)
|
|
997
|
+
)
|
|
998
|
+
|
|
999
|
+
output_shape = (
|
|
1000
|
+
input_shape[0],
|
|
1001
|
+
input_shape[1] // 2,
|
|
1002
|
+
input_shape[2] // 2,
|
|
1003
|
+
(self._img_dim + self._res_dim) * 2,
|
|
1004
|
+
)
|
|
1005
|
+
|
|
1006
|
+
return output_shape
|
|
1007
|
+
|
|
1008
|
+
compute_output_shape.__doc__ = DUCLayer.compute_output_shape.__doc__
|
|
1009
|
+
|
|
1010
|
+
def get_config(self):
|
|
1011
|
+
config = {
|
|
1012
|
+
"img_dim": self._img_dim,
|
|
1013
|
+
"res_dim": self._res_dim,
|
|
1014
|
+
}
|
|
1015
|
+
base_config = super(Downsize2D_V3, self).get_config()
|
|
1016
|
+
return dict(list(base_config.items()) + list(config.items()))
|
|
1017
|
+
|
|
1018
|
+
get_config.__doc__ = DUCLayer.get_config.__doc__
|
|
1019
|
+
|
|
1020
|
+
|
|
1021
|
+
Downsize2D_V4 = Downsize2D_V3 # to be removed in future
|
|
1022
|
+
|
|
1023
|
+
|
|
1024
|
+
# ----- v5 ------
|
|
1025
|
+
|
|
1026
|
+
|
|
1027
|
+
class DUCLayerV5(DUCLayer):
|
|
1028
|
+
"""Downsizing along the x-axis and the y-axis using convolutions of residuals.
|
|
1029
|
+
|
|
1030
|
+
Downsizing means halving the width and the height and doubling the number of channels.
|
|
1031
|
+
|
|
1032
|
+
TBC
|
|
1033
|
+
|
|
1034
|
+
This layer is supposed to be nearly an inverse of the Upsize2D layer.
|
|
1035
|
+
|
|
1036
|
+
Input dimensionality consists of image dimensionality and residual dimensionality.
|
|
1037
|
+
|
|
1038
|
+
Parameters
|
|
1039
|
+
----------
|
|
1040
|
+
img_dim : int
|
|
1041
|
+
the image dimensionality
|
|
1042
|
+
res_dim : int
|
|
1043
|
+
the residual dimensionality
|
|
1044
|
+
kernel_size : int or tuple or list
|
|
1045
|
+
An integer or tuple/list of 2 integers, specifying the height and width of the 2D
|
|
1046
|
+
convolution window. Can be a single integer to specify the same value for all spatial
|
|
1047
|
+
dimensions.
|
|
1048
|
+
kernel_initializer : object
|
|
1049
|
+
Initializer for the convolutional kernels.
|
|
1050
|
+
bias_initializer : object
|
|
1051
|
+
Initializer for the convolutional biases.
|
|
1052
|
+
kernel_regularizer : object
|
|
1053
|
+
Regularizer for the convolutional kernels.
|
|
1054
|
+
bias_regularizer : object
|
|
1055
|
+
Regularizer for the convolutional biases.
|
|
1056
|
+
kernel_constraint: object
|
|
1057
|
+
Contraint function applied to the convolutional layer kernels.
|
|
1058
|
+
bias_constraint: object
|
|
1059
|
+
Contraint function applied to the convolutional layer biases.
|
|
1060
|
+
"""
|
|
1061
|
+
|
|
1062
|
+
def __init__(
|
|
1063
|
+
self,
|
|
1064
|
+
img_dim: int,
|
|
1065
|
+
res_dim: int,
|
|
1066
|
+
kernel_size: tp.Union[int, tuple, list] = 1,
|
|
1067
|
+
kernel_initializer="glorot_uniform",
|
|
1068
|
+
bias_initializer="zeros",
|
|
1069
|
+
kernel_regularizer=None,
|
|
1070
|
+
bias_regularizer=None,
|
|
1071
|
+
kernel_constraint=None,
|
|
1072
|
+
bias_constraint=None,
|
|
1073
|
+
**kwargs
|
|
1074
|
+
):
|
|
1075
|
+
super(DUCLayerV5, self).__init__(
|
|
1076
|
+
kernel_size=kernel_size,
|
|
1077
|
+
kernel_initializer=kernel_initializer,
|
|
1078
|
+
bias_initializer=bias_initializer,
|
|
1079
|
+
kernel_regularizer=kernel_regularizer,
|
|
1080
|
+
bias_regularizer=bias_regularizer,
|
|
1081
|
+
kernel_constraint=kernel_constraint,
|
|
1082
|
+
bias_constraint=bias_constraint,
|
|
1083
|
+
**kwargs
|
|
1084
|
+
)
|
|
1085
|
+
|
|
1086
|
+
self.I = img_dim
|
|
1087
|
+
self.R = res_dim
|
|
1088
|
+
if res_dim == 0:
|
|
1089
|
+
self.RX = img_dim
|
|
1090
|
+
self.RY = img_dim
|
|
1091
|
+
else:
|
|
1092
|
+
self.RX = (img_dim + res_dim * 3 + 1) // 2
|
|
1093
|
+
self.RY = img_dim + res_dim * 2
|
|
1094
|
+
|
|
1095
|
+
def get_config(self):
|
|
1096
|
+
config = {
|
|
1097
|
+
"img_dim": self.I,
|
|
1098
|
+
"res_dim": self.R,
|
|
1099
|
+
}
|
|
1100
|
+
base_config = super(DUCLayerV5, self).get_config()
|
|
1101
|
+
return dict(list(base_config.items()) + list(config.items()))
|
|
1102
|
+
|
|
1103
|
+
get_config.__doc__ = DUCLayer.get_config.__doc__
|
|
1104
|
+
|
|
1105
|
+
|
|
1106
|
+
class DownsizeX2D(DUCLayerV5):
|
|
1107
|
+
"""Downsizing along the x-axis and the y-axis using convolutions of residuals.
|
|
1108
|
+
|
|
1109
|
+
Downsizing means halving the width and the height and doubling the number of channels.
|
|
1110
|
+
|
|
1111
|
+
TBC
|
|
1112
|
+
|
|
1113
|
+
This layer is supposed to be nearly an inverse of the Upsize2D layer.
|
|
1114
|
+
|
|
1115
|
+
Input dimensionality consists of image dimensionality and residual dimensionality.
|
|
1116
|
+
|
|
1117
|
+
Parameters
|
|
1118
|
+
----------
|
|
1119
|
+
img_dim : int
|
|
1120
|
+
the image dimensionality
|
|
1121
|
+
res_dim : int
|
|
1122
|
+
the residual dimensionality
|
|
1123
|
+
kernel_size : int or tuple or list
|
|
1124
|
+
An integer or tuple/list of 2 integers, specifying the height and width of the 2D
|
|
1125
|
+
convolution window. Can be a single integer to specify the same value for all spatial
|
|
1126
|
+
dimensions.
|
|
1127
|
+
kernel_initializer : object
|
|
1128
|
+
Initializer for the convolutional kernels.
|
|
1129
|
+
bias_initializer : object
|
|
1130
|
+
Initializer for the convolutional biases.
|
|
1131
|
+
kernel_regularizer : object
|
|
1132
|
+
Regularizer for the convolutional kernels.
|
|
1133
|
+
bias_regularizer : object
|
|
1134
|
+
Regularizer for the convolutional biases.
|
|
1135
|
+
kernel_constraint: object
|
|
1136
|
+
Contraint function applied to the convolutional layer kernels.
|
|
1137
|
+
bias_constraint: object
|
|
1138
|
+
Contraint function applied to the convolutional layer biases.
|
|
1139
|
+
"""
|
|
1140
|
+
|
|
1141
|
+
def __init__(
|
|
1142
|
+
self,
|
|
1143
|
+
img_dim: int,
|
|
1144
|
+
res_dim: int,
|
|
1145
|
+
kernel_size: tp.Union[int, tuple, list] = 1,
|
|
1146
|
+
kernel_initializer="glorot_uniform",
|
|
1147
|
+
bias_initializer="zeros",
|
|
1148
|
+
kernel_regularizer=None,
|
|
1149
|
+
bias_regularizer=None,
|
|
1150
|
+
kernel_constraint=None,
|
|
1151
|
+
bias_constraint=None,
|
|
1152
|
+
**kwargs
|
|
1153
|
+
):
|
|
1154
|
+
super(DownsizeX2D, self).__init__(
|
|
1155
|
+
img_dim,
|
|
1156
|
+
res_dim,
|
|
1157
|
+
kernel_size=kernel_size,
|
|
1158
|
+
kernel_initializer=kernel_initializer,
|
|
1159
|
+
bias_initializer=bias_initializer,
|
|
1160
|
+
kernel_regularizer=kernel_regularizer,
|
|
1161
|
+
bias_regularizer=bias_regularizer,
|
|
1162
|
+
kernel_constraint=kernel_constraint,
|
|
1163
|
+
bias_constraint=bias_constraint,
|
|
1164
|
+
**kwargs
|
|
1165
|
+
)
|
|
1166
|
+
|
|
1167
|
+
if res_dim > 0:
|
|
1168
|
+
if res_dim > img_dim:
|
|
1169
|
+
self.prenorm1_layer = layers.LayerNormalization(name="prenorm1")
|
|
1170
|
+
self.expand1_layer = layers.Conv2D(
|
|
1171
|
+
(self.I + self.R) * 4,
|
|
1172
|
+
self._kernel_size,
|
|
1173
|
+
padding="same",
|
|
1174
|
+
activation="swish",
|
|
1175
|
+
kernel_initializer=self._kernel_initializer,
|
|
1176
|
+
bias_initializer=self._bias_initializer,
|
|
1177
|
+
kernel_regularizer=self._kernel_regularizer,
|
|
1178
|
+
bias_regularizer=self._bias_regularizer,
|
|
1179
|
+
kernel_constraint=self._kernel_constraint,
|
|
1180
|
+
bias_constraint=self._bias_constraint,
|
|
1181
|
+
name="expand1",
|
|
1182
|
+
)
|
|
1183
|
+
self.prenorm2_layer = layers.LayerNormalization(name="prenorm2")
|
|
1184
|
+
self.project1_layer = layers.Conv2D(
|
|
1185
|
+
self.RX,
|
|
1186
|
+
1,
|
|
1187
|
+
padding="same",
|
|
1188
|
+
activation="sigmoid", # (0., 1.)
|
|
1189
|
+
kernel_initializer=self._kernel_initializer,
|
|
1190
|
+
bias_initializer=self._bias_initializer,
|
|
1191
|
+
kernel_regularizer=self._kernel_regularizer,
|
|
1192
|
+
bias_regularizer=self._bias_regularizer,
|
|
1193
|
+
kernel_constraint=self._kernel_constraint,
|
|
1194
|
+
bias_constraint=self._bias_constraint,
|
|
1195
|
+
name="project1",
|
|
1196
|
+
)
|
|
1197
|
+
|
|
1198
|
+
def call(self, x, training: bool = False):
|
|
1199
|
+
# shape
|
|
1200
|
+
input_shape = tf.shape(x)
|
|
1201
|
+
B = input_shape[0]
|
|
1202
|
+
H = input_shape[1]
|
|
1203
|
+
W = input_shape[2] // 2
|
|
1204
|
+
|
|
1205
|
+
# merge pairs of consecutive pixels in each row
|
|
1206
|
+
if self.R > 0:
|
|
1207
|
+
x = tf.reshape(x, [B, H, W, 2, self.I + self.R])
|
|
1208
|
+
xl = x[:, :, :, 0, :]
|
|
1209
|
+
xr = x[:, :, :, 1, :]
|
|
1210
|
+
x_avg = (xl + xr) * 0.5
|
|
1211
|
+
x_res = xl - xr
|
|
1212
|
+
x = tf.concat([x_avg, x_res], axis=3)
|
|
1213
|
+
if self.R > self.I:
|
|
1214
|
+
x = self.prenorm1_layer(x, training=training)
|
|
1215
|
+
x = self.expand1_layer(x, training=training) # shape = [B,H,W,(I+R)*4]
|
|
1216
|
+
x = self.prenorm2_layer(x, training=training)
|
|
1217
|
+
x = self.project1_layer(x, training=training) # shape = [B, H, W, RX]
|
|
1218
|
+
x_avg = x_avg[:, :, :, : self.I]
|
|
1219
|
+
x = tf.concat([x_avg, x], axis=3) # shape = [B, H, W, I + RX]
|
|
1220
|
+
else:
|
|
1221
|
+
x = tf.reshape(x, [B, H, W, self.I * 2])
|
|
1222
|
+
|
|
1223
|
+
# output
|
|
1224
|
+
return x
|
|
1225
|
+
|
|
1226
|
+
call.__doc__ = DUCLayerV5.call.__doc__
|
|
1227
|
+
|
|
1228
|
+
|
|
1229
|
+
class UpsizeX2D(DUCLayerV5):
|
|
1230
|
+
"""Downsizing along the x-axis and the y-axis using convolutions of residuals.
|
|
1231
|
+
|
|
1232
|
+
Downsizing means halving the width and the height and doubling the number of channels.
|
|
1233
|
+
|
|
1234
|
+
TBC
|
|
1235
|
+
|
|
1236
|
+
This layer is supposed to be nearly an inverse of the Upsize2D layer.
|
|
1237
|
+
|
|
1238
|
+
Input dimensionality consists of image dimensionality and residual dimensionality.
|
|
1239
|
+
|
|
1240
|
+
Parameters
|
|
1241
|
+
----------
|
|
1242
|
+
img_dim : int
|
|
1243
|
+
the image dimensionality
|
|
1244
|
+
res_dim : int
|
|
1245
|
+
the residual dimensionality
|
|
1246
|
+
kernel_size : int or tuple or list
|
|
1247
|
+
An integer or tuple/list of 2 integers, specifying the height and width of the 2D
|
|
1248
|
+
convolution window. Can be a single integer to specify the same value for all spatial
|
|
1249
|
+
dimensions.
|
|
1250
|
+
kernel_initializer : object
|
|
1251
|
+
Initializer for the convolutional kernels.
|
|
1252
|
+
bias_initializer : object
|
|
1253
|
+
Initializer for the convolutional biases.
|
|
1254
|
+
kernel_regularizer : object
|
|
1255
|
+
Regularizer for the convolutional kernels.
|
|
1256
|
+
bias_regularizer : object
|
|
1257
|
+
Regularizer for the convolutional biases.
|
|
1258
|
+
kernel_constraint: object
|
|
1259
|
+
Contraint function applied to the convolutional layer kernels.
|
|
1260
|
+
bias_constraint: object
|
|
1261
|
+
Contraint function applied to the convolutional layer biases.
|
|
1262
|
+
"""
|
|
1263
|
+
|
|
1264
|
+
def __init__(
|
|
1265
|
+
self,
|
|
1266
|
+
img_dim: int,
|
|
1267
|
+
res_dim: int,
|
|
1268
|
+
kernel_size: tp.Union[int, tuple, list] = 3,
|
|
1269
|
+
kernel_initializer="glorot_uniform",
|
|
1270
|
+
bias_initializer="zeros",
|
|
1271
|
+
kernel_regularizer=None,
|
|
1272
|
+
bias_regularizer=None,
|
|
1273
|
+
kernel_constraint=None,
|
|
1274
|
+
bias_constraint=None,
|
|
1275
|
+
**kwargs
|
|
1276
|
+
):
|
|
1277
|
+
super(UpsizeX2D, self).__init__(
|
|
1278
|
+
img_dim,
|
|
1279
|
+
res_dim,
|
|
1280
|
+
kernel_size=kernel_size,
|
|
1281
|
+
kernel_initializer=kernel_initializer,
|
|
1282
|
+
bias_initializer=bias_initializer,
|
|
1283
|
+
kernel_regularizer=kernel_regularizer,
|
|
1284
|
+
bias_regularizer=bias_regularizer,
|
|
1285
|
+
kernel_constraint=kernel_constraint,
|
|
1286
|
+
bias_constraint=bias_constraint,
|
|
1287
|
+
**kwargs
|
|
1288
|
+
)
|
|
1289
|
+
|
|
1290
|
+
if res_dim > 0:
|
|
1291
|
+
self.prenorm1_layer = layers.LayerNormalization(name="prenorm1")
|
|
1292
|
+
self.expand1_layer = layers.Conv2D(
|
|
1293
|
+
(self.I + self.R) * 4,
|
|
1294
|
+
self._kernel_size,
|
|
1295
|
+
padding="same",
|
|
1296
|
+
activation="swish",
|
|
1297
|
+
kernel_initializer=self._kernel_initializer,
|
|
1298
|
+
bias_initializer=self._bias_initializer,
|
|
1299
|
+
kernel_regularizer=self._kernel_regularizer,
|
|
1300
|
+
bias_regularizer=self._bias_regularizer,
|
|
1301
|
+
kernel_constraint=self._kernel_constraint,
|
|
1302
|
+
bias_constraint=self._bias_constraint,
|
|
1303
|
+
name="expand1",
|
|
1304
|
+
)
|
|
1305
|
+
self.prenorm2_layer = layers.LayerNormalization(name="prenorm2")
|
|
1306
|
+
self.project1_layer = layers.Conv2D(
|
|
1307
|
+
self.R,
|
|
1308
|
+
1,
|
|
1309
|
+
padding="same",
|
|
1310
|
+
activation="sigmoid", # (0., 1.)
|
|
1311
|
+
kernel_initializer=self._kernel_initializer,
|
|
1312
|
+
bias_initializer=self._bias_initializer,
|
|
1313
|
+
kernel_regularizer=self._kernel_regularizer,
|
|
1314
|
+
bias_regularizer=self._bias_regularizer,
|
|
1315
|
+
kernel_constraint=self._kernel_constraint,
|
|
1316
|
+
bias_constraint=self._bias_constraint,
|
|
1317
|
+
name="project1",
|
|
1318
|
+
)
|
|
1319
|
+
self.project2_layer = layers.Conv2D(
|
|
1320
|
+
self.I + self.R,
|
|
1321
|
+
1,
|
|
1322
|
+
padding="same",
|
|
1323
|
+
activation="tanh", # (-1., 1.)
|
|
1324
|
+
kernel_initializer=self._kernel_initializer,
|
|
1325
|
+
bias_initializer=self._bias_initializer,
|
|
1326
|
+
kernel_regularizer=self._kernel_regularizer,
|
|
1327
|
+
bias_regularizer=self._bias_regularizer,
|
|
1328
|
+
kernel_constraint=self._kernel_constraint,
|
|
1329
|
+
bias_constraint=self._bias_constraint,
|
|
1330
|
+
name="project2",
|
|
1331
|
+
)
|
|
1332
|
+
|
|
1333
|
+
def call(self, x, training: bool = False):
|
|
1334
|
+
# shape
|
|
1335
|
+
input_shape = tf.shape(x)
|
|
1336
|
+
B = input_shape[0]
|
|
1337
|
+
H = input_shape[1]
|
|
1338
|
+
W = input_shape[2]
|
|
1339
|
+
|
|
1340
|
+
# split pairs of consecutive pixels in each row
|
|
1341
|
+
if self.R > 0:
|
|
1342
|
+
x_avg = x[:, :, :, : self.I]
|
|
1343
|
+
x = self.prenorm1_layer(x, training=training)
|
|
1344
|
+
x = self.expand1_layer(x, training=training) # shape = [B, H, W, (I+RX)*2]
|
|
1345
|
+
x = self.prenorm2_layer(x, training=training)
|
|
1346
|
+
x1 = self.project1_layer(x, training=training) # shape = [B, H, W, R]
|
|
1347
|
+
x = self.project2_layer(x, training=training) # shape = [B, H, W, I + R]
|
|
1348
|
+
x_avg = tf.concat([x_avg, x1], axis=3)
|
|
1349
|
+
x = tf.concat([x_avg + x, x_avg - x], axis=3)
|
|
1350
|
+
x = tf.reshape(x, [B, H, W * 2, self.I + self.R])
|
|
1351
|
+
else:
|
|
1352
|
+
x = tf.reshape(x, [B, H, W * 2, self.I])
|
|
1353
|
+
|
|
1354
|
+
# output
|
|
1355
|
+
return x
|
|
1356
|
+
|
|
1357
|
+
call.__doc__ = DUCLayerV5.call.__doc__
|
|
1358
|
+
|
|
1359
|
+
|
|
1360
|
+
class DownsizeY2D(DUCLayerV5):
|
|
1361
|
+
"""Downsizing along the x-axis and the y-axis using convolutions of residuals.
|
|
1362
|
+
|
|
1363
|
+
Downsizing means halving the width and the height and doubling the number of channels.
|
|
1364
|
+
|
|
1365
|
+
TBC
|
|
1366
|
+
|
|
1367
|
+
This layer is supposed to be nearly an inverse of the Upsize2D layer.
|
|
1368
|
+
|
|
1369
|
+
Input dimensionality consists of image dimensionality and residual dimensionality.
|
|
1370
|
+
|
|
1371
|
+
Parameters
|
|
1372
|
+
----------
|
|
1373
|
+
img_dim : int
|
|
1374
|
+
the image dimensionality
|
|
1375
|
+
res_dim : int
|
|
1376
|
+
the residual dimensionality
|
|
1377
|
+
kernel_size : int or tuple or list
|
|
1378
|
+
An integer or tuple/list of 2 integers, specifying the height and width of the 2D
|
|
1379
|
+
convolution window. Can be a single integer to specify the same value for all spatial
|
|
1380
|
+
dimensions.
|
|
1381
|
+
kernel_initializer : object
|
|
1382
|
+
Initializer for the convolutional kernels.
|
|
1383
|
+
bias_initializer : object
|
|
1384
|
+
Initializer for the convolutional biases.
|
|
1385
|
+
kernel_regularizer : object
|
|
1386
|
+
Regularizer for the convolutional kernels.
|
|
1387
|
+
bias_regularizer : object
|
|
1388
|
+
Regularizer for the convolutional biases.
|
|
1389
|
+
kernel_constraint: object
|
|
1390
|
+
Contraint function applied to the convolutional layer kernels.
|
|
1391
|
+
bias_constraint: object
|
|
1392
|
+
Contraint function applied to the convolutional layer biases.
|
|
1393
|
+
"""
|
|
1394
|
+
|
|
1395
|
+
def __init__(
|
|
1396
|
+
self,
|
|
1397
|
+
img_dim: int,
|
|
1398
|
+
res_dim: int,
|
|
1399
|
+
kernel_size: tp.Union[int, tuple, list] = 1,
|
|
1400
|
+
kernel_initializer="glorot_uniform",
|
|
1401
|
+
bias_initializer="zeros",
|
|
1402
|
+
kernel_regularizer=None,
|
|
1403
|
+
bias_regularizer=None,
|
|
1404
|
+
kernel_constraint=None,
|
|
1405
|
+
bias_constraint=None,
|
|
1406
|
+
**kwargs
|
|
1407
|
+
):
|
|
1408
|
+
super(DownsizeY2D, self).__init__(
|
|
1409
|
+
img_dim,
|
|
1410
|
+
res_dim,
|
|
1411
|
+
kernel_size=kernel_size,
|
|
1412
|
+
kernel_initializer=kernel_initializer,
|
|
1413
|
+
bias_initializer=bias_initializer,
|
|
1414
|
+
kernel_regularizer=kernel_regularizer,
|
|
1415
|
+
bias_regularizer=bias_regularizer,
|
|
1416
|
+
kernel_constraint=kernel_constraint,
|
|
1417
|
+
bias_constraint=bias_constraint,
|
|
1418
|
+
**kwargs
|
|
1419
|
+
)
|
|
1420
|
+
|
|
1421
|
+
if self.R > 0:
|
|
1422
|
+
self.prenorm1_layer = layers.LayerNormalization(name="prenorm1")
|
|
1423
|
+
self.expand1_layer = layers.Conv2D(
|
|
1424
|
+
(self.I + self.RX) * 4,
|
|
1425
|
+
self._kernel_size,
|
|
1426
|
+
padding="same",
|
|
1427
|
+
activation="swish",
|
|
1428
|
+
kernel_initializer=self._kernel_initializer,
|
|
1429
|
+
bias_initializer=self._bias_initializer,
|
|
1430
|
+
kernel_regularizer=self._kernel_regularizer,
|
|
1431
|
+
bias_regularizer=self._bias_regularizer,
|
|
1432
|
+
kernel_constraint=self._kernel_constraint,
|
|
1433
|
+
bias_constraint=self._bias_constraint,
|
|
1434
|
+
name="expand1",
|
|
1435
|
+
)
|
|
1436
|
+
self.prenorm2_layer = layers.LayerNormalization(name="prenorm2")
|
|
1437
|
+
self.project1_layer = layers.Conv2D(
|
|
1438
|
+
self.RY,
|
|
1439
|
+
1,
|
|
1440
|
+
padding="same",
|
|
1441
|
+
activation="sigmoid", # (0., 1.)
|
|
1442
|
+
kernel_initializer=self._kernel_initializer,
|
|
1443
|
+
bias_initializer=self._bias_initializer,
|
|
1444
|
+
kernel_regularizer=self._kernel_regularizer,
|
|
1445
|
+
bias_regularizer=self._bias_regularizer,
|
|
1446
|
+
kernel_constraint=self._kernel_constraint,
|
|
1447
|
+
bias_constraint=self._bias_constraint,
|
|
1448
|
+
name="project1",
|
|
1449
|
+
)
|
|
1450
|
+
|
|
1451
|
+
def call(self, x, training: bool = False):
|
|
1452
|
+
# shape
|
|
1453
|
+
input_shape = tf.shape(x)
|
|
1454
|
+
B = input_shape[0]
|
|
1455
|
+
H = input_shape[1] // 2
|
|
1456
|
+
W = input_shape[2]
|
|
1457
|
+
|
|
1458
|
+
# merge pairs of consecutive pixels in each column
|
|
1459
|
+
x = tf.reshape(x, [B, H, 2, W, self.I + self.RX])
|
|
1460
|
+
xt = x[:, :, 0, :, :]
|
|
1461
|
+
xb = x[:, :, 1, :, :]
|
|
1462
|
+
x_avg = (xt + xb) * 0.5
|
|
1463
|
+
x_res = xt - xb
|
|
1464
|
+
x = tf.concat([x_avg, x_res], axis=3)
|
|
1465
|
+
if self.R > 0:
|
|
1466
|
+
x = self.prenorm1_layer(x, training=training)
|
|
1467
|
+
x = self.expand1_layer(x, training=training) # shape = [B, H, W, I*2+RX*4]
|
|
1468
|
+
x = self.prenorm2_layer(x, training=training)
|
|
1469
|
+
x = self.project1_layer(x, training=training) # shape = [B, H, W, RY]
|
|
1470
|
+
|
|
1471
|
+
# output
|
|
1472
|
+
x_avg = x_avg[:, :, :, : self.I]
|
|
1473
|
+
x = tf.concat([x_avg, x], axis=3) # shape = [B, H, W, I + RY]
|
|
1474
|
+
return x
|
|
1475
|
+
|
|
1476
|
+
call.__doc__ = DUCLayerV5.call.__doc__
|
|
1477
|
+
|
|
1478
|
+
|
|
1479
|
+
class UpsizeY2D(DUCLayerV5):
|
|
1480
|
+
"""Downsizing along the x-axis and the y-axis using convolutions of residuals.
|
|
1481
|
+
|
|
1482
|
+
Downsizing means halving the width and the height and doubling the number of channels.
|
|
1483
|
+
|
|
1484
|
+
TBC
|
|
1485
|
+
|
|
1486
|
+
This layer is supposed to be nearly an inverse of the Upsize2D layer.
|
|
1487
|
+
|
|
1488
|
+
Input dimensionality consists of image dimensionality and residual dimensionality.
|
|
1489
|
+
|
|
1490
|
+
Parameters
|
|
1491
|
+
----------
|
|
1492
|
+
img_dim : int
|
|
1493
|
+
the image dimensionality
|
|
1494
|
+
res_dim : int
|
|
1495
|
+
the residual dimensionality
|
|
1496
|
+
kernel_size : int or tuple or list
|
|
1497
|
+
An integer or tuple/list of 2 integers, specifying the height and width of the 2D
|
|
1498
|
+
convolution window. Can be a single integer to specify the same value for all spatial
|
|
1499
|
+
dimensions.
|
|
1500
|
+
kernel_initializer : object
|
|
1501
|
+
Initializer for the convolutional kernels.
|
|
1502
|
+
bias_initializer : object
|
|
1503
|
+
Initializer for the convolutional biases.
|
|
1504
|
+
kernel_regularizer : object
|
|
1505
|
+
Regularizer for the convolutional kernels.
|
|
1506
|
+
bias_regularizer : object
|
|
1507
|
+
Regularizer for the convolutional biases.
|
|
1508
|
+
kernel_constraint: object
|
|
1509
|
+
Contraint function applied to the convolutional layer kernels.
|
|
1510
|
+
bias_constraint: object
|
|
1511
|
+
Contraint function applied to the convolutional layer biases.
|
|
1512
|
+
"""
|
|
1513
|
+
|
|
1514
|
+
def __init__(
|
|
1515
|
+
self,
|
|
1516
|
+
img_dim: int,
|
|
1517
|
+
res_dim: int,
|
|
1518
|
+
kernel_size: tp.Union[int, tuple, list] = 3,
|
|
1519
|
+
kernel_initializer="glorot_uniform",
|
|
1520
|
+
bias_initializer="zeros",
|
|
1521
|
+
kernel_regularizer=None,
|
|
1522
|
+
bias_regularizer=None,
|
|
1523
|
+
kernel_constraint=None,
|
|
1524
|
+
bias_constraint=None,
|
|
1525
|
+
**kwargs
|
|
1526
|
+
):
|
|
1527
|
+
super(UpsizeY2D, self).__init__(
|
|
1528
|
+
img_dim,
|
|
1529
|
+
res_dim,
|
|
1530
|
+
kernel_size=kernel_size,
|
|
1531
|
+
kernel_initializer=kernel_initializer,
|
|
1532
|
+
bias_initializer=bias_initializer,
|
|
1533
|
+
kernel_regularizer=kernel_regularizer,
|
|
1534
|
+
bias_regularizer=bias_regularizer,
|
|
1535
|
+
kernel_constraint=kernel_constraint,
|
|
1536
|
+
bias_constraint=bias_constraint,
|
|
1537
|
+
**kwargs
|
|
1538
|
+
)
|
|
1539
|
+
|
|
1540
|
+
self.prenorm1_layer = layers.LayerNormalization(name="prenorm1")
|
|
1541
|
+
self.expand1_layer = layers.Conv2D(
|
|
1542
|
+
(self.I + self.RX) * 4,
|
|
1543
|
+
self._kernel_size,
|
|
1544
|
+
padding="same",
|
|
1545
|
+
activation="swish",
|
|
1546
|
+
kernel_initializer=self._kernel_initializer,
|
|
1547
|
+
bias_initializer=self._bias_initializer,
|
|
1548
|
+
kernel_regularizer=self._kernel_regularizer,
|
|
1549
|
+
bias_regularizer=self._bias_regularizer,
|
|
1550
|
+
kernel_constraint=self._kernel_constraint,
|
|
1551
|
+
bias_constraint=self._bias_constraint,
|
|
1552
|
+
name="expand1",
|
|
1553
|
+
)
|
|
1554
|
+
self.prenorm2_layer = layers.LayerNormalization(name="prenorm2")
|
|
1555
|
+
self.project1_layer = layers.Conv2D(
|
|
1556
|
+
self.RX,
|
|
1557
|
+
1,
|
|
1558
|
+
padding="same",
|
|
1559
|
+
activation="sigmoid", # (0., 1.)
|
|
1560
|
+
kernel_initializer=self._kernel_initializer,
|
|
1561
|
+
bias_initializer=self._bias_initializer,
|
|
1562
|
+
kernel_regularizer=self._kernel_regularizer,
|
|
1563
|
+
bias_regularizer=self._bias_regularizer,
|
|
1564
|
+
kernel_constraint=self._kernel_constraint,
|
|
1565
|
+
bias_constraint=self._bias_constraint,
|
|
1566
|
+
name="project1",
|
|
1567
|
+
)
|
|
1568
|
+
self.project2_layer = layers.Conv2D(
|
|
1569
|
+
self.I + self.RX,
|
|
1570
|
+
1,
|
|
1571
|
+
padding="same",
|
|
1572
|
+
activation="tanh", # (-1., 1.)
|
|
1573
|
+
kernel_initializer=self._kernel_initializer,
|
|
1574
|
+
bias_initializer=self._bias_initializer,
|
|
1575
|
+
kernel_regularizer=self._kernel_regularizer,
|
|
1576
|
+
bias_regularizer=self._bias_regularizer,
|
|
1577
|
+
kernel_constraint=self._kernel_constraint,
|
|
1578
|
+
bias_constraint=self._bias_constraint,
|
|
1579
|
+
name="project2",
|
|
1580
|
+
)
|
|
1581
|
+
|
|
1582
|
+
def call(self, x, training: bool = False):
|
|
1583
|
+
# shape
|
|
1584
|
+
input_shape = tf.shape(x)
|
|
1585
|
+
B = input_shape[0]
|
|
1586
|
+
H = input_shape[1]
|
|
1587
|
+
W = input_shape[2]
|
|
1588
|
+
|
|
1589
|
+
# split pairs of consecutive pixels in each column
|
|
1590
|
+
x_avg = x[:, :, :, : self.I]
|
|
1591
|
+
x = self.prenorm1_layer(x, training=training)
|
|
1592
|
+
x = self.expand1_layer(x, training=training) # shape = [B, H, W, (I + RY) * 2]
|
|
1593
|
+
x = self.prenorm2_layer(x, training=training)
|
|
1594
|
+
x1 = self.project1_layer(x, training=training) # shape = [B, H, W, RX]
|
|
1595
|
+
x = self.project2_layer(x, training=training) # shape = [B, H, W, I + RX]
|
|
1596
|
+
x_avg = tf.concat([x_avg, x1], axis=3)
|
|
1597
|
+
|
|
1598
|
+
# output
|
|
1599
|
+
x = tf.concat([x_avg + x, x_avg - x], axis=3)
|
|
1600
|
+
x = tf.reshape(x, [B, H, W, 2, self.I + self.RX])
|
|
1601
|
+
x = tf.transpose(x, perm=[0, 1, 3, 2, 4])
|
|
1602
|
+
x = tf.reshape(x, [B, H * 2, W, self.I + self.RX])
|
|
1603
|
+
return x
|
|
1604
|
+
|
|
1605
|
+
call.__doc__ = DUCLayerV5.call.__doc__
|