mttf 1.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mt/keras/__init__.py +8 -0
- mt/keras_src/__init__.py +16 -0
- mt/keras_src/applications_src/__init__.py +33 -0
- mt/keras_src/applications_src/classifier.py +497 -0
- mt/keras_src/applications_src/mobilenet_v3_split.py +544 -0
- mt/keras_src/applications_src/mobilevit.py +292 -0
- mt/keras_src/base.py +28 -0
- mt/keras_src/constraints_src/__init__.py +14 -0
- mt/keras_src/constraints_src/center_around.py +19 -0
- mt/keras_src/layers_src/__init__.py +43 -0
- mt/keras_src/layers_src/counter.py +27 -0
- mt/keras_src/layers_src/floor.py +24 -0
- mt/keras_src/layers_src/identical.py +15 -0
- mt/keras_src/layers_src/image_sizing.py +1605 -0
- mt/keras_src/layers_src/normed_conv2d.py +239 -0
- mt/keras_src/layers_src/simple_mha.py +472 -0
- mt/keras_src/layers_src/soft_bend.py +36 -0
- mt/keras_src/layers_src/transformer_encoder.py +246 -0
- mt/keras_src/layers_src/utils.py +88 -0
- mt/keras_src/layers_src/var_regularizer.py +38 -0
- mt/tf/__init__.py +10 -0
- mt/tf/init.py +25 -0
- mt/tf/keras_applications/__init__.py +5 -0
- mt/tf/keras_layers/__init__.py +5 -0
- mt/tf/mttf_version.py +5 -0
- mt/tf/utils.py +44 -0
- mt/tf/version.py +5 -0
- mt/tfc/__init__.py +291 -0
- mt/tfg/__init__.py +8 -0
- mt/tfp/__init__.py +11 -0
- mt/tfp/real_nvp.py +116 -0
- mttf-1.3.6.data/scripts/dmt_build_package_and_upload_to_nexus.sh +25 -0
- mttf-1.3.6.data/scripts/dmt_pipi.sh +7 -0
- mttf-1.3.6.data/scripts/dmt_twineu.sh +2 -0
- mttf-1.3.6.data/scripts/pipi.sh +7 -0
- mttf-1.3.6.data/scripts/user_build_package_and_upload_to_nexus.sh +25 -0
- mttf-1.3.6.data/scripts/user_pipi.sh +8 -0
- mttf-1.3.6.data/scripts/user_twineu.sh +3 -0
- mttf-1.3.6.data/scripts/wml_build_package_and_upload_to_nexus.sh +25 -0
- mttf-1.3.6.data/scripts/wml_nexus.py +50 -0
- mttf-1.3.6.data/scripts/wml_pipi.sh +7 -0
- mttf-1.3.6.data/scripts/wml_twineu.sh +2 -0
- mttf-1.3.6.dist-info/METADATA +18 -0
- mttf-1.3.6.dist-info/RECORD +47 -0
- mttf-1.3.6.dist-info/WHEEL +5 -0
- mttf-1.3.6.dist-info/licenses/LICENSE +21 -0
- mttf-1.3.6.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,472 @@
|
|
|
1
|
+
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ==============================================================================
|
|
15
|
+
"""A simplified version of keras-based attention layer."""
|
|
16
|
+
# pylint: disable=g-classes-have-attributes
|
|
17
|
+
|
|
18
|
+
import math
|
|
19
|
+
import tensorflow as tf
|
|
20
|
+
from tensorflow.python.util.tf_export import keras_export
|
|
21
|
+
|
|
22
|
+
from mt import tp, tfc
|
|
23
|
+
from .. import layers, initializers, regularizers, constraints, activations
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@keras_export("keras.layers.SimpleMHA2D")
|
|
27
|
+
class SimpleMHA2D(layers.Layer):
|
|
28
|
+
"""SimpleMHA2D layer.
|
|
29
|
+
|
|
30
|
+
This is a simplified version of the Keras-based MultiHeadAttention layer.
|
|
31
|
+
|
|
32
|
+
The layer takes as input a high-dim image tensor of shape [B, H, W, KV] where B is the
|
|
33
|
+
batch size, H and W are the grid resolution, and KV is the (high) number of channels. It then
|
|
34
|
+
2D-convolves the tensor into 2 tensors, `key` of shape [B, H, W, N*K] and `value` of shape
|
|
35
|
+
[B, H, W, N*V] where N is the number of heads, K is the key dimensionality and V is the value
|
|
36
|
+
dimensionality. In the absence of V, V is set to K. Next, it reshapes `key` as [B, H*W, N, K]
|
|
37
|
+
and `value` as [B, H*W, N, V]. `key` is then dot-producted with an internal query tensor of
|
|
38
|
+
shape [1, 1, N, K] with broadcasting, forming a tensor of shape [B, H*W, N]. This tensor is
|
|
39
|
+
softmaxed along the axis containing H*W and reshaped as [B, H*W, N, 1], and then multiplied
|
|
40
|
+
with `value` and sum-reduced along the axis containing H*W, forming an output attention tensor
|
|
41
|
+
of shape [B, N, V].
|
|
42
|
+
|
|
43
|
+
Parameters
|
|
44
|
+
----------
|
|
45
|
+
num_heads : int
|
|
46
|
+
Number of attention heads.
|
|
47
|
+
key_dim : int
|
|
48
|
+
Size of each attention head for query and key.
|
|
49
|
+
value_dim : int, optional
|
|
50
|
+
Size of each attention head for value.
|
|
51
|
+
use_bias : bool
|
|
52
|
+
Whether the convolutional layers use bias vectors/matrices.
|
|
53
|
+
activation : object
|
|
54
|
+
activation for the `value` convolution
|
|
55
|
+
kernel_initializer : object
|
|
56
|
+
Initializer for convolutional layer kernels.
|
|
57
|
+
bias_initializer : object
|
|
58
|
+
Initializer for convolutional layer biases.
|
|
59
|
+
kernel_regularizer : object
|
|
60
|
+
Regularizer for convolutional layer kernels.
|
|
61
|
+
bias_regularizer : object
|
|
62
|
+
Regularizer for convolutional layer biases.
|
|
63
|
+
kernel_constraint: object
|
|
64
|
+
Contraint function applied to the layer kernels.
|
|
65
|
+
bias_constraint: object
|
|
66
|
+
Contraint function applied to the layer biases.
|
|
67
|
+
dropout: float
|
|
68
|
+
dropout probability
|
|
69
|
+
|
|
70
|
+
Examples
|
|
71
|
+
--------
|
|
72
|
+
|
|
73
|
+
>>> layer = SimpleMHA2D(num_heads=3, key_dim=40, value_dim=80)
|
|
74
|
+
>>> input_tensor = layers.Input(shape=[8, 8, 160])
|
|
75
|
+
>>> output_tensor = layer(input_tensor)
|
|
76
|
+
>>> print(output_tensor.shape)
|
|
77
|
+
(None, 3, 80)
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
def __init__(
|
|
81
|
+
self,
|
|
82
|
+
num_heads: int,
|
|
83
|
+
key_dim: int,
|
|
84
|
+
value_dim: tp.Optional[int] = None,
|
|
85
|
+
use_bias=True,
|
|
86
|
+
activation="tanh",
|
|
87
|
+
kernel_initializer="glorot_uniform",
|
|
88
|
+
bias_initializer="zeros",
|
|
89
|
+
kernel_regularizer=None,
|
|
90
|
+
bias_regularizer=None,
|
|
91
|
+
kernel_constraint=None,
|
|
92
|
+
bias_constraint=None,
|
|
93
|
+
dropout: float = 0.2,
|
|
94
|
+
**kwargs
|
|
95
|
+
):
|
|
96
|
+
super(SimpleMHA2D, self).__init__(**kwargs)
|
|
97
|
+
self._num_heads = num_heads
|
|
98
|
+
self._key_dim = key_dim
|
|
99
|
+
self._value_dim = value_dim if value_dim else key_dim
|
|
100
|
+
self._use_bias = use_bias
|
|
101
|
+
self._activation = activations.get(activation)
|
|
102
|
+
self._kernel_initializer = initializers.get(kernel_initializer)
|
|
103
|
+
self._bias_initializer = initializers.get(bias_initializer)
|
|
104
|
+
self._kernel_regularizer = regularizers.get(kernel_regularizer)
|
|
105
|
+
self._bias_regularizer = regularizers.get(bias_regularizer)
|
|
106
|
+
self._kernel_constraint = constraints.get(kernel_constraint)
|
|
107
|
+
self._bias_constraint = constraints.get(bias_constraint)
|
|
108
|
+
self._dropout = dropout
|
|
109
|
+
|
|
110
|
+
self.tensor_query = self.add_weight(
|
|
111
|
+
name="query",
|
|
112
|
+
shape=[1, 1, num_heads, key_dim],
|
|
113
|
+
initializer="random_normal",
|
|
114
|
+
trainable=True,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
self.layer_key_proj = layers.Conv2D(
|
|
118
|
+
self._num_heads * self._key_dim, # filters
|
|
119
|
+
1, # kernel_size
|
|
120
|
+
use_bias=self._use_bias,
|
|
121
|
+
kernel_initializer=self._kernel_initializer,
|
|
122
|
+
bias_initializer=self._bias_initializer,
|
|
123
|
+
kernel_regularizer=self._kernel_regularizer,
|
|
124
|
+
bias_regularizer=self._bias_regularizer,
|
|
125
|
+
kernel_constraint=self._kernel_constraint,
|
|
126
|
+
bias_constraint=self._bias_constraint,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
self.layer_value_proj = layers.Conv2D(
|
|
130
|
+
self._num_heads * self._value_dim, # filters
|
|
131
|
+
1, # kernel_size
|
|
132
|
+
use_bias=self._use_bias,
|
|
133
|
+
activation=self._activation,
|
|
134
|
+
kernel_initializer=self._kernel_initializer,
|
|
135
|
+
bias_initializer=self._bias_initializer,
|
|
136
|
+
kernel_regularizer=self._kernel_regularizer,
|
|
137
|
+
bias_regularizer=self._bias_regularizer,
|
|
138
|
+
kernel_constraint=self._kernel_constraint,
|
|
139
|
+
bias_constraint=self._bias_constraint,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
self.layer_softmax = layers.Softmax(axis=1)
|
|
143
|
+
if self._dropout > 0:
|
|
144
|
+
self.layer_dropout = layers.Dropout(rate=self._dropout)
|
|
145
|
+
|
|
146
|
+
def call(self, key_value, training=None):
|
|
147
|
+
"""The call function.
|
|
148
|
+
|
|
149
|
+
Parameters
|
|
150
|
+
----------
|
|
151
|
+
key_value : tensorflow.Tensor
|
|
152
|
+
input `Tensor` of shape `(B, H, W, KV)`.
|
|
153
|
+
training : bool
|
|
154
|
+
Whether the layer should behave in training mode or in inference mode.
|
|
155
|
+
|
|
156
|
+
Returns
|
|
157
|
+
-------
|
|
158
|
+
attention_output : tensorflow.Tensor
|
|
159
|
+
The result of the computation, of shape `(B, N, V)`, where `N` is the number of heads
|
|
160
|
+
and `V` is the value dimensionality.
|
|
161
|
+
"""
|
|
162
|
+
|
|
163
|
+
bs_shape = tf.shape(key_value)[0:1]
|
|
164
|
+
hw_shape = tf.reduce_prod(tf.shape(key_value)[1:3], axis=0, keepdims=True)
|
|
165
|
+
|
|
166
|
+
# N = `num_attention_heads`
|
|
167
|
+
# K = `key_dim`
|
|
168
|
+
# V = `value_dim`
|
|
169
|
+
# H = `image_height`
|
|
170
|
+
# W = `image_width`
|
|
171
|
+
# `query` = [1, 1, N ,K]
|
|
172
|
+
|
|
173
|
+
# `key` = [B, H*W, N, K]
|
|
174
|
+
key = self.layer_key_proj(key_value, training=training)
|
|
175
|
+
key_shape = tf.concat(
|
|
176
|
+
[bs_shape, hw_shape, [self._num_heads, self._key_dim]], axis=0
|
|
177
|
+
)
|
|
178
|
+
key = tf.reshape(key, key_shape)
|
|
179
|
+
|
|
180
|
+
# `value` = [B, H*W, N, V]
|
|
181
|
+
value = self.layer_value_proj(key_value, training=training)
|
|
182
|
+
value_shape = tf.concat(
|
|
183
|
+
[bs_shape, hw_shape, [self._num_heads, self._value_dim]], axis=0
|
|
184
|
+
)
|
|
185
|
+
value = tf.reshape(value, value_shape)
|
|
186
|
+
|
|
187
|
+
# `dot_prod` = [B, H*W, N]
|
|
188
|
+
dot_prod = tf.reduce_sum(self.tensor_query * key, axis=-1)
|
|
189
|
+
|
|
190
|
+
# `softmax` = [B, H*W, N, 1]
|
|
191
|
+
softmax = self.layer_softmax(dot_prod)
|
|
192
|
+
if self._dropout > 0:
|
|
193
|
+
softmax = self.layer_dropout(softmax, training=training)
|
|
194
|
+
softmax = tf.expand_dims(softmax, axis=-1)
|
|
195
|
+
|
|
196
|
+
# `attention_output` = [B, N, V]
|
|
197
|
+
attention_output = tf.reduce_sum(softmax * value, axis=1)
|
|
198
|
+
|
|
199
|
+
return attention_output
|
|
200
|
+
|
|
201
|
+
def get_config(self):
|
|
202
|
+
config = {
|
|
203
|
+
"num_heads": self._num_heads,
|
|
204
|
+
"key_dim": self._key_dim,
|
|
205
|
+
"value_dim": self._value_dim,
|
|
206
|
+
"use_bias": self._use_bias,
|
|
207
|
+
"activation": activations.serialize(self._activation),
|
|
208
|
+
"kernel_initializer": initializers.serialize(self._kernel_initializer),
|
|
209
|
+
"bias_initializer": initializers.serialize(self._bias_initializer),
|
|
210
|
+
"kernel_regularizer": regularizers.serialize(self._kernel_regularizer),
|
|
211
|
+
"bias_regularizer": regularizers.serialize(self._bias_regularizer),
|
|
212
|
+
"kernel_constraint": constraints.serialize(self._kernel_constraint),
|
|
213
|
+
"bias_constraint": constraints.serialize(self._bias_constraint),
|
|
214
|
+
"dropout": self._dropout,
|
|
215
|
+
}
|
|
216
|
+
base_config = super(SimpleMHA2D, self).get_config()
|
|
217
|
+
return dict(list(base_config.items()) + list(config.items()))
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
@keras_export("keras.layers.MHAPool2D")
|
|
221
|
+
class MHAPool2D(layers.Layer):
|
|
222
|
+
"""Pooling in 2D using Keras-based self-attention.
|
|
223
|
+
|
|
224
|
+
The layer takes as input a high-dim image tensor of shape [B, H, W, D] where B is the
|
|
225
|
+
batch size, H and W are the grid resolution, and D is the (high) number of channels. First, it
|
|
226
|
+
pools the tensor down to an unprojected query tensor of shape [B, H2, W2, D] using max or avg
|
|
227
|
+
pooling. Second, it 2D-convolves the unprojected query tensor, the input tensor and the input
|
|
228
|
+
tensor to the `query` tensor of shape [B, H2, W2, N*K], the `key` tensor of shape
|
|
229
|
+
[B, H, W, N*K] and the `value` tensor of shape [B, H, W, N*V] where N is the number of heads,
|
|
230
|
+
K is the key dimensionality and V is the value dimensionality. In the absence of V, V is set to
|
|
231
|
+
K. Third, it divides `query` with sqrt(K). Fourth, it splits the `num_heads` dimension out of
|
|
232
|
+
the last dimension from all 3 tensors. Fifth, in a single einsum op of `query` and `key`, it
|
|
233
|
+
contracts K, makes an outer-product of [H2,W2] with [H,W], and runs through all B and N,
|
|
234
|
+
outputing a `prod` tensor of shape [B, H2, W2, H, W, N]. Fifth, it merges H with W in both
|
|
235
|
+
`prod` and `value` resulting in shapes [B, H2, W2, H*W, N] and [B, H*W, N, V] respectively.
|
|
236
|
+
Sixth, `prod` is softmaxed along the H*W axis. Seventh, in another einsum op of `prod` and
|
|
237
|
+
`value`, it contracts H*W while running through all other indices, outputing an `attention`
|
|
238
|
+
tensor of shape [B, H2, W2, N, V]. Finally, it merges N with V and returns the result.
|
|
239
|
+
|
|
240
|
+
Parameters
|
|
241
|
+
----------
|
|
242
|
+
num_heads : int
|
|
243
|
+
Number of attention heads.
|
|
244
|
+
key_dim : int
|
|
245
|
+
Size of each attention head for query and key.
|
|
246
|
+
value_dim : int, optional
|
|
247
|
+
Size of each attention head for value.
|
|
248
|
+
pooling : {'max', 'avg'}
|
|
249
|
+
type of 2D pooling
|
|
250
|
+
pool_size : int or tuple
|
|
251
|
+
integer or tuple of 2 integers, factors by which to downscale (vertical, horizontal).
|
|
252
|
+
(2, 2) will halve the input in both spatial dimension. If only one integer is specified,
|
|
253
|
+
the same window length will be used for both dimensions.
|
|
254
|
+
use_bias : bool
|
|
255
|
+
Whether the convolution layers use bias vectors/matrices.
|
|
256
|
+
activation : object
|
|
257
|
+
activation for the `value` convolution
|
|
258
|
+
kernel_initializer : object
|
|
259
|
+
Initializer for the convolutional layer kernels.
|
|
260
|
+
bias_initializer : object
|
|
261
|
+
Initializer for the convolutional layer biases.
|
|
262
|
+
kernel_regularizer : object
|
|
263
|
+
Regularizer for the convolutional layer kernels.
|
|
264
|
+
bias_regularizer : object
|
|
265
|
+
Regularizer for the convolutional layer biases.
|
|
266
|
+
kernel_constraint: object
|
|
267
|
+
Contraint function applied to the layer kernels.
|
|
268
|
+
bias_constraint: object
|
|
269
|
+
Contraint function applied to the layer biases.
|
|
270
|
+
dropout: float
|
|
271
|
+
dropout probability
|
|
272
|
+
|
|
273
|
+
Examples
|
|
274
|
+
--------
|
|
275
|
+
|
|
276
|
+
>>> layer = MHAPool2D(num_heads=3, key_dim=40, value_dim=80)
|
|
277
|
+
>>> input_tensor = layers.Input(shape=[8, 8, 160])
|
|
278
|
+
>>> output_tensor = layer(input_tensor)
|
|
279
|
+
>>> print(output_tensor.shape)
|
|
280
|
+
(None, 4, 4, 240)
|
|
281
|
+
"""
|
|
282
|
+
|
|
283
|
+
def __init__(
|
|
284
|
+
self,
|
|
285
|
+
num_heads: int,
|
|
286
|
+
key_dim: int,
|
|
287
|
+
value_dim: tp.Optional[int] = None,
|
|
288
|
+
pooling: str = "max",
|
|
289
|
+
pool_size=(2, 2),
|
|
290
|
+
use_bias: bool = True,
|
|
291
|
+
activation="swish",
|
|
292
|
+
kernel_initializer="glorot_uniform",
|
|
293
|
+
bias_initializer="zeros",
|
|
294
|
+
kernel_regularizer=None,
|
|
295
|
+
bias_regularizer=None,
|
|
296
|
+
kernel_constraint=None,
|
|
297
|
+
bias_constraint=None,
|
|
298
|
+
dropout: float = 0.2,
|
|
299
|
+
**kwargs
|
|
300
|
+
):
|
|
301
|
+
super(MHAPool2D, self).__init__(**kwargs)
|
|
302
|
+
self._num_heads = num_heads
|
|
303
|
+
self._key_dim = key_dim
|
|
304
|
+
self._value_dim = value_dim if value_dim else key_dim
|
|
305
|
+
self._pooling = pooling
|
|
306
|
+
self._pool_size = pool_size
|
|
307
|
+
self._use_bias = use_bias
|
|
308
|
+
self._activation = activations.get(activation)
|
|
309
|
+
self._kernel_initializer = initializers.get(kernel_initializer)
|
|
310
|
+
self._bias_initializer = initializers.get(bias_initializer)
|
|
311
|
+
self._kernel_regularizer = regularizers.get(kernel_regularizer)
|
|
312
|
+
self._bias_regularizer = regularizers.get(bias_regularizer)
|
|
313
|
+
self._kernel_constraint = constraints.get(kernel_constraint)
|
|
314
|
+
self._bias_constraint = constraints.get(bias_constraint)
|
|
315
|
+
self._dropout = dropout
|
|
316
|
+
|
|
317
|
+
if self._pooling == "max":
|
|
318
|
+
self.layer_pool = layers.MaxPool2D()
|
|
319
|
+
elif self._pooling == "avg":
|
|
320
|
+
self.layer_pool = layers.AveragePooling2D()
|
|
321
|
+
else:
|
|
322
|
+
raise tfc.ModelSyntaxError(
|
|
323
|
+
"Invalid pooling string: '{}'.".format(self._pooling)
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
self.layer_query_proj = layers.Conv2D(
|
|
327
|
+
self._num_heads * self._key_dim, # filters
|
|
328
|
+
1, # kernel_size
|
|
329
|
+
use_bias=self._use_bias,
|
|
330
|
+
activation=None,
|
|
331
|
+
kernel_initializer=self._kernel_initializer,
|
|
332
|
+
bias_initializer=self._bias_initializer,
|
|
333
|
+
kernel_regularizer=self._kernel_regularizer,
|
|
334
|
+
bias_regularizer=self._bias_regularizer,
|
|
335
|
+
kernel_constraint=self._kernel_constraint,
|
|
336
|
+
bias_constraint=self._bias_constraint,
|
|
337
|
+
name="query_proj",
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
self.layer_key_proj = layers.Conv2D(
|
|
341
|
+
self._num_heads * self._key_dim, # filters
|
|
342
|
+
1, # kernel_size
|
|
343
|
+
use_bias=self._use_bias,
|
|
344
|
+
activation=None,
|
|
345
|
+
kernel_initializer=self._kernel_initializer,
|
|
346
|
+
bias_initializer=self._bias_initializer,
|
|
347
|
+
kernel_regularizer=self._kernel_regularizer,
|
|
348
|
+
bias_regularizer=self._bias_regularizer,
|
|
349
|
+
kernel_constraint=self._kernel_constraint,
|
|
350
|
+
bias_constraint=self._bias_constraint,
|
|
351
|
+
name="key_proj",
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
self.layer_value_proj = layers.Conv2D(
|
|
355
|
+
self._num_heads * self._value_dim, # filters
|
|
356
|
+
1, # kernel_size
|
|
357
|
+
use_bias=self._use_bias,
|
|
358
|
+
activation=self._activation,
|
|
359
|
+
kernel_initializer=self._kernel_initializer,
|
|
360
|
+
bias_initializer=self._bias_initializer,
|
|
361
|
+
kernel_regularizer=self._kernel_regularizer,
|
|
362
|
+
bias_regularizer=self._bias_regularizer,
|
|
363
|
+
kernel_constraint=self._kernel_constraint,
|
|
364
|
+
bias_constraint=self._bias_constraint,
|
|
365
|
+
name="value_proj",
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
self.layer_softmax = layers.Softmax(axis=3)
|
|
369
|
+
if self._dropout > 0:
|
|
370
|
+
self.layer_dropout = layers.Dropout(rate=self._dropout)
|
|
371
|
+
|
|
372
|
+
def call(self, blob, training=None, return_attention_scores: bool = False):
|
|
373
|
+
"""The call function.
|
|
374
|
+
|
|
375
|
+
Parameters
|
|
376
|
+
----------
|
|
377
|
+
blob : tensorflow.Tensor
|
|
378
|
+
input `Tensor` of shape `(B, H, W, D)`.
|
|
379
|
+
training : bool
|
|
380
|
+
Whether the layer should behave in training mode or in inference mode.
|
|
381
|
+
return_attention_scores : bool
|
|
382
|
+
Whether to return the attention scores as well.
|
|
383
|
+
|
|
384
|
+
Returns
|
|
385
|
+
-------
|
|
386
|
+
attention_output : tensorflow.Tensor
|
|
387
|
+
The result of the computation, of shape `(B, H2, W2, N*V)`, where `H2` and `W2`
|
|
388
|
+
represent the downsampled resolution, `N` is the number of heads and `V` is the value
|
|
389
|
+
dimensionality.
|
|
390
|
+
attention_scores : tensorflow.Tensor
|
|
391
|
+
Multi-headed attention weights, of shape `(B, H2, W2, H*W, N)`. Only available if
|
|
392
|
+
`return_attention_scores` is True.
|
|
393
|
+
"""
|
|
394
|
+
|
|
395
|
+
blob_shape = tf.shape(blob)
|
|
396
|
+
bs_shape = blob_shape[0:1] # [B]
|
|
397
|
+
hw_shape = tf.reduce_prod(blob_shape[1:3], axis=0, keepdims=True) # [H*W]
|
|
398
|
+
|
|
399
|
+
# N = `num_attention_heads`
|
|
400
|
+
# K = `key_dim`
|
|
401
|
+
# V = `value_dim`
|
|
402
|
+
# H = `image_height`
|
|
403
|
+
# W = `image_width`
|
|
404
|
+
|
|
405
|
+
# `query` = [B, H2, W2, D]
|
|
406
|
+
query = self.layer_pool(blob)
|
|
407
|
+
|
|
408
|
+
# `query` = [B, H2, W2, N, K]
|
|
409
|
+
query = self.layer_query_proj(query, training=training)
|
|
410
|
+
query_head_shape = tf.shape(query)[0:3] # [B, H2, W2]
|
|
411
|
+
query_shape = tf.concat(
|
|
412
|
+
[query_head_shape, [self._num_heads, self._key_dim]], axis=0
|
|
413
|
+
)
|
|
414
|
+
query = tf.reshape(query, query_shape)
|
|
415
|
+
|
|
416
|
+
# `key` = [B, H*W, N, K]
|
|
417
|
+
key = self.layer_key_proj(blob, training=training)
|
|
418
|
+
key_shape = tf.concat(
|
|
419
|
+
[bs_shape, hw_shape, [self._num_heads, self._key_dim]], axis=0
|
|
420
|
+
)
|
|
421
|
+
key = tf.reshape(key, key_shape)
|
|
422
|
+
|
|
423
|
+
# `value` = [B, H*W, N, V]
|
|
424
|
+
value = self.layer_value_proj(blob, training=training)
|
|
425
|
+
value_shape = tf.concat(
|
|
426
|
+
[bs_shape, hw_shape, [self._num_heads, self._value_dim]], axis=0
|
|
427
|
+
)
|
|
428
|
+
value = tf.reshape(value, value_shape)
|
|
429
|
+
|
|
430
|
+
# `prod` = [B, H2, W2, H*W, N]
|
|
431
|
+
query *= 1.0 / math.sqrt(float(self._key_dim))
|
|
432
|
+
prod = tf.einsum("bhwnk,bink->bhwin", query, key)
|
|
433
|
+
|
|
434
|
+
# `attention_scores` = [B, H2, W2, H*W, N]
|
|
435
|
+
attention_scores = self.layer_softmax(prod)
|
|
436
|
+
if self._dropout > 0:
|
|
437
|
+
dropout = self.layer_dropout(attention_scores, training=training)
|
|
438
|
+
else:
|
|
439
|
+
dropout = attention_scores
|
|
440
|
+
|
|
441
|
+
# `attention_output` = [B, H2, W2, N, V]
|
|
442
|
+
attention_output = tf.einsum("bhwin,binv->bhwnv", dropout, value)
|
|
443
|
+
|
|
444
|
+
# `output`
|
|
445
|
+
output_shape = tf.concat(
|
|
446
|
+
[query_head_shape, [self._num_heads * self._value_dim]], axis=0
|
|
447
|
+
)
|
|
448
|
+
output = tf.reshape(attention_output, output_shape)
|
|
449
|
+
|
|
450
|
+
if return_attention_scores:
|
|
451
|
+
return output, attention_scores
|
|
452
|
+
return output
|
|
453
|
+
|
|
454
|
+
def get_config(self):
|
|
455
|
+
config = {
|
|
456
|
+
"num_heads": self._num_heads,
|
|
457
|
+
"key_dim": self._key_dim,
|
|
458
|
+
"value_dim": self._value_dim,
|
|
459
|
+
"pooling": self._pooling,
|
|
460
|
+
"pool_size": self._pool_size,
|
|
461
|
+
"use_bias": self._use_bias,
|
|
462
|
+
"activation": activations.serialize(self._activation),
|
|
463
|
+
"kernel_initializer": initializers.serialize(self._kernel_initializer),
|
|
464
|
+
"bias_initializer": initializers.serialize(self._bias_initializer),
|
|
465
|
+
"kernel_regularizer": regularizers.serialize(self._kernel_regularizer),
|
|
466
|
+
"bias_regularizer": regularizers.serialize(self._bias_regularizer),
|
|
467
|
+
"kernel_constraint": constraints.serialize(self._kernel_constraint),
|
|
468
|
+
"bias_constraint": constraints.serialize(self._bias_constraint),
|
|
469
|
+
"dropout": self._dropout,
|
|
470
|
+
}
|
|
471
|
+
base_config = super(MHAPool2D, self).get_config()
|
|
472
|
+
return dict(list(base_config.items()) + list(config.items()))
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from .. import layers
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class SoftBend(layers.Layer):
|
|
5
|
+
"""Soft bend activation layer.
|
|
6
|
+
|
|
7
|
+
Function: `|x|^alpha * tanh(x)`, bending the linear activation a bit.
|
|
8
|
+
|
|
9
|
+
If alpha is less than 1, it acts as a soft squash.
|
|
10
|
+
If alpha is greater than 1, it acts as a soft explode.
|
|
11
|
+
If alpha is 1, it acts as the linear activation function.
|
|
12
|
+
|
|
13
|
+
Parameters
|
|
14
|
+
----------
|
|
15
|
+
alpha : float
|
|
16
|
+
the bending coefficient
|
|
17
|
+
**kwds : dict
|
|
18
|
+
keyword arguments passed as-is to the super class
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, alpha: float = 0.5, **kwds):
|
|
22
|
+
super(SoftBend, self).__init__(**kwds)
|
|
23
|
+
|
|
24
|
+
self.alpha = alpha
|
|
25
|
+
|
|
26
|
+
def call(self, x):
|
|
27
|
+
from tensorflow.math import pow, abs, tanh
|
|
28
|
+
|
|
29
|
+
return pow(abs(x), self.alpha) * tanh(x)
|
|
30
|
+
|
|
31
|
+
call.__doc__ = layers.Layer.call.__doc__
|
|
32
|
+
|
|
33
|
+
def compute_output_shape(self, input_shape):
|
|
34
|
+
return input_shape
|
|
35
|
+
|
|
36
|
+
compute_output_shape.__doc__ = layers.Layer.compute_output_shape.__doc__
|