keras-hub-nightly 0.19.0.dev202502090345__py3-none-any.whl → 0.19.0.dev202502110348__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- keras_hub/api/layers/__init__.py +3 -0
- keras_hub/api/models/__init__.py +3 -0
- keras_hub/src/models/mobilenet/mobilenet_backbone.py +679 -386
- keras_hub/src/models/mobilenet/mobilenet_image_classifier.py +73 -0
- keras_hub/src/models/mobilenet/mobilenet_image_classifier_preprocessor.py +14 -0
- keras_hub/src/models/mobilenet/mobilenet_image_converter.py +8 -0
- keras_hub/src/models/mobilenet/mobilenet_presets.py +15 -0
- keras_hub/src/models/mobilenet/util.py +23 -0
- keras_hub/src/utils/preset_utils.py +33 -1
- keras_hub/src/utils/timm/convert_mobilenet.py +201 -0
- keras_hub/src/utils/timm/preset_loader.py +3 -0
- keras_hub/src/version_utils.py +1 -1
- {keras_hub_nightly-0.19.0.dev202502090345.dist-info → keras_hub_nightly-0.19.0.dev202502110348.dist-info}/METADATA +1 -1
- {keras_hub_nightly-0.19.0.dev202502090345.dist-info → keras_hub_nightly-0.19.0.dev202502110348.dist-info}/RECORD +16 -11
- {keras_hub_nightly-0.19.0.dev202502090345.dist-info → keras_hub_nightly-0.19.0.dev202502110348.dist-info}/WHEEL +0 -0
- {keras_hub_nightly-0.19.0.dev202502090345.dist-info → keras_hub_nightly-0.19.0.dev202502110348.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,562 @@
|
|
1
1
|
import keras
|
2
|
-
from keras import ops
|
3
2
|
|
4
3
|
from keras_hub.src.api_export import keras_hub_export
|
5
4
|
from keras_hub.src.models.backbone import Backbone
|
5
|
+
from keras_hub.src.models.mobilenet.util import adjust_channels
|
6
6
|
|
7
|
-
BN_EPSILON = 1e-
|
8
|
-
BN_MOMENTUM = 0.
|
7
|
+
BN_EPSILON = 1e-5
|
8
|
+
BN_MOMENTUM = 0.9
|
9
|
+
|
10
|
+
|
11
|
+
class SqueezeAndExcite2D(keras.layers.Layer):
|
12
|
+
"""
|
13
|
+
Description:
|
14
|
+
This layer applies a content-aware mechanism to adaptively assign
|
15
|
+
channel-wise weights. It uses global average pooling to compress
|
16
|
+
feature maps into single values, which are then processed by
|
17
|
+
two Conv1D layers: the first reduces the dimensionality, and
|
18
|
+
the second restores it.
|
19
|
+
Args:
|
20
|
+
filters: Number of input and output filters. The number of input and
|
21
|
+
output filters is same.
|
22
|
+
bottleneck_filters: (Optional) Number of bottleneck filters. Defaults
|
23
|
+
to `0.25 * filters`
|
24
|
+
squeeze_activation: (Optional) String, callable (or
|
25
|
+
keras.layers.Layer) or keras.activations.Activation instance
|
26
|
+
denoting activation to be applied after squeeze convolution.
|
27
|
+
Defaults to `relu`.
|
28
|
+
excite_activation: (Optional) String, callable (or
|
29
|
+
keras.layers.Layer) or keras.activations.Activation instance
|
30
|
+
denoting activation to be applied after excite convolution.
|
31
|
+
Defaults to `sigmoid`.
|
32
|
+
name: Name of the layer
|
33
|
+
dtype: `None` or str or `keras.mixed_precision.DTypePolicy`. The dtype
|
34
|
+
to use for the model's computations and weights.
|
35
|
+
"""
|
36
|
+
|
37
|
+
def __init__(
|
38
|
+
self,
|
39
|
+
filters,
|
40
|
+
bottleneck_filters=None,
|
41
|
+
squeeze_activation="relu",
|
42
|
+
excite_activation="sigmoid",
|
43
|
+
name=None,
|
44
|
+
dtype=None,
|
45
|
+
**kwargs,
|
46
|
+
):
|
47
|
+
super().__init__(dtype=dtype, **kwargs)
|
48
|
+
self.filters = filters
|
49
|
+
self.bottleneck_filters = bottleneck_filters
|
50
|
+
self.squeeze_activation = squeeze_activation
|
51
|
+
self.excite_activation = excite_activation
|
52
|
+
self.name = name
|
53
|
+
|
54
|
+
image_data_format = keras.config.image_data_format()
|
55
|
+
if image_data_format == "channels_last":
|
56
|
+
self.spatial_dims = (1, 2)
|
57
|
+
else:
|
58
|
+
self.spatial_dims = (2, 3)
|
59
|
+
|
60
|
+
self.conv_reduce = keras.layers.Conv2D(
|
61
|
+
bottleneck_filters,
|
62
|
+
(1, 1),
|
63
|
+
data_format=image_data_format,
|
64
|
+
name=f"{name}_conv_reduce",
|
65
|
+
dtype=dtype,
|
66
|
+
)
|
67
|
+
self.activation1 = keras.layers.Activation(
|
68
|
+
self.squeeze_activation,
|
69
|
+
name=self.name + "squeeze_activation",
|
70
|
+
dtype=dtype,
|
71
|
+
)
|
72
|
+
|
73
|
+
self.conv_expand = keras.layers.Conv2D(
|
74
|
+
filters,
|
75
|
+
(1, 1),
|
76
|
+
data_format=image_data_format,
|
77
|
+
name=f"{name}_conv_expand",
|
78
|
+
dtype=dtype,
|
79
|
+
)
|
80
|
+
self.gate = keras.layers.Activation(
|
81
|
+
self.excite_activation,
|
82
|
+
name=self.name + "excite_activation",
|
83
|
+
dtype=dtype,
|
84
|
+
)
|
85
|
+
|
86
|
+
def compute_output_shape(self, input_shape):
|
87
|
+
shape = self.conv_reduce.compute_output_shape(input_shape)
|
88
|
+
shape = self.activation1.compute_output_shape(shape)
|
89
|
+
shape = self.conv_expand.compute_output_shape(shape)
|
90
|
+
return self.gate.compute_output_shape(shape)
|
91
|
+
|
92
|
+
def build(self, input_shape):
|
93
|
+
self.conv_reduce.build(input_shape)
|
94
|
+
input_shape = self.conv_reduce.compute_output_shape(input_shape)
|
95
|
+
self.activation1.build(input_shape)
|
96
|
+
input_shape = self.activation1.compute_output_shape(input_shape)
|
97
|
+
self.conv_expand.build(input_shape)
|
98
|
+
input_shape = self.conv_expand.compute_output_shape(input_shape)
|
99
|
+
self.gate.build(input_shape)
|
100
|
+
self.built = True
|
101
|
+
|
102
|
+
def call(self, inputs):
|
103
|
+
x_se = keras.ops.mean(inputs, axis=self.spatial_dims, keepdims=True)
|
104
|
+
x_se = self.conv_reduce(x_se)
|
105
|
+
x_se = self.activation1(x_se)
|
106
|
+
x_se = self.conv_expand(x_se)
|
107
|
+
return inputs * self.gate(x_se)
|
108
|
+
|
109
|
+
def get_config(self):
|
110
|
+
config = super().get_config()
|
111
|
+
config.update(
|
112
|
+
{
|
113
|
+
"filters": self.filters,
|
114
|
+
"bottleneck_filters": self.bottleneck_filters,
|
115
|
+
"squeeze_activation": self.squeeze_activation,
|
116
|
+
"excite_activation": self.excite_activation,
|
117
|
+
"name": self.name,
|
118
|
+
"spatial_dims": self.spatial_dims,
|
119
|
+
}
|
120
|
+
)
|
121
|
+
return config
|
122
|
+
|
123
|
+
|
124
|
+
class DepthwiseConvBlock(keras.layers.Layer):
|
125
|
+
"""
|
126
|
+
A depthwise convolution block consists of a depthwise conv,
|
127
|
+
batch normalization, relu, optional squeeze & excite, pointwise convolution,
|
128
|
+
and batch normalization layer.
|
129
|
+
|
130
|
+
Args:
|
131
|
+
infilters: int, the output channels for the initial depthwise conv
|
132
|
+
filters: int, the dimensionality of the output space
|
133
|
+
(i.e. the number of output filters in the pointwise convolution).
|
134
|
+
kernel_size: int or Tuple[int, int], the kernel size to apply
|
135
|
+
to the initial depthwise convolution
|
136
|
+
strides: An int or Tuple[int, int], specifying the strides
|
137
|
+
of the convolution along the width and height.
|
138
|
+
Can be a single integer to specify the same value for
|
139
|
+
all spatial dimensions.
|
140
|
+
squeeze_excite_ratio: squeeze & excite ratio: float[Optional], if
|
141
|
+
exists, specifies the ratio of channels (<1) to squeeze the initial
|
142
|
+
signal into before reexciting back out. If (>1) technically, it's an
|
143
|
+
excite & squeeze layer. If this doesn't exist there is no
|
144
|
+
SqueezeExcite layer.
|
145
|
+
name: str, name of the layer
|
146
|
+
dtype: `None` or str or `keras.mixed_precision.DTypePolicy`. The dtype
|
147
|
+
to use for the model's computations and weights.
|
148
|
+
|
149
|
+
Input shape when applied as a layer:
|
150
|
+
4D tensor with shape: `(batch, rows, cols, channels)` in "channels_last"
|
151
|
+
4D tensor with shape: `(batch, channels, rows, cols)` in
|
152
|
+
"channels_first"
|
153
|
+
Returns:
|
154
|
+
Output tensor of block.
|
155
|
+
"""
|
156
|
+
|
157
|
+
def __init__(
|
158
|
+
self,
|
159
|
+
infilters,
|
160
|
+
filters,
|
161
|
+
kernel_size=3,
|
162
|
+
stride=2,
|
163
|
+
squeeze_excite_ratio=None,
|
164
|
+
name=None,
|
165
|
+
dtype=None,
|
166
|
+
**kwargs,
|
167
|
+
):
|
168
|
+
super().__init__(dtype=dtype, **kwargs)
|
169
|
+
self.infilters = infilters
|
170
|
+
self.filters = filters
|
171
|
+
self.kernel_size = kernel_size
|
172
|
+
self.stride = stride
|
173
|
+
self.squeeze_excite_ratio = squeeze_excite_ratio
|
174
|
+
self.name = name
|
175
|
+
|
176
|
+
channel_axis = (
|
177
|
+
-1 if keras.config.image_data_format() == "channels_last" else 1
|
178
|
+
)
|
179
|
+
self.name = name = f"{name}_0"
|
180
|
+
|
181
|
+
self.pad = keras.layers.ZeroPadding2D(
|
182
|
+
padding=(1, 1),
|
183
|
+
name=f"{name}_pad",
|
184
|
+
dtype=dtype,
|
185
|
+
)
|
186
|
+
self.conv1 = keras.layers.Conv2D(
|
187
|
+
infilters,
|
188
|
+
kernel_size,
|
189
|
+
strides=stride,
|
190
|
+
padding="valid",
|
191
|
+
data_format=keras.config.image_data_format(),
|
192
|
+
groups=infilters,
|
193
|
+
use_bias=False,
|
194
|
+
name=f"{name}_conv1",
|
195
|
+
dtype=dtype,
|
196
|
+
)
|
197
|
+
self.batch_normalization1 = keras.layers.BatchNormalization(
|
198
|
+
axis=channel_axis,
|
199
|
+
epsilon=BN_EPSILON,
|
200
|
+
momentum=BN_MOMENTUM,
|
201
|
+
name=f"{name}_bn1",
|
202
|
+
dtype=dtype,
|
203
|
+
)
|
204
|
+
self.activation1 = keras.layers.ReLU(dtype=dtype)
|
205
|
+
|
206
|
+
if squeeze_excite_ratio:
|
207
|
+
self.se_layer = SqueezeAndExcite2D(
|
208
|
+
filters=infilters,
|
209
|
+
bottleneck_filters=adjust_channels(
|
210
|
+
infilters * squeeze_excite_ratio
|
211
|
+
),
|
212
|
+
squeeze_activation="relu",
|
213
|
+
excite_activation=keras.activations.hard_sigmoid,
|
214
|
+
name=f"{name}_squeeze_excite",
|
215
|
+
dtype=dtype,
|
216
|
+
)
|
217
|
+
|
218
|
+
self.conv2 = keras.layers.Conv2D(
|
219
|
+
filters,
|
220
|
+
kernel_size=1,
|
221
|
+
data_format=keras.config.image_data_format(),
|
222
|
+
use_bias=False,
|
223
|
+
name=f"{name}_conv2",
|
224
|
+
dtype=dtype,
|
225
|
+
)
|
226
|
+
self.batch_normalization2 = keras.layers.BatchNormalization(
|
227
|
+
axis=channel_axis,
|
228
|
+
epsilon=BN_EPSILON,
|
229
|
+
momentum=BN_MOMENTUM,
|
230
|
+
name=f"{name}_bn2",
|
231
|
+
dtype=dtype,
|
232
|
+
)
|
233
|
+
|
234
|
+
def build(self, input_shape):
|
235
|
+
self.pad.build(input_shape)
|
236
|
+
input_shape = self.pad.compute_output_shape(input_shape)
|
237
|
+
self.conv1.build(input_shape)
|
238
|
+
input_shape = self.conv1.compute_output_shape(input_shape)
|
239
|
+
self.batch_normalization1.build(input_shape)
|
240
|
+
input_shape = self.batch_normalization1.compute_output_shape(
|
241
|
+
input_shape
|
242
|
+
)
|
243
|
+
self.activation1.build(input_shape)
|
244
|
+
input_shape = self.activation1.compute_output_shape(input_shape)
|
245
|
+
if self.squeeze_excite_ratio:
|
246
|
+
self.se_layer.build(input_shape)
|
247
|
+
input_shape = self.se_layer.compute_output_shape(input_shape)
|
248
|
+
self.conv2.build(input_shape)
|
249
|
+
input_shape = self.conv2.compute_output_shape(input_shape)
|
250
|
+
self.batch_normalization2.build(input_shape)
|
251
|
+
self.built = True
|
252
|
+
|
253
|
+
def call(self, inputs):
|
254
|
+
x = self.pad(inputs)
|
255
|
+
x = self.conv1(x)
|
256
|
+
x = self.batch_normalization1(x)
|
257
|
+
x = self.activation1(x)
|
258
|
+
|
259
|
+
if self.se_layer:
|
260
|
+
x = self.se_layer(x)
|
261
|
+
|
262
|
+
x = self.conv2(x)
|
263
|
+
x = self.batch_normalization2(x)
|
264
|
+
return x
|
265
|
+
|
266
|
+
def get_config(self):
|
267
|
+
config = super().get_config()
|
268
|
+
config.update(
|
269
|
+
{
|
270
|
+
"infilters": self.infilters,
|
271
|
+
"filters": self.filters,
|
272
|
+
"kernel_size": self.kernel_size,
|
273
|
+
"stride": self.stride,
|
274
|
+
"squeeze_excite_ratio": self.squeeze_excite_ratio,
|
275
|
+
"name": self.name,
|
276
|
+
}
|
277
|
+
)
|
278
|
+
return config
|
279
|
+
|
280
|
+
|
281
|
+
class InvertedResidualBlock(keras.layers.Layer):
|
282
|
+
"""An Inverted Residual Block.
|
283
|
+
|
284
|
+
Args:
|
285
|
+
expansion: integer, the expansion ratio, multiplied with infilters to
|
286
|
+
get the minimum value passed to adjust_channels.
|
287
|
+
infilters: Int, the output channels for the initial depthwise conv
|
288
|
+
filters: integer, number of filters for convolution layer.
|
289
|
+
kernel_size: integer, the kernel size for DepthWise Convolutions.
|
290
|
+
stride: integer, the stride length for DepthWise Convolutions.
|
291
|
+
squeeze_excite_ratio: float, ratio for bottleneck filters. Number of
|
292
|
+
bottleneck filters = filters * se_ratio.
|
293
|
+
activation: the activation layer to use.
|
294
|
+
padding: padding in the conv2d layer
|
295
|
+
name: string, block label.
|
296
|
+
dtype: `None` or str or `keras.mixed_precision.DTypePolicy`. The dtype
|
297
|
+
to use for the model's computations and weights.
|
298
|
+
|
299
|
+
Input shape when applied as a layer:
|
300
|
+
4D tensor with shape: `(batch, rows, cols, channels)` in "channels_last"
|
301
|
+
4D tensor with shape: `(batch, channels, rows, cols)` in
|
302
|
+
"channels_first"
|
303
|
+
Returns:
|
304
|
+
Output tensor of block.
|
305
|
+
"""
|
306
|
+
|
307
|
+
def __init__(
|
308
|
+
self,
|
309
|
+
expansion,
|
310
|
+
infilters,
|
311
|
+
filters,
|
312
|
+
kernel_size,
|
313
|
+
stride,
|
314
|
+
squeeze_excite_ratio,
|
315
|
+
activation,
|
316
|
+
padding,
|
317
|
+
name=None,
|
318
|
+
dtype=None,
|
319
|
+
**kwargs,
|
320
|
+
):
|
321
|
+
super().__init__(dtype=dtype, **kwargs)
|
322
|
+
self.expansion = expansion
|
323
|
+
self.infilters = infilters
|
324
|
+
self.filters = filters
|
325
|
+
self.kernel_size = kernel_size
|
326
|
+
self.stride = stride
|
327
|
+
self.squeeze_excite_ratio = squeeze_excite_ratio
|
328
|
+
self.activation = activation
|
329
|
+
self.padding = padding
|
330
|
+
self.name = name
|
331
|
+
|
332
|
+
channel_axis = (
|
333
|
+
-1 if keras.config.image_data_format() == "channels_last" else 1
|
334
|
+
)
|
335
|
+
expanded_channels = adjust_channels(expansion)
|
336
|
+
|
337
|
+
self.conv1 = keras.layers.Conv2D(
|
338
|
+
expanded_channels,
|
339
|
+
kernel_size=1,
|
340
|
+
data_format=keras.config.image_data_format(),
|
341
|
+
use_bias=False,
|
342
|
+
name=f"{name}_conv1",
|
343
|
+
dtype=dtype,
|
344
|
+
)
|
345
|
+
|
346
|
+
self.batch_normalization1 = keras.layers.BatchNormalization(
|
347
|
+
axis=channel_axis,
|
348
|
+
epsilon=BN_EPSILON,
|
349
|
+
momentum=BN_MOMENTUM,
|
350
|
+
name=f"{name}_bn1",
|
351
|
+
dtype=dtype,
|
352
|
+
)
|
353
|
+
|
354
|
+
self.activation1 = keras.layers.Activation(
|
355
|
+
activation=activation,
|
356
|
+
dtype=dtype,
|
357
|
+
)
|
358
|
+
|
359
|
+
self.pad = keras.layers.ZeroPadding2D(
|
360
|
+
padding=(padding, padding),
|
361
|
+
name=f"{name}_pad",
|
362
|
+
dtype=dtype,
|
363
|
+
)
|
364
|
+
|
365
|
+
self.conv2 = keras.layers.Conv2D(
|
366
|
+
expanded_channels,
|
367
|
+
kernel_size,
|
368
|
+
strides=stride,
|
369
|
+
padding="valid",
|
370
|
+
groups=expanded_channels,
|
371
|
+
data_format=keras.config.image_data_format(),
|
372
|
+
use_bias=False,
|
373
|
+
name=f"{name}_conv2",
|
374
|
+
dtype=dtype,
|
375
|
+
)
|
376
|
+
self.batch_normalization2 = keras.layers.BatchNormalization(
|
377
|
+
axis=channel_axis,
|
378
|
+
epsilon=BN_EPSILON,
|
379
|
+
momentum=BN_MOMENTUM,
|
380
|
+
name=f"{name}_bn2",
|
381
|
+
dtype=dtype,
|
382
|
+
)
|
383
|
+
|
384
|
+
self.activation2 = keras.layers.Activation(
|
385
|
+
activation=activation,
|
386
|
+
dtype=dtype,
|
387
|
+
)
|
388
|
+
|
389
|
+
self.squeeze_excite = None
|
390
|
+
if self.squeeze_excite_ratio:
|
391
|
+
se_filters = expanded_channels
|
392
|
+
self.squeeze_excite = SqueezeAndExcite2D(
|
393
|
+
filters=se_filters,
|
394
|
+
bottleneck_filters=adjust_channels(
|
395
|
+
se_filters * squeeze_excite_ratio
|
396
|
+
),
|
397
|
+
squeeze_activation="relu",
|
398
|
+
excite_activation=keras.activations.hard_sigmoid,
|
399
|
+
name=f"{name}_se",
|
400
|
+
dtype=dtype,
|
401
|
+
)
|
402
|
+
|
403
|
+
self.conv3 = keras.layers.Conv2D(
|
404
|
+
filters,
|
405
|
+
kernel_size=1,
|
406
|
+
data_format=keras.config.image_data_format(),
|
407
|
+
use_bias=False,
|
408
|
+
name=f"{name}_conv3",
|
409
|
+
dtype=dtype,
|
410
|
+
)
|
411
|
+
self.batch_normalization3 = keras.layers.BatchNormalization(
|
412
|
+
axis=channel_axis,
|
413
|
+
epsilon=BN_EPSILON,
|
414
|
+
momentum=BN_MOMENTUM,
|
415
|
+
name=f"{name}_bn3",
|
416
|
+
dtype=dtype,
|
417
|
+
)
|
418
|
+
|
419
|
+
def build(self, input_shape):
|
420
|
+
self.conv1.build(input_shape)
|
421
|
+
input_shape = self.conv1.compute_output_shape(input_shape)
|
422
|
+
self.batch_normalization1.build(input_shape)
|
423
|
+
input_shape = self.batch_normalization1.compute_output_shape(
|
424
|
+
input_shape
|
425
|
+
)
|
426
|
+
self.activation1.build(input_shape)
|
427
|
+
input_shape = self.activation1.compute_output_shape(input_shape)
|
428
|
+
self.pad.build(input_shape)
|
429
|
+
input_shape = self.pad.compute_output_shape(input_shape)
|
430
|
+
self.conv2.build(input_shape)
|
431
|
+
input_shape = self.conv2.compute_output_shape(input_shape)
|
432
|
+
self.batch_normalization2.build(input_shape)
|
433
|
+
input_shape = self.batch_normalization2.compute_output_shape(
|
434
|
+
input_shape
|
435
|
+
)
|
436
|
+
self.activation2.build(input_shape)
|
437
|
+
input_shape = self.activation2.compute_output_shape(input_shape)
|
438
|
+
if self.squeeze_excite_ratio:
|
439
|
+
self.squeeze_excite.build(input_shape)
|
440
|
+
input_shape = self.squeeze_excite.compute_output_shape(input_shape)
|
441
|
+
self.conv3.build(input_shape)
|
442
|
+
input_shape = self.conv3.compute_output_shape(input_shape)
|
443
|
+
self.batch_normalization3.build(input_shape)
|
444
|
+
self.built = True
|
445
|
+
|
446
|
+
def call(self, inputs):
|
447
|
+
x = self.conv1(inputs)
|
448
|
+
x = self.batch_normalization1(x)
|
449
|
+
x = self.activation1(x)
|
450
|
+
x = self.pad(x)
|
451
|
+
x = self.conv2(x)
|
452
|
+
x = self.batch_normalization2(x)
|
453
|
+
x = self.activation2(x)
|
454
|
+
if self.squeeze_excite:
|
455
|
+
x = self.squeeze_excite(x)
|
456
|
+
x = self.conv3(x)
|
457
|
+
x = self.batch_normalization3(x)
|
458
|
+
if self.stride == 1 and self.infilters == self.filters:
|
459
|
+
x = inputs + x
|
460
|
+
return x
|
461
|
+
|
462
|
+
def get_config(self):
|
463
|
+
config = super().get_config()
|
464
|
+
config.update(
|
465
|
+
{
|
466
|
+
"expansion": self.expansion,
|
467
|
+
"infilters": self.infilters,
|
468
|
+
"filters": self.filters,
|
469
|
+
"kernel_size": self.kernel_size,
|
470
|
+
"stride": self.stride,
|
471
|
+
"squeeze_excite_ratio": self.squeeze_excite_ratio,
|
472
|
+
"activation": self.activation,
|
473
|
+
"padding": self.padding,
|
474
|
+
"name": self.name,
|
475
|
+
}
|
476
|
+
)
|
477
|
+
return config
|
478
|
+
|
479
|
+
|
480
|
+
class ConvBnActBlock(keras.layers.Layer):
|
481
|
+
"""
|
482
|
+
A ConvBnActBlock consists of a convultion, batchnorm, and activation layer
|
483
|
+
|
484
|
+
Args:
|
485
|
+
filters: Integer, the dimensionality of the output space
|
486
|
+
(i.e. the number of output filters in the pointwise convolution).
|
487
|
+
activation: The activation function to apply to the signal at the end.
|
488
|
+
dtype: `None` or str or `keras.mixed_precision.DTypePolicy`. The dtype
|
489
|
+
to use for the model's computations and weights.
|
490
|
+
|
491
|
+
Input shape (when called as a layer):
|
492
|
+
4D tensor with shape: `(batch, rows, cols, channels)` in "channels_last"
|
493
|
+
4D tensor with shape: `(batch, channels, rows, cols)` in
|
494
|
+
"channels_first"
|
495
|
+
|
496
|
+
Returns:
|
497
|
+
Output tensor of block.
|
498
|
+
"""
|
499
|
+
|
500
|
+
def __init__(
|
501
|
+
self,
|
502
|
+
filter,
|
503
|
+
activation,
|
504
|
+
name=None,
|
505
|
+
dtype=None,
|
506
|
+
**kwargs,
|
507
|
+
):
|
508
|
+
super().__init__(dtype=dtype, **kwargs)
|
509
|
+
self.filter = filter
|
510
|
+
self.activation = activation
|
511
|
+
self.name = name
|
512
|
+
|
513
|
+
channel_axis = (
|
514
|
+
-1 if keras.config.image_data_format() == "channels_last" else 1
|
515
|
+
)
|
516
|
+
self.conv = keras.layers.Conv2D(
|
517
|
+
filter,
|
518
|
+
kernel_size=1,
|
519
|
+
data_format=keras.config.image_data_format(),
|
520
|
+
use_bias=False,
|
521
|
+
name=f"{name}_conv",
|
522
|
+
dtype=dtype,
|
523
|
+
)
|
524
|
+
self.batch_normalization = keras.layers.BatchNormalization(
|
525
|
+
axis=channel_axis,
|
526
|
+
epsilon=BN_EPSILON,
|
527
|
+
momentum=BN_MOMENTUM,
|
528
|
+
name=f"{name}_bn",
|
529
|
+
dtype=dtype,
|
530
|
+
)
|
531
|
+
self.activation_layer = keras.layers.Activation(
|
532
|
+
activation,
|
533
|
+
dtype=dtype,
|
534
|
+
)
|
535
|
+
|
536
|
+
def build(self, input_shape):
|
537
|
+
self.conv.build(input_shape)
|
538
|
+
input_shape = self.conv.compute_output_shape(input_shape)
|
539
|
+
self.batch_normalization.build(input_shape)
|
540
|
+
input_shape = self.batch_normalization.compute_output_shape(input_shape)
|
541
|
+
self.activation_layer.build(input_shape)
|
542
|
+
self.built = True
|
543
|
+
|
544
|
+
def call(self, inputs):
|
545
|
+
x = self.conv(inputs)
|
546
|
+
x = self.batch_normalization(x)
|
547
|
+
x = self.activation_layer(x)
|
548
|
+
return x
|
549
|
+
|
550
|
+
def get_config(self):
|
551
|
+
config = super().get_config()
|
552
|
+
config.update(
|
553
|
+
{
|
554
|
+
"filter": self.filter,
|
555
|
+
"activation": self.activation,
|
556
|
+
"name": self.name,
|
557
|
+
}
|
558
|
+
)
|
559
|
+
return config
|
9
560
|
|
10
561
|
|
11
562
|
@keras_hub_export("keras_hub.models.MobileNetBackbone")
|
@@ -29,39 +580,41 @@ class MobileNetBackbone(Backbone):
|
|
29
580
|
(ICCV 2019)
|
30
581
|
|
31
582
|
Args:
|
32
|
-
stackwise_expansion: list of
|
583
|
+
stackwise_expansion: list of list of ints, the expanded filters for
|
584
|
+
each inverted residual block for each block in the model.
|
585
|
+
stackwise_num_blocks: list of ints, number of inversted residual blocks
|
586
|
+
per block
|
587
|
+
stackwise_num_filters: list of list of ints, number of filters for
|
33
588
|
each inverted residual block in the model.
|
34
|
-
|
35
|
-
residual block in the model.
|
36
|
-
|
37
|
-
residual block in the model.
|
38
|
-
stackwise_num_strides: list of ints, stride length for each inverted
|
39
|
-
residual block in the model.
|
589
|
+
stackwise_kernel_size: list of list of ints, kernel size for each
|
590
|
+
inverted residual block in the model.
|
591
|
+
stackwise_num_strides: list of list of ints, stride length for each
|
592
|
+
inverted residual block in the model.
|
40
593
|
stackwise_se_ratio: se ratio for each inverted residual block in the
|
41
594
|
model. 0 if dont want to add Squeeze and Excite layer.
|
42
|
-
stackwise_activation: list of activation functions, for each
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
- If `depth_multiplier` < 1.0, proportionally decreases the number
|
47
|
-
of filters in each layer.
|
48
|
-
- If `depth_multiplier` > 1.0, proportionally increases the number
|
49
|
-
of filters in each layer.
|
50
|
-
- If `depth_multiplier` = 1, default number of filters from the
|
51
|
-
paper are used at each layer.
|
52
|
-
input_num_filters: number of filters in first convolution layer
|
595
|
+
stackwise_activation: list of list of activation functions, for each
|
596
|
+
inverted residual block in the model.
|
597
|
+
stackwise_padding: list of list of int, to provide padding values for
|
598
|
+
each inverted residual block in the model.
|
53
599
|
output_num_filters: specifies whether to add conv and batch_norm in the
|
54
600
|
end, if set to None, it will not add these layers in the end.
|
55
601
|
'None' for MobileNetV1
|
602
|
+
depthwise_filters: int, number of filters in depthwise separable
|
603
|
+
convolution layer,
|
604
|
+
last_layer_filter: int, channels/filters for the head ConvBnAct block
|
605
|
+
squeeze_and_excite: float, squeeze and excite ratio in the depthwise
|
606
|
+
layer, None, if dont want to do squeeze and excite
|
607
|
+
image_shape: optional shape tuple, defaults to (224, 224, 3).
|
56
608
|
input_activation: activation function to be used in the input layer
|
57
609
|
'hard_swish' for MobileNetV3,
|
58
610
|
'relu6' for MobileNetV1 and MobileNetV2
|
59
611
|
output_activation: activation function to be used in the output layer
|
60
612
|
'hard_swish' for MobileNetV3,
|
61
613
|
'relu6' for MobileNetV1 and MobileNetV2
|
62
|
-
|
63
|
-
|
64
|
-
|
614
|
+
input_num_filters: int, channels/filters for the input before the stem
|
615
|
+
input_conv
|
616
|
+
dtype: `None` or str or `keras.mixed_precision.DTypePolicy`. The dtype
|
617
|
+
to use for the model's computations and weights.
|
65
618
|
|
66
619
|
|
67
620
|
Example:
|
@@ -70,16 +623,40 @@ class MobileNetBackbone(Backbone):
|
|
70
623
|
|
71
624
|
# Randomly initialized backbone with a custom config
|
72
625
|
model = MobileNetBackbone(
|
73
|
-
stackwise_expansion=[
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
626
|
+
stackwise_expansion=[
|
627
|
+
[40, 56],
|
628
|
+
[64, 144, 144],
|
629
|
+
[72, 72],
|
630
|
+
[144, 288, 288],
|
631
|
+
],
|
632
|
+
stackwise_num_blocks=[2, 3, 2, 3],
|
633
|
+
stackwise_num_filters=[
|
634
|
+
[16, 16],
|
635
|
+
[24, 24, 24],
|
636
|
+
[24, 24],
|
637
|
+
[48, 48, 48],
|
638
|
+
],
|
639
|
+
stackwise_kernel_size=[[3, 3], [5, 5, 5], [5, 5], [5, 5, 5]],
|
640
|
+
stackwise_num_strides=[[2, 1], [2, 1, 1], [1, 1], [2, 1, 1]],
|
641
|
+
stackwise_se_ratio=[
|
642
|
+
[None, None],
|
643
|
+
[0.25, 0.25, 0.25],
|
644
|
+
[0.3, 0.3],
|
645
|
+
[0.3, 0.25, 0.25],
|
646
|
+
],
|
647
|
+
stackwise_activation=[
|
648
|
+
["relu", "relu"],
|
649
|
+
["hard_swish", "hard_swish", "hard_swish"],
|
650
|
+
["hard_swish", "hard_swish"],
|
651
|
+
["hard_swish", "hard_swish", "hard_swish"],
|
652
|
+
],
|
653
|
+
output_num_filters=288,
|
654
|
+
input_activation="hard_swish",
|
655
|
+
output_activation="hard_swish",
|
656
|
+
input_num_filters=16,
|
657
|
+
image_shape=(224, 224, 3),
|
658
|
+
depthwise_filters=8,
|
659
|
+
squeeze_and_excite=0.5,
|
83
660
|
|
84
661
|
)
|
85
662
|
output = model(input_data)
|
@@ -89,18 +666,22 @@ class MobileNetBackbone(Backbone):
|
|
89
666
|
def __init__(
|
90
667
|
self,
|
91
668
|
stackwise_expansion,
|
669
|
+
stackwise_num_blocks,
|
92
670
|
stackwise_num_filters,
|
93
671
|
stackwise_kernel_size,
|
94
672
|
stackwise_num_strides,
|
95
673
|
stackwise_se_ratio,
|
96
674
|
stackwise_activation,
|
675
|
+
stackwise_padding,
|
97
676
|
output_num_filters,
|
98
|
-
|
677
|
+
depthwise_filters,
|
678
|
+
last_layer_filter,
|
679
|
+
squeeze_and_excite=None,
|
99
680
|
image_shape=(None, None, 3),
|
100
681
|
input_activation="hard_swish",
|
101
682
|
output_activation="hard_swish",
|
102
|
-
depth_multiplier=1.0,
|
103
683
|
input_num_filters=16,
|
684
|
+
dtype=None,
|
104
685
|
**kwargs,
|
105
686
|
):
|
106
687
|
# === Functional Model ===
|
@@ -109,85 +690,88 @@ class MobileNetBackbone(Backbone):
|
|
109
690
|
)
|
110
691
|
|
111
692
|
image_input = keras.layers.Input(shape=image_shape)
|
112
|
-
x = image_input
|
693
|
+
x = image_input
|
113
694
|
input_num_filters = adjust_channels(input_num_filters)
|
695
|
+
|
696
|
+
x = keras.layers.ZeroPadding2D(
|
697
|
+
padding=(1, 1),
|
698
|
+
name="input_pad",
|
699
|
+
dtype=dtype,
|
700
|
+
)(x)
|
114
701
|
x = keras.layers.Conv2D(
|
115
702
|
input_num_filters,
|
116
703
|
kernel_size=3,
|
117
704
|
strides=(2, 2),
|
118
|
-
padding="same",
|
119
705
|
data_format=keras.config.image_data_format(),
|
120
706
|
use_bias=False,
|
121
707
|
name="input_conv",
|
708
|
+
dtype=dtype,
|
122
709
|
)(x)
|
123
710
|
x = keras.layers.BatchNormalization(
|
124
711
|
axis=channel_axis,
|
125
712
|
epsilon=BN_EPSILON,
|
126
713
|
momentum=BN_MOMENTUM,
|
127
714
|
name="input_batch_norm",
|
715
|
+
dtype=dtype,
|
716
|
+
)(x)
|
717
|
+
x = keras.layers.Activation(
|
718
|
+
input_activation,
|
719
|
+
dtype=dtype,
|
128
720
|
)(x)
|
129
|
-
x = keras.layers.Activation(input_activation)(x)
|
130
721
|
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
722
|
+
x = DepthwiseConvBlock(
|
723
|
+
input_num_filters,
|
724
|
+
depthwise_filters,
|
725
|
+
squeeze_excite_ratio=squeeze_and_excite,
|
726
|
+
name="block_0",
|
727
|
+
dtype=dtype,
|
728
|
+
)(x)
|
729
|
+
|
730
|
+
for block in range(len(stackwise_num_blocks)):
|
731
|
+
for inverted_block in range(stackwise_num_blocks[block]):
|
732
|
+
infilters = x.shape[channel_axis]
|
733
|
+
x = InvertedResidualBlock(
|
734
|
+
expansion=stackwise_expansion[block][inverted_block],
|
735
|
+
infilters=infilters,
|
736
|
+
filters=adjust_channels(
|
737
|
+
stackwise_num_filters[block][inverted_block]
|
738
|
+
),
|
739
|
+
kernel_size=stackwise_kernel_size[block][inverted_block],
|
740
|
+
stride=stackwise_num_strides[block][inverted_block],
|
741
|
+
squeeze_excite_ratio=stackwise_se_ratio[block][
|
742
|
+
inverted_block
|
743
|
+
],
|
744
|
+
activation=stackwise_activation[block][inverted_block],
|
745
|
+
padding=stackwise_padding[block][inverted_block],
|
746
|
+
name=f"block_{block + 1}_{inverted_block}",
|
747
|
+
dtype=dtype,
|
748
|
+
)(x)
|
749
|
+
|
750
|
+
x = ConvBnActBlock(
|
751
|
+
filter=adjust_channels(last_layer_filter),
|
752
|
+
activation="hard_swish",
|
753
|
+
name=f"block_{len(stackwise_num_blocks) + 1}_0",
|
754
|
+
dtype=dtype,
|
755
|
+
)(x)
|
135
756
|
|
136
|
-
|
137
|
-
x = apply_inverted_res_block(
|
138
|
-
x,
|
139
|
-
expansion=stackwise_expansion[stack_index],
|
140
|
-
filters=filters,
|
141
|
-
kernel_size=stackwise_kernel_size[stack_index],
|
142
|
-
stride=stackwise_num_strides[stack_index],
|
143
|
-
se_ratio=(stackwise_se_ratio[stack_index]),
|
144
|
-
activation=stackwise_activation[stack_index],
|
145
|
-
expansion_index=stack_index,
|
146
|
-
)
|
147
|
-
else:
|
148
|
-
x = apply_depthwise_conv_block(
|
149
|
-
x,
|
150
|
-
filters=filters,
|
151
|
-
kernel_size=3,
|
152
|
-
stride=stackwise_num_strides[stack_index],
|
153
|
-
depth_multiplier=depth_multiplier,
|
154
|
-
block_id=stack_index,
|
155
|
-
)
|
156
|
-
|
157
|
-
if output_num_filters is not None:
|
158
|
-
last_conv_ch = adjust_channels(x.shape[channel_axis] * 6)
|
159
|
-
|
160
|
-
x = keras.layers.Conv2D(
|
161
|
-
last_conv_ch,
|
162
|
-
kernel_size=1,
|
163
|
-
padding="same",
|
164
|
-
data_format=keras.config.image_data_format(),
|
165
|
-
use_bias=False,
|
166
|
-
name="output_conv",
|
167
|
-
)(x)
|
168
|
-
x = keras.layers.BatchNormalization(
|
169
|
-
axis=channel_axis,
|
170
|
-
epsilon=BN_EPSILON,
|
171
|
-
momentum=BN_MOMENTUM,
|
172
|
-
name="output_batch_norm",
|
173
|
-
)(x)
|
174
|
-
x = keras.layers.Activation(output_activation)(x)
|
175
|
-
|
176
|
-
super().__init__(inputs=image_input, outputs=x, **kwargs)
|
757
|
+
super().__init__(inputs=image_input, outputs=x, dtype=dtype, **kwargs)
|
177
758
|
|
178
759
|
# === Config ===
|
179
760
|
self.stackwise_expansion = stackwise_expansion
|
761
|
+
self.stackwise_num_blocks = stackwise_num_blocks
|
180
762
|
self.stackwise_num_filters = stackwise_num_filters
|
181
763
|
self.stackwise_kernel_size = stackwise_kernel_size
|
182
764
|
self.stackwise_num_strides = stackwise_num_strides
|
183
765
|
self.stackwise_se_ratio = stackwise_se_ratio
|
184
766
|
self.stackwise_activation = stackwise_activation
|
185
|
-
self.
|
767
|
+
self.stackwise_padding = stackwise_padding
|
186
768
|
self.input_num_filters = input_num_filters
|
187
769
|
self.output_num_filters = output_num_filters
|
188
|
-
self.
|
189
|
-
self.
|
190
|
-
self.
|
770
|
+
self.depthwise_filters = depthwise_filters
|
771
|
+
self.last_layer_filter = last_layer_filter
|
772
|
+
self.squeeze_and_excite = squeeze_and_excite
|
773
|
+
self.input_activation = input_activation
|
774
|
+
self.output_activation = output_activation
|
191
775
|
self.image_shape = image_shape
|
192
776
|
|
193
777
|
def get_config(self):
|
@@ -195,312 +779,21 @@ class MobileNetBackbone(Backbone):
|
|
195
779
|
config.update(
|
196
780
|
{
|
197
781
|
"stackwise_expansion": self.stackwise_expansion,
|
782
|
+
"stackwise_num_blocks": self.stackwise_num_blocks,
|
198
783
|
"stackwise_num_filters": self.stackwise_num_filters,
|
199
784
|
"stackwise_kernel_size": self.stackwise_kernel_size,
|
200
785
|
"stackwise_num_strides": self.stackwise_num_strides,
|
201
786
|
"stackwise_se_ratio": self.stackwise_se_ratio,
|
202
787
|
"stackwise_activation": self.stackwise_activation,
|
788
|
+
"stackwise_padding": self.stackwise_padding,
|
203
789
|
"image_shape": self.image_shape,
|
204
|
-
"depth_multiplier": self.depth_multiplier,
|
205
790
|
"input_num_filters": self.input_num_filters,
|
206
791
|
"output_num_filters": self.output_num_filters,
|
207
|
-
"
|
208
|
-
|
209
|
-
|
210
|
-
"
|
211
|
-
|
212
|
-
),
|
213
|
-
"inverted_res_block": self.inverted_res_block,
|
792
|
+
"depthwise_filters": self.depthwise_filters,
|
793
|
+
"last_layer_filter": self.last_layer_filter,
|
794
|
+
"squeeze_and_excite": self.squeeze_and_excite,
|
795
|
+
"input_activation": self.input_activation,
|
796
|
+
"output_activation": self.output_activation,
|
214
797
|
}
|
215
798
|
)
|
216
799
|
return config
|
217
|
-
|
218
|
-
|
219
|
-
def adjust_channels(x, divisor=8, min_value=None):
|
220
|
-
"""Ensure that all layers have a channel number divisible by the `divisor`.
|
221
|
-
|
222
|
-
Args:
|
223
|
-
x: integer, input value.
|
224
|
-
divisor: integer, the value by which a channel number should be
|
225
|
-
divisible, defaults to 8.
|
226
|
-
min_value: float, optional minimum value for the new tensor. If None,
|
227
|
-
defaults to value of divisor.
|
228
|
-
|
229
|
-
Returns:
|
230
|
-
the updated input scalar.
|
231
|
-
"""
|
232
|
-
|
233
|
-
if min_value is None:
|
234
|
-
min_value = divisor
|
235
|
-
|
236
|
-
new_x = max(min_value, int(x + divisor / 2) // divisor * divisor)
|
237
|
-
|
238
|
-
# make sure that round down does not go down by more than 10%.
|
239
|
-
if new_x < 0.9 * x:
|
240
|
-
new_x += divisor
|
241
|
-
return new_x
|
242
|
-
|
243
|
-
|
244
|
-
def apply_inverted_res_block(
|
245
|
-
x,
|
246
|
-
expansion,
|
247
|
-
filters,
|
248
|
-
kernel_size,
|
249
|
-
stride,
|
250
|
-
se_ratio,
|
251
|
-
activation,
|
252
|
-
expansion_index,
|
253
|
-
):
|
254
|
-
"""An Inverted Residual Block.
|
255
|
-
|
256
|
-
Args:
|
257
|
-
x: input tensor.
|
258
|
-
expansion: integer, the expansion ratio, multiplied with infilters to
|
259
|
-
get the minimum value passed to adjust_channels.
|
260
|
-
filters: integer, number of filters for convolution layer.
|
261
|
-
kernel_size: integer, the kernel size for DepthWise Convolutions.
|
262
|
-
stride: integer, the stride length for DepthWise Convolutions.
|
263
|
-
se_ratio: float, ratio for bottleneck filters. Number of bottleneck
|
264
|
-
filters = filters * se_ratio.
|
265
|
-
activation: the activation layer to use.
|
266
|
-
expansion_index: integer, a unique identification if you want to use
|
267
|
-
expanded convolutions. If greater than 0, an additional Conv+BN
|
268
|
-
layer is added after the expanded convolutional layer.
|
269
|
-
|
270
|
-
Returns:
|
271
|
-
the updated input tensor.
|
272
|
-
"""
|
273
|
-
channel_axis = (
|
274
|
-
-1 if keras.config.image_data_format() == "channels_last" else 1
|
275
|
-
)
|
276
|
-
activation = keras.activations.get(activation)
|
277
|
-
shortcut = x
|
278
|
-
prefix = "expanded_conv_"
|
279
|
-
infilters = x.shape[channel_axis]
|
280
|
-
|
281
|
-
if expansion_index > 0:
|
282
|
-
prefix = f"expanded_conv_{expansion_index}_"
|
283
|
-
|
284
|
-
x = keras.layers.Conv2D(
|
285
|
-
adjust_channels(infilters * expansion),
|
286
|
-
kernel_size=1,
|
287
|
-
padding="same",
|
288
|
-
data_format=keras.config.image_data_format(),
|
289
|
-
use_bias=False,
|
290
|
-
name=prefix + "expand",
|
291
|
-
)(x)
|
292
|
-
x = keras.layers.BatchNormalization(
|
293
|
-
axis=channel_axis,
|
294
|
-
epsilon=BN_EPSILON,
|
295
|
-
momentum=BN_MOMENTUM,
|
296
|
-
name=prefix + "expand_BatchNorm",
|
297
|
-
)(x)
|
298
|
-
x = keras.layers.Activation(activation=activation)(x)
|
299
|
-
|
300
|
-
if stride == 2:
|
301
|
-
x = keras.layers.ZeroPadding2D(
|
302
|
-
padding=correct_pad_downsample(x, kernel_size),
|
303
|
-
name=prefix + "depthwise_pad",
|
304
|
-
)(x)
|
305
|
-
|
306
|
-
x = keras.layers.DepthwiseConv2D(
|
307
|
-
kernel_size,
|
308
|
-
strides=stride,
|
309
|
-
padding="same" if stride == 1 else "valid",
|
310
|
-
data_format=keras.config.image_data_format(),
|
311
|
-
use_bias=False,
|
312
|
-
name=prefix + "depthwise",
|
313
|
-
)(x)
|
314
|
-
x = keras.layers.BatchNormalization(
|
315
|
-
axis=channel_axis,
|
316
|
-
epsilon=BN_EPSILON,
|
317
|
-
momentum=BN_MOMENTUM,
|
318
|
-
name=prefix + "depthwise_BatchNorm",
|
319
|
-
)(x)
|
320
|
-
x = keras.layers.Activation(activation=activation)(x)
|
321
|
-
|
322
|
-
if se_ratio:
|
323
|
-
se_filters = adjust_channels(infilters * expansion)
|
324
|
-
x = SqueezeAndExcite2D(
|
325
|
-
input=x,
|
326
|
-
filters=se_filters,
|
327
|
-
bottleneck_filters=adjust_channels(se_filters * se_ratio),
|
328
|
-
squeeze_activation="relu",
|
329
|
-
excite_activation=keras.activations.hard_sigmoid,
|
330
|
-
)
|
331
|
-
|
332
|
-
x = keras.layers.Conv2D(
|
333
|
-
filters,
|
334
|
-
kernel_size=1,
|
335
|
-
padding="same",
|
336
|
-
data_format=keras.config.image_data_format(),
|
337
|
-
use_bias=False,
|
338
|
-
name=prefix + "project",
|
339
|
-
)(x)
|
340
|
-
x = keras.layers.BatchNormalization(
|
341
|
-
axis=channel_axis,
|
342
|
-
epsilon=BN_EPSILON,
|
343
|
-
momentum=BN_MOMENTUM,
|
344
|
-
name=prefix + "project_BatchNorm",
|
345
|
-
)(x)
|
346
|
-
|
347
|
-
if stride == 1 and infilters == filters:
|
348
|
-
x = keras.layers.Add(name=prefix + "Add")([shortcut, x])
|
349
|
-
|
350
|
-
return x
|
351
|
-
|
352
|
-
|
353
|
-
def apply_depthwise_conv_block(
|
354
|
-
x,
|
355
|
-
filters,
|
356
|
-
kernel_size=3,
|
357
|
-
depth_multiplier=1,
|
358
|
-
stride=1,
|
359
|
-
block_id=1,
|
360
|
-
):
|
361
|
-
"""Adds a depthwise convolution block.
|
362
|
-
|
363
|
-
A depthwise convolution block consists of a depthwise conv,
|
364
|
-
batch normalization, relu6, pointwise convolution,
|
365
|
-
batch normalization and relu6 activation.
|
366
|
-
|
367
|
-
Args:
|
368
|
-
x: Input tensor of shape `(rows, cols, channels)`
|
369
|
-
filters: Integer, the dimensionality of the output space
|
370
|
-
(i.e. the number of output filters in the pointwise convolution).
|
371
|
-
depth_multiplier: controls the width of the network.
|
372
|
-
- If `depth_multiplier` < 1.0, proportionally decreases the number
|
373
|
-
of filters in each layer.
|
374
|
-
- If `depth_multiplier` > 1.0, proportionally increases the number
|
375
|
-
of filters in each layer.
|
376
|
-
- If `depth_multiplier` = 1, default number of filters from the
|
377
|
-
paper are used at each layer.
|
378
|
-
strides: An integer or tuple/list of 2 integers, specifying the strides
|
379
|
-
of the convolution along the width and height.
|
380
|
-
Can be a single integer to specify the same value for
|
381
|
-
all spatial dimensions. Specifying any stride value != 1 is
|
382
|
-
incompatible with specifying any `dilation_rate` value != 1.
|
383
|
-
block_id: Integer, a unique identification designating the block number.
|
384
|
-
|
385
|
-
Input shape:
|
386
|
-
4D tensor with shape `(batch, rows, cols, channels)` in "channels_last"
|
387
|
-
4D tensor with shape `(batch, channels, rows, cols)` in "channels_first"
|
388
|
-
Returns:
|
389
|
-
Output tensor of block.
|
390
|
-
"""
|
391
|
-
channel_axis = (
|
392
|
-
-1 if keras.config.image_data_format() == "channels_last" else 1
|
393
|
-
)
|
394
|
-
if stride == 2:
|
395
|
-
x = keras.layers.ZeroPadding2D(
|
396
|
-
padding=correct_pad_downsample(x, kernel_size),
|
397
|
-
name="conv_pad_%d" % block_id,
|
398
|
-
)(x)
|
399
|
-
|
400
|
-
x = keras.layers.DepthwiseConv2D(
|
401
|
-
kernel_size,
|
402
|
-
strides=stride,
|
403
|
-
padding="same" if stride == 1 else "valid",
|
404
|
-
data_format=keras.config.image_data_format(),
|
405
|
-
depth_multiplier=depth_multiplier,
|
406
|
-
use_bias=False,
|
407
|
-
name="depthwise_%d" % block_id,
|
408
|
-
)(x)
|
409
|
-
x = keras.layers.BatchNormalization(
|
410
|
-
axis=channel_axis,
|
411
|
-
epsilon=BN_EPSILON,
|
412
|
-
momentum=BN_MOMENTUM,
|
413
|
-
name="depthwise_BatchNorm_%d" % block_id,
|
414
|
-
)(x)
|
415
|
-
x = keras.layers.ReLU(6.0)(x)
|
416
|
-
|
417
|
-
x = keras.layers.Conv2D(
|
418
|
-
filters,
|
419
|
-
kernel_size=1,
|
420
|
-
padding="same",
|
421
|
-
data_format=keras.config.image_data_format(),
|
422
|
-
use_bias=False,
|
423
|
-
name="conv_%d" % block_id,
|
424
|
-
)(x)
|
425
|
-
x = keras.layers.BatchNormalization(
|
426
|
-
axis=channel_axis,
|
427
|
-
epsilon=BN_EPSILON,
|
428
|
-
momentum=BN_MOMENTUM,
|
429
|
-
name="BatchNorm_%d" % block_id,
|
430
|
-
)(x)
|
431
|
-
return keras.layers.ReLU(6.0)(x)
|
432
|
-
|
433
|
-
|
434
|
-
def SqueezeAndExcite2D(
|
435
|
-
input,
|
436
|
-
filters,
|
437
|
-
bottleneck_filters=None,
|
438
|
-
squeeze_activation="relu",
|
439
|
-
excite_activation="sigmoid",
|
440
|
-
):
|
441
|
-
"""
|
442
|
-
Description:
|
443
|
-
This layer applies a content-aware mechanism to adaptively assign
|
444
|
-
channel-wise weights. It uses global average pooling to compress
|
445
|
-
feature maps into single values, which are then processed by
|
446
|
-
two Conv1D layers: the first reduces the dimensionality, and
|
447
|
-
the second restores it.
|
448
|
-
Args:
|
449
|
-
filters: Number of input and output filters. The number of input and
|
450
|
-
output filters is same.
|
451
|
-
bottleneck_filters: (Optional) Number of bottleneck filters. Defaults
|
452
|
-
to `0.25 * filters`
|
453
|
-
squeeze_activation: (Optional) String, callable (or
|
454
|
-
keras.layers.Layer) or keras.activations.Activation instance
|
455
|
-
denoting activation to be applied after squeeze convolution.
|
456
|
-
Defaults to `relu`.
|
457
|
-
excite_activation: (Optional) String, callable (or
|
458
|
-
keras.layers.Layer) or keras.activations.Activation instance
|
459
|
-
denoting activation to be applied after excite convolution.
|
460
|
-
Defaults to `sigmoid`.
|
461
|
-
"""
|
462
|
-
if not bottleneck_filters:
|
463
|
-
bottleneck_filters = filters // 4
|
464
|
-
|
465
|
-
x = keras.layers.GlobalAveragePooling2D(keepdims=True)(input)
|
466
|
-
|
467
|
-
x = keras.layers.Conv2D(
|
468
|
-
bottleneck_filters,
|
469
|
-
(1, 1),
|
470
|
-
data_format=keras.config.image_data_format(),
|
471
|
-
activation=squeeze_activation,
|
472
|
-
)(x)
|
473
|
-
x = keras.layers.Conv2D(
|
474
|
-
filters,
|
475
|
-
(1, 1),
|
476
|
-
data_format=keras.config.image_data_format(),
|
477
|
-
activation=excite_activation,
|
478
|
-
)(x)
|
479
|
-
|
480
|
-
x = ops.multiply(x, input)
|
481
|
-
return x
|
482
|
-
|
483
|
-
|
484
|
-
def correct_pad_downsample(inputs, kernel_size):
|
485
|
-
"""Returns a tuple for zero-padding for 2D convolution with downsampling.
|
486
|
-
|
487
|
-
Args:
|
488
|
-
inputs: Input tensor.
|
489
|
-
kernel_size: An integer or tuple/list of 2 integers.
|
490
|
-
|
491
|
-
Returns:
|
492
|
-
A tuple.
|
493
|
-
"""
|
494
|
-
img_dim = 1
|
495
|
-
input_size = inputs.shape[img_dim : (img_dim + 2)]
|
496
|
-
if isinstance(kernel_size, int):
|
497
|
-
kernel_size = (kernel_size, kernel_size)
|
498
|
-
if input_size[0] is None:
|
499
|
-
adjust = (1, 1)
|
500
|
-
else:
|
501
|
-
adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2)
|
502
|
-
correct = (kernel_size[0] // 2, kernel_size[1] // 2)
|
503
|
-
return (
|
504
|
-
(correct[0] - adjust[0], correct[0]),
|
505
|
-
(correct[1] - adjust[1], correct[1]),
|
506
|
-
)
|