keras-hub-nightly 0.19.0.dev202502090345__py3-none-any.whl → 0.19.0.dev202502110348__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,562 @@
1
1
  import keras
2
- from keras import ops
3
2
 
4
3
  from keras_hub.src.api_export import keras_hub_export
5
4
  from keras_hub.src.models.backbone import Backbone
5
+ from keras_hub.src.models.mobilenet.util import adjust_channels
6
6
 
7
- BN_EPSILON = 1e-3
8
- BN_MOMENTUM = 0.999
7
+ BN_EPSILON = 1e-5
8
+ BN_MOMENTUM = 0.9
9
+
10
+
11
+ class SqueezeAndExcite2D(keras.layers.Layer):
12
+ """
13
+ Description:
14
+ This layer applies a content-aware mechanism to adaptively assign
15
+ channel-wise weights. It uses global average pooling to compress
16
+ feature maps into single values, which are then processed by
17
+ two Conv1D layers: the first reduces the dimensionality, and
18
+ the second restores it.
19
+ Args:
20
+ filters: Number of input and output filters. The number of input and
21
+ output filters is same.
22
+ bottleneck_filters: (Optional) Number of bottleneck filters. Defaults
23
+ to `0.25 * filters`
24
+ squeeze_activation: (Optional) String, callable (or
25
+ keras.layers.Layer) or keras.activations.Activation instance
26
+ denoting activation to be applied after squeeze convolution.
27
+ Defaults to `relu`.
28
+ excite_activation: (Optional) String, callable (or
29
+ keras.layers.Layer) or keras.activations.Activation instance
30
+ denoting activation to be applied after excite convolution.
31
+ Defaults to `sigmoid`.
32
+ name: Name of the layer
33
+ dtype: `None` or str or `keras.mixed_precision.DTypePolicy`. The dtype
34
+ to use for the model's computations and weights.
35
+ """
36
+
37
+ def __init__(
38
+ self,
39
+ filters,
40
+ bottleneck_filters=None,
41
+ squeeze_activation="relu",
42
+ excite_activation="sigmoid",
43
+ name=None,
44
+ dtype=None,
45
+ **kwargs,
46
+ ):
47
+ super().__init__(dtype=dtype, **kwargs)
48
+ self.filters = filters
49
+ self.bottleneck_filters = bottleneck_filters
50
+ self.squeeze_activation = squeeze_activation
51
+ self.excite_activation = excite_activation
52
+ self.name = name
53
+
54
+ image_data_format = keras.config.image_data_format()
55
+ if image_data_format == "channels_last":
56
+ self.spatial_dims = (1, 2)
57
+ else:
58
+ self.spatial_dims = (2, 3)
59
+
60
+ self.conv_reduce = keras.layers.Conv2D(
61
+ bottleneck_filters,
62
+ (1, 1),
63
+ data_format=image_data_format,
64
+ name=f"{name}_conv_reduce",
65
+ dtype=dtype,
66
+ )
67
+ self.activation1 = keras.layers.Activation(
68
+ self.squeeze_activation,
69
+ name=self.name + "squeeze_activation",
70
+ dtype=dtype,
71
+ )
72
+
73
+ self.conv_expand = keras.layers.Conv2D(
74
+ filters,
75
+ (1, 1),
76
+ data_format=image_data_format,
77
+ name=f"{name}_conv_expand",
78
+ dtype=dtype,
79
+ )
80
+ self.gate = keras.layers.Activation(
81
+ self.excite_activation,
82
+ name=self.name + "excite_activation",
83
+ dtype=dtype,
84
+ )
85
+
86
+ def compute_output_shape(self, input_shape):
87
+ shape = self.conv_reduce.compute_output_shape(input_shape)
88
+ shape = self.activation1.compute_output_shape(shape)
89
+ shape = self.conv_expand.compute_output_shape(shape)
90
+ return self.gate.compute_output_shape(shape)
91
+
92
+ def build(self, input_shape):
93
+ self.conv_reduce.build(input_shape)
94
+ input_shape = self.conv_reduce.compute_output_shape(input_shape)
95
+ self.activation1.build(input_shape)
96
+ input_shape = self.activation1.compute_output_shape(input_shape)
97
+ self.conv_expand.build(input_shape)
98
+ input_shape = self.conv_expand.compute_output_shape(input_shape)
99
+ self.gate.build(input_shape)
100
+ self.built = True
101
+
102
+ def call(self, inputs):
103
+ x_se = keras.ops.mean(inputs, axis=self.spatial_dims, keepdims=True)
104
+ x_se = self.conv_reduce(x_se)
105
+ x_se = self.activation1(x_se)
106
+ x_se = self.conv_expand(x_se)
107
+ return inputs * self.gate(x_se)
108
+
109
+ def get_config(self):
110
+ config = super().get_config()
111
+ config.update(
112
+ {
113
+ "filters": self.filters,
114
+ "bottleneck_filters": self.bottleneck_filters,
115
+ "squeeze_activation": self.squeeze_activation,
116
+ "excite_activation": self.excite_activation,
117
+ "name": self.name,
118
+ "spatial_dims": self.spatial_dims,
119
+ }
120
+ )
121
+ return config
122
+
123
+
124
+ class DepthwiseConvBlock(keras.layers.Layer):
125
+ """
126
+ A depthwise convolution block consists of a depthwise conv,
127
+ batch normalization, relu, optional squeeze & excite, pointwise convolution,
128
+ and batch normalization layer.
129
+
130
+ Args:
131
+ infilters: int, the output channels for the initial depthwise conv
132
+ filters: int, the dimensionality of the output space
133
+ (i.e. the number of output filters in the pointwise convolution).
134
+ kernel_size: int or Tuple[int, int], the kernel size to apply
135
+ to the initial depthwise convolution
136
+ strides: An int or Tuple[int, int], specifying the strides
137
+ of the convolution along the width and height.
138
+ Can be a single integer to specify the same value for
139
+ all spatial dimensions.
140
+ squeeze_excite_ratio: squeeze & excite ratio: float[Optional], if
141
+ exists, specifies the ratio of channels (<1) to squeeze the initial
142
+ signal into before reexciting back out. If (>1) technically, it's an
143
+ excite & squeeze layer. If this doesn't exist there is no
144
+ SqueezeExcite layer.
145
+ name: str, name of the layer
146
+ dtype: `None` or str or `keras.mixed_precision.DTypePolicy`. The dtype
147
+ to use for the model's computations and weights.
148
+
149
+ Input shape when applied as a layer:
150
+ 4D tensor with shape: `(batch, rows, cols, channels)` in "channels_last"
151
+ 4D tensor with shape: `(batch, channels, rows, cols)` in
152
+ "channels_first"
153
+ Returns:
154
+ Output tensor of block.
155
+ """
156
+
157
+ def __init__(
158
+ self,
159
+ infilters,
160
+ filters,
161
+ kernel_size=3,
162
+ stride=2,
163
+ squeeze_excite_ratio=None,
164
+ name=None,
165
+ dtype=None,
166
+ **kwargs,
167
+ ):
168
+ super().__init__(dtype=dtype, **kwargs)
169
+ self.infilters = infilters
170
+ self.filters = filters
171
+ self.kernel_size = kernel_size
172
+ self.stride = stride
173
+ self.squeeze_excite_ratio = squeeze_excite_ratio
174
+ self.name = name
175
+
176
+ channel_axis = (
177
+ -1 if keras.config.image_data_format() == "channels_last" else 1
178
+ )
179
+ self.name = name = f"{name}_0"
180
+
181
+ self.pad = keras.layers.ZeroPadding2D(
182
+ padding=(1, 1),
183
+ name=f"{name}_pad",
184
+ dtype=dtype,
185
+ )
186
+ self.conv1 = keras.layers.Conv2D(
187
+ infilters,
188
+ kernel_size,
189
+ strides=stride,
190
+ padding="valid",
191
+ data_format=keras.config.image_data_format(),
192
+ groups=infilters,
193
+ use_bias=False,
194
+ name=f"{name}_conv1",
195
+ dtype=dtype,
196
+ )
197
+ self.batch_normalization1 = keras.layers.BatchNormalization(
198
+ axis=channel_axis,
199
+ epsilon=BN_EPSILON,
200
+ momentum=BN_MOMENTUM,
201
+ name=f"{name}_bn1",
202
+ dtype=dtype,
203
+ )
204
+ self.activation1 = keras.layers.ReLU(dtype=dtype)
205
+
206
+ if squeeze_excite_ratio:
207
+ self.se_layer = SqueezeAndExcite2D(
208
+ filters=infilters,
209
+ bottleneck_filters=adjust_channels(
210
+ infilters * squeeze_excite_ratio
211
+ ),
212
+ squeeze_activation="relu",
213
+ excite_activation=keras.activations.hard_sigmoid,
214
+ name=f"{name}_squeeze_excite",
215
+ dtype=dtype,
216
+ )
217
+
218
+ self.conv2 = keras.layers.Conv2D(
219
+ filters,
220
+ kernel_size=1,
221
+ data_format=keras.config.image_data_format(),
222
+ use_bias=False,
223
+ name=f"{name}_conv2",
224
+ dtype=dtype,
225
+ )
226
+ self.batch_normalization2 = keras.layers.BatchNormalization(
227
+ axis=channel_axis,
228
+ epsilon=BN_EPSILON,
229
+ momentum=BN_MOMENTUM,
230
+ name=f"{name}_bn2",
231
+ dtype=dtype,
232
+ )
233
+
234
+ def build(self, input_shape):
235
+ self.pad.build(input_shape)
236
+ input_shape = self.pad.compute_output_shape(input_shape)
237
+ self.conv1.build(input_shape)
238
+ input_shape = self.conv1.compute_output_shape(input_shape)
239
+ self.batch_normalization1.build(input_shape)
240
+ input_shape = self.batch_normalization1.compute_output_shape(
241
+ input_shape
242
+ )
243
+ self.activation1.build(input_shape)
244
+ input_shape = self.activation1.compute_output_shape(input_shape)
245
+ if self.squeeze_excite_ratio:
246
+ self.se_layer.build(input_shape)
247
+ input_shape = self.se_layer.compute_output_shape(input_shape)
248
+ self.conv2.build(input_shape)
249
+ input_shape = self.conv2.compute_output_shape(input_shape)
250
+ self.batch_normalization2.build(input_shape)
251
+ self.built = True
252
+
253
+ def call(self, inputs):
254
+ x = self.pad(inputs)
255
+ x = self.conv1(x)
256
+ x = self.batch_normalization1(x)
257
+ x = self.activation1(x)
258
+
259
+ if self.se_layer:
260
+ x = self.se_layer(x)
261
+
262
+ x = self.conv2(x)
263
+ x = self.batch_normalization2(x)
264
+ return x
265
+
266
+ def get_config(self):
267
+ config = super().get_config()
268
+ config.update(
269
+ {
270
+ "infilters": self.infilters,
271
+ "filters": self.filters,
272
+ "kernel_size": self.kernel_size,
273
+ "stride": self.stride,
274
+ "squeeze_excite_ratio": self.squeeze_excite_ratio,
275
+ "name": self.name,
276
+ }
277
+ )
278
+ return config
279
+
280
+
281
+ class InvertedResidualBlock(keras.layers.Layer):
282
+ """An Inverted Residual Block.
283
+
284
+ Args:
285
+ expansion: integer, the expansion ratio, multiplied with infilters to
286
+ get the minimum value passed to adjust_channels.
287
+ infilters: Int, the output channels for the initial depthwise conv
288
+ filters: integer, number of filters for convolution layer.
289
+ kernel_size: integer, the kernel size for DepthWise Convolutions.
290
+ stride: integer, the stride length for DepthWise Convolutions.
291
+ squeeze_excite_ratio: float, ratio for bottleneck filters. Number of
292
+ bottleneck filters = filters * se_ratio.
293
+ activation: the activation layer to use.
294
+ padding: padding in the conv2d layer
295
+ name: string, block label.
296
+ dtype: `None` or str or `keras.mixed_precision.DTypePolicy`. The dtype
297
+ to use for the model's computations and weights.
298
+
299
+ Input shape when applied as a layer:
300
+ 4D tensor with shape: `(batch, rows, cols, channels)` in "channels_last"
301
+ 4D tensor with shape: `(batch, channels, rows, cols)` in
302
+ "channels_first"
303
+ Returns:
304
+ Output tensor of block.
305
+ """
306
+
307
+ def __init__(
308
+ self,
309
+ expansion,
310
+ infilters,
311
+ filters,
312
+ kernel_size,
313
+ stride,
314
+ squeeze_excite_ratio,
315
+ activation,
316
+ padding,
317
+ name=None,
318
+ dtype=None,
319
+ **kwargs,
320
+ ):
321
+ super().__init__(dtype=dtype, **kwargs)
322
+ self.expansion = expansion
323
+ self.infilters = infilters
324
+ self.filters = filters
325
+ self.kernel_size = kernel_size
326
+ self.stride = stride
327
+ self.squeeze_excite_ratio = squeeze_excite_ratio
328
+ self.activation = activation
329
+ self.padding = padding
330
+ self.name = name
331
+
332
+ channel_axis = (
333
+ -1 if keras.config.image_data_format() == "channels_last" else 1
334
+ )
335
+ expanded_channels = adjust_channels(expansion)
336
+
337
+ self.conv1 = keras.layers.Conv2D(
338
+ expanded_channels,
339
+ kernel_size=1,
340
+ data_format=keras.config.image_data_format(),
341
+ use_bias=False,
342
+ name=f"{name}_conv1",
343
+ dtype=dtype,
344
+ )
345
+
346
+ self.batch_normalization1 = keras.layers.BatchNormalization(
347
+ axis=channel_axis,
348
+ epsilon=BN_EPSILON,
349
+ momentum=BN_MOMENTUM,
350
+ name=f"{name}_bn1",
351
+ dtype=dtype,
352
+ )
353
+
354
+ self.activation1 = keras.layers.Activation(
355
+ activation=activation,
356
+ dtype=dtype,
357
+ )
358
+
359
+ self.pad = keras.layers.ZeroPadding2D(
360
+ padding=(padding, padding),
361
+ name=f"{name}_pad",
362
+ dtype=dtype,
363
+ )
364
+
365
+ self.conv2 = keras.layers.Conv2D(
366
+ expanded_channels,
367
+ kernel_size,
368
+ strides=stride,
369
+ padding="valid",
370
+ groups=expanded_channels,
371
+ data_format=keras.config.image_data_format(),
372
+ use_bias=False,
373
+ name=f"{name}_conv2",
374
+ dtype=dtype,
375
+ )
376
+ self.batch_normalization2 = keras.layers.BatchNormalization(
377
+ axis=channel_axis,
378
+ epsilon=BN_EPSILON,
379
+ momentum=BN_MOMENTUM,
380
+ name=f"{name}_bn2",
381
+ dtype=dtype,
382
+ )
383
+
384
+ self.activation2 = keras.layers.Activation(
385
+ activation=activation,
386
+ dtype=dtype,
387
+ )
388
+
389
+ self.squeeze_excite = None
390
+ if self.squeeze_excite_ratio:
391
+ se_filters = expanded_channels
392
+ self.squeeze_excite = SqueezeAndExcite2D(
393
+ filters=se_filters,
394
+ bottleneck_filters=adjust_channels(
395
+ se_filters * squeeze_excite_ratio
396
+ ),
397
+ squeeze_activation="relu",
398
+ excite_activation=keras.activations.hard_sigmoid,
399
+ name=f"{name}_se",
400
+ dtype=dtype,
401
+ )
402
+
403
+ self.conv3 = keras.layers.Conv2D(
404
+ filters,
405
+ kernel_size=1,
406
+ data_format=keras.config.image_data_format(),
407
+ use_bias=False,
408
+ name=f"{name}_conv3",
409
+ dtype=dtype,
410
+ )
411
+ self.batch_normalization3 = keras.layers.BatchNormalization(
412
+ axis=channel_axis,
413
+ epsilon=BN_EPSILON,
414
+ momentum=BN_MOMENTUM,
415
+ name=f"{name}_bn3",
416
+ dtype=dtype,
417
+ )
418
+
419
+ def build(self, input_shape):
420
+ self.conv1.build(input_shape)
421
+ input_shape = self.conv1.compute_output_shape(input_shape)
422
+ self.batch_normalization1.build(input_shape)
423
+ input_shape = self.batch_normalization1.compute_output_shape(
424
+ input_shape
425
+ )
426
+ self.activation1.build(input_shape)
427
+ input_shape = self.activation1.compute_output_shape(input_shape)
428
+ self.pad.build(input_shape)
429
+ input_shape = self.pad.compute_output_shape(input_shape)
430
+ self.conv2.build(input_shape)
431
+ input_shape = self.conv2.compute_output_shape(input_shape)
432
+ self.batch_normalization2.build(input_shape)
433
+ input_shape = self.batch_normalization2.compute_output_shape(
434
+ input_shape
435
+ )
436
+ self.activation2.build(input_shape)
437
+ input_shape = self.activation2.compute_output_shape(input_shape)
438
+ if self.squeeze_excite_ratio:
439
+ self.squeeze_excite.build(input_shape)
440
+ input_shape = self.squeeze_excite.compute_output_shape(input_shape)
441
+ self.conv3.build(input_shape)
442
+ input_shape = self.conv3.compute_output_shape(input_shape)
443
+ self.batch_normalization3.build(input_shape)
444
+ self.built = True
445
+
446
+ def call(self, inputs):
447
+ x = self.conv1(inputs)
448
+ x = self.batch_normalization1(x)
449
+ x = self.activation1(x)
450
+ x = self.pad(x)
451
+ x = self.conv2(x)
452
+ x = self.batch_normalization2(x)
453
+ x = self.activation2(x)
454
+ if self.squeeze_excite:
455
+ x = self.squeeze_excite(x)
456
+ x = self.conv3(x)
457
+ x = self.batch_normalization3(x)
458
+ if self.stride == 1 and self.infilters == self.filters:
459
+ x = inputs + x
460
+ return x
461
+
462
+ def get_config(self):
463
+ config = super().get_config()
464
+ config.update(
465
+ {
466
+ "expansion": self.expansion,
467
+ "infilters": self.infilters,
468
+ "filters": self.filters,
469
+ "kernel_size": self.kernel_size,
470
+ "stride": self.stride,
471
+ "squeeze_excite_ratio": self.squeeze_excite_ratio,
472
+ "activation": self.activation,
473
+ "padding": self.padding,
474
+ "name": self.name,
475
+ }
476
+ )
477
+ return config
478
+
479
+
480
+ class ConvBnActBlock(keras.layers.Layer):
481
+ """
482
+ A ConvBnActBlock consists of a convultion, batchnorm, and activation layer
483
+
484
+ Args:
485
+ filters: Integer, the dimensionality of the output space
486
+ (i.e. the number of output filters in the pointwise convolution).
487
+ activation: The activation function to apply to the signal at the end.
488
+ dtype: `None` or str or `keras.mixed_precision.DTypePolicy`. The dtype
489
+ to use for the model's computations and weights.
490
+
491
+ Input shape (when called as a layer):
492
+ 4D tensor with shape: `(batch, rows, cols, channels)` in "channels_last"
493
+ 4D tensor with shape: `(batch, channels, rows, cols)` in
494
+ "channels_first"
495
+
496
+ Returns:
497
+ Output tensor of block.
498
+ """
499
+
500
+ def __init__(
501
+ self,
502
+ filter,
503
+ activation,
504
+ name=None,
505
+ dtype=None,
506
+ **kwargs,
507
+ ):
508
+ super().__init__(dtype=dtype, **kwargs)
509
+ self.filter = filter
510
+ self.activation = activation
511
+ self.name = name
512
+
513
+ channel_axis = (
514
+ -1 if keras.config.image_data_format() == "channels_last" else 1
515
+ )
516
+ self.conv = keras.layers.Conv2D(
517
+ filter,
518
+ kernel_size=1,
519
+ data_format=keras.config.image_data_format(),
520
+ use_bias=False,
521
+ name=f"{name}_conv",
522
+ dtype=dtype,
523
+ )
524
+ self.batch_normalization = keras.layers.BatchNormalization(
525
+ axis=channel_axis,
526
+ epsilon=BN_EPSILON,
527
+ momentum=BN_MOMENTUM,
528
+ name=f"{name}_bn",
529
+ dtype=dtype,
530
+ )
531
+ self.activation_layer = keras.layers.Activation(
532
+ activation,
533
+ dtype=dtype,
534
+ )
535
+
536
+ def build(self, input_shape):
537
+ self.conv.build(input_shape)
538
+ input_shape = self.conv.compute_output_shape(input_shape)
539
+ self.batch_normalization.build(input_shape)
540
+ input_shape = self.batch_normalization.compute_output_shape(input_shape)
541
+ self.activation_layer.build(input_shape)
542
+ self.built = True
543
+
544
+ def call(self, inputs):
545
+ x = self.conv(inputs)
546
+ x = self.batch_normalization(x)
547
+ x = self.activation_layer(x)
548
+ return x
549
+
550
+ def get_config(self):
551
+ config = super().get_config()
552
+ config.update(
553
+ {
554
+ "filter": self.filter,
555
+ "activation": self.activation,
556
+ "name": self.name,
557
+ }
558
+ )
559
+ return config
9
560
 
10
561
 
11
562
  @keras_hub_export("keras_hub.models.MobileNetBackbone")
@@ -29,39 +580,41 @@ class MobileNetBackbone(Backbone):
29
580
  (ICCV 2019)
30
581
 
31
582
  Args:
32
- stackwise_expansion: list of ints or floats, the expansion ratio for
583
+ stackwise_expansion: list of list of ints, the expanded filters for
584
+ each inverted residual block for each block in the model.
585
+ stackwise_num_blocks: list of ints, number of inversted residual blocks
586
+ per block
587
+ stackwise_num_filters: list of list of ints, number of filters for
33
588
  each inverted residual block in the model.
34
- stackwise_num_filters: list of ints, number of filters for each inverted
35
- residual block in the model.
36
- stackwise_kernel_size: list of ints, kernel size for each inverted
37
- residual block in the model.
38
- stackwise_num_strides: list of ints, stride length for each inverted
39
- residual block in the model.
589
+ stackwise_kernel_size: list of list of ints, kernel size for each
590
+ inverted residual block in the model.
591
+ stackwise_num_strides: list of list of ints, stride length for each
592
+ inverted residual block in the model.
40
593
  stackwise_se_ratio: se ratio for each inverted residual block in the
41
594
  model. 0 if dont want to add Squeeze and Excite layer.
42
- stackwise_activation: list of activation functions, for each inverted
43
- residual block in the model.
44
- image_shape: optional shape tuple, defaults to (224, 224, 3).
45
- depth_multiplier: float, controls the width of the network.
46
- - If `depth_multiplier` < 1.0, proportionally decreases the number
47
- of filters in each layer.
48
- - If `depth_multiplier` > 1.0, proportionally increases the number
49
- of filters in each layer.
50
- - If `depth_multiplier` = 1, default number of filters from the
51
- paper are used at each layer.
52
- input_num_filters: number of filters in first convolution layer
595
+ stackwise_activation: list of list of activation functions, for each
596
+ inverted residual block in the model.
597
+ stackwise_padding: list of list of int, to provide padding values for
598
+ each inverted residual block in the model.
53
599
  output_num_filters: specifies whether to add conv and batch_norm in the
54
600
  end, if set to None, it will not add these layers in the end.
55
601
  'None' for MobileNetV1
602
+ depthwise_filters: int, number of filters in depthwise separable
603
+ convolution layer,
604
+ last_layer_filter: int, channels/filters for the head ConvBnAct block
605
+ squeeze_and_excite: float, squeeze and excite ratio in the depthwise
606
+ layer, None, if dont want to do squeeze and excite
607
+ image_shape: optional shape tuple, defaults to (224, 224, 3).
56
608
  input_activation: activation function to be used in the input layer
57
609
  'hard_swish' for MobileNetV3,
58
610
  'relu6' for MobileNetV1 and MobileNetV2
59
611
  output_activation: activation function to be used in the output layer
60
612
  'hard_swish' for MobileNetV3,
61
613
  'relu6' for MobileNetV1 and MobileNetV2
62
- inverted_res_block: whether to use inverted residual blocks or not,
63
- 'False' for MobileNetV1,
64
- 'True' for MobileNetV2 and MobileNetV3
614
+ input_num_filters: int, channels/filters for the input before the stem
615
+ input_conv
616
+ dtype: `None` or str or `keras.mixed_precision.DTypePolicy`. The dtype
617
+ to use for the model's computations and weights.
65
618
 
66
619
 
67
620
  Example:
@@ -70,16 +623,40 @@ class MobileNetBackbone(Backbone):
70
623
 
71
624
  # Randomly initialized backbone with a custom config
72
625
  model = MobileNetBackbone(
73
- stackwise_expansion=[1, 4, 6],
74
- stackwise_num_filters=[4, 8, 16],
75
- stackwise_kernel_size=[3, 3, 5],
76
- stackwise_num_strides=[2, 2, 1],
77
- stackwise_se_ratio=[0.25, None, 0.25],
78
- stackwise_activation=["relu", "relu6", "hard_swish"],
79
- output_num_filters=1280,
80
- input_activation='hard_swish',
81
- output_activation='hard_swish',
82
- inverted_res_block=True,
626
+ stackwise_expansion=[
627
+ [40, 56],
628
+ [64, 144, 144],
629
+ [72, 72],
630
+ [144, 288, 288],
631
+ ],
632
+ stackwise_num_blocks=[2, 3, 2, 3],
633
+ stackwise_num_filters=[
634
+ [16, 16],
635
+ [24, 24, 24],
636
+ [24, 24],
637
+ [48, 48, 48],
638
+ ],
639
+ stackwise_kernel_size=[[3, 3], [5, 5, 5], [5, 5], [5, 5, 5]],
640
+ stackwise_num_strides=[[2, 1], [2, 1, 1], [1, 1], [2, 1, 1]],
641
+ stackwise_se_ratio=[
642
+ [None, None],
643
+ [0.25, 0.25, 0.25],
644
+ [0.3, 0.3],
645
+ [0.3, 0.25, 0.25],
646
+ ],
647
+ stackwise_activation=[
648
+ ["relu", "relu"],
649
+ ["hard_swish", "hard_swish", "hard_swish"],
650
+ ["hard_swish", "hard_swish"],
651
+ ["hard_swish", "hard_swish", "hard_swish"],
652
+ ],
653
+ output_num_filters=288,
654
+ input_activation="hard_swish",
655
+ output_activation="hard_swish",
656
+ input_num_filters=16,
657
+ image_shape=(224, 224, 3),
658
+ depthwise_filters=8,
659
+ squeeze_and_excite=0.5,
83
660
 
84
661
  )
85
662
  output = model(input_data)
@@ -89,18 +666,22 @@ class MobileNetBackbone(Backbone):
89
666
  def __init__(
90
667
  self,
91
668
  stackwise_expansion,
669
+ stackwise_num_blocks,
92
670
  stackwise_num_filters,
93
671
  stackwise_kernel_size,
94
672
  stackwise_num_strides,
95
673
  stackwise_se_ratio,
96
674
  stackwise_activation,
675
+ stackwise_padding,
97
676
  output_num_filters,
98
- inverted_res_block,
677
+ depthwise_filters,
678
+ last_layer_filter,
679
+ squeeze_and_excite=None,
99
680
  image_shape=(None, None, 3),
100
681
  input_activation="hard_swish",
101
682
  output_activation="hard_swish",
102
- depth_multiplier=1.0,
103
683
  input_num_filters=16,
684
+ dtype=None,
104
685
  **kwargs,
105
686
  ):
106
687
  # === Functional Model ===
@@ -109,85 +690,88 @@ class MobileNetBackbone(Backbone):
109
690
  )
110
691
 
111
692
  image_input = keras.layers.Input(shape=image_shape)
112
- x = image_input # Intermediate result.
693
+ x = image_input
113
694
  input_num_filters = adjust_channels(input_num_filters)
695
+
696
+ x = keras.layers.ZeroPadding2D(
697
+ padding=(1, 1),
698
+ name="input_pad",
699
+ dtype=dtype,
700
+ )(x)
114
701
  x = keras.layers.Conv2D(
115
702
  input_num_filters,
116
703
  kernel_size=3,
117
704
  strides=(2, 2),
118
- padding="same",
119
705
  data_format=keras.config.image_data_format(),
120
706
  use_bias=False,
121
707
  name="input_conv",
708
+ dtype=dtype,
122
709
  )(x)
123
710
  x = keras.layers.BatchNormalization(
124
711
  axis=channel_axis,
125
712
  epsilon=BN_EPSILON,
126
713
  momentum=BN_MOMENTUM,
127
714
  name="input_batch_norm",
715
+ dtype=dtype,
716
+ )(x)
717
+ x = keras.layers.Activation(
718
+ input_activation,
719
+ dtype=dtype,
128
720
  )(x)
129
- x = keras.layers.Activation(input_activation)(x)
130
721
 
131
- for stack_index in range(len(stackwise_num_filters)):
132
- filters = adjust_channels(
133
- (stackwise_num_filters[stack_index]) * depth_multiplier
134
- )
722
+ x = DepthwiseConvBlock(
723
+ input_num_filters,
724
+ depthwise_filters,
725
+ squeeze_excite_ratio=squeeze_and_excite,
726
+ name="block_0",
727
+ dtype=dtype,
728
+ )(x)
729
+
730
+ for block in range(len(stackwise_num_blocks)):
731
+ for inverted_block in range(stackwise_num_blocks[block]):
732
+ infilters = x.shape[channel_axis]
733
+ x = InvertedResidualBlock(
734
+ expansion=stackwise_expansion[block][inverted_block],
735
+ infilters=infilters,
736
+ filters=adjust_channels(
737
+ stackwise_num_filters[block][inverted_block]
738
+ ),
739
+ kernel_size=stackwise_kernel_size[block][inverted_block],
740
+ stride=stackwise_num_strides[block][inverted_block],
741
+ squeeze_excite_ratio=stackwise_se_ratio[block][
742
+ inverted_block
743
+ ],
744
+ activation=stackwise_activation[block][inverted_block],
745
+ padding=stackwise_padding[block][inverted_block],
746
+ name=f"block_{block + 1}_{inverted_block}",
747
+ dtype=dtype,
748
+ )(x)
749
+
750
+ x = ConvBnActBlock(
751
+ filter=adjust_channels(last_layer_filter),
752
+ activation="hard_swish",
753
+ name=f"block_{len(stackwise_num_blocks) + 1}_0",
754
+ dtype=dtype,
755
+ )(x)
135
756
 
136
- if inverted_res_block:
137
- x = apply_inverted_res_block(
138
- x,
139
- expansion=stackwise_expansion[stack_index],
140
- filters=filters,
141
- kernel_size=stackwise_kernel_size[stack_index],
142
- stride=stackwise_num_strides[stack_index],
143
- se_ratio=(stackwise_se_ratio[stack_index]),
144
- activation=stackwise_activation[stack_index],
145
- expansion_index=stack_index,
146
- )
147
- else:
148
- x = apply_depthwise_conv_block(
149
- x,
150
- filters=filters,
151
- kernel_size=3,
152
- stride=stackwise_num_strides[stack_index],
153
- depth_multiplier=depth_multiplier,
154
- block_id=stack_index,
155
- )
156
-
157
- if output_num_filters is not None:
158
- last_conv_ch = adjust_channels(x.shape[channel_axis] * 6)
159
-
160
- x = keras.layers.Conv2D(
161
- last_conv_ch,
162
- kernel_size=1,
163
- padding="same",
164
- data_format=keras.config.image_data_format(),
165
- use_bias=False,
166
- name="output_conv",
167
- )(x)
168
- x = keras.layers.BatchNormalization(
169
- axis=channel_axis,
170
- epsilon=BN_EPSILON,
171
- momentum=BN_MOMENTUM,
172
- name="output_batch_norm",
173
- )(x)
174
- x = keras.layers.Activation(output_activation)(x)
175
-
176
- super().__init__(inputs=image_input, outputs=x, **kwargs)
757
+ super().__init__(inputs=image_input, outputs=x, dtype=dtype, **kwargs)
177
758
 
178
759
  # === Config ===
179
760
  self.stackwise_expansion = stackwise_expansion
761
+ self.stackwise_num_blocks = stackwise_num_blocks
180
762
  self.stackwise_num_filters = stackwise_num_filters
181
763
  self.stackwise_kernel_size = stackwise_kernel_size
182
764
  self.stackwise_num_strides = stackwise_num_strides
183
765
  self.stackwise_se_ratio = stackwise_se_ratio
184
766
  self.stackwise_activation = stackwise_activation
185
- self.depth_multiplier = depth_multiplier
767
+ self.stackwise_padding = stackwise_padding
186
768
  self.input_num_filters = input_num_filters
187
769
  self.output_num_filters = output_num_filters
188
- self.input_activation = keras.activations.get(input_activation)
189
- self.output_activation = keras.activations.get(output_activation)
190
- self.inverted_res_block = inverted_res_block
770
+ self.depthwise_filters = depthwise_filters
771
+ self.last_layer_filter = last_layer_filter
772
+ self.squeeze_and_excite = squeeze_and_excite
773
+ self.input_activation = input_activation
774
+ self.output_activation = output_activation
191
775
  self.image_shape = image_shape
192
776
 
193
777
  def get_config(self):
@@ -195,312 +779,21 @@ class MobileNetBackbone(Backbone):
195
779
  config.update(
196
780
  {
197
781
  "stackwise_expansion": self.stackwise_expansion,
782
+ "stackwise_num_blocks": self.stackwise_num_blocks,
198
783
  "stackwise_num_filters": self.stackwise_num_filters,
199
784
  "stackwise_kernel_size": self.stackwise_kernel_size,
200
785
  "stackwise_num_strides": self.stackwise_num_strides,
201
786
  "stackwise_se_ratio": self.stackwise_se_ratio,
202
787
  "stackwise_activation": self.stackwise_activation,
788
+ "stackwise_padding": self.stackwise_padding,
203
789
  "image_shape": self.image_shape,
204
- "depth_multiplier": self.depth_multiplier,
205
790
  "input_num_filters": self.input_num_filters,
206
791
  "output_num_filters": self.output_num_filters,
207
- "input_activation": keras.activations.serialize(
208
- activation=self.input_activation
209
- ),
210
- "output_activation": keras.activations.serialize(
211
- activation=self.output_activation
212
- ),
213
- "inverted_res_block": self.inverted_res_block,
792
+ "depthwise_filters": self.depthwise_filters,
793
+ "last_layer_filter": self.last_layer_filter,
794
+ "squeeze_and_excite": self.squeeze_and_excite,
795
+ "input_activation": self.input_activation,
796
+ "output_activation": self.output_activation,
214
797
  }
215
798
  )
216
799
  return config
217
-
218
-
219
- def adjust_channels(x, divisor=8, min_value=None):
220
- """Ensure that all layers have a channel number divisible by the `divisor`.
221
-
222
- Args:
223
- x: integer, input value.
224
- divisor: integer, the value by which a channel number should be
225
- divisible, defaults to 8.
226
- min_value: float, optional minimum value for the new tensor. If None,
227
- defaults to value of divisor.
228
-
229
- Returns:
230
- the updated input scalar.
231
- """
232
-
233
- if min_value is None:
234
- min_value = divisor
235
-
236
- new_x = max(min_value, int(x + divisor / 2) // divisor * divisor)
237
-
238
- # make sure that round down does not go down by more than 10%.
239
- if new_x < 0.9 * x:
240
- new_x += divisor
241
- return new_x
242
-
243
-
244
- def apply_inverted_res_block(
245
- x,
246
- expansion,
247
- filters,
248
- kernel_size,
249
- stride,
250
- se_ratio,
251
- activation,
252
- expansion_index,
253
- ):
254
- """An Inverted Residual Block.
255
-
256
- Args:
257
- x: input tensor.
258
- expansion: integer, the expansion ratio, multiplied with infilters to
259
- get the minimum value passed to adjust_channels.
260
- filters: integer, number of filters for convolution layer.
261
- kernel_size: integer, the kernel size for DepthWise Convolutions.
262
- stride: integer, the stride length for DepthWise Convolutions.
263
- se_ratio: float, ratio for bottleneck filters. Number of bottleneck
264
- filters = filters * se_ratio.
265
- activation: the activation layer to use.
266
- expansion_index: integer, a unique identification if you want to use
267
- expanded convolutions. If greater than 0, an additional Conv+BN
268
- layer is added after the expanded convolutional layer.
269
-
270
- Returns:
271
- the updated input tensor.
272
- """
273
- channel_axis = (
274
- -1 if keras.config.image_data_format() == "channels_last" else 1
275
- )
276
- activation = keras.activations.get(activation)
277
- shortcut = x
278
- prefix = "expanded_conv_"
279
- infilters = x.shape[channel_axis]
280
-
281
- if expansion_index > 0:
282
- prefix = f"expanded_conv_{expansion_index}_"
283
-
284
- x = keras.layers.Conv2D(
285
- adjust_channels(infilters * expansion),
286
- kernel_size=1,
287
- padding="same",
288
- data_format=keras.config.image_data_format(),
289
- use_bias=False,
290
- name=prefix + "expand",
291
- )(x)
292
- x = keras.layers.BatchNormalization(
293
- axis=channel_axis,
294
- epsilon=BN_EPSILON,
295
- momentum=BN_MOMENTUM,
296
- name=prefix + "expand_BatchNorm",
297
- )(x)
298
- x = keras.layers.Activation(activation=activation)(x)
299
-
300
- if stride == 2:
301
- x = keras.layers.ZeroPadding2D(
302
- padding=correct_pad_downsample(x, kernel_size),
303
- name=prefix + "depthwise_pad",
304
- )(x)
305
-
306
- x = keras.layers.DepthwiseConv2D(
307
- kernel_size,
308
- strides=stride,
309
- padding="same" if stride == 1 else "valid",
310
- data_format=keras.config.image_data_format(),
311
- use_bias=False,
312
- name=prefix + "depthwise",
313
- )(x)
314
- x = keras.layers.BatchNormalization(
315
- axis=channel_axis,
316
- epsilon=BN_EPSILON,
317
- momentum=BN_MOMENTUM,
318
- name=prefix + "depthwise_BatchNorm",
319
- )(x)
320
- x = keras.layers.Activation(activation=activation)(x)
321
-
322
- if se_ratio:
323
- se_filters = adjust_channels(infilters * expansion)
324
- x = SqueezeAndExcite2D(
325
- input=x,
326
- filters=se_filters,
327
- bottleneck_filters=adjust_channels(se_filters * se_ratio),
328
- squeeze_activation="relu",
329
- excite_activation=keras.activations.hard_sigmoid,
330
- )
331
-
332
- x = keras.layers.Conv2D(
333
- filters,
334
- kernel_size=1,
335
- padding="same",
336
- data_format=keras.config.image_data_format(),
337
- use_bias=False,
338
- name=prefix + "project",
339
- )(x)
340
- x = keras.layers.BatchNormalization(
341
- axis=channel_axis,
342
- epsilon=BN_EPSILON,
343
- momentum=BN_MOMENTUM,
344
- name=prefix + "project_BatchNorm",
345
- )(x)
346
-
347
- if stride == 1 and infilters == filters:
348
- x = keras.layers.Add(name=prefix + "Add")([shortcut, x])
349
-
350
- return x
351
-
352
-
353
- def apply_depthwise_conv_block(
354
- x,
355
- filters,
356
- kernel_size=3,
357
- depth_multiplier=1,
358
- stride=1,
359
- block_id=1,
360
- ):
361
- """Adds a depthwise convolution block.
362
-
363
- A depthwise convolution block consists of a depthwise conv,
364
- batch normalization, relu6, pointwise convolution,
365
- batch normalization and relu6 activation.
366
-
367
- Args:
368
- x: Input tensor of shape `(rows, cols, channels)`
369
- filters: Integer, the dimensionality of the output space
370
- (i.e. the number of output filters in the pointwise convolution).
371
- depth_multiplier: controls the width of the network.
372
- - If `depth_multiplier` < 1.0, proportionally decreases the number
373
- of filters in each layer.
374
- - If `depth_multiplier` > 1.0, proportionally increases the number
375
- of filters in each layer.
376
- - If `depth_multiplier` = 1, default number of filters from the
377
- paper are used at each layer.
378
- strides: An integer or tuple/list of 2 integers, specifying the strides
379
- of the convolution along the width and height.
380
- Can be a single integer to specify the same value for
381
- all spatial dimensions. Specifying any stride value != 1 is
382
- incompatible with specifying any `dilation_rate` value != 1.
383
- block_id: Integer, a unique identification designating the block number.
384
-
385
- Input shape:
386
- 4D tensor with shape `(batch, rows, cols, channels)` in "channels_last"
387
- 4D tensor with shape `(batch, channels, rows, cols)` in "channels_first"
388
- Returns:
389
- Output tensor of block.
390
- """
391
- channel_axis = (
392
- -1 if keras.config.image_data_format() == "channels_last" else 1
393
- )
394
- if stride == 2:
395
- x = keras.layers.ZeroPadding2D(
396
- padding=correct_pad_downsample(x, kernel_size),
397
- name="conv_pad_%d" % block_id,
398
- )(x)
399
-
400
- x = keras.layers.DepthwiseConv2D(
401
- kernel_size,
402
- strides=stride,
403
- padding="same" if stride == 1 else "valid",
404
- data_format=keras.config.image_data_format(),
405
- depth_multiplier=depth_multiplier,
406
- use_bias=False,
407
- name="depthwise_%d" % block_id,
408
- )(x)
409
- x = keras.layers.BatchNormalization(
410
- axis=channel_axis,
411
- epsilon=BN_EPSILON,
412
- momentum=BN_MOMENTUM,
413
- name="depthwise_BatchNorm_%d" % block_id,
414
- )(x)
415
- x = keras.layers.ReLU(6.0)(x)
416
-
417
- x = keras.layers.Conv2D(
418
- filters,
419
- kernel_size=1,
420
- padding="same",
421
- data_format=keras.config.image_data_format(),
422
- use_bias=False,
423
- name="conv_%d" % block_id,
424
- )(x)
425
- x = keras.layers.BatchNormalization(
426
- axis=channel_axis,
427
- epsilon=BN_EPSILON,
428
- momentum=BN_MOMENTUM,
429
- name="BatchNorm_%d" % block_id,
430
- )(x)
431
- return keras.layers.ReLU(6.0)(x)
432
-
433
-
434
- def SqueezeAndExcite2D(
435
- input,
436
- filters,
437
- bottleneck_filters=None,
438
- squeeze_activation="relu",
439
- excite_activation="sigmoid",
440
- ):
441
- """
442
- Description:
443
- This layer applies a content-aware mechanism to adaptively assign
444
- channel-wise weights. It uses global average pooling to compress
445
- feature maps into single values, which are then processed by
446
- two Conv1D layers: the first reduces the dimensionality, and
447
- the second restores it.
448
- Args:
449
- filters: Number of input and output filters. The number of input and
450
- output filters is same.
451
- bottleneck_filters: (Optional) Number of bottleneck filters. Defaults
452
- to `0.25 * filters`
453
- squeeze_activation: (Optional) String, callable (or
454
- keras.layers.Layer) or keras.activations.Activation instance
455
- denoting activation to be applied after squeeze convolution.
456
- Defaults to `relu`.
457
- excite_activation: (Optional) String, callable (or
458
- keras.layers.Layer) or keras.activations.Activation instance
459
- denoting activation to be applied after excite convolution.
460
- Defaults to `sigmoid`.
461
- """
462
- if not bottleneck_filters:
463
- bottleneck_filters = filters // 4
464
-
465
- x = keras.layers.GlobalAveragePooling2D(keepdims=True)(input)
466
-
467
- x = keras.layers.Conv2D(
468
- bottleneck_filters,
469
- (1, 1),
470
- data_format=keras.config.image_data_format(),
471
- activation=squeeze_activation,
472
- )(x)
473
- x = keras.layers.Conv2D(
474
- filters,
475
- (1, 1),
476
- data_format=keras.config.image_data_format(),
477
- activation=excite_activation,
478
- )(x)
479
-
480
- x = ops.multiply(x, input)
481
- return x
482
-
483
-
484
- def correct_pad_downsample(inputs, kernel_size):
485
- """Returns a tuple for zero-padding for 2D convolution with downsampling.
486
-
487
- Args:
488
- inputs: Input tensor.
489
- kernel_size: An integer or tuple/list of 2 integers.
490
-
491
- Returns:
492
- A tuple.
493
- """
494
- img_dim = 1
495
- input_size = inputs.shape[img_dim : (img_dim + 2)]
496
- if isinstance(kernel_size, int):
497
- kernel_size = (kernel_size, kernel_size)
498
- if input_size[0] is None:
499
- adjust = (1, 1)
500
- else:
501
- adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2)
502
- correct = (kernel_size[0] // 2, kernel_size[1] // 2)
503
- return (
504
- (correct[0] - adjust[0], correct[0]),
505
- (correct[1] - adjust[1], correct[1]),
506
- )