keras-hub-nightly 0.19.0.dev202503060350__py3-none-any.whl → 0.20.0.dev202503150350__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- keras_hub/api/layers/__init__.py +3 -0
- keras_hub/api/models/__init__.py +5 -4
- keras_hub/src/models/cspnet/__init__.py +5 -0
- keras_hub/src/models/cspnet/cspnet_backbone.py +1279 -0
- keras_hub/src/models/cspnet/cspnet_image_classifier.py +12 -0
- keras_hub/src/models/cspnet/cspnet_image_classifier_preprocessor.py +14 -0
- keras_hub/src/models/cspnet/cspnet_image_converter.py +8 -0
- keras_hub/src/models/cspnet/cspnet_presets.py +16 -0
- keras_hub/src/models/gemma/gemma_attention.py +23 -12
- keras_hub/src/models/mobilenet/mobilenet_backbone.py +18 -1
- keras_hub/src/models/mobilenet/mobilenet_image_classifier.py +4 -1
- keras_hub/src/models/mobilenet/mobilenet_presets.py +38 -2
- keras_hub/src/models/siglip/siglip_presets.py +206 -10
- keras_hub/src/models/siglip/siglip_text_encoder.py +7 -1
- keras_hub/src/utils/keras_utils.py +32 -0
- keras_hub/src/utils/preset_utils.py +1 -0
- keras_hub/src/utils/timm/convert_cspnet.py +165 -0
- keras_hub/src/utils/timm/convert_mobilenet.py +120 -44
- keras_hub/src/utils/timm/preset_loader.py +9 -0
- keras_hub/src/version_utils.py +1 -1
- {keras_hub_nightly-0.19.0.dev202503060350.dist-info → keras_hub_nightly-0.20.0.dev202503150350.dist-info}/METADATA +1 -1
- {keras_hub_nightly-0.19.0.dev202503060350.dist-info → keras_hub_nightly-0.20.0.dev202503150350.dist-info}/RECORD +24 -20
- {keras_hub_nightly-0.19.0.dev202503060350.dist-info → keras_hub_nightly-0.20.0.dev202503150350.dist-info}/WHEEL +1 -1
- keras_hub/src/models/csp_darknet/__init__.py +0 -0
- keras_hub/src/models/csp_darknet/csp_darknet_backbone.py +0 -427
- keras_hub/src/models/csp_darknet/csp_darknet_image_classifier.py +0 -10
- {keras_hub_nightly-0.19.0.dev202503060350.dist-info → keras_hub_nightly-0.20.0.dev202503150350.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1279 @@
|
|
1
|
+
import keras
|
2
|
+
from keras import layers
|
3
|
+
from keras import ops
|
4
|
+
|
5
|
+
from keras_hub.src.api_export import keras_hub_export
|
6
|
+
from keras_hub.src.models.feature_pyramid_backbone import FeaturePyramidBackbone
|
7
|
+
from keras_hub.src.utils.keras_utils import standardize_data_format
|
8
|
+
|
9
|
+
|
10
|
+
@keras_hub_export("keras_hub.models.CSPNetBackbone")
|
11
|
+
class CSPNetBackbone(FeaturePyramidBackbone):
|
12
|
+
"""This class represents Keras Backbone of CSPNet model.
|
13
|
+
|
14
|
+
This class implements a CSPNet backbone as described in
|
15
|
+
[CSPNet: A New Backbone that can Enhance Learning Capability of CNN](
|
16
|
+
https://arxiv.org/abs/1911.11929).
|
17
|
+
|
18
|
+
Args:
|
19
|
+
stem_filters: int or list of ints, filter size for the stem.
|
20
|
+
stem_kernel_size: int or tuple/list of 2 integers, kernel size for the
|
21
|
+
stem.
|
22
|
+
stem_strides: int or tuple/list of 2 integers, stride length of the
|
23
|
+
convolution for the stem.
|
24
|
+
stackwise_num_filters: A list of ints, filter size for each block level
|
25
|
+
in the model.
|
26
|
+
stackwise_strides: int or tuple/list of ints, strides for each block
|
27
|
+
level in the model.
|
28
|
+
stackwise_depth: A list of ints, representing the depth
|
29
|
+
(number of blocks) for each block level in the model.
|
30
|
+
block_type: str. One of `"bottleneck_block"`, `"dark_block"`, or
|
31
|
+
`"edge_block"`. Use `"dark_block"` for DarkNet blocks,
|
32
|
+
`"edge_block"` for EdgeResidual / Fused-MBConv blocks.
|
33
|
+
groups: int, specifying the number of groups into which the input is
|
34
|
+
split along the channel axis. Defaults to `1`.
|
35
|
+
stage_type: str. One of `"csp"`, `"dark"`, or `"cs3"`. Use `"dark"` for
|
36
|
+
DarkNet stages, `"csp"` for Cross Stage, and `"cs3"` for Cross Stage
|
37
|
+
with only one transition conv. Defaults to `None`, which defaults to
|
38
|
+
`"cs3"`.
|
39
|
+
activation: str. Activation function for the model.
|
40
|
+
output_strides: int, output stride length of the backbone model. Must be
|
41
|
+
one of `(8, 16, 32)`. Defaults to `32`.
|
42
|
+
bottle_ratio: float or tuple/list of floats. The dimensionality of the
|
43
|
+
intermediate bottleneck space (i.e., the number of output filters in
|
44
|
+
the bottleneck convolution), calculated as
|
45
|
+
`(filters * bottle_ratio)` and applied to:
|
46
|
+
- the first convolution of `"dark_block"` and `"edge_block"`
|
47
|
+
- the first two convolutions of `"bottleneck_block"`
|
48
|
+
of each stage. Defaults to `1.0`.
|
49
|
+
block_ratio: float or tuple/list of floats. Filter size for each block,
|
50
|
+
calculated as `(stackwise_num_filters * block_ratio)` for each
|
51
|
+
stage. Defaults to `1.0`.
|
52
|
+
expand_ratio: float or tuple/list of floats. Filters ratio for `"csp"`
|
53
|
+
and `"cs3"` stages at different levels. Defaults to `1.0`.
|
54
|
+
stem_padding: str, padding value for the stem, either `"valid"` or
|
55
|
+
`"same"`. Defaults to `"valid"`.
|
56
|
+
stem_pooling: str, pooling value for the stem. Defaults to `None`.
|
57
|
+
avg_down: bool, if `True`, `AveragePooling2D` is applied at the
|
58
|
+
beginning of each stage when `strides == 2`. Defaults to `False`.
|
59
|
+
down_growth: bool, grow downsample channels to output channels. Applies
|
60
|
+
to Cross Stage only. Defaults to `False`.
|
61
|
+
cross_linear: bool, if `True`, activation will not be applied after the
|
62
|
+
expansion convolution. Applies to Cross Stage only. Defaults to
|
63
|
+
`False`.
|
64
|
+
data_format: `None` or str. If specified, either `"channels_last"` or
|
65
|
+
`"channels_first"`. The ordering of the dimensions in the inputs.
|
66
|
+
`"channels_last"` corresponds to inputs with shape
|
67
|
+
`(batch_size, height, width, channels)` while `"channels_first"`
|
68
|
+
corresponds to inputs with shape
|
69
|
+
`(batch_size, channels, height, width)`. It defaults to the
|
70
|
+
`image_data_format` value found in your Keras config file at
|
71
|
+
`~/.keras/keras.json`. If you never set it, then it will be
|
72
|
+
`"channels_last"`.
|
73
|
+
image_shape: tuple. The input shape without the batch size.
|
74
|
+
Defaults to `(None, None, 3)`.
|
75
|
+
dtype: `None` or str or `keras.mixed_precision.DTypePolicy`. The dtype
|
76
|
+
to use for the model's computations and weights.
|
77
|
+
|
78
|
+
Examples:
|
79
|
+
```python
|
80
|
+
input_data = np.ones(shape=(8, 224, 224, 3))
|
81
|
+
|
82
|
+
# Pretrained backbone
|
83
|
+
model = keras_hub.models.CSPNetBackbone.from_preset(
|
84
|
+
"cspdarknet53_ra_imagenet"
|
85
|
+
)
|
86
|
+
model(input_data)
|
87
|
+
|
88
|
+
# Randomly initialized backbone with a custom config
|
89
|
+
model = keras_hub.models.CSPNetBackbone(
|
90
|
+
stem_filters=32,
|
91
|
+
stem_kernel_size=3,
|
92
|
+
stem_strides=1,
|
93
|
+
stackwise_depth=[1, 2, 4],
|
94
|
+
stackwise_strides=[1, 2, 2],
|
95
|
+
stackwise_num_filters=[32, 64, 128],
|
96
|
+
block_type="dark,
|
97
|
+
)
|
98
|
+
model(input_data)
|
99
|
+
```
|
100
|
+
"""
|
101
|
+
|
102
|
+
def __init__(
|
103
|
+
self,
|
104
|
+
stem_filters,
|
105
|
+
stem_kernel_size,
|
106
|
+
stem_strides,
|
107
|
+
stackwise_depth,
|
108
|
+
stackwise_strides,
|
109
|
+
stackwise_num_filters,
|
110
|
+
block_type,
|
111
|
+
groups=1,
|
112
|
+
stage_type=None,
|
113
|
+
activation="leaky_relu",
|
114
|
+
output_strides=32,
|
115
|
+
bottle_ratio=[1.0],
|
116
|
+
block_ratio=[1.0],
|
117
|
+
expand_ratio=[1.0],
|
118
|
+
stem_padding="valid",
|
119
|
+
stem_pooling=None,
|
120
|
+
avg_down=False,
|
121
|
+
down_growth=False,
|
122
|
+
cross_linear=False,
|
123
|
+
image_shape=(None, None, 3),
|
124
|
+
data_format=None,
|
125
|
+
dtype=None,
|
126
|
+
**kwargs,
|
127
|
+
):
|
128
|
+
if block_type not in (
|
129
|
+
"dark_block",
|
130
|
+
"edge_block",
|
131
|
+
"bottleneck_block",
|
132
|
+
):
|
133
|
+
raise ValueError(
|
134
|
+
'`block_type` must be either `"dark_block"`, '
|
135
|
+
'`"edge_block"`, or `"bottleneck_block"`.'
|
136
|
+
f"Received block_type={block_type}."
|
137
|
+
)
|
138
|
+
|
139
|
+
if stage_type not in (
|
140
|
+
"dark",
|
141
|
+
"csp",
|
142
|
+
"cs3",
|
143
|
+
):
|
144
|
+
raise ValueError(
|
145
|
+
'`block_type` must be either `"dark"`, `"csp"`, or `"cs3"`.'
|
146
|
+
f"Received block_type={stage_type}."
|
147
|
+
)
|
148
|
+
data_format = standardize_data_format(data_format)
|
149
|
+
channel_axis = -1 if data_format == "channels_last" else 1
|
150
|
+
|
151
|
+
# === Functional Model ===
|
152
|
+
image_input = layers.Input(shape=image_shape)
|
153
|
+
x = image_input # Intermediate result.
|
154
|
+
stem, stem_feat_info = create_csp_stem(
|
155
|
+
data_format=data_format,
|
156
|
+
channel_axis=channel_axis,
|
157
|
+
filters=stem_filters,
|
158
|
+
kernel_size=stem_kernel_size,
|
159
|
+
strides=stem_strides,
|
160
|
+
pooling=stem_pooling,
|
161
|
+
padding=stem_padding,
|
162
|
+
activation=activation,
|
163
|
+
dtype=dtype,
|
164
|
+
)(x)
|
165
|
+
|
166
|
+
stages, pyramid_outputs = create_csp_stages(
|
167
|
+
inputs=stem,
|
168
|
+
filters=stackwise_num_filters,
|
169
|
+
data_format=data_format,
|
170
|
+
channel_axis=channel_axis,
|
171
|
+
stackwise_depth=stackwise_depth,
|
172
|
+
reduction=stem_feat_info,
|
173
|
+
groups=groups,
|
174
|
+
block_ratio=block_ratio,
|
175
|
+
bottle_ratio=bottle_ratio,
|
176
|
+
expand_ratio=expand_ratio,
|
177
|
+
strides=stackwise_strides,
|
178
|
+
avg_down=avg_down,
|
179
|
+
down_growth=down_growth,
|
180
|
+
cross_linear=cross_linear,
|
181
|
+
activation=activation,
|
182
|
+
output_strides=output_strides,
|
183
|
+
stage_type=stage_type,
|
184
|
+
block_type=block_type,
|
185
|
+
dtype=dtype,
|
186
|
+
name="csp_stage",
|
187
|
+
)
|
188
|
+
|
189
|
+
super().__init__(
|
190
|
+
inputs=image_input, outputs=stages, dtype=dtype, **kwargs
|
191
|
+
)
|
192
|
+
|
193
|
+
# === Config ===
|
194
|
+
self.stem_filters = stem_filters
|
195
|
+
self.stem_kernel_size = stem_kernel_size
|
196
|
+
self.stem_strides = stem_strides
|
197
|
+
self.stackwise_depth = stackwise_depth
|
198
|
+
self.stackwise_strides = stackwise_strides
|
199
|
+
self.stackwise_num_filters = stackwise_num_filters
|
200
|
+
self.stage_type = stage_type
|
201
|
+
self.block_type = block_type
|
202
|
+
self.output_strides = output_strides
|
203
|
+
self.groups = groups
|
204
|
+
self.activation = activation
|
205
|
+
self.bottle_ratio = bottle_ratio
|
206
|
+
self.block_ratio = block_ratio
|
207
|
+
self.expand_ratio = expand_ratio
|
208
|
+
self.stem_padding = stem_padding
|
209
|
+
self.stem_pooling = stem_pooling
|
210
|
+
self.avg_down = avg_down
|
211
|
+
self.down_growth = down_growth
|
212
|
+
self.cross_linear = cross_linear
|
213
|
+
self.image_shape = image_shape
|
214
|
+
self.data_format = data_format
|
215
|
+
self.pyramid_outputs = pyramid_outputs
|
216
|
+
|
217
|
+
def get_config(self):
|
218
|
+
config = super().get_config()
|
219
|
+
config.update(
|
220
|
+
{
|
221
|
+
"stem_filters": self.stem_filters,
|
222
|
+
"stem_kernel_size": self.stem_kernel_size,
|
223
|
+
"stem_strides": self.stem_strides,
|
224
|
+
"stackwise_depth": self.stackwise_depth,
|
225
|
+
"stackwise_strides": self.stackwise_strides,
|
226
|
+
"stackwise_num_filters": self.stackwise_num_filters,
|
227
|
+
"stage_type": self.stage_type,
|
228
|
+
"block_type": self.block_type,
|
229
|
+
"output_strides": self.output_strides,
|
230
|
+
"groups": self.groups,
|
231
|
+
"activation": self.activation,
|
232
|
+
"bottle_ratio": self.bottle_ratio,
|
233
|
+
"block_ratio": self.block_ratio,
|
234
|
+
"expand_ratio": self.expand_ratio,
|
235
|
+
"stem_padding": self.stem_padding,
|
236
|
+
"stem_pooling": self.stem_pooling,
|
237
|
+
"avg_down": self.avg_down,
|
238
|
+
"down_growth": self.down_growth,
|
239
|
+
"cross_linear": self.cross_linear,
|
240
|
+
"image_shape": self.image_shape,
|
241
|
+
"data_format": self.data_format,
|
242
|
+
}
|
243
|
+
)
|
244
|
+
return config
|
245
|
+
|
246
|
+
|
247
|
+
def bottleneck_block(
|
248
|
+
filters,
|
249
|
+
channel_axis,
|
250
|
+
data_format,
|
251
|
+
bottle_ratio,
|
252
|
+
dilation=1,
|
253
|
+
groups=1,
|
254
|
+
activation="relu",
|
255
|
+
dtype=None,
|
256
|
+
name=None,
|
257
|
+
):
|
258
|
+
"""
|
259
|
+
BottleNeck block.
|
260
|
+
|
261
|
+
Args:
|
262
|
+
filters: Integer, the dimensionality of the output spaces (i.e. the
|
263
|
+
number of output filters in used the blocks).
|
264
|
+
data_format: `None` or str. the ordering of the dimensions in the
|
265
|
+
inputs. Can be `"channels_last"`
|
266
|
+
(`(batch_size, height, width, channels)`) or`"channels_first"`
|
267
|
+
(`(batch_size, channels, height, width)`).
|
268
|
+
bottle_ratio: float, ratio for bottleneck filters. Number of bottleneck
|
269
|
+
`filters = filters * bottle_ratio`.
|
270
|
+
dilation: int or tuple/list of 2 integers, specifying the dilation rate
|
271
|
+
to use for dilated convolution, defaults to `1`.
|
272
|
+
groups: A positive int specifying the number of groups in which the
|
273
|
+
input is split along the channel axis
|
274
|
+
activation: Activation for the conv layers, defaults to "relu".
|
275
|
+
dtype: `None` or str or `keras.mixed_precision.DTypePolicy`. The dtype
|
276
|
+
to use for the models computations and weights.
|
277
|
+
name: str. A prefix for the layer names used in the block.
|
278
|
+
|
279
|
+
Returns:
|
280
|
+
Output tensor of block.
|
281
|
+
"""
|
282
|
+
if name is None:
|
283
|
+
name = f"bottleneck{keras.backend.get_uid('bottleneck')}"
|
284
|
+
|
285
|
+
hidden_filters = int(round(filters * bottle_ratio))
|
286
|
+
|
287
|
+
def apply(x):
|
288
|
+
shortcut = x
|
289
|
+
x = layers.Conv2D(
|
290
|
+
filters=hidden_filters,
|
291
|
+
kernel_size=1,
|
292
|
+
use_bias=False,
|
293
|
+
data_format=data_format,
|
294
|
+
dtype=dtype,
|
295
|
+
name=f"{name}_bottleneck_block_conv_1",
|
296
|
+
)(x)
|
297
|
+
x = layers.BatchNormalization(
|
298
|
+
epsilon=1e-05,
|
299
|
+
axis=channel_axis,
|
300
|
+
dtype=dtype,
|
301
|
+
name=f"{name}_bottleneck_block_bn_1",
|
302
|
+
)(x)
|
303
|
+
if activation == "leaky_relu":
|
304
|
+
x = layers.LeakyReLU(
|
305
|
+
negative_slope=0.01,
|
306
|
+
dtype=dtype,
|
307
|
+
name=f"{name}_bottleneck_block_activation_1",
|
308
|
+
)(x)
|
309
|
+
else:
|
310
|
+
x = layers.Activation(
|
311
|
+
activation,
|
312
|
+
dtype=dtype,
|
313
|
+
name=f"{name}_bottleneck_block_activation_1",
|
314
|
+
)(x)
|
315
|
+
|
316
|
+
x = layers.Conv2D(
|
317
|
+
filters=hidden_filters,
|
318
|
+
kernel_size=3,
|
319
|
+
dilation_rate=dilation,
|
320
|
+
groups=groups,
|
321
|
+
padding="same",
|
322
|
+
use_bias=False,
|
323
|
+
data_format=data_format,
|
324
|
+
dtype=dtype,
|
325
|
+
name=f"{name}_bottleneck_block_conv_2",
|
326
|
+
)(x)
|
327
|
+
x = layers.BatchNormalization(
|
328
|
+
epsilon=1e-05,
|
329
|
+
axis=channel_axis,
|
330
|
+
dtype=dtype,
|
331
|
+
name=f"{name}_bottleneck_block_bn_2",
|
332
|
+
)(x)
|
333
|
+
if activation == "leaky_relu":
|
334
|
+
x = layers.LeakyReLU(
|
335
|
+
negative_slope=0.01,
|
336
|
+
dtype=dtype,
|
337
|
+
name=f"{name}_bottleneck_block_activation_2",
|
338
|
+
)(x)
|
339
|
+
else:
|
340
|
+
x = layers.Activation(
|
341
|
+
activation,
|
342
|
+
dtype=dtype,
|
343
|
+
name=f"{name}_bottleneck_block_activation_2",
|
344
|
+
)(x)
|
345
|
+
|
346
|
+
x = layers.Conv2D(
|
347
|
+
filters=filters,
|
348
|
+
kernel_size=1,
|
349
|
+
use_bias=False,
|
350
|
+
data_format=data_format,
|
351
|
+
dtype=dtype,
|
352
|
+
name=f"{name}_bottleneck_block_conv_3",
|
353
|
+
)(x)
|
354
|
+
x = layers.BatchNormalization(
|
355
|
+
epsilon=1e-05,
|
356
|
+
axis=channel_axis,
|
357
|
+
dtype=dtype,
|
358
|
+
name=f"{name}_bottleneck_block_bn_3",
|
359
|
+
)(x)
|
360
|
+
if activation == "leaky_relu":
|
361
|
+
x = layers.LeakyReLU(
|
362
|
+
negative_slope=0.01,
|
363
|
+
dtype=dtype,
|
364
|
+
name=f"{name}_bottleneck_block_activation_3",
|
365
|
+
)(x)
|
366
|
+
else:
|
367
|
+
x = layers.Activation(
|
368
|
+
activation,
|
369
|
+
dtype=dtype,
|
370
|
+
name=f"{name}_bottleneck_block_activation_3",
|
371
|
+
)(x)
|
372
|
+
|
373
|
+
x = layers.add(
|
374
|
+
[x, shortcut], dtype=dtype, name=f"{name}_bottleneck_block_add"
|
375
|
+
)
|
376
|
+
if activation == "leaky_relu":
|
377
|
+
x = layers.LeakyReLU(
|
378
|
+
negative_slope=0.01,
|
379
|
+
dtype=dtype,
|
380
|
+
name=f"{name}_bottleneck_block_activation_4",
|
381
|
+
)(x)
|
382
|
+
else:
|
383
|
+
x = layers.Activation(
|
384
|
+
activation,
|
385
|
+
dtype=dtype,
|
386
|
+
name=f"{name}_bottleneck_block_activation_4",
|
387
|
+
)(x)
|
388
|
+
return x
|
389
|
+
|
390
|
+
return apply
|
391
|
+
|
392
|
+
|
393
|
+
def dark_block(
|
394
|
+
filters,
|
395
|
+
data_format,
|
396
|
+
channel_axis,
|
397
|
+
dilation,
|
398
|
+
bottle_ratio,
|
399
|
+
groups,
|
400
|
+
activation,
|
401
|
+
dtype=None,
|
402
|
+
name=None,
|
403
|
+
):
|
404
|
+
"""
|
405
|
+
DarkNet block.
|
406
|
+
|
407
|
+
Args:
|
408
|
+
filters: Integer, the dimensionality of the output spaces (i.e. the
|
409
|
+
number of output filters in used the blocks).
|
410
|
+
data_format: `None` or str. the ordering of the dimensions in the
|
411
|
+
inputs. Can be `"channels_last"`
|
412
|
+
(`(batch_size, height, width, channels)`) or`"channels_first"`
|
413
|
+
(`(batch_size, channels, height, width)`).
|
414
|
+
bottle_ratio: float, ratio for darknet filters. Number of darknet
|
415
|
+
`filters = filters * bottle_ratio`.
|
416
|
+
dilation: int or tuple/list of 2 integers, specifying the dilation rate
|
417
|
+
to use for dilated convolution, defaults to `1`.
|
418
|
+
groups: A positive int specifying the number of groups in which the
|
419
|
+
input is split along the channel axis
|
420
|
+
activation: Activation for the conv layers, defaults to "relu".
|
421
|
+
dtype: `None` or str or `keras.mixed_precision.DTypePolicy`. The dtype
|
422
|
+
to use for the models computations and weights.
|
423
|
+
name: str. A prefix for the layer names used in the block.
|
424
|
+
|
425
|
+
Returns:
|
426
|
+
Output tensor of block.
|
427
|
+
"""
|
428
|
+
if name is None:
|
429
|
+
name = f"dark{keras.backend.get_uid('dark')}"
|
430
|
+
|
431
|
+
hidden_filters = int(round(filters * bottle_ratio))
|
432
|
+
|
433
|
+
def apply(x):
|
434
|
+
shortcut = x
|
435
|
+
x = layers.Conv2D(
|
436
|
+
filters=hidden_filters,
|
437
|
+
kernel_size=1,
|
438
|
+
use_bias=False,
|
439
|
+
data_format=data_format,
|
440
|
+
dtype=dtype,
|
441
|
+
name=f"{name}_dark_block_conv_1",
|
442
|
+
)(x)
|
443
|
+
x = layers.BatchNormalization(
|
444
|
+
epsilon=1e-05,
|
445
|
+
axis=channel_axis,
|
446
|
+
dtype=dtype,
|
447
|
+
name=f"{name}_dark_block_bn_1",
|
448
|
+
)(x)
|
449
|
+
if activation == "leaky_relu":
|
450
|
+
x = layers.LeakyReLU(
|
451
|
+
negative_slope=0.01,
|
452
|
+
dtype=dtype,
|
453
|
+
name=f"{name}_dark_block_activation_1",
|
454
|
+
)(x)
|
455
|
+
else:
|
456
|
+
x = layers.Activation(
|
457
|
+
activation,
|
458
|
+
dtype=dtype,
|
459
|
+
name=f"{name}_dark_block_activation_1",
|
460
|
+
)(x)
|
461
|
+
|
462
|
+
x = layers.Conv2D(
|
463
|
+
filters=filters,
|
464
|
+
kernel_size=3,
|
465
|
+
dilation_rate=dilation,
|
466
|
+
groups=groups,
|
467
|
+
padding="same",
|
468
|
+
use_bias=False,
|
469
|
+
data_format=data_format,
|
470
|
+
dtype=dtype,
|
471
|
+
name=f"{name}_dark_block_conv_2",
|
472
|
+
)(x)
|
473
|
+
x = layers.BatchNormalization(
|
474
|
+
epsilon=1e-05,
|
475
|
+
axis=channel_axis,
|
476
|
+
dtype=dtype,
|
477
|
+
name=f"{name}_dark_block_bn_2",
|
478
|
+
)(x)
|
479
|
+
if activation == "leaky_relu":
|
480
|
+
x = layers.LeakyReLU(
|
481
|
+
negative_slope=0.01,
|
482
|
+
dtype=dtype,
|
483
|
+
name=f"{name}_dark_block_activation_2",
|
484
|
+
)(x)
|
485
|
+
else:
|
486
|
+
x = layers.Activation(
|
487
|
+
activation,
|
488
|
+
dtype=dtype,
|
489
|
+
name=f"{name}_dark_block_activation_2",
|
490
|
+
)(x)
|
491
|
+
|
492
|
+
x = layers.add(
|
493
|
+
[x, shortcut], dtype=dtype, name=f"{name}_dark_block_add"
|
494
|
+
)
|
495
|
+
return x
|
496
|
+
|
497
|
+
return apply
|
498
|
+
|
499
|
+
|
500
|
+
def edge_block(
|
501
|
+
filters,
|
502
|
+
data_format,
|
503
|
+
channel_axis,
|
504
|
+
dilation=1,
|
505
|
+
bottle_ratio=0.5,
|
506
|
+
groups=1,
|
507
|
+
activation="relu",
|
508
|
+
dtype=None,
|
509
|
+
name=None,
|
510
|
+
):
|
511
|
+
"""
|
512
|
+
EdgeResidual / Fused-MBConv blocks.
|
513
|
+
|
514
|
+
Args:
|
515
|
+
filters: Integer, the dimensionality of the output spaces (i.e. the
|
516
|
+
number of output filters in used the blocks).
|
517
|
+
data_format: `None` or str. the ordering of the dimensions in the
|
518
|
+
inputs. Can be `"channels_last"`
|
519
|
+
(`(batch_size, height, width, channels)`) or`"channels_first"`
|
520
|
+
(`(batch_size, channels, height, width)`).
|
521
|
+
bottle_ratio: float, ratio for edge_block filters. Number of edge_block
|
522
|
+
`filters = filters * bottle_ratio`.
|
523
|
+
dilation: int or tuple/list of 2 integers, specifying the dilation rate
|
524
|
+
to use for dilated convolution, defaults to `1`.
|
525
|
+
groups: A positive int specifying the number of groups in which the
|
526
|
+
input is split along the channel axis
|
527
|
+
activation: Activation for the conv layers, defaults to "relu".
|
528
|
+
dtype: `None` or str or `keras.mixed_precision.DTypePolicy`. The dtype
|
529
|
+
to use for the models computations and weights.
|
530
|
+
name: str. A prefix for the layer names used in the block.
|
531
|
+
|
532
|
+
Returns:
|
533
|
+
Output tensor of block.
|
534
|
+
"""
|
535
|
+
if name is None:
|
536
|
+
name = f"edge{keras.backend.get_uid('edge')}"
|
537
|
+
|
538
|
+
hidden_filters = int(round(filters * bottle_ratio))
|
539
|
+
|
540
|
+
def apply(x):
|
541
|
+
shortcut = x
|
542
|
+
x = layers.Conv2D(
|
543
|
+
filters=hidden_filters,
|
544
|
+
kernel_size=3,
|
545
|
+
use_bias=False,
|
546
|
+
dilation_rate=dilation,
|
547
|
+
groups=groups,
|
548
|
+
padding="same",
|
549
|
+
data_format=data_format,
|
550
|
+
dtype=dtype,
|
551
|
+
name=f"{name}_edge_block_conv_1",
|
552
|
+
)(x)
|
553
|
+
x = layers.BatchNormalization(
|
554
|
+
epsilon=1e-05,
|
555
|
+
axis=channel_axis,
|
556
|
+
dtype=dtype,
|
557
|
+
name=f"{name}_edge_block_bn_1",
|
558
|
+
)(x)
|
559
|
+
if activation == "leaky_relu":
|
560
|
+
x = layers.LeakyReLU(
|
561
|
+
negative_slope=0.01,
|
562
|
+
dtype=dtype,
|
563
|
+
name=f"{name}_edge_block_activation_1",
|
564
|
+
)(x)
|
565
|
+
else:
|
566
|
+
x = layers.Activation(
|
567
|
+
activation,
|
568
|
+
dtype=dtype,
|
569
|
+
name=f"{name}_edge_block_activation_1",
|
570
|
+
)(x)
|
571
|
+
|
572
|
+
x = layers.Conv2D(
|
573
|
+
filters=filters,
|
574
|
+
kernel_size=1,
|
575
|
+
use_bias=False,
|
576
|
+
data_format=data_format,
|
577
|
+
dtype=dtype,
|
578
|
+
name=f"{name}_edge_block_conv_2",
|
579
|
+
)(x)
|
580
|
+
x = layers.BatchNormalization(
|
581
|
+
epsilon=1e-05,
|
582
|
+
axis=channel_axis,
|
583
|
+
dtype=dtype,
|
584
|
+
name=f"{name}_edge_block_bn_2",
|
585
|
+
)(x)
|
586
|
+
if activation == "leaky_relu":
|
587
|
+
x = layers.LeakyReLU(
|
588
|
+
negative_slope=0.01,
|
589
|
+
dtype=dtype,
|
590
|
+
name=f"{name}_edge_block_activation_2",
|
591
|
+
)(x)
|
592
|
+
else:
|
593
|
+
x = layers.Activation(
|
594
|
+
activation,
|
595
|
+
dtype=dtype,
|
596
|
+
name=f"{name}_edge_block_activation_2",
|
597
|
+
)(x)
|
598
|
+
|
599
|
+
x = layers.add(
|
600
|
+
[x, shortcut], dtype=dtype, name=f"{name}_edge_block_add"
|
601
|
+
)
|
602
|
+
return x
|
603
|
+
|
604
|
+
return apply
|
605
|
+
|
606
|
+
|
607
|
+
def cross_stage(
|
608
|
+
filters,
|
609
|
+
strides,
|
610
|
+
dilation,
|
611
|
+
depth,
|
612
|
+
data_format,
|
613
|
+
channel_axis,
|
614
|
+
block_ratio=1.0,
|
615
|
+
bottle_ratio=1.0,
|
616
|
+
expand_ratio=1.0,
|
617
|
+
groups=1,
|
618
|
+
first_dilation=None,
|
619
|
+
avg_down=False,
|
620
|
+
activation="relu",
|
621
|
+
down_growth=False,
|
622
|
+
cross_linear=False,
|
623
|
+
block_fn=bottleneck_block,
|
624
|
+
dtype=None,
|
625
|
+
name=None,
|
626
|
+
):
|
627
|
+
""" "
|
628
|
+
Cross Stage.
|
629
|
+
"""
|
630
|
+
if name is None:
|
631
|
+
name = f"cross_stage_{keras.backend.get_uid('cross_stage')}"
|
632
|
+
|
633
|
+
first_dilation = first_dilation or dilation
|
634
|
+
|
635
|
+
def apply(x):
|
636
|
+
prev_filters = keras.ops.shape(x)[channel_axis]
|
637
|
+
down_chs = filters if down_growth else prev_filters
|
638
|
+
expand_chs = int(round(filters * expand_ratio))
|
639
|
+
block_channels = int(round(filters * block_ratio))
|
640
|
+
|
641
|
+
if strides != 1 or first_dilation != dilation:
|
642
|
+
if avg_down:
|
643
|
+
if strides == 2:
|
644
|
+
x = layers.AveragePooling2D(
|
645
|
+
2, dtype=dtype, name=f"{name}_csp_avg_pool"
|
646
|
+
)(x)
|
647
|
+
x = layers.Conv2D(
|
648
|
+
filters=filters,
|
649
|
+
kernel_size=1,
|
650
|
+
strides=1,
|
651
|
+
use_bias=False,
|
652
|
+
groups=groups,
|
653
|
+
data_format=data_format,
|
654
|
+
dtype=dtype,
|
655
|
+
name=f"{name}_csp_conv_down_1",
|
656
|
+
)(x)
|
657
|
+
x = layers.BatchNormalization(
|
658
|
+
epsilon=1e-05,
|
659
|
+
axis=channel_axis,
|
660
|
+
dtype=dtype,
|
661
|
+
name=f"{name}_csp_bn_1",
|
662
|
+
)(x)
|
663
|
+
if activation == "leaky_relu":
|
664
|
+
x = layers.LeakyReLU(
|
665
|
+
negative_slope=0.01,
|
666
|
+
dtype=dtype,
|
667
|
+
name=f"{name}_csp_activation_1",
|
668
|
+
)(x)
|
669
|
+
else:
|
670
|
+
x = layers.Activation(
|
671
|
+
activation,
|
672
|
+
dtype=dtype,
|
673
|
+
name=f"{name}_csp_activation_1",
|
674
|
+
)(x)
|
675
|
+
else:
|
676
|
+
x = layers.Conv2D(
|
677
|
+
filters=down_chs,
|
678
|
+
kernel_size=3,
|
679
|
+
strides=strides,
|
680
|
+
dilation_rate=first_dilation,
|
681
|
+
use_bias=False,
|
682
|
+
groups=groups,
|
683
|
+
data_format=data_format,
|
684
|
+
dtype=dtype,
|
685
|
+
name=f"{name}_csp_conv_down_1",
|
686
|
+
)(x)
|
687
|
+
x = layers.BatchNormalization(
|
688
|
+
epsilon=1e-05,
|
689
|
+
axis=channel_axis,
|
690
|
+
dtype=dtype,
|
691
|
+
name=f"{name}_csp_bn_1",
|
692
|
+
)(x)
|
693
|
+
if activation == "leaky_relu":
|
694
|
+
x = layers.LeakyReLU(
|
695
|
+
negative_slope=0.01,
|
696
|
+
dtype=dtype,
|
697
|
+
name=f"{name}_csp_activation_1",
|
698
|
+
)(x)
|
699
|
+
else:
|
700
|
+
x = layers.Activation(
|
701
|
+
activation,
|
702
|
+
dtype=dtype,
|
703
|
+
name=f"{name}_csp_activation_1",
|
704
|
+
)(x)
|
705
|
+
|
706
|
+
x = layers.Conv2D(
|
707
|
+
filters=expand_chs,
|
708
|
+
kernel_size=1,
|
709
|
+
use_bias=False,
|
710
|
+
data_format=data_format,
|
711
|
+
dtype=dtype,
|
712
|
+
name=f"{name}_csp_conv_exp",
|
713
|
+
)(x)
|
714
|
+
x = layers.BatchNormalization(
|
715
|
+
epsilon=1e-05,
|
716
|
+
axis=channel_axis,
|
717
|
+
dtype=dtype,
|
718
|
+
name=f"{name}_csp_bn_2",
|
719
|
+
)(x)
|
720
|
+
if not cross_linear:
|
721
|
+
if activation == "leaky_relu":
|
722
|
+
x = layers.LeakyReLU(
|
723
|
+
negative_slope=0.01,
|
724
|
+
dtype=dtype,
|
725
|
+
name=f"{name}_csp_activation_2",
|
726
|
+
)(x)
|
727
|
+
else:
|
728
|
+
x = layers.Activation(
|
729
|
+
activation,
|
730
|
+
dtype=dtype,
|
731
|
+
name=f"{name}_csp_activation_2",
|
732
|
+
)(x)
|
733
|
+
prev_filters = keras.ops.shape(x)[channel_axis]
|
734
|
+
xs, xb = ops.split(
|
735
|
+
x,
|
736
|
+
indices_or_sections=prev_filters // (expand_chs // 2),
|
737
|
+
axis=channel_axis,
|
738
|
+
)
|
739
|
+
|
740
|
+
for i in range(depth):
|
741
|
+
xb = block_fn(
|
742
|
+
filters=block_channels,
|
743
|
+
dilation=dilation,
|
744
|
+
bottle_ratio=bottle_ratio,
|
745
|
+
groups=groups,
|
746
|
+
activation=activation,
|
747
|
+
data_format=data_format,
|
748
|
+
channel_axis=channel_axis,
|
749
|
+
dtype=dtype,
|
750
|
+
name=f"{name}_block_{i}",
|
751
|
+
)(xb)
|
752
|
+
|
753
|
+
xb = layers.Conv2D(
|
754
|
+
filters=expand_chs // 2,
|
755
|
+
kernel_size=1,
|
756
|
+
use_bias=False,
|
757
|
+
data_format=data_format,
|
758
|
+
dtype=dtype,
|
759
|
+
name=f"{name}_csp_conv_transition_b",
|
760
|
+
)(xb)
|
761
|
+
xb = layers.BatchNormalization(
|
762
|
+
epsilon=1e-05,
|
763
|
+
axis=channel_axis,
|
764
|
+
dtype=dtype,
|
765
|
+
name=f"{name}_csp_transition_b_bn",
|
766
|
+
)(xb)
|
767
|
+
if activation == "leaky_relu":
|
768
|
+
xb = layers.LeakyReLU(
|
769
|
+
negative_slope=0.01,
|
770
|
+
dtype=dtype,
|
771
|
+
name=f"{name}_csp_transition_b_activation",
|
772
|
+
)(xb)
|
773
|
+
else:
|
774
|
+
xb = layers.Activation(
|
775
|
+
activation,
|
776
|
+
dtype=dtype,
|
777
|
+
name=f"{name}_csp_transition_b_activation",
|
778
|
+
)(xb)
|
779
|
+
|
780
|
+
out = layers.Concatenate(
|
781
|
+
axis=channel_axis, dtype=dtype, name=f"{name}_csp_conv_concat"
|
782
|
+
)([xs, xb])
|
783
|
+
out = layers.Conv2D(
|
784
|
+
filters=filters,
|
785
|
+
kernel_size=1,
|
786
|
+
use_bias=False,
|
787
|
+
data_format=data_format,
|
788
|
+
dtype=dtype,
|
789
|
+
name=f"{name}_csp_conv_transition",
|
790
|
+
)(out)
|
791
|
+
out = layers.BatchNormalization(
|
792
|
+
epsilon=1e-05,
|
793
|
+
axis=channel_axis,
|
794
|
+
dtype=dtype,
|
795
|
+
name=f"{name}_csp_transition_bn",
|
796
|
+
)(out)
|
797
|
+
if activation == "leaky_relu":
|
798
|
+
out = layers.LeakyReLU(
|
799
|
+
negative_slope=0.01,
|
800
|
+
dtype=dtype,
|
801
|
+
name=f"{name}_csp_transition_activation",
|
802
|
+
)(out)
|
803
|
+
else:
|
804
|
+
out = layers.Activation(
|
805
|
+
activation,
|
806
|
+
dtype=dtype,
|
807
|
+
name=f"{name}_csp_transition_activation",
|
808
|
+
)(out)
|
809
|
+
return out
|
810
|
+
|
811
|
+
return apply
|
812
|
+
|
813
|
+
|
814
|
+
def cross_stage3(
|
815
|
+
data_format,
|
816
|
+
channel_axis,
|
817
|
+
filters,
|
818
|
+
strides,
|
819
|
+
dilation,
|
820
|
+
depth,
|
821
|
+
block_ratio,
|
822
|
+
bottle_ratio,
|
823
|
+
expand_ratio,
|
824
|
+
avg_down,
|
825
|
+
activation,
|
826
|
+
first_dilation,
|
827
|
+
down_growth,
|
828
|
+
cross_linear,
|
829
|
+
block_fn,
|
830
|
+
groups,
|
831
|
+
name=None,
|
832
|
+
dtype=None,
|
833
|
+
):
|
834
|
+
"""
|
835
|
+
Cross Stage 3.
|
836
|
+
|
837
|
+
Similar to Cross Stage, but with only one transition conv in the output.
|
838
|
+
"""
|
839
|
+
if name is None:
|
840
|
+
name = f"cross_stage3_{keras.backend.get_uid('cross_stage3')}"
|
841
|
+
|
842
|
+
first_dilation = first_dilation or dilation
|
843
|
+
|
844
|
+
def apply(x):
|
845
|
+
prev_filters = keras.ops.shape(x)[channel_axis]
|
846
|
+
down_chs = filters if down_growth else prev_filters
|
847
|
+
expand_chs = int(round(filters * expand_ratio))
|
848
|
+
block_filters = int(round(filters * block_ratio))
|
849
|
+
|
850
|
+
if strides != 1 or first_dilation != dilation:
|
851
|
+
if avg_down:
|
852
|
+
if strides == 2:
|
853
|
+
x = layers.AveragePooling2D(
|
854
|
+
2, dtype=dtype, name=f"{name}_cross_stage3_avg_pool"
|
855
|
+
)(x)
|
856
|
+
x = layers.Conv2D(
|
857
|
+
filters=filters,
|
858
|
+
kernel_size=1,
|
859
|
+
strides=1,
|
860
|
+
use_bias=False,
|
861
|
+
groups=groups,
|
862
|
+
data_format=data_format,
|
863
|
+
dtype=dtype,
|
864
|
+
name=f"{name}_cs3_conv_down_1",
|
865
|
+
)(x)
|
866
|
+
x = layers.BatchNormalization(
|
867
|
+
epsilon=1e-05,
|
868
|
+
axis=channel_axis,
|
869
|
+
dtype=dtype,
|
870
|
+
name=f"{name}_cs3_bn_1",
|
871
|
+
)(x)
|
872
|
+
if activation == "leaky_relu":
|
873
|
+
x = layers.LeakyReLU(
|
874
|
+
negative_slope=0.01,
|
875
|
+
dtype=dtype,
|
876
|
+
name=f"{name}_cs3_activation_1",
|
877
|
+
)(x)
|
878
|
+
else:
|
879
|
+
x = layers.Activation(
|
880
|
+
activation,
|
881
|
+
dtype=dtype,
|
882
|
+
name=f"{name}_cs3_activation_1",
|
883
|
+
)(x)
|
884
|
+
else:
|
885
|
+
x = layers.Conv2D(
|
886
|
+
filters=down_chs,
|
887
|
+
kernel_size=3,
|
888
|
+
strides=strides,
|
889
|
+
dilation_rate=first_dilation,
|
890
|
+
use_bias=False,
|
891
|
+
groups=groups,
|
892
|
+
data_format=data_format,
|
893
|
+
dtype=dtype,
|
894
|
+
name=f"{name}_cs3_conv_down_1",
|
895
|
+
)(x)
|
896
|
+
x = layers.BatchNormalization(
|
897
|
+
epsilon=1e-05,
|
898
|
+
axis=channel_axis,
|
899
|
+
dtype=dtype,
|
900
|
+
name=f"{name}_cs3_bn_1",
|
901
|
+
)(x)
|
902
|
+
if activation == "leaky_relu":
|
903
|
+
x = layers.LeakyReLU(
|
904
|
+
negative_slope=0.01,
|
905
|
+
dtype=dtype,
|
906
|
+
name=f"{name}_cs3__activation_1",
|
907
|
+
)(x)
|
908
|
+
else:
|
909
|
+
x = layers.Activation(
|
910
|
+
activation,
|
911
|
+
dtype=dtype,
|
912
|
+
name=f"{name}_cs3_activation_1",
|
913
|
+
)(x)
|
914
|
+
|
915
|
+
x = layers.Conv2D(
|
916
|
+
filters=expand_chs,
|
917
|
+
kernel_size=1,
|
918
|
+
use_bias=False,
|
919
|
+
data_format=data_format,
|
920
|
+
dtype=dtype,
|
921
|
+
name=f"{name}_cs3_conv_exp",
|
922
|
+
)(x)
|
923
|
+
x = layers.BatchNormalization(
|
924
|
+
epsilon=1e-05,
|
925
|
+
axis=channel_axis,
|
926
|
+
dtype=dtype,
|
927
|
+
name=f"{name}_cs3_bn_2",
|
928
|
+
)(x)
|
929
|
+
if not cross_linear:
|
930
|
+
if activation == "leaky_relu":
|
931
|
+
x = layers.LeakyReLU(
|
932
|
+
negative_slope=0.01,
|
933
|
+
dtype=dtype,
|
934
|
+
name=f"{name}_cs3_activation_2",
|
935
|
+
)(x)
|
936
|
+
else:
|
937
|
+
x = layers.Activation(
|
938
|
+
activation,
|
939
|
+
dtype=dtype,
|
940
|
+
name=f"{name}_cs3_activation_2",
|
941
|
+
)(x)
|
942
|
+
|
943
|
+
prev_filters = keras.ops.shape(x)[channel_axis]
|
944
|
+
x1, x2 = ops.split(
|
945
|
+
x,
|
946
|
+
indices_or_sections=prev_filters // (expand_chs // 2),
|
947
|
+
axis=channel_axis,
|
948
|
+
)
|
949
|
+
|
950
|
+
for i in range(depth):
|
951
|
+
x1 = block_fn(
|
952
|
+
filters=block_filters,
|
953
|
+
dilation=dilation,
|
954
|
+
bottle_ratio=bottle_ratio,
|
955
|
+
groups=groups,
|
956
|
+
activation=activation,
|
957
|
+
data_format=data_format,
|
958
|
+
channel_axis=channel_axis,
|
959
|
+
dtype=dtype,
|
960
|
+
name=f"{name}_block_{i}",
|
961
|
+
)(x1)
|
962
|
+
|
963
|
+
out = layers.Concatenate(
|
964
|
+
axis=channel_axis,
|
965
|
+
dtype=dtype,
|
966
|
+
name=f"{name}_cs3_conv_transition_concat",
|
967
|
+
)([x1, x2])
|
968
|
+
out = layers.Conv2D(
|
969
|
+
filters=expand_chs // 2,
|
970
|
+
kernel_size=1,
|
971
|
+
use_bias=False,
|
972
|
+
data_format=data_format,
|
973
|
+
dtype=dtype,
|
974
|
+
name=f"{name}_cs3_conv_transition",
|
975
|
+
)(out)
|
976
|
+
out = layers.BatchNormalization(
|
977
|
+
epsilon=1e-05,
|
978
|
+
axis=channel_axis,
|
979
|
+
dtype=dtype,
|
980
|
+
name=f"{name}_cs3_transition_bn",
|
981
|
+
)(out)
|
982
|
+
if activation == "leaky_relu":
|
983
|
+
out = layers.LeakyReLU(
|
984
|
+
negative_slope=0.01,
|
985
|
+
dtype=dtype,
|
986
|
+
name=f"{name}_cs3_activation_3",
|
987
|
+
)(out)
|
988
|
+
else:
|
989
|
+
out = layers.Activation(
|
990
|
+
activation,
|
991
|
+
dtype=dtype,
|
992
|
+
name=f"{name}_cs3_activation_3",
|
993
|
+
)(out)
|
994
|
+
return out
|
995
|
+
|
996
|
+
return apply
|
997
|
+
|
998
|
+
|
999
|
+
def dark_stage(
|
1000
|
+
data_format,
|
1001
|
+
channel_axis,
|
1002
|
+
filters,
|
1003
|
+
strides,
|
1004
|
+
dilation,
|
1005
|
+
depth,
|
1006
|
+
block_ratio,
|
1007
|
+
bottle_ratio,
|
1008
|
+
avg_down,
|
1009
|
+
activation,
|
1010
|
+
first_dilation,
|
1011
|
+
block_fn,
|
1012
|
+
groups,
|
1013
|
+
expand_ratio=None,
|
1014
|
+
down_growth=None,
|
1015
|
+
cross_linear=None,
|
1016
|
+
name=None,
|
1017
|
+
dtype=None,
|
1018
|
+
):
|
1019
|
+
"""
|
1020
|
+
DarkNet Stage.
|
1021
|
+
|
1022
|
+
Similar to DarkNet Stage, but with only one transition conv in the output.
|
1023
|
+
"""
|
1024
|
+
if name is None:
|
1025
|
+
name = f"dark_stage_{keras.backend.get_uid('dark_stage')}"
|
1026
|
+
|
1027
|
+
first_dilation = first_dilation or dilation
|
1028
|
+
|
1029
|
+
def apply(x):
|
1030
|
+
block_channels = int(round(filters * block_ratio))
|
1031
|
+
if avg_down:
|
1032
|
+
if strides == 2:
|
1033
|
+
x = layers.AveragePooling2D(
|
1034
|
+
2, dtype=dtype, name=f"{name}_dark_avg_pool"
|
1035
|
+
)(x)
|
1036
|
+
x = layers.Conv2D(
|
1037
|
+
filters=filters,
|
1038
|
+
kernel_size=1,
|
1039
|
+
strides=1,
|
1040
|
+
use_bias=False,
|
1041
|
+
groups=groups,
|
1042
|
+
data_format=data_format,
|
1043
|
+
dtype=dtype,
|
1044
|
+
name=f"{name}_dark_conv_down_1",
|
1045
|
+
)(x)
|
1046
|
+
x = layers.BatchNormalization(
|
1047
|
+
epsilon=1e-05,
|
1048
|
+
axis=channel_axis,
|
1049
|
+
dtype=dtype,
|
1050
|
+
name=f"{name}_dark_bn_1",
|
1051
|
+
)(x)
|
1052
|
+
if activation == "leaky_relu":
|
1053
|
+
x = layers.LeakyReLU(
|
1054
|
+
negative_slope=0.01,
|
1055
|
+
dtype=dtype,
|
1056
|
+
name=f"{name}_dark_activation_1",
|
1057
|
+
)(x)
|
1058
|
+
else:
|
1059
|
+
x = layers.Activation(
|
1060
|
+
activation,
|
1061
|
+
dtype=dtype,
|
1062
|
+
name=f"{name}_dark_activation_1",
|
1063
|
+
)(x)
|
1064
|
+
else:
|
1065
|
+
x = layers.Conv2D(
|
1066
|
+
filters=filters,
|
1067
|
+
kernel_size=3,
|
1068
|
+
strides=strides,
|
1069
|
+
dilation_rate=first_dilation,
|
1070
|
+
use_bias=False,
|
1071
|
+
groups=groups,
|
1072
|
+
data_format=data_format,
|
1073
|
+
dtype=dtype,
|
1074
|
+
name=f"{name}_dark_conv_down_1",
|
1075
|
+
)(x)
|
1076
|
+
x = layers.BatchNormalization(
|
1077
|
+
epsilon=1e-05,
|
1078
|
+
axis=channel_axis,
|
1079
|
+
dtype=dtype,
|
1080
|
+
name=f"{name}_dark_bn_1",
|
1081
|
+
)(x)
|
1082
|
+
if activation == "leaky_relu":
|
1083
|
+
x = layers.LeakyReLU(
|
1084
|
+
negative_slope=0.01,
|
1085
|
+
dtype=dtype,
|
1086
|
+
name=f"{name}_dark_activation_1",
|
1087
|
+
)(x)
|
1088
|
+
else:
|
1089
|
+
x = layers.Activation(
|
1090
|
+
activation,
|
1091
|
+
dtype=dtype,
|
1092
|
+
name=f"{name}_dark_activation_1",
|
1093
|
+
)(x)
|
1094
|
+
for i in range(depth):
|
1095
|
+
x = block_fn(
|
1096
|
+
filters=block_channels,
|
1097
|
+
dilation=dilation,
|
1098
|
+
bottle_ratio=bottle_ratio,
|
1099
|
+
groups=groups,
|
1100
|
+
activation=activation,
|
1101
|
+
data_format=data_format,
|
1102
|
+
channel_axis=channel_axis,
|
1103
|
+
dtype=dtype,
|
1104
|
+
name=f"{name}_block_{i}",
|
1105
|
+
)(x)
|
1106
|
+
return x
|
1107
|
+
|
1108
|
+
return apply
|
1109
|
+
|
1110
|
+
|
1111
|
+
def create_csp_stem(
|
1112
|
+
data_format,
|
1113
|
+
channel_axis,
|
1114
|
+
activation,
|
1115
|
+
padding,
|
1116
|
+
filters=32,
|
1117
|
+
kernel_size=3,
|
1118
|
+
strides=2,
|
1119
|
+
pooling=None,
|
1120
|
+
dtype=None,
|
1121
|
+
):
|
1122
|
+
if not isinstance(filters, (tuple, list)):
|
1123
|
+
filters = [filters]
|
1124
|
+
stem_depth = len(filters)
|
1125
|
+
assert stem_depth
|
1126
|
+
assert strides in (1, 2, 4)
|
1127
|
+
last_idx = stem_depth - 1
|
1128
|
+
|
1129
|
+
def apply(x):
|
1130
|
+
stem_strides = 1
|
1131
|
+
for i, chs in enumerate(filters):
|
1132
|
+
conv_strides = (
|
1133
|
+
2
|
1134
|
+
if (i == 0 and strides > 1)
|
1135
|
+
or (i == last_idx and strides > 2 and not pooling)
|
1136
|
+
else 1
|
1137
|
+
)
|
1138
|
+
x = layers.Conv2D(
|
1139
|
+
filters=chs,
|
1140
|
+
kernel_size=kernel_size,
|
1141
|
+
strides=conv_strides,
|
1142
|
+
padding=padding if i == 0 else "valid",
|
1143
|
+
use_bias=False,
|
1144
|
+
data_format=data_format,
|
1145
|
+
dtype=dtype,
|
1146
|
+
name=f"csp_stem_conv_{i}",
|
1147
|
+
)(x)
|
1148
|
+
x = layers.BatchNormalization(
|
1149
|
+
epsilon=1e-05,
|
1150
|
+
axis=channel_axis,
|
1151
|
+
dtype=dtype,
|
1152
|
+
name=f"csp_stem_bn_{i}",
|
1153
|
+
)(x)
|
1154
|
+
if activation == "leaky_relu":
|
1155
|
+
x = layers.LeakyReLU(
|
1156
|
+
negative_slope=0.01,
|
1157
|
+
dtype=dtype,
|
1158
|
+
name=f"csp_stem_activation_{i}",
|
1159
|
+
)(x)
|
1160
|
+
else:
|
1161
|
+
x = layers.Activation(
|
1162
|
+
activation,
|
1163
|
+
dtype=dtype,
|
1164
|
+
name=f"csp_stem_activation_{i}",
|
1165
|
+
)(x)
|
1166
|
+
stem_strides *= conv_strides
|
1167
|
+
|
1168
|
+
if pooling == "max":
|
1169
|
+
assert strides > 2
|
1170
|
+
x = layers.MaxPooling2D(
|
1171
|
+
pool_size=3,
|
1172
|
+
strides=2,
|
1173
|
+
padding="same",
|
1174
|
+
data_format=data_format,
|
1175
|
+
dtype=dtype,
|
1176
|
+
name="csp_stem_pool",
|
1177
|
+
)(x)
|
1178
|
+
stem_strides *= 2
|
1179
|
+
return x, stem_strides
|
1180
|
+
|
1181
|
+
return apply
|
1182
|
+
|
1183
|
+
|
1184
|
+
def create_csp_stages(
|
1185
|
+
inputs,
|
1186
|
+
filters,
|
1187
|
+
data_format,
|
1188
|
+
channel_axis,
|
1189
|
+
stackwise_depth,
|
1190
|
+
reduction,
|
1191
|
+
block_ratio,
|
1192
|
+
bottle_ratio,
|
1193
|
+
expand_ratio,
|
1194
|
+
strides,
|
1195
|
+
groups,
|
1196
|
+
avg_down,
|
1197
|
+
down_growth,
|
1198
|
+
cross_linear,
|
1199
|
+
activation,
|
1200
|
+
output_strides,
|
1201
|
+
stage_type,
|
1202
|
+
block_type,
|
1203
|
+
dtype,
|
1204
|
+
name,
|
1205
|
+
):
|
1206
|
+
if name is None:
|
1207
|
+
name = f"csp_stage_{keras.backend.get_uid('csp_stage')}"
|
1208
|
+
|
1209
|
+
num_stages = len(stackwise_depth)
|
1210
|
+
dilation = 1
|
1211
|
+
net_strides = reduction
|
1212
|
+
strides = _pad_arg(strides, num_stages)
|
1213
|
+
expand_ratio = _pad_arg(expand_ratio, num_stages)
|
1214
|
+
bottle_ratio = _pad_arg(bottle_ratio, num_stages)
|
1215
|
+
block_ratio = _pad_arg(block_ratio, num_stages)
|
1216
|
+
|
1217
|
+
if stage_type == "dark":
|
1218
|
+
stage_fn = dark_stage
|
1219
|
+
elif stage_type == "csp":
|
1220
|
+
stage_fn = cross_stage
|
1221
|
+
else:
|
1222
|
+
stage_fn = cross_stage3
|
1223
|
+
|
1224
|
+
if block_type == "dark_block":
|
1225
|
+
block_fn = dark_block
|
1226
|
+
elif block_type == "edge_block":
|
1227
|
+
block_fn = edge_block
|
1228
|
+
else:
|
1229
|
+
block_fn = bottleneck_block
|
1230
|
+
|
1231
|
+
stages = inputs
|
1232
|
+
pyramid_outputs = {}
|
1233
|
+
for stage_idx, _ in enumerate(stackwise_depth):
|
1234
|
+
if net_strides >= output_strides and strides[stage_idx] > 1:
|
1235
|
+
dilation *= strides[stage_idx]
|
1236
|
+
strides = 1
|
1237
|
+
net_strides *= strides[stage_idx]
|
1238
|
+
first_dilation = 1 if dilation in (1, 2) else 2
|
1239
|
+
stages = stage_fn(
|
1240
|
+
data_format=data_format,
|
1241
|
+
channel_axis=channel_axis,
|
1242
|
+
filters=filters[stage_idx],
|
1243
|
+
depth=stackwise_depth[stage_idx],
|
1244
|
+
strides=strides[stage_idx],
|
1245
|
+
dilation=dilation,
|
1246
|
+
block_ratio=block_ratio[stage_idx],
|
1247
|
+
bottle_ratio=bottle_ratio[stage_idx],
|
1248
|
+
expand_ratio=expand_ratio[stage_idx],
|
1249
|
+
groups=groups,
|
1250
|
+
first_dilation=first_dilation,
|
1251
|
+
avg_down=avg_down,
|
1252
|
+
activation=activation,
|
1253
|
+
down_growth=down_growth,
|
1254
|
+
cross_linear=cross_linear,
|
1255
|
+
block_fn=block_fn,
|
1256
|
+
dtype=dtype,
|
1257
|
+
name=f"stage_{stage_idx}",
|
1258
|
+
)(stages)
|
1259
|
+
pyramid_outputs[f"P{stage_idx + 2}"] = stages
|
1260
|
+
return stages, pyramid_outputs
|
1261
|
+
|
1262
|
+
|
1263
|
+
def _pad_arg(x, n):
|
1264
|
+
"""
|
1265
|
+
pads an argument tuple to specified n by padding with last value
|
1266
|
+
"""
|
1267
|
+
if not isinstance(x, (tuple, list)):
|
1268
|
+
x = (x,)
|
1269
|
+
curr_n = len(x)
|
1270
|
+
pad_n = n - curr_n
|
1271
|
+
if pad_n <= 0:
|
1272
|
+
return x[:n]
|
1273
|
+
return tuple(
|
1274
|
+
list(x)
|
1275
|
+
+ [
|
1276
|
+
x[-1],
|
1277
|
+
]
|
1278
|
+
* pad_n
|
1279
|
+
)
|