keras-hub-nightly 0.19.0.dev202503050350__py3-none-any.whl → 0.20.0.dev202503140353__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. keras_hub/api/layers/__init__.py +3 -0
  2. keras_hub/api/models/__init__.py +5 -4
  3. keras_hub/src/models/cspnet/__init__.py +5 -0
  4. keras_hub/src/models/cspnet/cspnet_backbone.py +1279 -0
  5. keras_hub/src/models/cspnet/cspnet_image_classifier.py +12 -0
  6. keras_hub/src/models/cspnet/cspnet_image_classifier_preprocessor.py +14 -0
  7. keras_hub/src/models/cspnet/cspnet_image_converter.py +8 -0
  8. keras_hub/src/models/cspnet/cspnet_presets.py +16 -0
  9. keras_hub/src/models/gemma/gemma_attention.py +23 -12
  10. keras_hub/src/models/mobilenet/mobilenet_backbone.py +18 -1
  11. keras_hub/src/models/mobilenet/mobilenet_image_classifier.py +4 -1
  12. keras_hub/src/models/mobilenet/mobilenet_presets.py +38 -2
  13. keras_hub/src/models/siglip/siglip_presets.py +206 -10
  14. keras_hub/src/models/siglip/siglip_text_encoder.py +7 -1
  15. keras_hub/src/utils/keras_utils.py +32 -0
  16. keras_hub/src/utils/preset_utils.py +1 -0
  17. keras_hub/src/utils/timm/convert_cspnet.py +165 -0
  18. keras_hub/src/utils/timm/convert_mobilenet.py +120 -44
  19. keras_hub/src/utils/timm/preset_loader.py +9 -0
  20. keras_hub/src/version_utils.py +1 -1
  21. {keras_hub_nightly-0.19.0.dev202503050350.dist-info → keras_hub_nightly-0.20.0.dev202503140353.dist-info}/METADATA +1 -1
  22. {keras_hub_nightly-0.19.0.dev202503050350.dist-info → keras_hub_nightly-0.20.0.dev202503140353.dist-info}/RECORD +24 -20
  23. {keras_hub_nightly-0.19.0.dev202503050350.dist-info → keras_hub_nightly-0.20.0.dev202503140353.dist-info}/WHEEL +1 -1
  24. keras_hub/src/models/csp_darknet/__init__.py +0 -0
  25. keras_hub/src/models/csp_darknet/csp_darknet_backbone.py +0 -427
  26. keras_hub/src/models/csp_darknet/csp_darknet_image_classifier.py +0 -10
  27. {keras_hub_nightly-0.19.0.dev202503050350.dist-info → keras_hub_nightly-0.20.0.dev202503140353.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1279 @@
1
+ import keras
2
+ from keras import layers
3
+ from keras import ops
4
+
5
+ from keras_hub.src.api_export import keras_hub_export
6
+ from keras_hub.src.models.feature_pyramid_backbone import FeaturePyramidBackbone
7
+ from keras_hub.src.utils.keras_utils import standardize_data_format
8
+
9
+
10
+ @keras_hub_export("keras_hub.models.CSPNetBackbone")
11
+ class CSPNetBackbone(FeaturePyramidBackbone):
12
+ """This class represents Keras Backbone of CSPNet model.
13
+
14
+ This class implements a CSPNet backbone as described in
15
+ [CSPNet: A New Backbone that can Enhance Learning Capability of CNN](
16
+ https://arxiv.org/abs/1911.11929).
17
+
18
+ Args:
19
+ stem_filters: int or list of ints, filter size for the stem.
20
+ stem_kernel_size: int or tuple/list of 2 integers, kernel size for the
21
+ stem.
22
+ stem_strides: int or tuple/list of 2 integers, stride length of the
23
+ convolution for the stem.
24
+ stackwise_num_filters: A list of ints, filter size for each block level
25
+ in the model.
26
+ stackwise_strides: int or tuple/list of ints, strides for each block
27
+ level in the model.
28
+ stackwise_depth: A list of ints, representing the depth
29
+ (number of blocks) for each block level in the model.
30
+ block_type: str. One of `"bottleneck_block"`, `"dark_block"`, or
31
+ `"edge_block"`. Use `"dark_block"` for DarkNet blocks,
32
+ `"edge_block"` for EdgeResidual / Fused-MBConv blocks.
33
+ groups: int, specifying the number of groups into which the input is
34
+ split along the channel axis. Defaults to `1`.
35
+ stage_type: str. One of `"csp"`, `"dark"`, or `"cs3"`. Use `"dark"` for
36
+ DarkNet stages, `"csp"` for Cross Stage, and `"cs3"` for Cross Stage
37
+ with only one transition conv. Defaults to `None`, which defaults to
38
+ `"cs3"`.
39
+ activation: str. Activation function for the model.
40
+ output_strides: int, output stride length of the backbone model. Must be
41
+ one of `(8, 16, 32)`. Defaults to `32`.
42
+ bottle_ratio: float or tuple/list of floats. The dimensionality of the
43
+ intermediate bottleneck space (i.e., the number of output filters in
44
+ the bottleneck convolution), calculated as
45
+ `(filters * bottle_ratio)` and applied to:
46
+ - the first convolution of `"dark_block"` and `"edge_block"`
47
+ - the first two convolutions of `"bottleneck_block"`
48
+ of each stage. Defaults to `1.0`.
49
+ block_ratio: float or tuple/list of floats. Filter size for each block,
50
+ calculated as `(stackwise_num_filters * block_ratio)` for each
51
+ stage. Defaults to `1.0`.
52
+ expand_ratio: float or tuple/list of floats. Filters ratio for `"csp"`
53
+ and `"cs3"` stages at different levels. Defaults to `1.0`.
54
+ stem_padding: str, padding value for the stem, either `"valid"` or
55
+ `"same"`. Defaults to `"valid"`.
56
+ stem_pooling: str, pooling value for the stem. Defaults to `None`.
57
+ avg_down: bool, if `True`, `AveragePooling2D` is applied at the
58
+ beginning of each stage when `strides == 2`. Defaults to `False`.
59
+ down_growth: bool, grow downsample channels to output channels. Applies
60
+ to Cross Stage only. Defaults to `False`.
61
+ cross_linear: bool, if `True`, activation will not be applied after the
62
+ expansion convolution. Applies to Cross Stage only. Defaults to
63
+ `False`.
64
+ data_format: `None` or str. If specified, either `"channels_last"` or
65
+ `"channels_first"`. The ordering of the dimensions in the inputs.
66
+ `"channels_last"` corresponds to inputs with shape
67
+ `(batch_size, height, width, channels)` while `"channels_first"`
68
+ corresponds to inputs with shape
69
+ `(batch_size, channels, height, width)`. It defaults to the
70
+ `image_data_format` value found in your Keras config file at
71
+ `~/.keras/keras.json`. If you never set it, then it will be
72
+ `"channels_last"`.
73
+ image_shape: tuple. The input shape without the batch size.
74
+ Defaults to `(None, None, 3)`.
75
+ dtype: `None` or str or `keras.mixed_precision.DTypePolicy`. The dtype
76
+ to use for the model's computations and weights.
77
+
78
+ Examples:
79
+ ```python
80
+ input_data = np.ones(shape=(8, 224, 224, 3))
81
+
82
+ # Pretrained backbone
83
+ model = keras_hub.models.CSPNetBackbone.from_preset(
84
+ "cspdarknet53_ra_imagenet"
85
+ )
86
+ model(input_data)
87
+
88
+ # Randomly initialized backbone with a custom config
89
+ model = keras_hub.models.CSPNetBackbone(
90
+ stem_filters=32,
91
+ stem_kernel_size=3,
92
+ stem_strides=1,
93
+ stackwise_depth=[1, 2, 4],
94
+ stackwise_strides=[1, 2, 2],
95
+ stackwise_num_filters=[32, 64, 128],
96
+ block_type="dark,
97
+ )
98
+ model(input_data)
99
+ ```
100
+ """
101
+
102
+ def __init__(
103
+ self,
104
+ stem_filters,
105
+ stem_kernel_size,
106
+ stem_strides,
107
+ stackwise_depth,
108
+ stackwise_strides,
109
+ stackwise_num_filters,
110
+ block_type,
111
+ groups=1,
112
+ stage_type=None,
113
+ activation="leaky_relu",
114
+ output_strides=32,
115
+ bottle_ratio=[1.0],
116
+ block_ratio=[1.0],
117
+ expand_ratio=[1.0],
118
+ stem_padding="valid",
119
+ stem_pooling=None,
120
+ avg_down=False,
121
+ down_growth=False,
122
+ cross_linear=False,
123
+ image_shape=(None, None, 3),
124
+ data_format=None,
125
+ dtype=None,
126
+ **kwargs,
127
+ ):
128
+ if block_type not in (
129
+ "dark_block",
130
+ "edge_block",
131
+ "bottleneck_block",
132
+ ):
133
+ raise ValueError(
134
+ '`block_type` must be either `"dark_block"`, '
135
+ '`"edge_block"`, or `"bottleneck_block"`.'
136
+ f"Received block_type={block_type}."
137
+ )
138
+
139
+ if stage_type not in (
140
+ "dark",
141
+ "csp",
142
+ "cs3",
143
+ ):
144
+ raise ValueError(
145
+ '`block_type` must be either `"dark"`, `"csp"`, or `"cs3"`.'
146
+ f"Received block_type={stage_type}."
147
+ )
148
+ data_format = standardize_data_format(data_format)
149
+ channel_axis = -1 if data_format == "channels_last" else 1
150
+
151
+ # === Functional Model ===
152
+ image_input = layers.Input(shape=image_shape)
153
+ x = image_input # Intermediate result.
154
+ stem, stem_feat_info = create_csp_stem(
155
+ data_format=data_format,
156
+ channel_axis=channel_axis,
157
+ filters=stem_filters,
158
+ kernel_size=stem_kernel_size,
159
+ strides=stem_strides,
160
+ pooling=stem_pooling,
161
+ padding=stem_padding,
162
+ activation=activation,
163
+ dtype=dtype,
164
+ )(x)
165
+
166
+ stages, pyramid_outputs = create_csp_stages(
167
+ inputs=stem,
168
+ filters=stackwise_num_filters,
169
+ data_format=data_format,
170
+ channel_axis=channel_axis,
171
+ stackwise_depth=stackwise_depth,
172
+ reduction=stem_feat_info,
173
+ groups=groups,
174
+ block_ratio=block_ratio,
175
+ bottle_ratio=bottle_ratio,
176
+ expand_ratio=expand_ratio,
177
+ strides=stackwise_strides,
178
+ avg_down=avg_down,
179
+ down_growth=down_growth,
180
+ cross_linear=cross_linear,
181
+ activation=activation,
182
+ output_strides=output_strides,
183
+ stage_type=stage_type,
184
+ block_type=block_type,
185
+ dtype=dtype,
186
+ name="csp_stage",
187
+ )
188
+
189
+ super().__init__(
190
+ inputs=image_input, outputs=stages, dtype=dtype, **kwargs
191
+ )
192
+
193
+ # === Config ===
194
+ self.stem_filters = stem_filters
195
+ self.stem_kernel_size = stem_kernel_size
196
+ self.stem_strides = stem_strides
197
+ self.stackwise_depth = stackwise_depth
198
+ self.stackwise_strides = stackwise_strides
199
+ self.stackwise_num_filters = stackwise_num_filters
200
+ self.stage_type = stage_type
201
+ self.block_type = block_type
202
+ self.output_strides = output_strides
203
+ self.groups = groups
204
+ self.activation = activation
205
+ self.bottle_ratio = bottle_ratio
206
+ self.block_ratio = block_ratio
207
+ self.expand_ratio = expand_ratio
208
+ self.stem_padding = stem_padding
209
+ self.stem_pooling = stem_pooling
210
+ self.avg_down = avg_down
211
+ self.down_growth = down_growth
212
+ self.cross_linear = cross_linear
213
+ self.image_shape = image_shape
214
+ self.data_format = data_format
215
+ self.pyramid_outputs = pyramid_outputs
216
+
217
+ def get_config(self):
218
+ config = super().get_config()
219
+ config.update(
220
+ {
221
+ "stem_filters": self.stem_filters,
222
+ "stem_kernel_size": self.stem_kernel_size,
223
+ "stem_strides": self.stem_strides,
224
+ "stackwise_depth": self.stackwise_depth,
225
+ "stackwise_strides": self.stackwise_strides,
226
+ "stackwise_num_filters": self.stackwise_num_filters,
227
+ "stage_type": self.stage_type,
228
+ "block_type": self.block_type,
229
+ "output_strides": self.output_strides,
230
+ "groups": self.groups,
231
+ "activation": self.activation,
232
+ "bottle_ratio": self.bottle_ratio,
233
+ "block_ratio": self.block_ratio,
234
+ "expand_ratio": self.expand_ratio,
235
+ "stem_padding": self.stem_padding,
236
+ "stem_pooling": self.stem_pooling,
237
+ "avg_down": self.avg_down,
238
+ "down_growth": self.down_growth,
239
+ "cross_linear": self.cross_linear,
240
+ "image_shape": self.image_shape,
241
+ "data_format": self.data_format,
242
+ }
243
+ )
244
+ return config
245
+
246
+
247
+ def bottleneck_block(
248
+ filters,
249
+ channel_axis,
250
+ data_format,
251
+ bottle_ratio,
252
+ dilation=1,
253
+ groups=1,
254
+ activation="relu",
255
+ dtype=None,
256
+ name=None,
257
+ ):
258
+ """
259
+ BottleNeck block.
260
+
261
+ Args:
262
+ filters: Integer, the dimensionality of the output spaces (i.e. the
263
+ number of output filters in used the blocks).
264
+ data_format: `None` or str. the ordering of the dimensions in the
265
+ inputs. Can be `"channels_last"`
266
+ (`(batch_size, height, width, channels)`) or`"channels_first"`
267
+ (`(batch_size, channels, height, width)`).
268
+ bottle_ratio: float, ratio for bottleneck filters. Number of bottleneck
269
+ `filters = filters * bottle_ratio`.
270
+ dilation: int or tuple/list of 2 integers, specifying the dilation rate
271
+ to use for dilated convolution, defaults to `1`.
272
+ groups: A positive int specifying the number of groups in which the
273
+ input is split along the channel axis
274
+ activation: Activation for the conv layers, defaults to "relu".
275
+ dtype: `None` or str or `keras.mixed_precision.DTypePolicy`. The dtype
276
+ to use for the models computations and weights.
277
+ name: str. A prefix for the layer names used in the block.
278
+
279
+ Returns:
280
+ Output tensor of block.
281
+ """
282
+ if name is None:
283
+ name = f"bottleneck{keras.backend.get_uid('bottleneck')}"
284
+
285
+ hidden_filters = int(round(filters * bottle_ratio))
286
+
287
+ def apply(x):
288
+ shortcut = x
289
+ x = layers.Conv2D(
290
+ filters=hidden_filters,
291
+ kernel_size=1,
292
+ use_bias=False,
293
+ data_format=data_format,
294
+ dtype=dtype,
295
+ name=f"{name}_bottleneck_block_conv_1",
296
+ )(x)
297
+ x = layers.BatchNormalization(
298
+ epsilon=1e-05,
299
+ axis=channel_axis,
300
+ dtype=dtype,
301
+ name=f"{name}_bottleneck_block_bn_1",
302
+ )(x)
303
+ if activation == "leaky_relu":
304
+ x = layers.LeakyReLU(
305
+ negative_slope=0.01,
306
+ dtype=dtype,
307
+ name=f"{name}_bottleneck_block_activation_1",
308
+ )(x)
309
+ else:
310
+ x = layers.Activation(
311
+ activation,
312
+ dtype=dtype,
313
+ name=f"{name}_bottleneck_block_activation_1",
314
+ )(x)
315
+
316
+ x = layers.Conv2D(
317
+ filters=hidden_filters,
318
+ kernel_size=3,
319
+ dilation_rate=dilation,
320
+ groups=groups,
321
+ padding="same",
322
+ use_bias=False,
323
+ data_format=data_format,
324
+ dtype=dtype,
325
+ name=f"{name}_bottleneck_block_conv_2",
326
+ )(x)
327
+ x = layers.BatchNormalization(
328
+ epsilon=1e-05,
329
+ axis=channel_axis,
330
+ dtype=dtype,
331
+ name=f"{name}_bottleneck_block_bn_2",
332
+ )(x)
333
+ if activation == "leaky_relu":
334
+ x = layers.LeakyReLU(
335
+ negative_slope=0.01,
336
+ dtype=dtype,
337
+ name=f"{name}_bottleneck_block_activation_2",
338
+ )(x)
339
+ else:
340
+ x = layers.Activation(
341
+ activation,
342
+ dtype=dtype,
343
+ name=f"{name}_bottleneck_block_activation_2",
344
+ )(x)
345
+
346
+ x = layers.Conv2D(
347
+ filters=filters,
348
+ kernel_size=1,
349
+ use_bias=False,
350
+ data_format=data_format,
351
+ dtype=dtype,
352
+ name=f"{name}_bottleneck_block_conv_3",
353
+ )(x)
354
+ x = layers.BatchNormalization(
355
+ epsilon=1e-05,
356
+ axis=channel_axis,
357
+ dtype=dtype,
358
+ name=f"{name}_bottleneck_block_bn_3",
359
+ )(x)
360
+ if activation == "leaky_relu":
361
+ x = layers.LeakyReLU(
362
+ negative_slope=0.01,
363
+ dtype=dtype,
364
+ name=f"{name}_bottleneck_block_activation_3",
365
+ )(x)
366
+ else:
367
+ x = layers.Activation(
368
+ activation,
369
+ dtype=dtype,
370
+ name=f"{name}_bottleneck_block_activation_3",
371
+ )(x)
372
+
373
+ x = layers.add(
374
+ [x, shortcut], dtype=dtype, name=f"{name}_bottleneck_block_add"
375
+ )
376
+ if activation == "leaky_relu":
377
+ x = layers.LeakyReLU(
378
+ negative_slope=0.01,
379
+ dtype=dtype,
380
+ name=f"{name}_bottleneck_block_activation_4",
381
+ )(x)
382
+ else:
383
+ x = layers.Activation(
384
+ activation,
385
+ dtype=dtype,
386
+ name=f"{name}_bottleneck_block_activation_4",
387
+ )(x)
388
+ return x
389
+
390
+ return apply
391
+
392
+
393
+ def dark_block(
394
+ filters,
395
+ data_format,
396
+ channel_axis,
397
+ dilation,
398
+ bottle_ratio,
399
+ groups,
400
+ activation,
401
+ dtype=None,
402
+ name=None,
403
+ ):
404
+ """
405
+ DarkNet block.
406
+
407
+ Args:
408
+ filters: Integer, the dimensionality of the output spaces (i.e. the
409
+ number of output filters in used the blocks).
410
+ data_format: `None` or str. the ordering of the dimensions in the
411
+ inputs. Can be `"channels_last"`
412
+ (`(batch_size, height, width, channels)`) or`"channels_first"`
413
+ (`(batch_size, channels, height, width)`).
414
+ bottle_ratio: float, ratio for darknet filters. Number of darknet
415
+ `filters = filters * bottle_ratio`.
416
+ dilation: int or tuple/list of 2 integers, specifying the dilation rate
417
+ to use for dilated convolution, defaults to `1`.
418
+ groups: A positive int specifying the number of groups in which the
419
+ input is split along the channel axis
420
+ activation: Activation for the conv layers, defaults to "relu".
421
+ dtype: `None` or str or `keras.mixed_precision.DTypePolicy`. The dtype
422
+ to use for the models computations and weights.
423
+ name: str. A prefix for the layer names used in the block.
424
+
425
+ Returns:
426
+ Output tensor of block.
427
+ """
428
+ if name is None:
429
+ name = f"dark{keras.backend.get_uid('dark')}"
430
+
431
+ hidden_filters = int(round(filters * bottle_ratio))
432
+
433
+ def apply(x):
434
+ shortcut = x
435
+ x = layers.Conv2D(
436
+ filters=hidden_filters,
437
+ kernel_size=1,
438
+ use_bias=False,
439
+ data_format=data_format,
440
+ dtype=dtype,
441
+ name=f"{name}_dark_block_conv_1",
442
+ )(x)
443
+ x = layers.BatchNormalization(
444
+ epsilon=1e-05,
445
+ axis=channel_axis,
446
+ dtype=dtype,
447
+ name=f"{name}_dark_block_bn_1",
448
+ )(x)
449
+ if activation == "leaky_relu":
450
+ x = layers.LeakyReLU(
451
+ negative_slope=0.01,
452
+ dtype=dtype,
453
+ name=f"{name}_dark_block_activation_1",
454
+ )(x)
455
+ else:
456
+ x = layers.Activation(
457
+ activation,
458
+ dtype=dtype,
459
+ name=f"{name}_dark_block_activation_1",
460
+ )(x)
461
+
462
+ x = layers.Conv2D(
463
+ filters=filters,
464
+ kernel_size=3,
465
+ dilation_rate=dilation,
466
+ groups=groups,
467
+ padding="same",
468
+ use_bias=False,
469
+ data_format=data_format,
470
+ dtype=dtype,
471
+ name=f"{name}_dark_block_conv_2",
472
+ )(x)
473
+ x = layers.BatchNormalization(
474
+ epsilon=1e-05,
475
+ axis=channel_axis,
476
+ dtype=dtype,
477
+ name=f"{name}_dark_block_bn_2",
478
+ )(x)
479
+ if activation == "leaky_relu":
480
+ x = layers.LeakyReLU(
481
+ negative_slope=0.01,
482
+ dtype=dtype,
483
+ name=f"{name}_dark_block_activation_2",
484
+ )(x)
485
+ else:
486
+ x = layers.Activation(
487
+ activation,
488
+ dtype=dtype,
489
+ name=f"{name}_dark_block_activation_2",
490
+ )(x)
491
+
492
+ x = layers.add(
493
+ [x, shortcut], dtype=dtype, name=f"{name}_dark_block_add"
494
+ )
495
+ return x
496
+
497
+ return apply
498
+
499
+
500
+ def edge_block(
501
+ filters,
502
+ data_format,
503
+ channel_axis,
504
+ dilation=1,
505
+ bottle_ratio=0.5,
506
+ groups=1,
507
+ activation="relu",
508
+ dtype=None,
509
+ name=None,
510
+ ):
511
+ """
512
+ EdgeResidual / Fused-MBConv blocks.
513
+
514
+ Args:
515
+ filters: Integer, the dimensionality of the output spaces (i.e. the
516
+ number of output filters in used the blocks).
517
+ data_format: `None` or str. the ordering of the dimensions in the
518
+ inputs. Can be `"channels_last"`
519
+ (`(batch_size, height, width, channels)`) or`"channels_first"`
520
+ (`(batch_size, channels, height, width)`).
521
+ bottle_ratio: float, ratio for edge_block filters. Number of edge_block
522
+ `filters = filters * bottle_ratio`.
523
+ dilation: int or tuple/list of 2 integers, specifying the dilation rate
524
+ to use for dilated convolution, defaults to `1`.
525
+ groups: A positive int specifying the number of groups in which the
526
+ input is split along the channel axis
527
+ activation: Activation for the conv layers, defaults to "relu".
528
+ dtype: `None` or str or `keras.mixed_precision.DTypePolicy`. The dtype
529
+ to use for the models computations and weights.
530
+ name: str. A prefix for the layer names used in the block.
531
+
532
+ Returns:
533
+ Output tensor of block.
534
+ """
535
+ if name is None:
536
+ name = f"edge{keras.backend.get_uid('edge')}"
537
+
538
+ hidden_filters = int(round(filters * bottle_ratio))
539
+
540
+ def apply(x):
541
+ shortcut = x
542
+ x = layers.Conv2D(
543
+ filters=hidden_filters,
544
+ kernel_size=3,
545
+ use_bias=False,
546
+ dilation_rate=dilation,
547
+ groups=groups,
548
+ padding="same",
549
+ data_format=data_format,
550
+ dtype=dtype,
551
+ name=f"{name}_edge_block_conv_1",
552
+ )(x)
553
+ x = layers.BatchNormalization(
554
+ epsilon=1e-05,
555
+ axis=channel_axis,
556
+ dtype=dtype,
557
+ name=f"{name}_edge_block_bn_1",
558
+ )(x)
559
+ if activation == "leaky_relu":
560
+ x = layers.LeakyReLU(
561
+ negative_slope=0.01,
562
+ dtype=dtype,
563
+ name=f"{name}_edge_block_activation_1",
564
+ )(x)
565
+ else:
566
+ x = layers.Activation(
567
+ activation,
568
+ dtype=dtype,
569
+ name=f"{name}_edge_block_activation_1",
570
+ )(x)
571
+
572
+ x = layers.Conv2D(
573
+ filters=filters,
574
+ kernel_size=1,
575
+ use_bias=False,
576
+ data_format=data_format,
577
+ dtype=dtype,
578
+ name=f"{name}_edge_block_conv_2",
579
+ )(x)
580
+ x = layers.BatchNormalization(
581
+ epsilon=1e-05,
582
+ axis=channel_axis,
583
+ dtype=dtype,
584
+ name=f"{name}_edge_block_bn_2",
585
+ )(x)
586
+ if activation == "leaky_relu":
587
+ x = layers.LeakyReLU(
588
+ negative_slope=0.01,
589
+ dtype=dtype,
590
+ name=f"{name}_edge_block_activation_2",
591
+ )(x)
592
+ else:
593
+ x = layers.Activation(
594
+ activation,
595
+ dtype=dtype,
596
+ name=f"{name}_edge_block_activation_2",
597
+ )(x)
598
+
599
+ x = layers.add(
600
+ [x, shortcut], dtype=dtype, name=f"{name}_edge_block_add"
601
+ )
602
+ return x
603
+
604
+ return apply
605
+
606
+
607
+ def cross_stage(
608
+ filters,
609
+ strides,
610
+ dilation,
611
+ depth,
612
+ data_format,
613
+ channel_axis,
614
+ block_ratio=1.0,
615
+ bottle_ratio=1.0,
616
+ expand_ratio=1.0,
617
+ groups=1,
618
+ first_dilation=None,
619
+ avg_down=False,
620
+ activation="relu",
621
+ down_growth=False,
622
+ cross_linear=False,
623
+ block_fn=bottleneck_block,
624
+ dtype=None,
625
+ name=None,
626
+ ):
627
+ """ "
628
+ Cross Stage.
629
+ """
630
+ if name is None:
631
+ name = f"cross_stage_{keras.backend.get_uid('cross_stage')}"
632
+
633
+ first_dilation = first_dilation or dilation
634
+
635
+ def apply(x):
636
+ prev_filters = keras.ops.shape(x)[channel_axis]
637
+ down_chs = filters if down_growth else prev_filters
638
+ expand_chs = int(round(filters * expand_ratio))
639
+ block_channels = int(round(filters * block_ratio))
640
+
641
+ if strides != 1 or first_dilation != dilation:
642
+ if avg_down:
643
+ if strides == 2:
644
+ x = layers.AveragePooling2D(
645
+ 2, dtype=dtype, name=f"{name}_csp_avg_pool"
646
+ )(x)
647
+ x = layers.Conv2D(
648
+ filters=filters,
649
+ kernel_size=1,
650
+ strides=1,
651
+ use_bias=False,
652
+ groups=groups,
653
+ data_format=data_format,
654
+ dtype=dtype,
655
+ name=f"{name}_csp_conv_down_1",
656
+ )(x)
657
+ x = layers.BatchNormalization(
658
+ epsilon=1e-05,
659
+ axis=channel_axis,
660
+ dtype=dtype,
661
+ name=f"{name}_csp_bn_1",
662
+ )(x)
663
+ if activation == "leaky_relu":
664
+ x = layers.LeakyReLU(
665
+ negative_slope=0.01,
666
+ dtype=dtype,
667
+ name=f"{name}_csp_activation_1",
668
+ )(x)
669
+ else:
670
+ x = layers.Activation(
671
+ activation,
672
+ dtype=dtype,
673
+ name=f"{name}_csp_activation_1",
674
+ )(x)
675
+ else:
676
+ x = layers.Conv2D(
677
+ filters=down_chs,
678
+ kernel_size=3,
679
+ strides=strides,
680
+ dilation_rate=first_dilation,
681
+ use_bias=False,
682
+ groups=groups,
683
+ data_format=data_format,
684
+ dtype=dtype,
685
+ name=f"{name}_csp_conv_down_1",
686
+ )(x)
687
+ x = layers.BatchNormalization(
688
+ epsilon=1e-05,
689
+ axis=channel_axis,
690
+ dtype=dtype,
691
+ name=f"{name}_csp_bn_1",
692
+ )(x)
693
+ if activation == "leaky_relu":
694
+ x = layers.LeakyReLU(
695
+ negative_slope=0.01,
696
+ dtype=dtype,
697
+ name=f"{name}_csp_activation_1",
698
+ )(x)
699
+ else:
700
+ x = layers.Activation(
701
+ activation,
702
+ dtype=dtype,
703
+ name=f"{name}_csp_activation_1",
704
+ )(x)
705
+
706
+ x = layers.Conv2D(
707
+ filters=expand_chs,
708
+ kernel_size=1,
709
+ use_bias=False,
710
+ data_format=data_format,
711
+ dtype=dtype,
712
+ name=f"{name}_csp_conv_exp",
713
+ )(x)
714
+ x = layers.BatchNormalization(
715
+ epsilon=1e-05,
716
+ axis=channel_axis,
717
+ dtype=dtype,
718
+ name=f"{name}_csp_bn_2",
719
+ )(x)
720
+ if not cross_linear:
721
+ if activation == "leaky_relu":
722
+ x = layers.LeakyReLU(
723
+ negative_slope=0.01,
724
+ dtype=dtype,
725
+ name=f"{name}_csp_activation_2",
726
+ )(x)
727
+ else:
728
+ x = layers.Activation(
729
+ activation,
730
+ dtype=dtype,
731
+ name=f"{name}_csp_activation_2",
732
+ )(x)
733
+ prev_filters = keras.ops.shape(x)[channel_axis]
734
+ xs, xb = ops.split(
735
+ x,
736
+ indices_or_sections=prev_filters // (expand_chs // 2),
737
+ axis=channel_axis,
738
+ )
739
+
740
+ for i in range(depth):
741
+ xb = block_fn(
742
+ filters=block_channels,
743
+ dilation=dilation,
744
+ bottle_ratio=bottle_ratio,
745
+ groups=groups,
746
+ activation=activation,
747
+ data_format=data_format,
748
+ channel_axis=channel_axis,
749
+ dtype=dtype,
750
+ name=f"{name}_block_{i}",
751
+ )(xb)
752
+
753
+ xb = layers.Conv2D(
754
+ filters=expand_chs // 2,
755
+ kernel_size=1,
756
+ use_bias=False,
757
+ data_format=data_format,
758
+ dtype=dtype,
759
+ name=f"{name}_csp_conv_transition_b",
760
+ )(xb)
761
+ xb = layers.BatchNormalization(
762
+ epsilon=1e-05,
763
+ axis=channel_axis,
764
+ dtype=dtype,
765
+ name=f"{name}_csp_transition_b_bn",
766
+ )(xb)
767
+ if activation == "leaky_relu":
768
+ xb = layers.LeakyReLU(
769
+ negative_slope=0.01,
770
+ dtype=dtype,
771
+ name=f"{name}_csp_transition_b_activation",
772
+ )(xb)
773
+ else:
774
+ xb = layers.Activation(
775
+ activation,
776
+ dtype=dtype,
777
+ name=f"{name}_csp_transition_b_activation",
778
+ )(xb)
779
+
780
+ out = layers.Concatenate(
781
+ axis=channel_axis, dtype=dtype, name=f"{name}_csp_conv_concat"
782
+ )([xs, xb])
783
+ out = layers.Conv2D(
784
+ filters=filters,
785
+ kernel_size=1,
786
+ use_bias=False,
787
+ data_format=data_format,
788
+ dtype=dtype,
789
+ name=f"{name}_csp_conv_transition",
790
+ )(out)
791
+ out = layers.BatchNormalization(
792
+ epsilon=1e-05,
793
+ axis=channel_axis,
794
+ dtype=dtype,
795
+ name=f"{name}_csp_transition_bn",
796
+ )(out)
797
+ if activation == "leaky_relu":
798
+ out = layers.LeakyReLU(
799
+ negative_slope=0.01,
800
+ dtype=dtype,
801
+ name=f"{name}_csp_transition_activation",
802
+ )(out)
803
+ else:
804
+ out = layers.Activation(
805
+ activation,
806
+ dtype=dtype,
807
+ name=f"{name}_csp_transition_activation",
808
+ )(out)
809
+ return out
810
+
811
+ return apply
812
+
813
+
814
+ def cross_stage3(
815
+ data_format,
816
+ channel_axis,
817
+ filters,
818
+ strides,
819
+ dilation,
820
+ depth,
821
+ block_ratio,
822
+ bottle_ratio,
823
+ expand_ratio,
824
+ avg_down,
825
+ activation,
826
+ first_dilation,
827
+ down_growth,
828
+ cross_linear,
829
+ block_fn,
830
+ groups,
831
+ name=None,
832
+ dtype=None,
833
+ ):
834
+ """
835
+ Cross Stage 3.
836
+
837
+ Similar to Cross Stage, but with only one transition conv in the output.
838
+ """
839
+ if name is None:
840
+ name = f"cross_stage3_{keras.backend.get_uid('cross_stage3')}"
841
+
842
+ first_dilation = first_dilation or dilation
843
+
844
+ def apply(x):
845
+ prev_filters = keras.ops.shape(x)[channel_axis]
846
+ down_chs = filters if down_growth else prev_filters
847
+ expand_chs = int(round(filters * expand_ratio))
848
+ block_filters = int(round(filters * block_ratio))
849
+
850
+ if strides != 1 or first_dilation != dilation:
851
+ if avg_down:
852
+ if strides == 2:
853
+ x = layers.AveragePooling2D(
854
+ 2, dtype=dtype, name=f"{name}_cross_stage3_avg_pool"
855
+ )(x)
856
+ x = layers.Conv2D(
857
+ filters=filters,
858
+ kernel_size=1,
859
+ strides=1,
860
+ use_bias=False,
861
+ groups=groups,
862
+ data_format=data_format,
863
+ dtype=dtype,
864
+ name=f"{name}_cs3_conv_down_1",
865
+ )(x)
866
+ x = layers.BatchNormalization(
867
+ epsilon=1e-05,
868
+ axis=channel_axis,
869
+ dtype=dtype,
870
+ name=f"{name}_cs3_bn_1",
871
+ )(x)
872
+ if activation == "leaky_relu":
873
+ x = layers.LeakyReLU(
874
+ negative_slope=0.01,
875
+ dtype=dtype,
876
+ name=f"{name}_cs3_activation_1",
877
+ )(x)
878
+ else:
879
+ x = layers.Activation(
880
+ activation,
881
+ dtype=dtype,
882
+ name=f"{name}_cs3_activation_1",
883
+ )(x)
884
+ else:
885
+ x = layers.Conv2D(
886
+ filters=down_chs,
887
+ kernel_size=3,
888
+ strides=strides,
889
+ dilation_rate=first_dilation,
890
+ use_bias=False,
891
+ groups=groups,
892
+ data_format=data_format,
893
+ dtype=dtype,
894
+ name=f"{name}_cs3_conv_down_1",
895
+ )(x)
896
+ x = layers.BatchNormalization(
897
+ epsilon=1e-05,
898
+ axis=channel_axis,
899
+ dtype=dtype,
900
+ name=f"{name}_cs3_bn_1",
901
+ )(x)
902
+ if activation == "leaky_relu":
903
+ x = layers.LeakyReLU(
904
+ negative_slope=0.01,
905
+ dtype=dtype,
906
+ name=f"{name}_cs3__activation_1",
907
+ )(x)
908
+ else:
909
+ x = layers.Activation(
910
+ activation,
911
+ dtype=dtype,
912
+ name=f"{name}_cs3_activation_1",
913
+ )(x)
914
+
915
+ x = layers.Conv2D(
916
+ filters=expand_chs,
917
+ kernel_size=1,
918
+ use_bias=False,
919
+ data_format=data_format,
920
+ dtype=dtype,
921
+ name=f"{name}_cs3_conv_exp",
922
+ )(x)
923
+ x = layers.BatchNormalization(
924
+ epsilon=1e-05,
925
+ axis=channel_axis,
926
+ dtype=dtype,
927
+ name=f"{name}_cs3_bn_2",
928
+ )(x)
929
+ if not cross_linear:
930
+ if activation == "leaky_relu":
931
+ x = layers.LeakyReLU(
932
+ negative_slope=0.01,
933
+ dtype=dtype,
934
+ name=f"{name}_cs3_activation_2",
935
+ )(x)
936
+ else:
937
+ x = layers.Activation(
938
+ activation,
939
+ dtype=dtype,
940
+ name=f"{name}_cs3_activation_2",
941
+ )(x)
942
+
943
+ prev_filters = keras.ops.shape(x)[channel_axis]
944
+ x1, x2 = ops.split(
945
+ x,
946
+ indices_or_sections=prev_filters // (expand_chs // 2),
947
+ axis=channel_axis,
948
+ )
949
+
950
+ for i in range(depth):
951
+ x1 = block_fn(
952
+ filters=block_filters,
953
+ dilation=dilation,
954
+ bottle_ratio=bottle_ratio,
955
+ groups=groups,
956
+ activation=activation,
957
+ data_format=data_format,
958
+ channel_axis=channel_axis,
959
+ dtype=dtype,
960
+ name=f"{name}_block_{i}",
961
+ )(x1)
962
+
963
+ out = layers.Concatenate(
964
+ axis=channel_axis,
965
+ dtype=dtype,
966
+ name=f"{name}_cs3_conv_transition_concat",
967
+ )([x1, x2])
968
+ out = layers.Conv2D(
969
+ filters=expand_chs // 2,
970
+ kernel_size=1,
971
+ use_bias=False,
972
+ data_format=data_format,
973
+ dtype=dtype,
974
+ name=f"{name}_cs3_conv_transition",
975
+ )(out)
976
+ out = layers.BatchNormalization(
977
+ epsilon=1e-05,
978
+ axis=channel_axis,
979
+ dtype=dtype,
980
+ name=f"{name}_cs3_transition_bn",
981
+ )(out)
982
+ if activation == "leaky_relu":
983
+ out = layers.LeakyReLU(
984
+ negative_slope=0.01,
985
+ dtype=dtype,
986
+ name=f"{name}_cs3_activation_3",
987
+ )(out)
988
+ else:
989
+ out = layers.Activation(
990
+ activation,
991
+ dtype=dtype,
992
+ name=f"{name}_cs3_activation_3",
993
+ )(out)
994
+ return out
995
+
996
+ return apply
997
+
998
+
999
+ def dark_stage(
1000
+ data_format,
1001
+ channel_axis,
1002
+ filters,
1003
+ strides,
1004
+ dilation,
1005
+ depth,
1006
+ block_ratio,
1007
+ bottle_ratio,
1008
+ avg_down,
1009
+ activation,
1010
+ first_dilation,
1011
+ block_fn,
1012
+ groups,
1013
+ expand_ratio=None,
1014
+ down_growth=None,
1015
+ cross_linear=None,
1016
+ name=None,
1017
+ dtype=None,
1018
+ ):
1019
+ """
1020
+ DarkNet Stage.
1021
+
1022
+ Similar to DarkNet Stage, but with only one transition conv in the output.
1023
+ """
1024
+ if name is None:
1025
+ name = f"dark_stage_{keras.backend.get_uid('dark_stage')}"
1026
+
1027
+ first_dilation = first_dilation or dilation
1028
+
1029
+ def apply(x):
1030
+ block_channels = int(round(filters * block_ratio))
1031
+ if avg_down:
1032
+ if strides == 2:
1033
+ x = layers.AveragePooling2D(
1034
+ 2, dtype=dtype, name=f"{name}_dark_avg_pool"
1035
+ )(x)
1036
+ x = layers.Conv2D(
1037
+ filters=filters,
1038
+ kernel_size=1,
1039
+ strides=1,
1040
+ use_bias=False,
1041
+ groups=groups,
1042
+ data_format=data_format,
1043
+ dtype=dtype,
1044
+ name=f"{name}_dark_conv_down_1",
1045
+ )(x)
1046
+ x = layers.BatchNormalization(
1047
+ epsilon=1e-05,
1048
+ axis=channel_axis,
1049
+ dtype=dtype,
1050
+ name=f"{name}_dark_bn_1",
1051
+ )(x)
1052
+ if activation == "leaky_relu":
1053
+ x = layers.LeakyReLU(
1054
+ negative_slope=0.01,
1055
+ dtype=dtype,
1056
+ name=f"{name}_dark_activation_1",
1057
+ )(x)
1058
+ else:
1059
+ x = layers.Activation(
1060
+ activation,
1061
+ dtype=dtype,
1062
+ name=f"{name}_dark_activation_1",
1063
+ )(x)
1064
+ else:
1065
+ x = layers.Conv2D(
1066
+ filters=filters,
1067
+ kernel_size=3,
1068
+ strides=strides,
1069
+ dilation_rate=first_dilation,
1070
+ use_bias=False,
1071
+ groups=groups,
1072
+ data_format=data_format,
1073
+ dtype=dtype,
1074
+ name=f"{name}_dark_conv_down_1",
1075
+ )(x)
1076
+ x = layers.BatchNormalization(
1077
+ epsilon=1e-05,
1078
+ axis=channel_axis,
1079
+ dtype=dtype,
1080
+ name=f"{name}_dark_bn_1",
1081
+ )(x)
1082
+ if activation == "leaky_relu":
1083
+ x = layers.LeakyReLU(
1084
+ negative_slope=0.01,
1085
+ dtype=dtype,
1086
+ name=f"{name}_dark_activation_1",
1087
+ )(x)
1088
+ else:
1089
+ x = layers.Activation(
1090
+ activation,
1091
+ dtype=dtype,
1092
+ name=f"{name}_dark_activation_1",
1093
+ )(x)
1094
+ for i in range(depth):
1095
+ x = block_fn(
1096
+ filters=block_channels,
1097
+ dilation=dilation,
1098
+ bottle_ratio=bottle_ratio,
1099
+ groups=groups,
1100
+ activation=activation,
1101
+ data_format=data_format,
1102
+ channel_axis=channel_axis,
1103
+ dtype=dtype,
1104
+ name=f"{name}_block_{i}",
1105
+ )(x)
1106
+ return x
1107
+
1108
+ return apply
1109
+
1110
+
1111
+ def create_csp_stem(
1112
+ data_format,
1113
+ channel_axis,
1114
+ activation,
1115
+ padding,
1116
+ filters=32,
1117
+ kernel_size=3,
1118
+ strides=2,
1119
+ pooling=None,
1120
+ dtype=None,
1121
+ ):
1122
+ if not isinstance(filters, (tuple, list)):
1123
+ filters = [filters]
1124
+ stem_depth = len(filters)
1125
+ assert stem_depth
1126
+ assert strides in (1, 2, 4)
1127
+ last_idx = stem_depth - 1
1128
+
1129
+ def apply(x):
1130
+ stem_strides = 1
1131
+ for i, chs in enumerate(filters):
1132
+ conv_strides = (
1133
+ 2
1134
+ if (i == 0 and strides > 1)
1135
+ or (i == last_idx and strides > 2 and not pooling)
1136
+ else 1
1137
+ )
1138
+ x = layers.Conv2D(
1139
+ filters=chs,
1140
+ kernel_size=kernel_size,
1141
+ strides=conv_strides,
1142
+ padding=padding if i == 0 else "valid",
1143
+ use_bias=False,
1144
+ data_format=data_format,
1145
+ dtype=dtype,
1146
+ name=f"csp_stem_conv_{i}",
1147
+ )(x)
1148
+ x = layers.BatchNormalization(
1149
+ epsilon=1e-05,
1150
+ axis=channel_axis,
1151
+ dtype=dtype,
1152
+ name=f"csp_stem_bn_{i}",
1153
+ )(x)
1154
+ if activation == "leaky_relu":
1155
+ x = layers.LeakyReLU(
1156
+ negative_slope=0.01,
1157
+ dtype=dtype,
1158
+ name=f"csp_stem_activation_{i}",
1159
+ )(x)
1160
+ else:
1161
+ x = layers.Activation(
1162
+ activation,
1163
+ dtype=dtype,
1164
+ name=f"csp_stem_activation_{i}",
1165
+ )(x)
1166
+ stem_strides *= conv_strides
1167
+
1168
+ if pooling == "max":
1169
+ assert strides > 2
1170
+ x = layers.MaxPooling2D(
1171
+ pool_size=3,
1172
+ strides=2,
1173
+ padding="same",
1174
+ data_format=data_format,
1175
+ dtype=dtype,
1176
+ name="csp_stem_pool",
1177
+ )(x)
1178
+ stem_strides *= 2
1179
+ return x, stem_strides
1180
+
1181
+ return apply
1182
+
1183
+
1184
+ def create_csp_stages(
1185
+ inputs,
1186
+ filters,
1187
+ data_format,
1188
+ channel_axis,
1189
+ stackwise_depth,
1190
+ reduction,
1191
+ block_ratio,
1192
+ bottle_ratio,
1193
+ expand_ratio,
1194
+ strides,
1195
+ groups,
1196
+ avg_down,
1197
+ down_growth,
1198
+ cross_linear,
1199
+ activation,
1200
+ output_strides,
1201
+ stage_type,
1202
+ block_type,
1203
+ dtype,
1204
+ name,
1205
+ ):
1206
+ if name is None:
1207
+ name = f"csp_stage_{keras.backend.get_uid('csp_stage')}"
1208
+
1209
+ num_stages = len(stackwise_depth)
1210
+ dilation = 1
1211
+ net_strides = reduction
1212
+ strides = _pad_arg(strides, num_stages)
1213
+ expand_ratio = _pad_arg(expand_ratio, num_stages)
1214
+ bottle_ratio = _pad_arg(bottle_ratio, num_stages)
1215
+ block_ratio = _pad_arg(block_ratio, num_stages)
1216
+
1217
+ if stage_type == "dark":
1218
+ stage_fn = dark_stage
1219
+ elif stage_type == "csp":
1220
+ stage_fn = cross_stage
1221
+ else:
1222
+ stage_fn = cross_stage3
1223
+
1224
+ if block_type == "dark_block":
1225
+ block_fn = dark_block
1226
+ elif block_type == "edge_block":
1227
+ block_fn = edge_block
1228
+ else:
1229
+ block_fn = bottleneck_block
1230
+
1231
+ stages = inputs
1232
+ pyramid_outputs = {}
1233
+ for stage_idx, _ in enumerate(stackwise_depth):
1234
+ if net_strides >= output_strides and strides[stage_idx] > 1:
1235
+ dilation *= strides[stage_idx]
1236
+ strides = 1
1237
+ net_strides *= strides[stage_idx]
1238
+ first_dilation = 1 if dilation in (1, 2) else 2
1239
+ stages = stage_fn(
1240
+ data_format=data_format,
1241
+ channel_axis=channel_axis,
1242
+ filters=filters[stage_idx],
1243
+ depth=stackwise_depth[stage_idx],
1244
+ strides=strides[stage_idx],
1245
+ dilation=dilation,
1246
+ block_ratio=block_ratio[stage_idx],
1247
+ bottle_ratio=bottle_ratio[stage_idx],
1248
+ expand_ratio=expand_ratio[stage_idx],
1249
+ groups=groups,
1250
+ first_dilation=first_dilation,
1251
+ avg_down=avg_down,
1252
+ activation=activation,
1253
+ down_growth=down_growth,
1254
+ cross_linear=cross_linear,
1255
+ block_fn=block_fn,
1256
+ dtype=dtype,
1257
+ name=f"stage_{stage_idx}",
1258
+ )(stages)
1259
+ pyramid_outputs[f"P{stage_idx + 2}"] = stages
1260
+ return stages, pyramid_outputs
1261
+
1262
+
1263
+ def _pad_arg(x, n):
1264
+ """
1265
+ pads an argument tuple to specified n by padding with last value
1266
+ """
1267
+ if not isinstance(x, (tuple, list)):
1268
+ x = (x,)
1269
+ curr_n = len(x)
1270
+ pad_n = n - curr_n
1271
+ if pad_n <= 0:
1272
+ return x[:n]
1273
+ return tuple(
1274
+ list(x)
1275
+ + [
1276
+ x[-1],
1277
+ ]
1278
+ * pad_n
1279
+ )