keras-hub-nightly 0.16.1.dev202410200345__py3-none-any.whl → 0.19.0.dev202412070351__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- keras_hub/api/layers/__init__.py +12 -0
- keras_hub/api/models/__init__.py +32 -0
- keras_hub/src/bounding_box/__init__.py +2 -0
- keras_hub/src/bounding_box/converters.py +102 -12
- keras_hub/src/layers/modeling/rms_normalization.py +34 -0
- keras_hub/src/layers/modeling/transformer_encoder.py +27 -7
- keras_hub/src/layers/preprocessing/image_converter.py +5 -0
- keras_hub/src/models/albert/albert_presets.py +0 -8
- keras_hub/src/models/bart/bart_presets.py +0 -6
- keras_hub/src/models/bert/bert_presets.py +0 -20
- keras_hub/src/models/bloom/bloom_presets.py +0 -16
- keras_hub/src/models/clip/__init__.py +5 -0
- keras_hub/src/models/clip/clip_backbone.py +286 -0
- keras_hub/src/models/clip/clip_encoder_block.py +19 -4
- keras_hub/src/models/clip/clip_image_converter.py +8 -0
- keras_hub/src/models/clip/clip_presets.py +93 -0
- keras_hub/src/models/clip/clip_text_encoder.py +4 -1
- keras_hub/src/models/clip/clip_tokenizer.py +18 -3
- keras_hub/src/models/clip/clip_vision_embedding.py +101 -0
- keras_hub/src/models/clip/clip_vision_encoder.py +159 -0
- keras_hub/src/models/deberta_v3/deberta_v3_presets.py +0 -10
- keras_hub/src/models/deeplab_v3/deeplab_v3_presets.py +0 -2
- keras_hub/src/models/deeplab_v3/deeplab_v3_segmenter.py +5 -3
- keras_hub/src/models/densenet/densenet_backbone.py +1 -1
- keras_hub/src/models/densenet/densenet_presets.py +0 -6
- keras_hub/src/models/distil_bert/distil_bert_presets.py +0 -6
- keras_hub/src/models/efficientnet/__init__.py +9 -0
- keras_hub/src/models/efficientnet/cba.py +141 -0
- keras_hub/src/models/efficientnet/efficientnet_backbone.py +139 -56
- keras_hub/src/models/efficientnet/efficientnet_image_classifier.py +14 -0
- keras_hub/src/models/efficientnet/efficientnet_image_classifier_preprocessor.py +16 -0
- keras_hub/src/models/efficientnet/efficientnet_image_converter.py +10 -0
- keras_hub/src/models/efficientnet/efficientnet_presets.py +192 -0
- keras_hub/src/models/efficientnet/fusedmbconv.py +81 -36
- keras_hub/src/models/efficientnet/mbconv.py +52 -21
- keras_hub/src/models/electra/electra_presets.py +0 -12
- keras_hub/src/models/f_net/f_net_presets.py +0 -4
- keras_hub/src/models/falcon/falcon_presets.py +0 -2
- keras_hub/src/models/flux/__init__.py +5 -0
- keras_hub/src/models/flux/flux_layers.py +494 -0
- keras_hub/src/models/flux/flux_maths.py +218 -0
- keras_hub/src/models/flux/flux_model.py +231 -0
- keras_hub/src/models/flux/flux_presets.py +14 -0
- keras_hub/src/models/flux/flux_text_to_image.py +142 -0
- keras_hub/src/models/flux/flux_text_to_image_preprocessor.py +73 -0
- keras_hub/src/models/gemma/gemma_presets.py +0 -40
- keras_hub/src/models/gpt2/gpt2_presets.py +0 -9
- keras_hub/src/models/image_object_detector.py +87 -0
- keras_hub/src/models/image_object_detector_preprocessor.py +57 -0
- keras_hub/src/models/image_to_image.py +16 -10
- keras_hub/src/models/inpaint.py +20 -13
- keras_hub/src/models/llama/llama_backbone.py +1 -1
- keras_hub/src/models/llama/llama_presets.py +5 -15
- keras_hub/src/models/llama3/llama3_presets.py +0 -8
- keras_hub/src/models/mistral/mistral_presets.py +0 -6
- keras_hub/src/models/mit/mit_backbone.py +41 -27
- keras_hub/src/models/mit/mit_layers.py +9 -7
- keras_hub/src/models/mit/mit_presets.py +12 -24
- keras_hub/src/models/opt/opt_presets.py +0 -8
- keras_hub/src/models/pali_gemma/pali_gemma_backbone.py +61 -11
- keras_hub/src/models/pali_gemma/pali_gemma_decoder_block.py +21 -23
- keras_hub/src/models/pali_gemma/pali_gemma_presets.py +166 -10
- keras_hub/src/models/pali_gemma/pali_gemma_vit.py +12 -11
- keras_hub/src/models/phi3/phi3_presets.py +0 -4
- keras_hub/src/models/resnet/resnet_presets.py +10 -42
- keras_hub/src/models/retinanet/__init__.py +5 -0
- keras_hub/src/models/retinanet/anchor_generator.py +52 -53
- keras_hub/src/models/retinanet/feature_pyramid.py +99 -36
- keras_hub/src/models/retinanet/non_max_supression.py +1 -0
- keras_hub/src/models/retinanet/prediction_head.py +192 -0
- keras_hub/src/models/retinanet/retinanet_backbone.py +146 -0
- keras_hub/src/models/retinanet/retinanet_image_converter.py +53 -0
- keras_hub/src/models/retinanet/retinanet_label_encoder.py +49 -51
- keras_hub/src/models/retinanet/retinanet_object_detector.py +382 -0
- keras_hub/src/models/retinanet/retinanet_object_detector_preprocessor.py +14 -0
- keras_hub/src/models/retinanet/retinanet_presets.py +15 -0
- keras_hub/src/models/roberta/roberta_presets.py +0 -4
- keras_hub/src/models/sam/sam_backbone.py +0 -1
- keras_hub/src/models/sam/sam_image_segmenter.py +9 -10
- keras_hub/src/models/sam/sam_presets.py +0 -6
- keras_hub/src/models/segformer/__init__.py +8 -0
- keras_hub/src/models/segformer/segformer_backbone.py +163 -0
- keras_hub/src/models/segformer/segformer_image_converter.py +8 -0
- keras_hub/src/models/segformer/segformer_image_segmenter.py +171 -0
- keras_hub/src/models/segformer/segformer_image_segmenter_preprocessor.py +31 -0
- keras_hub/src/models/segformer/segformer_presets.py +124 -0
- keras_hub/src/models/stable_diffusion_3/mmdit.py +41 -0
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_backbone.py +38 -21
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_image_to_image.py +3 -3
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_inpaint.py +3 -3
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_presets.py +28 -4
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image.py +1 -1
- keras_hub/src/models/t5/t5_backbone.py +5 -4
- keras_hub/src/models/t5/t5_presets.py +41 -13
- keras_hub/src/models/text_to_image.py +13 -5
- keras_hub/src/models/vgg/vgg_backbone.py +1 -1
- keras_hub/src/models/vgg/vgg_presets.py +0 -8
- keras_hub/src/models/whisper/whisper_audio_converter.py +1 -1
- keras_hub/src/models/whisper/whisper_presets.py +0 -20
- keras_hub/src/models/xlm_roberta/xlm_roberta_presets.py +0 -4
- keras_hub/src/tests/test_case.py +25 -0
- keras_hub/src/utils/preset_utils.py +17 -4
- keras_hub/src/utils/timm/convert_efficientnet.py +449 -0
- keras_hub/src/utils/timm/preset_loader.py +3 -0
- keras_hub/src/version_utils.py +1 -1
- {keras_hub_nightly-0.16.1.dev202410200345.dist-info → keras_hub_nightly-0.19.0.dev202412070351.dist-info}/METADATA +15 -26
- {keras_hub_nightly-0.16.1.dev202410200345.dist-info → keras_hub_nightly-0.19.0.dev202412070351.dist-info}/RECORD +109 -76
- {keras_hub_nightly-0.16.1.dev202410200345.dist-info → keras_hub_nightly-0.19.0.dev202412070351.dist-info}/WHEEL +1 -1
- {keras_hub_nightly-0.16.1.dev202410200345.dist-info → keras_hub_nightly-0.19.0.dev202412070351.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,9 @@
|
|
1
|
+
import math
|
2
|
+
|
1
3
|
import keras
|
2
4
|
|
5
|
+
from keras_hub.src.utils.keras_utils import standardize_data_format
|
6
|
+
|
3
7
|
|
4
8
|
class FeaturePyramid(keras.layers.Layer):
|
5
9
|
"""A Feature Pyramid Network (FPN) layer.
|
@@ -37,14 +41,18 @@ class FeaturePyramid(keras.layers.Layer):
|
|
37
41
|
Args:
|
38
42
|
min_level: int. The minimum level of the feature pyramid.
|
39
43
|
max_level: int. The maximum level of the feature pyramid.
|
44
|
+
use_p5: bool. If True, uses the output of the last layer (`P5` from
|
45
|
+
Feature Pyramid Network) as input for creating coarser convolution
|
46
|
+
layers (`P6`, `P7`). If False, uses the direct input `P5`
|
47
|
+
for creating coarser convolution layers.
|
40
48
|
num_filters: int. The number of filters in each feature map.
|
41
49
|
activation: string or `keras.activations`. The activation function
|
42
50
|
to be used in network.
|
43
51
|
Defaults to `"relu"`.
|
44
|
-
kernel_initializer: `str` or `keras.initializers
|
52
|
+
kernel_initializer: `str` or `keras.initializers`.
|
45
53
|
The kernel initializer for the convolution layers.
|
46
54
|
Defaults to `"VarianceScaling"`.
|
47
|
-
bias_initializer: `str` or `keras.initializers
|
55
|
+
bias_initializer: `str` or `keras.initializers`.
|
48
56
|
The bias initializer for the convolution layers.
|
49
57
|
Defaults to `"zeros"`.
|
50
58
|
batch_norm_momentum: float.
|
@@ -53,10 +61,10 @@ class FeaturePyramid(keras.layers.Layer):
|
|
53
61
|
batch_norm_epsilon: float.
|
54
62
|
The epsilon for the batch normalization layers.
|
55
63
|
Defaults to `0.001`.
|
56
|
-
kernel_regularizer: `str` or `keras.regularizers
|
64
|
+
kernel_regularizer: `str` or `keras.regularizers`.
|
57
65
|
The kernel regularizer for the convolution layers.
|
58
66
|
Defaults to `None`.
|
59
|
-
bias_regularizer: `str` or `keras.regularizers
|
67
|
+
bias_regularizer: `str` or `keras.regularizers`.
|
60
68
|
The bias regularizer for the convolution layers.
|
61
69
|
Defaults to `None`.
|
62
70
|
use_batch_norm: bool. Whether to use batch normalization.
|
@@ -69,6 +77,7 @@ class FeaturePyramid(keras.layers.Layer):
|
|
69
77
|
self,
|
70
78
|
min_level,
|
71
79
|
max_level,
|
80
|
+
use_p5,
|
72
81
|
num_filters=256,
|
73
82
|
activation="relu",
|
74
83
|
kernel_initializer="VarianceScaling",
|
@@ -78,6 +87,7 @@ class FeaturePyramid(keras.layers.Layer):
|
|
78
87
|
kernel_regularizer=None,
|
79
88
|
bias_regularizer=None,
|
80
89
|
use_batch_norm=False,
|
90
|
+
data_format=None,
|
81
91
|
**kwargs,
|
82
92
|
):
|
83
93
|
super().__init__(**kwargs)
|
@@ -89,6 +99,7 @@ class FeaturePyramid(keras.layers.Layer):
|
|
89
99
|
self.min_level = min_level
|
90
100
|
self.max_level = max_level
|
91
101
|
self.num_filters = num_filters
|
102
|
+
self.use_p5 = use_p5
|
92
103
|
self.activation = keras.activations.get(activation)
|
93
104
|
self.kernel_initializer = keras.initializers.get(kernel_initializer)
|
94
105
|
self.bias_initializer = keras.initializers.get(bias_initializer)
|
@@ -103,8 +114,8 @@ class FeaturePyramid(keras.layers.Layer):
|
|
103
114
|
self.bias_regularizer = keras.regularizers.get(bias_regularizer)
|
104
115
|
else:
|
105
116
|
self.bias_regularizer = None
|
106
|
-
self.data_format =
|
107
|
-
self.batch_norm_axis = -1 if
|
117
|
+
self.data_format = standardize_data_format(data_format)
|
118
|
+
self.batch_norm_axis = -1 if data_format == "channels_last" else 1
|
108
119
|
|
109
120
|
def build(self, input_shapes):
|
110
121
|
input_shapes = {
|
@@ -117,7 +128,6 @@ class FeaturePyramid(keras.layers.Layer):
|
|
117
128
|
}
|
118
129
|
input_levels = [int(level[1]) for level in input_shapes]
|
119
130
|
backbone_max_level = min(max(input_levels), self.max_level)
|
120
|
-
|
121
131
|
# Build lateral layers
|
122
132
|
self.lateral_conv_layers = {}
|
123
133
|
for i in range(self.min_level, backbone_max_level + 1):
|
@@ -134,7 +144,11 @@ class FeaturePyramid(keras.layers.Layer):
|
|
134
144
|
dtype=self.dtype_policy,
|
135
145
|
name=f"lateral_conv_{level}",
|
136
146
|
)
|
137
|
-
self.lateral_conv_layers[level].build(
|
147
|
+
self.lateral_conv_layers[level].build(
|
148
|
+
(None, None, None, input_shapes[level][-1])
|
149
|
+
if self.data_format == "channels_last"
|
150
|
+
else (None, input_shapes[level][1], None, None)
|
151
|
+
)
|
138
152
|
|
139
153
|
self.lateral_batch_norm_layers = {}
|
140
154
|
if self.use_batch_norm:
|
@@ -149,9 +163,9 @@ class FeaturePyramid(keras.layers.Layer):
|
|
149
163
|
)
|
150
164
|
)
|
151
165
|
self.lateral_batch_norm_layers[level].build(
|
152
|
-
(None, None, None,
|
166
|
+
(None, None, None, self.num_filters)
|
153
167
|
if self.data_format == "channels_last"
|
154
|
-
else (None,
|
168
|
+
else (None, self.num_filters, None, None)
|
155
169
|
)
|
156
170
|
|
157
171
|
# Build output layers
|
@@ -171,9 +185,9 @@ class FeaturePyramid(keras.layers.Layer):
|
|
171
185
|
name=f"output_conv_{level}",
|
172
186
|
)
|
173
187
|
self.output_conv_layers[level].build(
|
174
|
-
(None, None, None,
|
188
|
+
(None, None, None, self.num_filters)
|
175
189
|
if self.data_format == "channels_last"
|
176
|
-
else (None,
|
190
|
+
else (None, self.num_filters, None, None)
|
177
191
|
)
|
178
192
|
|
179
193
|
# Build coarser layers
|
@@ -192,11 +206,18 @@ class FeaturePyramid(keras.layers.Layer):
|
|
192
206
|
dtype=self.dtype_policy,
|
193
207
|
name=f"coarser_{level}",
|
194
208
|
)
|
195
|
-
self.
|
196
|
-
(
|
197
|
-
|
198
|
-
|
199
|
-
|
209
|
+
if i == backbone_max_level + 1 and self.use_p5:
|
210
|
+
self.output_conv_layers[level].build(
|
211
|
+
(None, None, None, input_shapes[f"P{i-1}"][-1])
|
212
|
+
if self.data_format == "channels_last"
|
213
|
+
else (None, input_shapes[f"P{i-1}"][1], None, None)
|
214
|
+
)
|
215
|
+
else:
|
216
|
+
self.output_conv_layers[level].build(
|
217
|
+
(None, None, None, self.num_filters)
|
218
|
+
if self.data_format == "channels_last"
|
219
|
+
else (None, self.num_filters, None, None)
|
220
|
+
)
|
200
221
|
|
201
222
|
# Build batch norm layers
|
202
223
|
self.output_batch_norms = {}
|
@@ -212,9 +233,9 @@ class FeaturePyramid(keras.layers.Layer):
|
|
212
233
|
)
|
213
234
|
)
|
214
235
|
self.output_batch_norms[level].build(
|
215
|
-
(None, None, None,
|
236
|
+
(None, None, None, self.num_filters)
|
216
237
|
if self.data_format == "channels_last"
|
217
|
-
else (None,
|
238
|
+
else (None, self.num_filters, None, None)
|
218
239
|
)
|
219
240
|
|
220
241
|
# The same upsampling layer is used for all levels
|
@@ -273,7 +294,11 @@ class FeaturePyramid(keras.layers.Layer):
|
|
273
294
|
|
274
295
|
for i in range(backbone_max_level + 1, self.max_level + 1):
|
275
296
|
level = f"P{i}"
|
276
|
-
feats_in =
|
297
|
+
feats_in = (
|
298
|
+
inputs[f"P{i-1}"]
|
299
|
+
if i == backbone_max_level + 1 and self.use_p5
|
300
|
+
else output_features[f"P{i-1}"]
|
301
|
+
)
|
277
302
|
if i > backbone_max_level + 1:
|
278
303
|
feats_in = self.activation(feats_in)
|
279
304
|
output_features[level] = (
|
@@ -283,7 +308,10 @@ class FeaturePyramid(keras.layers.Layer):
|
|
283
308
|
if self.use_batch_norm
|
284
309
|
else self.output_conv_layers[level](feats_in)
|
285
310
|
)
|
286
|
-
|
311
|
+
output_features = {
|
312
|
+
f"P{i}": output_features[f"P{i}"]
|
313
|
+
for i in range(self.min_level, self.max_level + 1)
|
314
|
+
}
|
287
315
|
return output_features
|
288
316
|
|
289
317
|
def get_config(self):
|
@@ -293,7 +321,9 @@ class FeaturePyramid(keras.layers.Layer):
|
|
293
321
|
"min_level": self.min_level,
|
294
322
|
"max_level": self.max_level,
|
295
323
|
"num_filters": self.num_filters,
|
324
|
+
"use_p5": self.use_p5,
|
296
325
|
"use_batch_norm": self.use_batch_norm,
|
326
|
+
"data_format": self.data_format,
|
297
327
|
"activation": keras.activations.serialize(self.activation),
|
298
328
|
"kernel_initializer": keras.initializers.serialize(
|
299
329
|
self.kernel_initializer
|
@@ -320,34 +350,51 @@ class FeaturePyramid(keras.layers.Layer):
|
|
320
350
|
|
321
351
|
def compute_output_shape(self, input_shapes):
|
322
352
|
output_shape = {}
|
323
|
-
print(input_shapes)
|
324
353
|
input_levels = [int(level[1]) for level in input_shapes]
|
325
354
|
backbone_max_level = min(max(input_levels), self.max_level)
|
326
355
|
|
327
356
|
for i in range(self.min_level, backbone_max_level + 1):
|
328
357
|
level = f"P{i}"
|
329
358
|
if self.data_format == "channels_last":
|
330
|
-
output_shape[level] = input_shapes[level][:-1] + (
|
359
|
+
output_shape[level] = input_shapes[level][:-1] + (
|
360
|
+
self.num_filters,
|
361
|
+
)
|
331
362
|
else:
|
332
363
|
output_shape[level] = (
|
333
364
|
input_shapes[level][0],
|
334
|
-
|
365
|
+
self.num_filters,
|
335
366
|
) + input_shapes[level][1:3]
|
336
367
|
|
337
368
|
intermediate_shape = input_shapes[f"P{backbone_max_level}"]
|
338
369
|
intermediate_shape = (
|
339
370
|
(
|
340
371
|
intermediate_shape[0],
|
341
|
-
|
342
|
-
|
343
|
-
|
372
|
+
(
|
373
|
+
int(math.ceil(intermediate_shape[1] / 2))
|
374
|
+
if intermediate_shape[1] is not None
|
375
|
+
else None
|
376
|
+
),
|
377
|
+
(
|
378
|
+
int(math.ceil(intermediate_shape[1] / 2))
|
379
|
+
if intermediate_shape[1] is not None
|
380
|
+
else None
|
381
|
+
),
|
382
|
+
self.num_filters,
|
344
383
|
)
|
345
384
|
if self.data_format == "channels_last"
|
346
385
|
else (
|
347
386
|
intermediate_shape[0],
|
348
|
-
|
349
|
-
|
350
|
-
|
387
|
+
self.num_filters,
|
388
|
+
(
|
389
|
+
int(math.ceil(intermediate_shape[1] / 2))
|
390
|
+
if intermediate_shape[1] is not None
|
391
|
+
else None
|
392
|
+
),
|
393
|
+
(
|
394
|
+
int(math.ceil(intermediate_shape[1] / 2))
|
395
|
+
if intermediate_shape[1] is not None
|
396
|
+
else None
|
397
|
+
),
|
351
398
|
)
|
352
399
|
)
|
353
400
|
|
@@ -357,16 +404,32 @@ class FeaturePyramid(keras.layers.Layer):
|
|
357
404
|
intermediate_shape = (
|
358
405
|
(
|
359
406
|
intermediate_shape[0],
|
360
|
-
|
361
|
-
|
362
|
-
|
407
|
+
(
|
408
|
+
int(math.ceil(intermediate_shape[1] / 2))
|
409
|
+
if intermediate_shape[1] is not None
|
410
|
+
else None
|
411
|
+
),
|
412
|
+
(
|
413
|
+
int(math.ceil(intermediate_shape[1] / 2))
|
414
|
+
if intermediate_shape[1] is not None
|
415
|
+
else None
|
416
|
+
),
|
417
|
+
self.num_filters,
|
363
418
|
)
|
364
419
|
if self.data_format == "channels_last"
|
365
420
|
else (
|
366
421
|
intermediate_shape[0],
|
367
|
-
|
368
|
-
|
369
|
-
|
422
|
+
self.num_filters,
|
423
|
+
(
|
424
|
+
int(math.ceil(intermediate_shape[1] / 2))
|
425
|
+
if intermediate_shape[1] is not None
|
426
|
+
else None
|
427
|
+
),
|
428
|
+
(
|
429
|
+
int(math.ceil(intermediate_shape[1] / 2))
|
430
|
+
if intermediate_shape[1] is not None
|
431
|
+
else None
|
432
|
+
),
|
370
433
|
)
|
371
434
|
)
|
372
435
|
|
@@ -3,6 +3,7 @@ import math
|
|
3
3
|
import keras
|
4
4
|
from keras import ops
|
5
5
|
|
6
|
+
# TODO: https://github.com/keras-team/keras-hub/issues/1965
|
6
7
|
from keras_hub.src.bounding_box import converters
|
7
8
|
from keras_hub.src.bounding_box import utils
|
8
9
|
from keras_hub.src.bounding_box import validate_format
|
@@ -0,0 +1,192 @@
|
|
1
|
+
import keras
|
2
|
+
|
3
|
+
from keras_hub.src.utils.keras_utils import standardize_data_format
|
4
|
+
|
5
|
+
|
6
|
+
class PredictionHead(keras.layers.Layer):
|
7
|
+
"""A head for classification or bounding box regression predictions.
|
8
|
+
|
9
|
+
Args:
|
10
|
+
output_filters: int. The umber of convolution filters in the final layer.
|
11
|
+
The number of output channels determines the prediction type:
|
12
|
+
- **Classification**:
|
13
|
+
`output_filters = num_anchors * num_classes`
|
14
|
+
Predicts class probabilities for each anchor.
|
15
|
+
- **Bounding Box Regression**:
|
16
|
+
`output_filters = num_anchors * 4` Predicts bounding box
|
17
|
+
offsets (x1, y1, x2, y2) for each anchor.
|
18
|
+
num_filters: int. The number of convolution filters to use in the base
|
19
|
+
layer.
|
20
|
+
num_conv_layers: int. The number of convolution layers before the final
|
21
|
+
layer.
|
22
|
+
use_prior_probability: bool. Set to True to use prior probability in the
|
23
|
+
bias initializer for the final convolution layer.
|
24
|
+
Defaults to `False`.
|
25
|
+
prior_probability: float. The prior probability value to use for
|
26
|
+
initializing the bias. Only used if `use_prior_probability` is
|
27
|
+
`True`. Defaults to `0.01`.
|
28
|
+
kernel_initializer: `str` or `keras.initializers`. The kernel
|
29
|
+
initializer for the convolution layers. Defaults to
|
30
|
+
`"random_normal"`.
|
31
|
+
bias_initializer: `str` or `keras.initializers`. The bias initializer
|
32
|
+
for the convolution layers. Defaults to `"zeros"`.
|
33
|
+
kernel_regularizer: `str` or `keras.regularizers`. The kernel
|
34
|
+
regularizer for the convolution layers. Defaults to `None`.
|
35
|
+
bias_regularizer: `str` or `keras.regularizers`. The bias regularizer
|
36
|
+
for the convolution layers. Defaults to `None`.
|
37
|
+
use_group_norm: bool. Whether to use Group Normalization after
|
38
|
+
the convolution layers. Defaults to `False`.
|
39
|
+
|
40
|
+
Returns:
|
41
|
+
A function representing either the classification
|
42
|
+
or the box regression head depending on `output_filters`.
|
43
|
+
"""
|
44
|
+
|
45
|
+
def __init__(
|
46
|
+
self,
|
47
|
+
output_filters,
|
48
|
+
num_filters,
|
49
|
+
num_conv_layers,
|
50
|
+
use_prior_probability=False,
|
51
|
+
prior_probability=0.01,
|
52
|
+
activation="relu",
|
53
|
+
kernel_initializer="random_normal",
|
54
|
+
bias_initializer="zeros",
|
55
|
+
kernel_regularizer=None,
|
56
|
+
bias_regularizer=None,
|
57
|
+
use_group_norm=False,
|
58
|
+
data_format=None,
|
59
|
+
**kwargs,
|
60
|
+
):
|
61
|
+
super().__init__(**kwargs)
|
62
|
+
|
63
|
+
self.output_filters = output_filters
|
64
|
+
self.num_filters = num_filters
|
65
|
+
self.num_conv_layers = num_conv_layers
|
66
|
+
self.use_prior_probability = use_prior_probability
|
67
|
+
self.prior_probability = prior_probability
|
68
|
+
self.activation = keras.activations.get(activation)
|
69
|
+
self.kernel_initializer = keras.initializers.get(kernel_initializer)
|
70
|
+
self.bias_initializer = keras.initializers.get(bias_initializer)
|
71
|
+
if kernel_regularizer is not None:
|
72
|
+
self.kernel_regularizer = keras.regularizers.get(kernel_regularizer)
|
73
|
+
else:
|
74
|
+
self.kernel_regularizer = None
|
75
|
+
if bias_regularizer is not None:
|
76
|
+
self.bias_regularizer = keras.regularizers.get(bias_regularizer)
|
77
|
+
else:
|
78
|
+
self.bias_regularizer = None
|
79
|
+
self.use_group_norm = use_group_norm
|
80
|
+
self.data_format = standardize_data_format(data_format)
|
81
|
+
|
82
|
+
def build(self, input_shape):
|
83
|
+
intermediate_shape = input_shape
|
84
|
+
self.conv_layers = []
|
85
|
+
self.group_norm_layers = []
|
86
|
+
for idx in range(self.num_conv_layers):
|
87
|
+
conv = keras.layers.Conv2D(
|
88
|
+
self.num_filters,
|
89
|
+
kernel_size=3,
|
90
|
+
padding="same",
|
91
|
+
kernel_initializer=self.kernel_initializer,
|
92
|
+
bias_initializer=self.bias_initializer,
|
93
|
+
use_bias=not self.use_group_norm,
|
94
|
+
kernel_regularizer=self.kernel_regularizer,
|
95
|
+
bias_regularizer=self.bias_regularizer,
|
96
|
+
data_format=self.data_format,
|
97
|
+
dtype=self.dtype_policy,
|
98
|
+
name=f"conv2d_{idx}",
|
99
|
+
)
|
100
|
+
conv.build(intermediate_shape)
|
101
|
+
self.conv_layers.append(conv)
|
102
|
+
intermediate_shape = (
|
103
|
+
input_shape[:-1] + (self.num_filters,)
|
104
|
+
if self.data_format == "channels_last"
|
105
|
+
else (input_shape[0], self.num_filters) + (input_shape[1:-1])
|
106
|
+
)
|
107
|
+
if self.use_group_norm:
|
108
|
+
group_norm = keras.layers.GroupNormalization(
|
109
|
+
groups=32,
|
110
|
+
axis=-1 if self.data_format == "channels_last" else 1,
|
111
|
+
dtype=self.dtype_policy,
|
112
|
+
name=f"group_norm_{idx}",
|
113
|
+
)
|
114
|
+
group_norm.build(intermediate_shape)
|
115
|
+
self.group_norm_layers.append(group_norm)
|
116
|
+
prior_probability = keras.initializers.Constant(
|
117
|
+
-1
|
118
|
+
* keras.ops.log(
|
119
|
+
(1 - self.prior_probability) / self.prior_probability
|
120
|
+
)
|
121
|
+
)
|
122
|
+
self.prediction_layer = keras.layers.Conv2D(
|
123
|
+
self.output_filters,
|
124
|
+
kernel_size=3,
|
125
|
+
strides=1,
|
126
|
+
padding="same",
|
127
|
+
kernel_initializer=self.kernel_initializer,
|
128
|
+
bias_initializer=(
|
129
|
+
prior_probability
|
130
|
+
if self.use_prior_probability
|
131
|
+
else self.bias_initializer
|
132
|
+
),
|
133
|
+
kernel_regularizer=self.kernel_regularizer,
|
134
|
+
bias_regularizer=self.bias_regularizer,
|
135
|
+
dtype=self.dtype_policy,
|
136
|
+
name="logits_layer",
|
137
|
+
)
|
138
|
+
self.prediction_layer.build(
|
139
|
+
(None, None, None, self.num_filters)
|
140
|
+
if self.data_format == "channels_last"
|
141
|
+
else (None, self.num_filters, None, None)
|
142
|
+
)
|
143
|
+
self.built = True
|
144
|
+
|
145
|
+
def call(self, input):
|
146
|
+
x = input
|
147
|
+
for idx in range(self.num_conv_layers):
|
148
|
+
x = self.conv_layers[idx](x)
|
149
|
+
if self.use_group_norm:
|
150
|
+
x = self.group_norm_layers[idx](x)
|
151
|
+
x = self.activation(x)
|
152
|
+
|
153
|
+
output = self.prediction_layer(x)
|
154
|
+
return output
|
155
|
+
|
156
|
+
def get_config(self):
|
157
|
+
config = super().get_config()
|
158
|
+
config.update(
|
159
|
+
{
|
160
|
+
"output_filters": self.output_filters,
|
161
|
+
"num_filters": self.num_filters,
|
162
|
+
"num_conv_layers": self.num_conv_layers,
|
163
|
+
"use_group_norm": self.use_group_norm,
|
164
|
+
"use_prior_probability": self.use_prior_probability,
|
165
|
+
"prior_probability": self.prior_probability,
|
166
|
+
"activation": keras.activations.serialize(self.activation),
|
167
|
+
"kernel_initializer": keras.initializers.serialize(
|
168
|
+
self.kernel_initializer
|
169
|
+
),
|
170
|
+
"bias_initializer": keras.initializers.serialize(
|
171
|
+
self.kernel_initializer
|
172
|
+
),
|
173
|
+
"kernel_regularizer": (
|
174
|
+
keras.regularizers.serialize(self.kernel_regularizer)
|
175
|
+
if self.kernel_regularizer is not None
|
176
|
+
else None
|
177
|
+
),
|
178
|
+
"bias_regularizer": (
|
179
|
+
keras.regularizers.serialize(self.bias_regularizer)
|
180
|
+
if self.bias_regularizer is not None
|
181
|
+
else None
|
182
|
+
),
|
183
|
+
}
|
184
|
+
)
|
185
|
+
return config
|
186
|
+
|
187
|
+
def compute_output_shape(self, input_shape):
|
188
|
+
return (
|
189
|
+
input_shape[:-1] + (self.output_filters,)
|
190
|
+
if self.data_format == "channels_last"
|
191
|
+
else (input_shape[0],) + (self.output_filters,) + input_shape[1:-1]
|
192
|
+
)
|
@@ -0,0 +1,146 @@
|
|
1
|
+
import keras
|
2
|
+
|
3
|
+
from keras_hub.src.api_export import keras_hub_export
|
4
|
+
from keras_hub.src.models.feature_pyramid_backbone import FeaturePyramidBackbone
|
5
|
+
from keras_hub.src.models.retinanet.feature_pyramid import FeaturePyramid
|
6
|
+
from keras_hub.src.utils.keras_utils import standardize_data_format
|
7
|
+
|
8
|
+
|
9
|
+
@keras_hub_export("keras_hub.models.RetinaNetBackbone")
|
10
|
+
class RetinaNetBackbone(FeaturePyramidBackbone):
|
11
|
+
"""RetinaNet Backbone.
|
12
|
+
|
13
|
+
Combines a CNN backbone (e.g., ResNet, MobileNet) with a feature pyramid
|
14
|
+
network (FPN)to extract multi-scale features for object detection.
|
15
|
+
|
16
|
+
Args:
|
17
|
+
image_encoder: `keras.Model`. The backbone model (e.g., ResNet50,
|
18
|
+
MobileNetV2) used to extract features from the input image.
|
19
|
+
It should have pyramid outputs (i.e., a dictionary mapping level
|
20
|
+
names like `"P2"`, `"P3"`, etc. to their corresponding feature
|
21
|
+
tensors).
|
22
|
+
min_level: int. The minimum level of the feature pyramid (e.g., 3).
|
23
|
+
This determines the coarsest level of features used.
|
24
|
+
max_level: int. The maximum level of the feature pyramid (e.g., 7).
|
25
|
+
This determines the finest level of features used.
|
26
|
+
use_p5: bool. Determines the input source for creating coarser
|
27
|
+
feature pyramid levels. If `True`, the output of the last backbone
|
28
|
+
layer (typically `'P5'` in an FPN) is used as input to create
|
29
|
+
higher-level feature maps (e.g., `'P6'`, `'P7'`) through
|
30
|
+
additional convolutional layers. If `False`, the original `'P5'`
|
31
|
+
feature map from the backbone is directly used as input for
|
32
|
+
creating the coarser levels, bypassing any further processing of
|
33
|
+
`'P5'` within the feature pyramid. Defaults to `False`.
|
34
|
+
use_fpn_batch_norm: bool. Whether to use batch normalization in the
|
35
|
+
feature pyramid network. Defaults to `False`.
|
36
|
+
image_shape: tuple. tuple. The shape of the input image (H, W, C).
|
37
|
+
The height and width can be `None` if they are variable.
|
38
|
+
data_format: str. The data format of the input image
|
39
|
+
(channels_first or channels_last).
|
40
|
+
dtype: str. The data type of the input image.
|
41
|
+
**kwargs: Additional keyword arguments passed to the base class.
|
42
|
+
|
43
|
+
Raises:
|
44
|
+
ValueError: If `min_level` is greater than `max_level`.
|
45
|
+
ValueError: If `backbone_max_level` is less than 5 and `max_level` is greater than or equal to 5.
|
46
|
+
"""
|
47
|
+
|
48
|
+
def __init__(
|
49
|
+
self,
|
50
|
+
image_encoder,
|
51
|
+
min_level,
|
52
|
+
max_level,
|
53
|
+
use_p5,
|
54
|
+
use_fpn_batch_norm=False,
|
55
|
+
image_shape=(None, None, 3),
|
56
|
+
data_format=None,
|
57
|
+
dtype=None,
|
58
|
+
**kwargs,
|
59
|
+
):
|
60
|
+
|
61
|
+
# === Layers ===
|
62
|
+
if min_level > max_level:
|
63
|
+
raise ValueError(
|
64
|
+
f"Minimum level ({min_level}) must be less than or equal to "
|
65
|
+
f"maximum level ({max_level})."
|
66
|
+
)
|
67
|
+
|
68
|
+
data_format = standardize_data_format(data_format)
|
69
|
+
input_levels = [
|
70
|
+
int(level[1]) for level in image_encoder.pyramid_outputs
|
71
|
+
]
|
72
|
+
backbone_max_level = min(max(input_levels), max_level)
|
73
|
+
|
74
|
+
if backbone_max_level < 5 and max_level >= 5:
|
75
|
+
raise ValueError(
|
76
|
+
f"Backbone maximum level ({backbone_max_level}) is less than "
|
77
|
+
f"the desired maximum level ({max_level}). "
|
78
|
+
f"Please ensure that the backbone can generate features up to "
|
79
|
+
f"the specified maximum level."
|
80
|
+
)
|
81
|
+
feature_extractor = keras.Model(
|
82
|
+
inputs=image_encoder.inputs,
|
83
|
+
outputs={
|
84
|
+
f"P{level}": image_encoder.pyramid_outputs[f"P{level}"]
|
85
|
+
for level in range(min_level, backbone_max_level + 1)
|
86
|
+
},
|
87
|
+
name="backbone",
|
88
|
+
)
|
89
|
+
|
90
|
+
feature_pyramid = FeaturePyramid(
|
91
|
+
min_level=min_level,
|
92
|
+
max_level=max_level,
|
93
|
+
use_p5=use_p5,
|
94
|
+
name="fpn",
|
95
|
+
dtype=dtype,
|
96
|
+
data_format=data_format,
|
97
|
+
use_batch_norm=use_fpn_batch_norm,
|
98
|
+
)
|
99
|
+
|
100
|
+
# === Functional model ===
|
101
|
+
image_input = keras.layers.Input(image_shape, name="inputs")
|
102
|
+
feature_extractor_outputs = feature_extractor(image_input)
|
103
|
+
feature_pyramid_outputs = feature_pyramid(feature_extractor_outputs)
|
104
|
+
|
105
|
+
super().__init__(
|
106
|
+
inputs=image_input,
|
107
|
+
outputs=feature_pyramid_outputs,
|
108
|
+
dtype=dtype,
|
109
|
+
**kwargs,
|
110
|
+
)
|
111
|
+
|
112
|
+
# === config ===
|
113
|
+
self.min_level = min_level
|
114
|
+
self.max_level = max_level
|
115
|
+
self.use_p5 = use_p5
|
116
|
+
self.use_fpn_batch_norm = use_fpn_batch_norm
|
117
|
+
self.image_encoder = image_encoder
|
118
|
+
self.feature_pyramid = feature_pyramid
|
119
|
+
self.image_shape = image_shape
|
120
|
+
self.pyramid_outputs = feature_pyramid_outputs
|
121
|
+
|
122
|
+
def get_config(self):
|
123
|
+
config = super().get_config()
|
124
|
+
config.update(
|
125
|
+
{
|
126
|
+
"image_encoder": keras.layers.serialize(self.image_encoder),
|
127
|
+
"min_level": self.min_level,
|
128
|
+
"max_level": self.max_level,
|
129
|
+
"use_p5": self.use_p5,
|
130
|
+
"use_fpn_batch_norm": self.use_fpn_batch_norm,
|
131
|
+
"image_shape": self.image_shape,
|
132
|
+
}
|
133
|
+
)
|
134
|
+
return config
|
135
|
+
|
136
|
+
@classmethod
|
137
|
+
def from_config(cls, config):
|
138
|
+
config.update(
|
139
|
+
{
|
140
|
+
"image_encoder": keras.layers.deserialize(
|
141
|
+
config["image_encoder"]
|
142
|
+
),
|
143
|
+
}
|
144
|
+
)
|
145
|
+
|
146
|
+
return super().from_config(config)
|
@@ -0,0 +1,53 @@
|
|
1
|
+
from keras_hub.src.api_export import keras_hub_export
|
2
|
+
from keras_hub.src.layers.preprocessing.image_converter import ImageConverter
|
3
|
+
from keras_hub.src.models.retinanet.retinanet_backbone import RetinaNetBackbone
|
4
|
+
from keras_hub.src.utils.tensor_utils import preprocessing_function
|
5
|
+
|
6
|
+
|
7
|
+
@keras_hub_export("keras_hub.layers.RetinaNetImageConverter")
|
8
|
+
class RetinaNetImageConverter(ImageConverter):
|
9
|
+
backbone_cls = RetinaNetBackbone
|
10
|
+
|
11
|
+
def __init__(
|
12
|
+
self,
|
13
|
+
image_size=None,
|
14
|
+
scale=None,
|
15
|
+
offset=None,
|
16
|
+
norm_mean=[0.485, 0.456, 0.406],
|
17
|
+
norm_std=[0.229, 0.224, 0.225],
|
18
|
+
**kwargs
|
19
|
+
):
|
20
|
+
super().__init__(**kwargs)
|
21
|
+
self.image_size = image_size
|
22
|
+
self.scale = scale
|
23
|
+
self.offset = offset
|
24
|
+
self.norm_mean = norm_mean
|
25
|
+
self.norm_std = norm_std
|
26
|
+
self.built = True
|
27
|
+
|
28
|
+
@preprocessing_function
|
29
|
+
def call(self, inputs):
|
30
|
+
# TODO: https://github.com/keras-team/keras-hub/issues/1965
|
31
|
+
x = inputs
|
32
|
+
# Rescaling Image
|
33
|
+
if self.scale is not None:
|
34
|
+
x = x * self._expand_non_channel_dims(self.scale, x)
|
35
|
+
if self.offset is not None:
|
36
|
+
x = x + self._expand_non_channel_dims(self.offset, x)
|
37
|
+
# By default normalize using imagenet mean and std
|
38
|
+
if self.norm_mean:
|
39
|
+
x = x - self._expand_non_channel_dims(self.norm_mean, x)
|
40
|
+
if self.norm_std:
|
41
|
+
x = x / self._expand_non_channel_dims(self.norm_std, x)
|
42
|
+
|
43
|
+
return x
|
44
|
+
|
45
|
+
def get_config(self):
|
46
|
+
config = super().get_config()
|
47
|
+
config.update(
|
48
|
+
{
|
49
|
+
"norm_mean": self.norm_mean,
|
50
|
+
"norm_std": self.norm_std,
|
51
|
+
}
|
52
|
+
)
|
53
|
+
return config
|