keras-hub-nightly 0.23.0.dev202510090417__py3-none-any.whl → 0.23.0.dev202510110411__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of keras-hub-nightly might be problematic. Click here for more details.
- keras_hub/layers/__init__.py +3 -0
- keras_hub/models/__init__.py +9 -0
- keras_hub/src/models/mobilenetv5/__init__.py +0 -0
- keras_hub/src/models/mobilenetv5/mobilenetv5_attention.py +699 -0
- keras_hub/src/models/mobilenetv5/mobilenetv5_backbone.py +396 -0
- keras_hub/src/models/mobilenetv5/mobilenetv5_blocks.py +890 -0
- keras_hub/src/models/mobilenetv5/mobilenetv5_builder.py +436 -0
- keras_hub/src/models/mobilenetv5/mobilenetv5_image_classifier.py +157 -0
- keras_hub/src/models/mobilenetv5/mobilenetv5_image_classifier_preprocessor.py +16 -0
- keras_hub/src/models/mobilenetv5/mobilenetv5_image_converter.py +10 -0
- keras_hub/src/models/mobilenetv5/mobilenetv5_layers.py +462 -0
- keras_hub/src/models/mobilenetv5/mobilenetv5_utils.py +146 -0
- keras_hub/src/models/qwen3_moe/__init__.py +5 -0
- keras_hub/src/models/qwen3_moe/qwen3_moe_presets.py +30 -0
- keras_hub/src/utils/preset_utils.py +9 -2
- keras_hub/src/utils/timm/convert_mobilenetv5.py +321 -0
- keras_hub/src/utils/timm/preset_loader.py +8 -4
- keras_hub/src/version.py +1 -1
- {keras_hub_nightly-0.23.0.dev202510090417.dist-info → keras_hub_nightly-0.23.0.dev202510110411.dist-info}/METADATA +1 -1
- {keras_hub_nightly-0.23.0.dev202510090417.dist-info → keras_hub_nightly-0.23.0.dev202510110411.dist-info}/RECORD +22 -9
- {keras_hub_nightly-0.23.0.dev202510090417.dist-info → keras_hub_nightly-0.23.0.dev202510110411.dist-info}/WHEEL +0 -0
- {keras_hub_nightly-0.23.0.dev202510090417.dist-info → keras_hub_nightly-0.23.0.dev202510110411.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,436 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from copy import deepcopy
|
|
3
|
+
|
|
4
|
+
from keras_hub.src.models.mobilenet.mobilenet_backbone import ConvBnActBlock
|
|
5
|
+
from keras_hub.src.models.mobilenet.mobilenet_backbone import DepthwiseConvBlock
|
|
6
|
+
from keras_hub.src.models.mobilenet.mobilenet_backbone import (
|
|
7
|
+
InvertedResidualBlock,
|
|
8
|
+
)
|
|
9
|
+
from keras_hub.src.models.mobilenet.util import adjust_channels
|
|
10
|
+
from keras_hub.src.models.mobilenetv5.mobilenetv5_attention import (
|
|
11
|
+
MobileAttention,
|
|
12
|
+
)
|
|
13
|
+
from keras_hub.src.models.mobilenetv5.mobilenetv5_blocks import CondConvResidual
|
|
14
|
+
from keras_hub.src.models.mobilenetv5.mobilenetv5_blocks import EdgeResidual
|
|
15
|
+
from keras_hub.src.models.mobilenetv5.mobilenetv5_blocks import (
|
|
16
|
+
UniversalInvertedResidual,
|
|
17
|
+
)
|
|
18
|
+
from keras_hub.src.models.mobilenetv5.mobilenetv5_utils import parse_ksize
|
|
19
|
+
from keras_hub.src.models.mobilenetv5.mobilenetv5_utils import round_channels
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def decode_block_str(block_str):
|
|
23
|
+
assert isinstance(block_str, str)
|
|
24
|
+
ops = block_str.split("_")
|
|
25
|
+
block_type = ops[0]
|
|
26
|
+
ops = ops[1:]
|
|
27
|
+
options = {}
|
|
28
|
+
skip = None
|
|
29
|
+
for op in ops:
|
|
30
|
+
if op == "noskip":
|
|
31
|
+
skip = False
|
|
32
|
+
elif op == "skip":
|
|
33
|
+
skip = True
|
|
34
|
+
elif op.startswith("n"):
|
|
35
|
+
key = op[0]
|
|
36
|
+
v = op[1:]
|
|
37
|
+
options[key] = v if v else "relu"
|
|
38
|
+
else:
|
|
39
|
+
splits = re.split(r"(\d.*)", op)
|
|
40
|
+
if len(splits) >= 2:
|
|
41
|
+
key, value = splits[:2]
|
|
42
|
+
options[key] = value
|
|
43
|
+
|
|
44
|
+
act_layer = options.get("n", "gelu")
|
|
45
|
+
num_repeat = int(options["r"])
|
|
46
|
+
|
|
47
|
+
block_args = dict(
|
|
48
|
+
block_type=block_type,
|
|
49
|
+
out_chs=int(options["c"]),
|
|
50
|
+
stride=int(options["s"]),
|
|
51
|
+
act_layer=act_layer,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
if block_type == "ir":
|
|
55
|
+
block_args.update(
|
|
56
|
+
dict(
|
|
57
|
+
dw_kernel_size=parse_ksize(options["k"]),
|
|
58
|
+
exp_ratio=float(options["e"]),
|
|
59
|
+
se_ratio=float(options.get("se", 0.0)),
|
|
60
|
+
noskip=skip is False,
|
|
61
|
+
)
|
|
62
|
+
)
|
|
63
|
+
elif block_type == "uir":
|
|
64
|
+
start_kernel_size = parse_ksize(options.get("a", "0"))
|
|
65
|
+
end_kernel_size = parse_ksize(options.get("p", "0"))
|
|
66
|
+
block_args.update(
|
|
67
|
+
dict(
|
|
68
|
+
dw_kernel_size_start=start_kernel_size,
|
|
69
|
+
dw_kernel_size_mid=parse_ksize(options["k"]),
|
|
70
|
+
dw_kernel_size_end=end_kernel_size,
|
|
71
|
+
exp_ratio=float(options["e"]),
|
|
72
|
+
se_ratio=float(options.get("se", 0.0)),
|
|
73
|
+
noskip=skip is False,
|
|
74
|
+
)
|
|
75
|
+
)
|
|
76
|
+
elif block_type == "er":
|
|
77
|
+
block_args.update(
|
|
78
|
+
dict(
|
|
79
|
+
exp_kernel_size=parse_ksize(options["k"]),
|
|
80
|
+
pw_kernel_size=1,
|
|
81
|
+
exp_ratio=float(options["e"]),
|
|
82
|
+
se_ratio=float(options.get("se", 0.0)),
|
|
83
|
+
noskip=skip is False,
|
|
84
|
+
)
|
|
85
|
+
)
|
|
86
|
+
elif block_type in ("mqa", "mha"):
|
|
87
|
+
key_dim_val = int(options.get("d", "64"))
|
|
88
|
+
block_args.update(
|
|
89
|
+
dict(
|
|
90
|
+
num_heads=int(options.get("h", "12")),
|
|
91
|
+
key_dim=key_dim_val,
|
|
92
|
+
value_dim=key_dim_val,
|
|
93
|
+
kv_stride=int(options.get("v", "1")),
|
|
94
|
+
use_cpe=bool(int(options.get("cpe", "0"))),
|
|
95
|
+
)
|
|
96
|
+
)
|
|
97
|
+
return block_args, num_repeat
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def decode_arch_def(arch_def):
|
|
101
|
+
arch_args = []
|
|
102
|
+
for _, block_strings in enumerate(arch_def):
|
|
103
|
+
stack_args = []
|
|
104
|
+
for block_str in block_strings:
|
|
105
|
+
ba, rep = decode_block_str(block_str)
|
|
106
|
+
stack_args.extend([deepcopy(ba) for _ in range(rep)])
|
|
107
|
+
arch_args.append(stack_args)
|
|
108
|
+
return arch_args
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def convert_arch_def_to_stackwise(arch_def):
|
|
112
|
+
decoded_args = decode_arch_def(arch_def)
|
|
113
|
+
stackwise_params = {
|
|
114
|
+
k: []
|
|
115
|
+
for k in [
|
|
116
|
+
"stackwise_block_types",
|
|
117
|
+
"stackwise_num_blocks",
|
|
118
|
+
"stackwise_num_filters",
|
|
119
|
+
"stackwise_strides",
|
|
120
|
+
"stackwise_act_layers",
|
|
121
|
+
"stackwise_exp_ratios",
|
|
122
|
+
"stackwise_se_ratios",
|
|
123
|
+
"stackwise_dw_kernel_sizes",
|
|
124
|
+
"stackwise_dw_start_kernel_sizes",
|
|
125
|
+
"stackwise_dw_end_kernel_sizes",
|
|
126
|
+
"stackwise_exp_kernel_sizes",
|
|
127
|
+
"stackwise_pw_kernel_sizes",
|
|
128
|
+
"stackwise_num_heads",
|
|
129
|
+
"stackwise_key_dims",
|
|
130
|
+
"stackwise_value_dims",
|
|
131
|
+
"stackwise_kv_strides",
|
|
132
|
+
"stackwise_use_cpe",
|
|
133
|
+
]
|
|
134
|
+
}
|
|
135
|
+
for stack in decoded_args:
|
|
136
|
+
stackwise_params["stackwise_num_blocks"].append(len(stack))
|
|
137
|
+
current_stack_params = {
|
|
138
|
+
k: [] for k in stackwise_params if k != "stackwise_num_blocks"
|
|
139
|
+
}
|
|
140
|
+
for block in stack:
|
|
141
|
+
current_stack_params["stackwise_block_types"].append(
|
|
142
|
+
block.get("block_type")
|
|
143
|
+
)
|
|
144
|
+
current_stack_params["stackwise_num_filters"].append(
|
|
145
|
+
block.get("out_chs")
|
|
146
|
+
)
|
|
147
|
+
current_stack_params["stackwise_strides"].append(
|
|
148
|
+
block.get("stride")
|
|
149
|
+
)
|
|
150
|
+
current_stack_params["stackwise_act_layers"].append(
|
|
151
|
+
block.get("act_layer")
|
|
152
|
+
)
|
|
153
|
+
current_stack_params["stackwise_exp_ratios"].append(
|
|
154
|
+
block.get("exp_ratio", 0.0)
|
|
155
|
+
)
|
|
156
|
+
current_stack_params["stackwise_se_ratios"].append(
|
|
157
|
+
block.get("se_ratio", 0.0)
|
|
158
|
+
)
|
|
159
|
+
current_stack_params["stackwise_dw_kernel_sizes"].append(
|
|
160
|
+
block.get("dw_kernel_size", block.get("dw_kernel_size_mid", 0))
|
|
161
|
+
)
|
|
162
|
+
current_stack_params["stackwise_dw_start_kernel_sizes"].append(
|
|
163
|
+
block.get("dw_kernel_size_start", 0)
|
|
164
|
+
)
|
|
165
|
+
current_stack_params["stackwise_dw_end_kernel_sizes"].append(
|
|
166
|
+
block.get("dw_kernel_size_end", 0)
|
|
167
|
+
)
|
|
168
|
+
current_stack_params["stackwise_exp_kernel_sizes"].append(
|
|
169
|
+
block.get("exp_kernel_size", 0)
|
|
170
|
+
)
|
|
171
|
+
current_stack_params["stackwise_pw_kernel_sizes"].append(
|
|
172
|
+
block.get("pw_kernel_size", 0)
|
|
173
|
+
)
|
|
174
|
+
current_stack_params["stackwise_num_heads"].append(
|
|
175
|
+
block.get("num_heads", 0)
|
|
176
|
+
)
|
|
177
|
+
current_stack_params["stackwise_key_dims"].append(
|
|
178
|
+
block.get("key_dim", 0)
|
|
179
|
+
)
|
|
180
|
+
current_stack_params["stackwise_value_dims"].append(
|
|
181
|
+
block.get("value_dim", 0)
|
|
182
|
+
)
|
|
183
|
+
current_stack_params["stackwise_kv_strides"].append(
|
|
184
|
+
block.get("kv_stride", 0)
|
|
185
|
+
)
|
|
186
|
+
current_stack_params["stackwise_use_cpe"].append(
|
|
187
|
+
block.get("use_cpe", False)
|
|
188
|
+
)
|
|
189
|
+
for key, value in current_stack_params.items():
|
|
190
|
+
stackwise_params[key].append(value)
|
|
191
|
+
return stackwise_params
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
class MobileNetV5Builder:
|
|
195
|
+
"""Builds a MobileNetV5 model from a decoded architecture definition.
|
|
196
|
+
|
|
197
|
+
This class takes a decoded architecture definition and constructs a list of
|
|
198
|
+
network stages, where each stage is a list of blocks. It handles channel
|
|
199
|
+
rounding, stride management, and feature extraction points.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
output_stride: int. The desired output stride of the network.
|
|
203
|
+
pad_type: str. The padding type for convolutions.
|
|
204
|
+
round_chs_fn: callable. A function to round the number of channels.
|
|
205
|
+
se_from_exp: bool. If `True`, SE channel reduction is based on the
|
|
206
|
+
expanded channels.
|
|
207
|
+
act_layer: str. The default activation function for blocks.
|
|
208
|
+
norm_layer: str. The default normalization layer for blocks.
|
|
209
|
+
aa_layer: keras.layers.Layer. An optional anti-aliasing layer.
|
|
210
|
+
se_layer: keras.layers.Layer. The Squeeze-and-Excitation layer to use.
|
|
211
|
+
drop_path_rate: float. The stochastic depth rate for the network.
|
|
212
|
+
layer_scale_init_value: float. The initial value for layer scale.
|
|
213
|
+
feature_location: str. Where to extract features from, either
|
|
214
|
+
`"bottleneck"`, `"expansion"`, or `""`.
|
|
215
|
+
data_format: str. The format of the input data, either
|
|
216
|
+
`"channels_last"` or `"channels_first"`.
|
|
217
|
+
channel_axis: int. The axis representing the channels in the input
|
|
218
|
+
tensor.
|
|
219
|
+
"""
|
|
220
|
+
|
|
221
|
+
def __init__(
|
|
222
|
+
self,
|
|
223
|
+
output_stride=32,
|
|
224
|
+
pad_type="same",
|
|
225
|
+
round_chs_fn=round_channels,
|
|
226
|
+
se_from_exp=False,
|
|
227
|
+
act_layer="relu",
|
|
228
|
+
norm_layer="batch_norm",
|
|
229
|
+
aa_layer=None,
|
|
230
|
+
se_layer=None,
|
|
231
|
+
drop_path_rate=0.0,
|
|
232
|
+
layer_scale_init_value=None,
|
|
233
|
+
feature_location="",
|
|
234
|
+
data_format=None,
|
|
235
|
+
channel_axis=None,
|
|
236
|
+
dtype=None,
|
|
237
|
+
):
|
|
238
|
+
self.output_stride = output_stride
|
|
239
|
+
self.pad_type = pad_type
|
|
240
|
+
self.data_format = data_format
|
|
241
|
+
self.channel_axis = channel_axis
|
|
242
|
+
self.round_chs_fn = round_chs_fn
|
|
243
|
+
self.se_from_exp = se_from_exp
|
|
244
|
+
self.act_layer = act_layer
|
|
245
|
+
self.norm_layer = norm_layer
|
|
246
|
+
self.aa_layer = aa_layer
|
|
247
|
+
self.se_layer = se_layer
|
|
248
|
+
self.drop_path_rate = drop_path_rate
|
|
249
|
+
self.layer_scale_init_value = layer_scale_init_value
|
|
250
|
+
self.dtype = dtype
|
|
251
|
+
if feature_location == "depthwise":
|
|
252
|
+
feature_location = "expansion"
|
|
253
|
+
self.feature_location = feature_location
|
|
254
|
+
assert feature_location in ("bottleneck", "expansion", "")
|
|
255
|
+
self.in_chs = None
|
|
256
|
+
self.features = []
|
|
257
|
+
|
|
258
|
+
def _make_block(self, ba, block_idx, block_count):
|
|
259
|
+
drop_path_rate = self.drop_path_rate * block_idx / block_count
|
|
260
|
+
bt = ba.pop("block_type")
|
|
261
|
+
ba["filters"] = self.round_chs_fn(ba.pop("out_chs"))
|
|
262
|
+
s2d = ba.get("s2d", 0)
|
|
263
|
+
if s2d > 0:
|
|
264
|
+
ba["filters"] *= 4
|
|
265
|
+
if "expansion_in_chs" in ba and ba["expansion_in_chs"]:
|
|
266
|
+
ba["expansion_in_chs"] = self.round_chs_fn(ba["expansion_in_chs"])
|
|
267
|
+
ba["pad_type"] = self.pad_type
|
|
268
|
+
ba["act_layer"] = (
|
|
269
|
+
ba.get("act_layer")
|
|
270
|
+
if ba.get("act_layer") is not None
|
|
271
|
+
else self.act_layer
|
|
272
|
+
)
|
|
273
|
+
assert ba["act_layer"] is not None
|
|
274
|
+
ba["norm_layer"] = self.norm_layer
|
|
275
|
+
ba["drop_path_rate"] = drop_path_rate
|
|
276
|
+
ba["data_format"] = self.data_format
|
|
277
|
+
ba["channel_axis"] = self.channel_axis
|
|
278
|
+
ba["dtype"] = self.dtype
|
|
279
|
+
if bt in ("ir", "er", "uir", "ds", "dsa"):
|
|
280
|
+
se_ratio = ba.pop("se_ratio", None)
|
|
281
|
+
if se_ratio and self.se_layer is not None:
|
|
282
|
+
if not self.se_from_exp:
|
|
283
|
+
se_ratio /= ba.get("exp_ratio", 1.0)
|
|
284
|
+
if s2d == 1:
|
|
285
|
+
se_ratio /= 4
|
|
286
|
+
ba["se_layer"] = lambda channels: self.se_layer(
|
|
287
|
+
filters=channels,
|
|
288
|
+
bottleneck_filters=adjust_channels(channels * se_ratio),
|
|
289
|
+
squeeze_activation=ba["act_layer"],
|
|
290
|
+
excite_activation="sigmoid",
|
|
291
|
+
data_format=self.data_format,
|
|
292
|
+
dtype=self.dtype,
|
|
293
|
+
)
|
|
294
|
+
else:
|
|
295
|
+
ba["se_layer"] = None
|
|
296
|
+
ba.pop("aa_layer", None)
|
|
297
|
+
if bt == "ir":
|
|
298
|
+
padding = 0
|
|
299
|
+
if ba["pad_type"].lower() in ("", "same"):
|
|
300
|
+
kernel_size = ba["dw_kernel_size"]
|
|
301
|
+
if isinstance(kernel_size, (list, tuple)):
|
|
302
|
+
kernel_size = kernel_size[0]
|
|
303
|
+
padding = (kernel_size - 1) // 2
|
|
304
|
+
block = (
|
|
305
|
+
CondConvResidual(**ba)
|
|
306
|
+
if ba.get("num_experts", 0) > 0
|
|
307
|
+
else InvertedResidualBlock(
|
|
308
|
+
expansion=ba["exp_ratio"],
|
|
309
|
+
infilters=self.in_chs,
|
|
310
|
+
filters=ba["filters"],
|
|
311
|
+
kernel_size=ba["dw_kernel_size"],
|
|
312
|
+
stride=ba["stride"],
|
|
313
|
+
padding=padding,
|
|
314
|
+
squeeze_excite_ratio=ba.pop("se_ratio", None),
|
|
315
|
+
activation=ba["act_layer"],
|
|
316
|
+
)
|
|
317
|
+
)
|
|
318
|
+
elif bt == "ds" or bt == "dsa":
|
|
319
|
+
block = DepthwiseConvBlock(
|
|
320
|
+
infilters=self.in_chs,
|
|
321
|
+
filters=ba["filters"],
|
|
322
|
+
kernel_size=ba["dw_kernel_size"],
|
|
323
|
+
stride=ba["stride"],
|
|
324
|
+
squeeze_excite_ratio=ba.pop("se_ratio", None),
|
|
325
|
+
residual=not ba["noskip"],
|
|
326
|
+
dtype=self.dtype,
|
|
327
|
+
)
|
|
328
|
+
elif bt == "er":
|
|
329
|
+
block = EdgeResidual(**ba)
|
|
330
|
+
elif bt == "cn":
|
|
331
|
+
block = ConvBnActBlock(out_chs=ba.pop("filters"), **ba)
|
|
332
|
+
elif bt == "uir":
|
|
333
|
+
block = UniversalInvertedResidual(
|
|
334
|
+
**ba, layer_scale_init_value=self.layer_scale_init_value
|
|
335
|
+
)
|
|
336
|
+
elif bt == "mqa":
|
|
337
|
+
ba.pop("act_layer", None)
|
|
338
|
+
block = MobileAttention(
|
|
339
|
+
**ba,
|
|
340
|
+
use_multi_query=True,
|
|
341
|
+
layer_scale_init_value=self.layer_scale_init_value,
|
|
342
|
+
)
|
|
343
|
+
elif bt == "mha":
|
|
344
|
+
ba.pop("act_layer", None)
|
|
345
|
+
block = MobileAttention(
|
|
346
|
+
**ba, layer_scale_init_value=self.layer_scale_init_value
|
|
347
|
+
)
|
|
348
|
+
else:
|
|
349
|
+
raise ValueError(f"Unknown block type ({bt}) while building model.")
|
|
350
|
+
self.in_chs = ba["filters"]
|
|
351
|
+
return block
|
|
352
|
+
|
|
353
|
+
def __call__(self, in_chs, model_block_args):
|
|
354
|
+
self.in_chs = in_chs
|
|
355
|
+
total_block_count = sum([len(x) for x in model_block_args])
|
|
356
|
+
total_block_idx = 0
|
|
357
|
+
current_stride = 2
|
|
358
|
+
current_dilation = 1
|
|
359
|
+
stages = []
|
|
360
|
+
if model_block_args[0][0]["stride"] > 1:
|
|
361
|
+
feature_info = dict(
|
|
362
|
+
module="conv_stem",
|
|
363
|
+
num_chs=in_chs,
|
|
364
|
+
stage=0,
|
|
365
|
+
reduction=current_stride,
|
|
366
|
+
)
|
|
367
|
+
self.features.append(feature_info)
|
|
368
|
+
space2depth = 0
|
|
369
|
+
for stack_idx, stack_args in enumerate(model_block_args):
|
|
370
|
+
blocks = []
|
|
371
|
+
for block_idx, block_args in enumerate(stack_args):
|
|
372
|
+
last_block = block_idx + 1 == len(stack_args)
|
|
373
|
+
in_chs_for_current_block = self.in_chs
|
|
374
|
+
assert block_args["stride"] in (1, 2)
|
|
375
|
+
if block_idx >= 1:
|
|
376
|
+
block_args["stride"] = 1
|
|
377
|
+
if not space2depth and block_args.pop("s2d", False):
|
|
378
|
+
assert block_args["stride"] == 1
|
|
379
|
+
space2depth = 1
|
|
380
|
+
if space2depth > 0:
|
|
381
|
+
if space2depth == 2 and block_args["stride"] == 2:
|
|
382
|
+
block_args["stride"] = 1
|
|
383
|
+
block_args["exp_ratio"] /= 4
|
|
384
|
+
space2depth = 0
|
|
385
|
+
else:
|
|
386
|
+
block_args["s2d"] = space2depth
|
|
387
|
+
next_dilation = current_dilation
|
|
388
|
+
if block_args["stride"] > 1:
|
|
389
|
+
next_output_stride = current_stride * block_args["stride"]
|
|
390
|
+
if next_output_stride > self.output_stride:
|
|
391
|
+
next_dilation = current_dilation * block_args["stride"]
|
|
392
|
+
block_args["stride"] = 1
|
|
393
|
+
else:
|
|
394
|
+
current_stride = next_output_stride
|
|
395
|
+
block_args["dilation"] = current_dilation
|
|
396
|
+
if next_dilation != current_dilation:
|
|
397
|
+
current_dilation = next_dilation
|
|
398
|
+
block = self._make_block(
|
|
399
|
+
block_args.copy(), total_block_idx, total_block_count
|
|
400
|
+
)
|
|
401
|
+
blocks.append(block)
|
|
402
|
+
if space2depth == 1:
|
|
403
|
+
space2depth = 2
|
|
404
|
+
extract_features = False
|
|
405
|
+
if last_block:
|
|
406
|
+
next_stack_idx = stack_idx + 1
|
|
407
|
+
extract_features = (
|
|
408
|
+
next_stack_idx >= len(model_block_args)
|
|
409
|
+
or model_block_args[next_stack_idx][0]["stride"] > 1
|
|
410
|
+
)
|
|
411
|
+
if extract_features:
|
|
412
|
+
num_chs = 0
|
|
413
|
+
module_name = f"blocks.{stack_idx}.{block_idx}"
|
|
414
|
+
if self.feature_location == "expansion":
|
|
415
|
+
bt = block_args.get("block_type")
|
|
416
|
+
if bt in ["ir", "er", "uir"]:
|
|
417
|
+
exp_ratio = block_args.get("exp_ratio", 1.0)
|
|
418
|
+
num_chs = self.round_chs_fn(
|
|
419
|
+
in_chs_for_current_block * exp_ratio
|
|
420
|
+
)
|
|
421
|
+
else:
|
|
422
|
+
num_chs = in_chs_for_current_block
|
|
423
|
+
else:
|
|
424
|
+
num_chs = self.in_chs
|
|
425
|
+
module_name = f"blocks.{stack_idx}"
|
|
426
|
+
|
|
427
|
+
feature_info = dict(
|
|
428
|
+
stage=stack_idx + 1,
|
|
429
|
+
reduction=current_stride,
|
|
430
|
+
num_chs=num_chs,
|
|
431
|
+
module=module_name,
|
|
432
|
+
)
|
|
433
|
+
self.features.append(feature_info)
|
|
434
|
+
total_block_idx += 1
|
|
435
|
+
stages.append(blocks)
|
|
436
|
+
return stages
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
import keras
|
|
2
|
+
|
|
3
|
+
from keras_hub.src.api_export import keras_hub_export
|
|
4
|
+
from keras_hub.src.models.image_classifier import ImageClassifier
|
|
5
|
+
from keras_hub.src.models.mobilenetv5.mobilenetv5_backbone import (
|
|
6
|
+
MobileNetV5Backbone,
|
|
7
|
+
)
|
|
8
|
+
from keras_hub.src.models.mobilenetv5.mobilenetv5_image_classifier_preprocessor import ( # noqa: E501
|
|
9
|
+
MobileNetV5ImageClassifierPreprocessor,
|
|
10
|
+
)
|
|
11
|
+
from keras_hub.src.models.mobilenetv5.mobilenetv5_layers import ConvNormAct
|
|
12
|
+
from keras_hub.src.models.mobilenetv5.mobilenetv5_utils import (
|
|
13
|
+
SelectAdaptivePool2d,
|
|
14
|
+
)
|
|
15
|
+
from keras_hub.src.models.task import Task
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@keras_hub_export("keras_hub.models.MobileNetV5ImageClassifier")
|
|
19
|
+
class MobileNetV5ImageClassifier(ImageClassifier):
|
|
20
|
+
"""An end-to-end MobileNetV5 model for image classification.
|
|
21
|
+
|
|
22
|
+
This model attaches a classification head to a `MobileNetV5Backbone`.
|
|
23
|
+
The head consists of a global pooling layer, an optional convolutional
|
|
24
|
+
head, a dropout layer, and a final dense classifier layer.
|
|
25
|
+
|
|
26
|
+
This model can optionally be configured with a `preprocessor` layer, in
|
|
27
|
+
which case it will automatically apply preprocessing to image inputs during
|
|
28
|
+
`fit()`, `predict()`, and `evaluate()`.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
backbone: A `keras_hub.models.MobileNetV5Backbone` instance.
|
|
32
|
+
num_classes: int. The number of classes for the classification head.
|
|
33
|
+
preprocessor: A `keras_hub.models.ImageClassifierPreprocessor` or
|
|
34
|
+
`None`. If `None`, this model will not apply preprocessing.
|
|
35
|
+
head_hidden_size: int. The number of channels in the convolutional
|
|
36
|
+
head.
|
|
37
|
+
global_pool: str. The type of global pooling to use.
|
|
38
|
+
drop_rate: float. The dropout rate for the head.
|
|
39
|
+
head_dtype: string or `keras.mixed_precision.DTypePolicy`. The dtype to
|
|
40
|
+
use for the head computations and weights.
|
|
41
|
+
|
|
42
|
+
Example:
|
|
43
|
+
```python
|
|
44
|
+
import keras
|
|
45
|
+
from keras_hub.models import MobileNetV5Backbone
|
|
46
|
+
from keras_hub.models import MobileNetV5ImageClassifier
|
|
47
|
+
|
|
48
|
+
# Randomly initialized task model with a custom config.
|
|
49
|
+
model_args = {
|
|
50
|
+
"stackwise_block_types": [["er"], ["uir", "uir"]],
|
|
51
|
+
"stackwise_num_blocks": [1, 2],
|
|
52
|
+
"stackwise_num_filters": [[24], [48, 48]],
|
|
53
|
+
"stackwise_strides": [[2], [2, 1]],
|
|
54
|
+
"stackwise_act_layers": [["relu"], ["relu", "relu"]],
|
|
55
|
+
"stackwise_exp_ratios": [[4.0], [6.0, 6.0]],
|
|
56
|
+
"stackwise_se_ratios": [[0.0], [0.0, 0.0]],
|
|
57
|
+
"stackwise_dw_kernel_sizes": [[0], [5, 5]],
|
|
58
|
+
"stackwise_dw_start_kernel_sizes": [[0], [0, 0]],
|
|
59
|
+
"stackwise_dw_end_kernel_sizes": [[0], [0, 0]],
|
|
60
|
+
"stackwise_exp_kernel_sizes": [[3], [0, 0]],
|
|
61
|
+
"stackwise_pw_kernel_sizes": [[1], [0, 0]],
|
|
62
|
+
"stackwise_num_heads": [[0], [0, 0]],
|
|
63
|
+
"stackwise_key_dims": [[0], [0, 0]],
|
|
64
|
+
"stackwise_value_dims": [[0], [0, 0]],
|
|
65
|
+
"stackwise_kv_strides": [[0], [0, 0]],
|
|
66
|
+
"stackwise_use_cpe": [[False], [False, False]],
|
|
67
|
+
"use_msfa": False,
|
|
68
|
+
}
|
|
69
|
+
backbone = MobileNetV5Backbone(**model_args)
|
|
70
|
+
model = MobileNetV5ImageClassifier(backbone, 1000)
|
|
71
|
+
images = keras.ops.ones((1, 224, 224, 3))
|
|
72
|
+
output = model.predict(images)
|
|
73
|
+
```
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
backbone_cls = MobileNetV5Backbone
|
|
77
|
+
preprocessor_cls = MobileNetV5ImageClassifierPreprocessor
|
|
78
|
+
|
|
79
|
+
def __init__(
|
|
80
|
+
self,
|
|
81
|
+
backbone,
|
|
82
|
+
num_classes,
|
|
83
|
+
preprocessor=None,
|
|
84
|
+
head_hidden_size=2048,
|
|
85
|
+
global_pool="avg",
|
|
86
|
+
drop_rate=0.0,
|
|
87
|
+
head_dtype=None,
|
|
88
|
+
**kwargs,
|
|
89
|
+
):
|
|
90
|
+
head_dtype = head_dtype or backbone.dtype_policy
|
|
91
|
+
data_format = getattr(backbone, "data_format", "channels_last")
|
|
92
|
+
|
|
93
|
+
# === Layers ===
|
|
94
|
+
self.backbone = backbone
|
|
95
|
+
self.preprocessor = preprocessor
|
|
96
|
+
if backbone.use_msfa:
|
|
97
|
+
self.global_pool = SelectAdaptivePool2d(
|
|
98
|
+
pool_type=global_pool, data_format=data_format, flatten=True
|
|
99
|
+
)
|
|
100
|
+
self.conv_head = None
|
|
101
|
+
self.flatten = None
|
|
102
|
+
else:
|
|
103
|
+
self.global_pool = SelectAdaptivePool2d(
|
|
104
|
+
pool_type=global_pool, data_format=data_format, flatten=False
|
|
105
|
+
)
|
|
106
|
+
self.conv_head = ConvNormAct(
|
|
107
|
+
out_chs=head_hidden_size,
|
|
108
|
+
kernel_size=1,
|
|
109
|
+
pad_type="same",
|
|
110
|
+
norm_layer=backbone.norm_layer,
|
|
111
|
+
act_layer=backbone.act_layer,
|
|
112
|
+
bias=False,
|
|
113
|
+
name="conv_head",
|
|
114
|
+
dtype=head_dtype,
|
|
115
|
+
)
|
|
116
|
+
self.flatten = keras.layers.Flatten(dtype=head_dtype)
|
|
117
|
+
self.dropout = (
|
|
118
|
+
keras.layers.Dropout(drop_rate, dtype=head_dtype)
|
|
119
|
+
if drop_rate > 0.0
|
|
120
|
+
else None
|
|
121
|
+
)
|
|
122
|
+
self.classifier = (
|
|
123
|
+
keras.layers.Dense(num_classes, dtype=head_dtype, name="classifier")
|
|
124
|
+
if num_classes > 0
|
|
125
|
+
else keras.layers.Activation("linear", name="identity_classifier")
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
# === Functional Model ===
|
|
129
|
+
inputs = self.backbone.input
|
|
130
|
+
x = self.backbone(inputs)
|
|
131
|
+
x = self.global_pool(x)
|
|
132
|
+
if self.conv_head is not None:
|
|
133
|
+
x = self.conv_head(x)
|
|
134
|
+
if self.flatten is not None:
|
|
135
|
+
x = self.flatten(x)
|
|
136
|
+
if self.dropout is not None:
|
|
137
|
+
x = self.dropout(x)
|
|
138
|
+
outputs = self.classifier(x)
|
|
139
|
+
Task.__init__(self, inputs=inputs, outputs=outputs, **kwargs)
|
|
140
|
+
|
|
141
|
+
# === Config ===
|
|
142
|
+
self.num_classes = num_classes
|
|
143
|
+
self.head_hidden_size = head_hidden_size
|
|
144
|
+
self.global_pool_type = global_pool
|
|
145
|
+
self.drop_rate = drop_rate
|
|
146
|
+
|
|
147
|
+
def get_config(self):
|
|
148
|
+
config = Task.get_config(self)
|
|
149
|
+
config.update(
|
|
150
|
+
{
|
|
151
|
+
"num_classes": self.num_classes,
|
|
152
|
+
"head_hidden_size": self.head_hidden_size,
|
|
153
|
+
"global_pool": self.global_pool_type,
|
|
154
|
+
"drop_rate": self.drop_rate,
|
|
155
|
+
}
|
|
156
|
+
)
|
|
157
|
+
return config
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from keras_hub.src.api_export import keras_hub_export
|
|
2
|
+
from keras_hub.src.models.image_classifier_preprocessor import (
|
|
3
|
+
ImageClassifierPreprocessor,
|
|
4
|
+
)
|
|
5
|
+
from keras_hub.src.models.mobilenetv5.mobilenetv5_backbone import (
|
|
6
|
+
MobileNetV5Backbone,
|
|
7
|
+
)
|
|
8
|
+
from keras_hub.src.models.mobilenetv5.mobilenetv5_image_converter import (
|
|
9
|
+
MobileNetV5ImageConverter,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@keras_hub_export("keras_hub.models.MobileNetV5ImageClassifierPreprocessor")
|
|
14
|
+
class MobileNetV5ImageClassifierPreprocessor(ImageClassifierPreprocessor):
|
|
15
|
+
backbone_cls = MobileNetV5Backbone
|
|
16
|
+
image_converter_cls = MobileNetV5ImageConverter
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from keras_hub.src.api_export import keras_hub_export
|
|
2
|
+
from keras_hub.src.layers.preprocessing.image_converter import ImageConverter
|
|
3
|
+
from keras_hub.src.models.mobilenetv5.mobilenetv5_backbone import (
|
|
4
|
+
MobileNetV5Backbone,
|
|
5
|
+
)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@keras_hub_export("keras_hub.layers.MobileNetV5ImageConverter")
|
|
9
|
+
class MobileNetV5ImageConverter(ImageConverter):
|
|
10
|
+
backbone_cls = MobileNetV5Backbone
|