mttf 1.1.10__py3-none-any.whl → 1.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mttf might be problematic. Click here for more details.

@@ -0,0 +1,29 @@
1
+ from ..base import applications as _applications
2
+
3
+ for _x, _y in _applications.__dict__.items():
4
+ if _x.startswith("_"):
5
+ continue
6
+ globals()[_x] = _y
7
+ __doc__ = _applications.__doc__
8
+
9
+ from .mobilenet_v3_split import (
10
+ MobileNetV3Input,
11
+ MobileNetV3Parser,
12
+ MobileNetV3Block,
13
+ MobileNetV3Mixer,
14
+ MobileNetV3Output,
15
+ MobileNetV3Split,
16
+ )
17
+
18
+ from .mobilevit import create_mobilevit
19
+
20
+
21
+ __api__ = [
22
+ "MobileNetV3Input",
23
+ "MobileNetV3Parser",
24
+ "MobileNetV3Block",
25
+ "MobileNetV3Mixer",
26
+ "MobileNetV3Output",
27
+ "MobileNetV3Split",
28
+ "create_mobilevit",
29
+ ]
@@ -0,0 +1,540 @@
1
+ # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+ # pylint: disable=invalid-name
16
+ # pylint: disable=missing-function-docstring
17
+ """MobileNet v3 models split into 5 submodels.
18
+
19
+ The MobileNetV3 model is split into 5 parts:
20
+
21
+ - The input parser block that downsamples once (:func:`MobileNetV3Parser`).
22
+ - Block 0 to 3 that downample once for each block (:func:`MobileNetV3LargeBlock`
23
+ or :func:`MobileNetV3SmallBlock`). As of 2023/05/15, there's a possibility to have block 4
24
+ for MobileNetV3Large.
25
+ - The mixer block that turns the downsampled grid into a (1,1,feat_dim) batch
26
+ (:func:`MobileNetV3Mixer`).
27
+ - Optionally the output block that may or may not contain the clasification head
28
+ (:func:`MobileNetV3Output`).
29
+
30
+ Input arguments follow those of MobileNetV3. One can also use :func:`MobileNetV3Split` to create
31
+ a model of submodels that is theoretically equivalent to the original MobileNetV3 model. However,
32
+ no pre-trained weights exist.
33
+ """
34
+
35
+
36
+ from mt import tp, tfc
37
+
38
+ from .. import base
39
+
40
+
41
+ if base.keras_source == "tf_keras":
42
+ from tf_keras.applications.mobilenet_v3 import (
43
+ relu,
44
+ hard_swish,
45
+ _depth,
46
+ _inverted_res_block,
47
+ )
48
+ from tf_keras import backend, models, layers
49
+ from tensorflow.python.keras.utils import data_utils, layer_utils
50
+ elif base.keras_source == "keras":
51
+ from keras.applications.mobilenet_v3 import (
52
+ relu,
53
+ hard_swish,
54
+ _depth,
55
+ _inverted_res_block,
56
+ )
57
+ from keras import backend, models
58
+ try:
59
+ from keras.layers import VersionAwareLayers
60
+
61
+ layers = VersionAwareLayers()
62
+ except ImportError:
63
+ try:
64
+ from keras import layers
65
+ from keras.utils import data_utils, layer_utils
66
+ elif base.keras_source == "tensorflow.keras":
67
+ from tensorflow.keras.src.applications.mobilenet_v3 import (
68
+ relu,
69
+ hard_swish,
70
+ _depth,
71
+ _inverted_res_block,
72
+ )
73
+ from tensorflow.python.keras import backend, models
74
+ try:
75
+ from tensorflow.keras.layers import VersionAwareLayers
76
+
77
+ layers = VersionAwareLayers()
78
+ except ImportError:
79
+ try:
80
+ from tensorflow.keras import layers
81
+ from tensorflow.python.keras.utils import data_utils, layer_utils
82
+ else:
83
+ raise ImportError(f"Unknown value '{keras_source}' for variable 'keras_source'.")
84
+
85
+ from tensorflow.python.platform import tf_logging as logging
86
+
87
+
88
+ def MobileNetV3Input(
89
+ input_shape=None,
90
+ ):
91
+ """Prepares a MobileNetV3 input layer."""
92
+
93
+ # If input_shape is None and input_tensor is None using standard shape
94
+ if input_shape is None:
95
+ input_shape = (None, None, 3)
96
+
97
+ if backend.image_data_format() == "channels_last":
98
+ row_axis, col_axis = (0, 1)
99
+ else:
100
+ row_axis, col_axis = (1, 2)
101
+ rows = input_shape[row_axis]
102
+ cols = input_shape[col_axis]
103
+ if rows and cols and (rows < 32 or cols < 32):
104
+ raise tfc.ModelSyntaxError(
105
+ f"Input size must be at least 32x32; got `input_shape={input_shape}`"
106
+ )
107
+
108
+ img_input = layers.Input(shape=input_shape)
109
+ return img_input
110
+
111
+
112
+ def MobileNetV3Parser(
113
+ img_input,
114
+ model_type: str = "Large", # only 'Small' or 'Large' are accepted
115
+ minimalistic=False,
116
+ ):
117
+ """Prepares a MobileNetV3 parser block."""
118
+
119
+ channel_axis = 1 if backend.image_data_format() == "channels_first" else -1
120
+
121
+ if minimalistic:
122
+ activation = relu
123
+ else:
124
+ activation = hard_swish
125
+
126
+ x = img_input
127
+ x = layers.Rescaling(scale=1.0 / 127.5, offset=-1.0)(x)
128
+ x = layers.Conv2D(
129
+ 16, kernel_size=3, strides=(2, 2), padding="same", use_bias=False, name="Conv"
130
+ )(x)
131
+ x = layers.BatchNormalization(
132
+ axis=channel_axis, epsilon=1e-3, momentum=0.999, name="Conv/BatchNorm"
133
+ )(x)
134
+ x = activation(x)
135
+
136
+ # Create model.
137
+ model = models.Model(img_input, x, name=f"MobileNetV3{model_type}Parser")
138
+
139
+ return model
140
+
141
+
142
+ def MobileNetV3SmallBlock(
143
+ block_id: int, # only 0 to 3 are accepted here
144
+ input_tensor, # input tensor for the block
145
+ alpha=1.0,
146
+ minimalistic=False,
147
+ ):
148
+ """Prepares a MobileNetV3Small downsampling block."""
149
+
150
+ def depth(d):
151
+ return _depth(d * alpha)
152
+
153
+ if minimalistic:
154
+ kernel = 3
155
+ activation = relu
156
+ se_ratio = None
157
+ else:
158
+ kernel = 5
159
+ activation = hard_swish
160
+ se_ratio = 0.25
161
+
162
+ x = input_tensor
163
+ if block_id == 0:
164
+ x = _inverted_res_block(x, 1, depth(16), 3, 2, se_ratio, relu, 0)
165
+ elif block_id == 1:
166
+ x = _inverted_res_block(x, 72.0 / 16, depth(24), 3, 2, None, relu, 1)
167
+ x = _inverted_res_block(x, 88.0 / 24, depth(24), 3, 1, None, relu, 2)
168
+ elif block_id == 2:
169
+ x = _inverted_res_block(x, 4, depth(40), kernel, 2, se_ratio, activation, 3)
170
+ x = _inverted_res_block(x, 6, depth(40), kernel, 1, se_ratio, activation, 4)
171
+ x = _inverted_res_block(x, 6, depth(40), kernel, 1, se_ratio, activation, 5)
172
+ x = _inverted_res_block(x, 3, depth(48), kernel, 1, se_ratio, activation, 6)
173
+ x = _inverted_res_block(x, 3, depth(48), kernel, 1, se_ratio, activation, 7)
174
+ else:
175
+ x = _inverted_res_block(x, 6, depth(96), kernel, 2, se_ratio, activation, 8)
176
+ x = _inverted_res_block(x, 6, depth(96), kernel, 1, se_ratio, activation, 9)
177
+ x = _inverted_res_block(x, 6, depth(96), kernel, 1, se_ratio, activation, 10)
178
+
179
+ # Create model.
180
+ model = models.Model(input_tensor, x, name=f"MobileNetV3SmallBlock{block_id}")
181
+
182
+ return model
183
+
184
+
185
+ def MobileNetV3LargeBlock(
186
+ block_id: int, # only 0 to 4 are accepted here. 4 is only available as of 2023/05/15
187
+ input_tensor, # input tensor for the block
188
+ alpha=1.0,
189
+ minimalistic=False,
190
+ ):
191
+ """Prepares a MobileNetV3Large downsampling block."""
192
+
193
+ def depth(d):
194
+ return _depth(d * alpha)
195
+
196
+ if minimalistic:
197
+ kernel = 3
198
+ activation = relu
199
+ se_ratio = None
200
+ else:
201
+ kernel = 5
202
+ activation = hard_swish
203
+ se_ratio = 0.25
204
+
205
+ x = input_tensor
206
+ if block_id == 0:
207
+ x = _inverted_res_block(x, 1, depth(16), 3, 1, None, relu, 0)
208
+ x = _inverted_res_block(x, 4, depth(24), 3, 2, None, relu, 1)
209
+ x = _inverted_res_block(x, 3, depth(24), 3, 1, None, relu, 2)
210
+ elif block_id == 1:
211
+ x = _inverted_res_block(x, 3, depth(40), kernel, 2, se_ratio, relu, 3)
212
+ x = _inverted_res_block(x, 3, depth(40), kernel, 1, se_ratio, relu, 4)
213
+ x = _inverted_res_block(x, 3, depth(40), kernel, 1, se_ratio, relu, 5)
214
+ elif block_id == 2:
215
+ x = _inverted_res_block(x, 6, depth(80), 3, 2, None, activation, 6)
216
+ x = _inverted_res_block(x, 2.5, depth(80), 3, 1, None, activation, 7)
217
+ x = _inverted_res_block(x, 2.3, depth(80), 3, 1, None, activation, 8)
218
+ x = _inverted_res_block(x, 2.3, depth(80), 3, 1, None, activation, 9)
219
+ x = _inverted_res_block(x, 6, depth(112), 3, 1, se_ratio, activation, 10)
220
+ x = _inverted_res_block(x, 6, depth(112), 3, 1, se_ratio, activation, 11)
221
+ elif block_id == 3:
222
+ x = _inverted_res_block(x, 6, depth(160), kernel, 2, se_ratio, activation, 12)
223
+ x = _inverted_res_block(x, 6, depth(160), kernel, 1, se_ratio, activation, 13)
224
+ x = _inverted_res_block(x, 6, depth(160), kernel, 1, se_ratio, activation, 14)
225
+ else:
226
+ x = _inverted_res_block(x, 6, depth(320), kernel, 2, se_ratio, activation, 15)
227
+ x = _inverted_res_block(x, 6, depth(320), kernel, 1, se_ratio, activation, 16)
228
+ x = _inverted_res_block(x, 6, depth(320), kernel, 1, se_ratio, activation, 17)
229
+
230
+ # Create model.
231
+ model = models.Model(input_tensor, x, name=f"MobileNetV3LargeBlock{block_id}")
232
+
233
+ return model
234
+
235
+
236
+ def MobileNetV3Mixer(
237
+ input_tensor,
238
+ params: tfc.MobileNetV3MixerParams,
239
+ last_point_ch,
240
+ alpha=1.0,
241
+ model_type: str = "Large", # only 'Small' or 'Large' are accepted
242
+ minimalistic=False,
243
+ ):
244
+ """Prepares a MobileNetV3 mixer block."""
245
+
246
+ x = input_tensor
247
+ channel_axis = 1 if backend.image_data_format() == "channels_first" else -1
248
+
249
+ if params.variant == "mobilenet":
250
+
251
+ if minimalistic:
252
+ kernel = 3
253
+ activation = relu
254
+ se_ratio = None
255
+ else:
256
+ kernel = 5
257
+ activation = hard_swish
258
+ se_ratio = 0.25
259
+
260
+ last_conv_ch = _depth(backend.int_shape(x)[channel_axis] * 6)
261
+
262
+ # if the width multiplier is greater than 1 we
263
+ # increase the number of output channels
264
+ if alpha > 1.0:
265
+ last_point_ch = _depth(last_point_ch * alpha)
266
+ x = layers.Conv2D(
267
+ last_conv_ch, kernel_size=1, padding="same", use_bias=False, name="Conv_1"
268
+ )(x)
269
+ x = layers.BatchNormalization(
270
+ axis=channel_axis, epsilon=1e-3, momentum=0.999, name="Conv_1/BatchNorm"
271
+ )(x)
272
+ x = activation(x)
273
+ x = layers.GlobalAveragePooling2D()(x)
274
+ if channel_axis == 1:
275
+ x = layers.Reshape((last_conv_ch, 1, 1))(x)
276
+ else:
277
+ x = layers.Reshape((1, 1, last_conv_ch))(x)
278
+ x = layers.Conv2D(
279
+ last_point_ch, kernel_size=1, padding="same", use_bias=True, name="Conv_2"
280
+ )(x)
281
+ x = activation(x)
282
+ elif params.variant == "maxpool":
283
+ x = layers.GlobalMaxPool2D(x)
284
+ elif params.variant == "mhapool":
285
+ if backend.image_data_format() == "channels_first":
286
+ raise tfc.ModelSyntaxError(
287
+ "Mixer variant 'mhapool' requires channels_last image data format."
288
+ )
289
+
290
+ mhapool_params = params.mhapool_cascade_params
291
+ if not isinstance(mhapool_params, tfc.MHAPool2DCascadeParams):
292
+ raise tfc.ModelSyntaxError(
293
+ "Parameter 'params.mhapool_cascade_params' is not of type "
294
+ "mt.tfc.MHAPool2DCascadeParams. Got: {}.".format(type(mhapool_params))
295
+ )
296
+
297
+ from ..keras_layers import MHAPool2D
298
+
299
+ n_heads = mhapool_params.n_heads
300
+ k = 0
301
+ outputs = []
302
+ while True:
303
+ h = x.shape[1]
304
+ w = x.shape[2]
305
+
306
+ if h <= 1 and w <= 1:
307
+ break
308
+
309
+ c = x.shape[3]
310
+ key_dim = (c + n_heads - 1) // n_heads
311
+ value_dim = int(key_dim * mhapool_params.expansion_factor)
312
+ k += 1
313
+ block_name = f"MHAPool2DCascade_block{k}"
314
+ if k > mhapool_params.max_num_pooling_layers: # GlobalMaxPool2D
315
+ x = layers.GlobalMaxPooling2D(
316
+ keepdims=True, name=block_name + "/GlobalMaxPool"
317
+ )(x)
318
+ else: # MHAPool2D
319
+ x = layers.LayerNormalization()(x)
320
+ if h <= 2 and w <= 2:
321
+ activation = mhapool_params.final_activation
322
+ else:
323
+ activation = mhapool_params.activation
324
+ x = MHAPool2D(
325
+ n_heads,
326
+ key_dim,
327
+ value_dim=value_dim,
328
+ pooling=mhapool_params.pooling,
329
+ dropout=mhapool_params.dropout,
330
+ name=block_name + "/MHAPool",
331
+ )(x)
332
+
333
+ if mhapool_params.output_all:
334
+ outputs.append(x)
335
+ else:
336
+ outputs = [x]
337
+ else:
338
+ raise tfc.ModelSyntaxError(
339
+ "Unknown mixer variant: '{}'.".format(params.variant)
340
+ )
341
+
342
+ # Create model.
343
+ model = models.Model(
344
+ input_tensor, outputs, name="MobileNetV3{}Mixer".format(model_type)
345
+ )
346
+
347
+ return model
348
+
349
+
350
+ def MobileNetV3Output(
351
+ input_tensor,
352
+ model_type: str = "Large", # only 'Small' or 'Large' are accepted
353
+ include_top=True,
354
+ classes=1000,
355
+ pooling=None,
356
+ dropout_rate=0.2,
357
+ classifier_activation="softmax",
358
+ ):
359
+ """Prepares a MobileNetV3 output block."""
360
+
361
+ x = input_tensor
362
+ if include_top:
363
+ if dropout_rate > 0:
364
+ x = layers.Dropout(dropout_rate)(x)
365
+ x = layers.Conv2D(classes, kernel_size=1, padding="same", name="Logits")(x)
366
+ x = layers.Flatten()(x)
367
+ x = layers.Activation(activation=classifier_activation, name="Predictions")(x)
368
+ else:
369
+ if pooling == "avg":
370
+ x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
371
+ elif pooling == "max":
372
+ x = layers.GlobalMaxPooling2D(name="max_pool")(x)
373
+ else:
374
+ return None
375
+
376
+ # Create model.
377
+ model = models.Model(input_tensor, x, name=f"MobileNetV3{model_type}Output")
378
+
379
+ return model
380
+
381
+
382
+ def MobileNetV3Split(
383
+ input_shape=None,
384
+ alpha: float = 1.0,
385
+ model_type: str = "Large",
386
+ max_n_blocks: int = 6,
387
+ minimalistic: bool = False,
388
+ mixer_params: tp.Optional[tfc.MobileNetV3MixerParams] = None,
389
+ include_top: bool = True,
390
+ pooling=None,
391
+ classes: int = 1000,
392
+ dropout_rate: float = 0.2,
393
+ classifier_activation="softmax",
394
+ output_all: bool = False,
395
+ name: tp.Optional[str] = None,
396
+ ):
397
+ """Prepares a model of submodels which is equivalent to a MobileNetV3 model.
398
+
399
+ Parameters
400
+ ----------
401
+ input_shape : tuple
402
+ Optional shape tuple, to be specified if you would like to use a model with an input image
403
+ resolution that is not (224, 224, 3). It should have exactly 3 inputs channels
404
+ (224, 224, 3). You can also omit this option if you would like to infer input_shape from an
405
+ input_tensor. If you choose to include both input_tensor and input_shape then input_shape
406
+ will be used if they match, if the shapes do not match then we will throw an error. E.g.
407
+ `(160, 160, 3)` would be one valid value.
408
+ alpha : float
409
+ controls the width of the network. This is known as the depth multiplier in the MobileNetV3
410
+ paper, but the name is kept for consistency with MobileNetV1 in Keras.
411
+ - If `alpha` < 1.0, proportionally decreases the number
412
+ of filters in each layer.
413
+ - If `alpha` > 1.0, proportionally increases the number
414
+ of filters in each layer.
415
+ - If `alpha` = 1, default number of filters from the paper
416
+ are used at each layer.
417
+ the mobilenetv3 alpha value
418
+ model_type : {'Small', 'Large'}
419
+ whether it is the small variant or the large variant
420
+ max_n_blocks : int
421
+ the maximum number of blocks in the backbone. It is further constrained by the actual
422
+ maximum number of blocks that the variant can implement.
423
+ minimalistic : bool
424
+ In addition to large and small models this module also contains so-called minimalistic
425
+ models, these models have the same per-layer dimensions characteristic as MobilenetV3
426
+ however, they do not utilize any of the advanced blocks (squeeze-and-excite units,
427
+ hard-swish, and 5x5 convolutions). While these models are less efficient on CPU, they
428
+ are much more performant on GPU/DSP.
429
+ mixer_params : mt.tfc.MobileNetV3MixerParams, optional
430
+ parameters for defining the mixer block
431
+ include_top : bool, default True
432
+ whether to include the fully-connected layer at the top of the network. Only valid if
433
+ `mixer_params` is not null.
434
+ pooling : str, optional
435
+ Optional pooling mode for feature extraction when `include_top` is False and
436
+ `mixer_params` is not null.
437
+ - `None` means that the output of the model will be the 4D tensor output of the last
438
+ convolutional block.
439
+ - `avg` means that global average pooling will be applied to the output of the last
440
+ convolutional block, and thus the output of the model will be a 2D tensor.
441
+ - `max` means that global max pooling will be applied.
442
+ classes : int, optional
443
+ Optional number of classes to classify images into, only to be specified if `mixer_params`
444
+ is not null and `include_top` is True.
445
+ dropout_rate : float
446
+ fraction of the input units to drop on the last layer. Only to be specified if
447
+ `mixer_params` is not null and `include_top` is True.
448
+ classifier_activation : object
449
+ A `str` or callable. The activation function to use on the "top" layer. Ignored unless
450
+ `mixer_params` is not null and `include_top` is True. Set `classifier_activation=None` to
451
+ return the logits of the "top" layer. When loading pretrained weights,
452
+ `classifier_activation` can only be `None` or `"softmax"`.
453
+ output_all : bool
454
+ If True, the model returns the output tensor of every submodel other than the input layer.
455
+ Otherwise, it returns the output tensor of the last submodel.
456
+ name : str, optional
457
+ model name, if any. Default to 'MobileNetV3LargeSplit' or 'MobileNetV3SmallSplit'.
458
+
459
+ Returns
460
+ -------
461
+ tensorflow.keras.Model
462
+ the output MobileNetV3 model split into 5 submodels
463
+ """
464
+
465
+ input_layer = MobileNetV3Input(input_shape=input_shape)
466
+ input_block = MobileNetV3Parser(
467
+ input_layer,
468
+ model_type=model_type,
469
+ minimalistic=minimalistic,
470
+ )
471
+ x = input_block(input_layer)
472
+ outputs = [x]
473
+
474
+ num_blocks = 5 if model_type == "Large" else 4
475
+ if num_blocks > max_n_blocks:
476
+ num_blocks = max_n_blocks
477
+ for i in range(num_blocks):
478
+ if model_type == "Large":
479
+ block = MobileNetV3LargeBlock(i, x, alpha=alpha, minimalistic=minimalistic)
480
+ else:
481
+ block = MobileNetV3SmallBlock(i, x, alpha=alpha, minimalistic=minimalistic)
482
+ x = block(x)
483
+ if output_all:
484
+ outputs.append(x)
485
+ else:
486
+ outputs = [x]
487
+
488
+ if mixer_params is not None:
489
+ if not isinstance(mixer_params, tfc.MobileNetV3MixerParams):
490
+ raise tfc.ModelSyntaxError(
491
+ "Argument 'mixer_params' is not an instance of "
492
+ "mt.tfc.MobileNetV3MixerParams. Got: {}.".format(type(mixer_params))
493
+ )
494
+
495
+ if model_type == "Large":
496
+ last_point_ch = 1280
497
+ else:
498
+ last_point_ch = 1024
499
+ mixer_block = MobileNetV3Mixer(
500
+ x,
501
+ mixer_params,
502
+ last_point_ch,
503
+ alpha=alpha,
504
+ model_type=model_type,
505
+ minimalistic=minimalistic,
506
+ )
507
+ x = mixer_block(x)
508
+ if output_all:
509
+ if isinstance(x, (list, tuple)):
510
+ outputs.extend(x)
511
+ else:
512
+ outputs.append(x)
513
+ else:
514
+ if isinstance(x, (list, tuple)):
515
+ outputs = [x[-1]]
516
+ else:
517
+ outputs = [x]
518
+
519
+ output_block = MobileNetV3Output(
520
+ x,
521
+ model_type=model_type,
522
+ include_top=include_top,
523
+ classes=classes,
524
+ pooling=pooling,
525
+ dropout_rate=dropout_rate,
526
+ classifier_activation=classifier_activation,
527
+ )
528
+ if output_block is not None:
529
+ x = output_block(x)
530
+ if output_all:
531
+ outputs.append(x)
532
+ else:
533
+ outputs = [x]
534
+
535
+ # Create model.
536
+ if name is None:
537
+ name = f"MobilenetV3{model_type}Split"
538
+ model = models.Model(input_layer, outputs, name=name)
539
+
540
+ return model
@@ -0,0 +1,291 @@
1
+ # pylint: disable=invalid-name
2
+ # pylint: disable=missing-function-docstring
3
+ """MobileViT model.
4
+
5
+ Most of the code here has been ripped and updated off from the following
6
+ `Keras tutorial <https://keras.io/examples/vision/mobilevit/>`_. Please refer
7
+ to the `MobileViT ICLR2022 paper <https://arxiv.org/abs/2110.02178>`_ for more details.
8
+
9
+ The paper authors' code is `here <https://github.com/apple/ml-cvnets>`_.
10
+ """
11
+
12
+
13
+ from mt import tp, tfc, tf
14
+
15
+ from .mobilenet_v3_split import (
16
+ MobileNetV3Input,
17
+ _inverted_res_block,
18
+ backend,
19
+ models,
20
+ layers,
21
+ )
22
+
23
+
24
+ def conv_block(x, filters=16, kernel_size=3, strides=2):
25
+ conv_layer = layers.Conv2D(
26
+ filters, kernel_size, strides=strides, activation=tf.nn.swish, padding="same"
27
+ )
28
+ return conv_layer(x)
29
+
30
+
31
+ # Reference: https://git.io/JKgtC
32
+
33
+
34
+ def inverted_residual_block(
35
+ x, expanded_channels, output_channels, strides=1, block_id=0
36
+ ):
37
+ if block_id == 0:
38
+ raise NotImplementedError(
39
+ "Zero block id for _inverted_res_block() is not implemented in MobileViT."
40
+ )
41
+
42
+ channel_axis = 1 if backend.image_data_format() == "channels_first" else -1
43
+ infilters = backend.int_shape(x)[channel_axis]
44
+
45
+ m = _inverted_res_block(
46
+ x,
47
+ expanded_channels // infilters, # expansion
48
+ output_channels, # filters
49
+ 3, # kernel_size
50
+ strides, # stride
51
+ 0, # se_ratio
52
+ tf.nn.swish, # activation
53
+ block_id,
54
+ )
55
+
56
+ return m
57
+
58
+
59
+ # Reference:
60
+ # https://keras.io/examples/vision/image_classification_with_vision_transformer/
61
+
62
+
63
+ def mlp(x, hidden_units, dropout_rate):
64
+ for units in hidden_units:
65
+ x = layers.Dense(units, activation=tf.nn.swish)(x)
66
+ x = layers.Dropout(dropout_rate)(x)
67
+ return x
68
+
69
+
70
+ def transformer_block(x, transformer_layers, projection_dim, num_heads=2):
71
+ for _ in range(transformer_layers):
72
+ # Layer normalization 1.
73
+ x1 = layers.LayerNormalization(epsilon=1e-6)(x)
74
+ # Create a multi-head attention layer.
75
+ attention_output = layers.MultiHeadAttention(
76
+ num_heads=num_heads, key_dim=projection_dim, dropout=0.1
77
+ )(x1, x1)
78
+ # Skip connection 1.
79
+ x2 = layers.Add()([attention_output, x])
80
+ # Layer normalization 2.
81
+ x3 = layers.LayerNormalization(epsilon=1e-6)(x2)
82
+ # MLP.
83
+ x3 = mlp(
84
+ x3,
85
+ hidden_units=[x.shape[-1] * 2, x.shape[-1]],
86
+ dropout_rate=0.1,
87
+ )
88
+ # Skip connection 2.
89
+ x = layers.Add()([x3, x2])
90
+
91
+ return x
92
+
93
+
94
+ def mobilevit_block(x, num_blocks, projection_dim, strides=1):
95
+ cell_size = 2 # 2x2 for the Transformer block
96
+
97
+ # Local projection with convolutions.
98
+ local_features = conv_block(x, filters=projection_dim, strides=strides)
99
+ local_features = conv_block(
100
+ local_features, filters=projection_dim, kernel_size=1, strides=strides
101
+ )
102
+
103
+ if x.shape[1] % cell_size != 0:
104
+ raise tfc.ModelSyntaxError(
105
+ f"Input tensor must have height divisible by {cell_size}. Got {x.shape}."
106
+ )
107
+
108
+ if x.shape[2] % cell_size != 0:
109
+ raise tfc.ModelSyntaxError(
110
+ f"Input tensor must have width divisible by {cell_size}. Got {x.shape}."
111
+ )
112
+
113
+ # Unfold into patches and then pass through Transformers.
114
+ z = local_features # (B,H,W,C)
115
+ z = layers.Reshape(
116
+ (
117
+ z.shape[1] // cell_size,
118
+ cell_size,
119
+ z.shape[2] // cell_size,
120
+ cell_size,
121
+ projection_dim,
122
+ )
123
+ )(
124
+ z
125
+ ) # (B,H/P,P,W/P,P,C)
126
+ z = tf.transpose(z, perm=[0, 2, 4, 1, 3, 5]) # (B,P,P,H/P,W/P,C)
127
+ non_overlapping_patches = layers.Reshape(
128
+ (cell_size * cell_size, z.shape[3] * z.shape[4], projection_dim)
129
+ )(
130
+ z
131
+ ) # (B,P*P,H*W/(P*P),C)
132
+ global_features = transformer_block(
133
+ non_overlapping_patches, num_blocks, projection_dim
134
+ )
135
+
136
+ # Fold into conv-like feature-maps.
137
+ z = layers.Reshape(
138
+ (
139
+ cell_size,
140
+ cell_size,
141
+ x.shape[1] // cell_size,
142
+ x.shape[2] // cell_size,
143
+ projection_dim,
144
+ )
145
+ )(
146
+ global_features
147
+ ) # (B,P,P,H/P,W/P,C)
148
+ z = tf.transpose(z, perm=[0, 3, 1, 4, 2, 5]) # (B,H/P,P,W/P,P,C)
149
+ folded_feature_map = layers.Reshape((x.shape[1], x.shape[2], projection_dim))(z)
150
+
151
+ # Apply point-wise conv -> concatenate with the input features.
152
+ folded_feature_map = conv_block(
153
+ folded_feature_map, filters=x.shape[-1], kernel_size=1, strides=strides
154
+ )
155
+ local_global_features = layers.Concatenate(axis=-1)([x, folded_feature_map])
156
+
157
+ # Fuse the local and global features using a convolution layer.
158
+ local_global_features = conv_block(
159
+ local_global_features, filters=projection_dim, strides=strides
160
+ )
161
+
162
+ return local_global_features
163
+
164
+
165
+ def create_mobilevit(
166
+ input_shape=None,
167
+ model_type: str = "XXS",
168
+ output_all: bool = False,
169
+ name: tp.Optional[str] = None,
170
+ ):
171
+ """Prepares a model of submodels which is equivalent to a MobileNetV3 model.
172
+
173
+ Parameters
174
+ ----------
175
+ input_shape : tuple
176
+ Optional shape tuple, to be specified if you would like to use a model with an input image
177
+ resolution that is not (224, 224, 3). It should have exactly 3 inputs channels
178
+ (224, 224, 3). You can also omit this option if you would like to infer input_shape from an
179
+ input_tensor. If you choose to include both input_tensor and input_shape then input_shape
180
+ will be used if they match, if the shapes do not match then we will throw an error. E.g.
181
+ `(160, 160, 3)` would be one valid value.
182
+ model_type : {'XXS', 'XS', 'S'}
183
+ one of the 3 variants introduced in the paper
184
+ output_all : bool
185
+ If True, the model returns the output tensor of every block before down-sampling, other
186
+ than the input layer. Otherwise, it returns the output tensor of the last block.
187
+ name : str, optional
188
+ model name, if any. Default to 'MobileViT<model_type>'.
189
+
190
+ Returns
191
+ -------
192
+ tensorflow.keras.Model
193
+ the output MobileViT model
194
+ """
195
+
196
+ model_type_id = ["XXS", "XS", "S"].index(model_type)
197
+
198
+ expansion_factor = 2 if model_type_id == 0 else 4
199
+
200
+ inputs = MobileNetV3Input(input_shape=input_shape)
201
+ x = layers.Rescaling(scale=1.0 / 255)(inputs)
202
+
203
+ # Initial conv-stem -> MV2 block.
204
+ x = conv_block(x, filters=16)
205
+ x = inverted_residual_block(
206
+ x,
207
+ expanded_channels=16 * expansion_factor,
208
+ output_channels=16 if model_type_id == 0 else 32,
209
+ block_id=1,
210
+ )
211
+ outputs = [x]
212
+
213
+ # Downsampling with MV2 block.
214
+ output_channels = [24, 48, 64][model_type_id]
215
+ x = inverted_residual_block(
216
+ x,
217
+ expanded_channels=16 * expansion_factor,
218
+ output_channels=output_channels,
219
+ strides=2,
220
+ block_id=2,
221
+ )
222
+ x = inverted_residual_block(
223
+ x,
224
+ expanded_channels=24 * expansion_factor,
225
+ output_channels=output_channels,
226
+ block_id=3,
227
+ )
228
+ x = inverted_residual_block(
229
+ x,
230
+ expanded_channels=24 * expansion_factor,
231
+ output_channels=output_channels,
232
+ block_id=4,
233
+ )
234
+ if output_all:
235
+ outputs.append(x)
236
+ else:
237
+ outputs = [x]
238
+
239
+ # First MV2 -> MobileViT block.
240
+ output_channels = [48, 64, 96][model_type_id]
241
+ projection_dim = [64, 96, 144][model_type_id]
242
+ x = inverted_residual_block(
243
+ x,
244
+ expanded_channels=48 * expansion_factor,
245
+ output_channels=output_channels,
246
+ strides=2,
247
+ block_id=5,
248
+ )
249
+ x = mobilevit_block(x, num_blocks=2, projection_dim=projection_dim)
250
+ if output_all:
251
+ outputs.append(x)
252
+ else:
253
+ outputs = [x]
254
+
255
+ # Second MV2 -> MobileViT block.
256
+ output_channels = [64, 80, 128][model_type_id]
257
+ projection_dim = [80, 120, 192][model_type_id]
258
+ x = inverted_residual_block(
259
+ x,
260
+ expanded_channels=64 * expansion_factor,
261
+ output_channels=output_channels,
262
+ strides=2,
263
+ block_id=6,
264
+ )
265
+ x = mobilevit_block(x, num_blocks=4, projection_dim=projection_dim)
266
+ if output_all:
267
+ outputs.append(x)
268
+ else:
269
+ outputs = [x]
270
+
271
+ # Third MV2 -> MobileViT block.
272
+ output_channels = [80, 96, 160][model_type_id]
273
+ projection_dim = [96, 144, 240][model_type_id]
274
+ x = inverted_residual_block(
275
+ x,
276
+ expanded_channels=80 * expansion_factor,
277
+ output_channels=output_channels,
278
+ strides=2,
279
+ block_id=7,
280
+ )
281
+ x = mobilevit_block(x, num_blocks=3, projection_dim=projection_dim)
282
+ filters = [320, 384, 640][model_type_id]
283
+ x = conv_block(x, filters=filters, kernel_size=1, strides=1)
284
+ if output_all:
285
+ outputs.append(x)
286
+ else:
287
+ outputs = [x]
288
+
289
+ if name is None:
290
+ name = f"MobileViT{model_type}"
291
+ return models.Model(inputs, outputs, name=name)
@@ -4,7 +4,7 @@ for _x, _y in _layers.__dict__.items():
4
4
  if _x.startswith("_"):
5
5
  continue
6
6
  globals()[_x] = _y
7
- globals.__doc__ = _layers.__doc__
7
+ __doc__ = _layers.__doc__
8
8
 
9
9
  from .identical import *
10
10
  from .floor import *
mt/tf/version.py CHANGED
@@ -1,5 +1,5 @@
1
1
  MAJOR_VERSION = 1
2
2
  MINOR_VERSION = 1
3
- PATCH_VERSION = 10
3
+ PATCH_VERSION = 12
4
4
  version = '{}.{}.{}'.format(MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION)
5
5
  __all__ = ['MAJOR_VERSION', 'MINOR_VERSION', 'PATCH_VERSION', 'version']
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mttf
3
- Version: 1.1.10
3
+ Version: 1.1.12
4
4
  Summary: A package to detect and monkey-patch TensorFlow and Keras, for Minh-Tri Pham
5
5
  Home-page: https://github.com/inteplus/mttf
6
6
  Author: ['Minh-Tri Pham']
@@ -1,7 +1,10 @@
1
1
  mt/keras/__init__.py,sha256=oM2xsSZTuGSUvFJSpntk8r7bzGcTytNvn-Apv_SBFh4,70
2
+ mt/keras/applications/__init__.py,sha256=eTTWZQU9u0z7W0eDKo3GaVJvVCh2gDtjD8R7S-8ZWH0,593
3
+ mt/keras/applications/mobilenet_v3_split.py,sha256=kpFEpE-93QYIt25ZbbGoXuhVzB-GyaCDJnXFCxd0-iU,19747
4
+ mt/keras/applications/mobilevit.py,sha256=v-XzmFSwWAjN_nsp2ys0QINtoqcYD-qvNrSbKHkreQs,8914
2
5
  mt/keras/base/__init__.py,sha256=vIvuf3gjbpXC-rGpXLfIApLRUV6w1GUzVK8YJOgWLyk,327
3
6
  mt/keras/base/base.py,sha256=_B2sSUMlHOtGSAqQD1p5YD0raEDL4W0Bh3uKD6BXOJM,807
4
- mt/keras/layers/__init__.py,sha256=pqRphJpkmB1y59f17yPKEnOoy-_2tp5mPAD4fBY3pKQ,758
7
+ mt/keras/layers/__init__.py,sha256=7TMhZig3bHtyDpiwnoXcwgAxw6e5xkAy664Z6qLQUrY,750
5
8
  mt/keras/layers/counter.py,sha256=J3__IXbaa7zp72a5P8FFi0bfftTHwa1xzzCwxCIU2gc,856
6
9
  mt/keras/layers/floor.py,sha256=4mSpmTrhM7VqTK85POkjC3OhaTZUNUF9knO7gTbSGtc,512
7
10
  mt/keras/layers/identical.py,sha256=AIqC36PxU9sXyF9rZuQ-5ObjnIjBiSIMHIb5MwqVdmY,361
@@ -14,7 +17,7 @@ mt/tf/__init__.py,sha256=M8xiJNdrAUJZgiZTOQOdfkehjO-CYzGpoxh5HVGBkms,338
14
17
  mt/tf/init.py,sha256=BUpw33uyA_DmeJjrN2fX4MIs8MynKxkwgc2oTGTqssU,1294
15
18
  mt/tf/mttf_version.py,sha256=ha53i-H9pE-crufFttUECgXHwPvam07zMKzApUts1Gs,206
16
19
  mt/tf/utils.py,sha256=Copl5VM0PpuFUchK-AcBuGO6QitDwHcEs4FruZb2GAI,2460
17
- mt/tf/version.py,sha256=xBDDlvCAF-wcYw_LU24Q5In5Tf5HJhO5hhybChnGc4A,207
20
+ mt/tf/version.py,sha256=NG7gRVnIXelT8AS-NtYLCjdeQ9cag8SH1NfKOpmwVq0,207
18
21
  mt/tf/keras_applications/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
22
  mt/tf/keras_applications/mobilenet_v3_split.py,sha256=1oPB3EX3k3c7iju9Ksuw9xyv32-mOKPs3uy2Mk5tLd8,19716
20
23
  mt/tf/keras_applications/mobilevit.py,sha256=VsKB_U-f9jwUEjpd0eq-YXL4rDsuAbKQ0yIzkbMfLzw,9949
@@ -23,14 +26,14 @@ mt/tfc/__init__.py,sha256=XFXAbmEi5uTAuZj797cBqw9ZBnVy6ptxP0TTzVauMNk,8446
23
26
  mt/tfg/__init__.py,sha256=6Ly2QImAyQTsg_ZszuAuK_L2n56v89Cix9yYmMVk0CM,304
24
27
  mt/tfp/__init__.py,sha256=AQkGCkmDRwswEt3qoOSpxe-fZekx78sHHBs2ZVz33gc,383
25
28
  mt/tfp/real_nvp.py,sha256=U9EmkXGqFcvtS2yeh5_RgbKlVKKlGFGklAb7Voyazz4,4440
26
- mttf-1.1.10.data/scripts/dmt_pipi.sh,sha256=NNsj4P332unHMqU4mAFjU9PQvxQ8TK5XQ42LC29IZY8,510
27
- mttf-1.1.10.data/scripts/dmt_twineu.sh,sha256=KZhcYwuCW0c36tWcOgCe7uxJmS08rz-J6YNY76Exy4M,193
28
- mttf-1.1.10.data/scripts/pipi.sh,sha256=kdo96bdaKq2QIa52Z4XFSiGPcbDm09SAU9cju6I2Lxo,289
29
- mttf-1.1.10.data/scripts/wml_nexus.py,sha256=kW0ju8_kdXc4jOjhdzKiMsFuO1MNpHmu87skrhu9SEg,1492
30
- mttf-1.1.10.data/scripts/wml_pipi.sh,sha256=CuidIcbuxyXSBNQqYRhCcSC8QbBaSGnQX0KAIFaIvKA,499
31
- mttf-1.1.10.data/scripts/wml_twineu.sh,sha256=av1JLN765oOWC5LPkv2eSWIVof26y60601tMGkuYdb8,180
32
- mttf-1.1.10.dist-info/licenses/LICENSE,sha256=e_JtcszdGZ2ZGfjcymTGrcxFj_9XPicZOVtnsrPvruk,1070
33
- mttf-1.1.10.dist-info/METADATA,sha256=TflB9A0LQvdZSg0o03NUnUjERgNXsQbuq0eM3zSF9tc,568
34
- mttf-1.1.10.dist-info/WHEEL,sha256=wXxTzcEDnjrTwFYjLPcsW_7_XihufBwmpiBeiXNBGEA,91
35
- mttf-1.1.10.dist-info/top_level.txt,sha256=WcqGFu9cV7iMZg09iam8eNxUvGpLSKKF2Iubf6SJVOo,3
36
- mttf-1.1.10.dist-info/RECORD,,
29
+ mttf-1.1.12.data/scripts/dmt_pipi.sh,sha256=NNsj4P332unHMqU4mAFjU9PQvxQ8TK5XQ42LC29IZY8,510
30
+ mttf-1.1.12.data/scripts/dmt_twineu.sh,sha256=KZhcYwuCW0c36tWcOgCe7uxJmS08rz-J6YNY76Exy4M,193
31
+ mttf-1.1.12.data/scripts/pipi.sh,sha256=kdo96bdaKq2QIa52Z4XFSiGPcbDm09SAU9cju6I2Lxo,289
32
+ mttf-1.1.12.data/scripts/wml_nexus.py,sha256=kW0ju8_kdXc4jOjhdzKiMsFuO1MNpHmu87skrhu9SEg,1492
33
+ mttf-1.1.12.data/scripts/wml_pipi.sh,sha256=CuidIcbuxyXSBNQqYRhCcSC8QbBaSGnQX0KAIFaIvKA,499
34
+ mttf-1.1.12.data/scripts/wml_twineu.sh,sha256=av1JLN765oOWC5LPkv2eSWIVof26y60601tMGkuYdb8,180
35
+ mttf-1.1.12.dist-info/licenses/LICENSE,sha256=e_JtcszdGZ2ZGfjcymTGrcxFj_9XPicZOVtnsrPvruk,1070
36
+ mttf-1.1.12.dist-info/METADATA,sha256=wqmEBPpwlrdEh_Asq2eXpphu-ZzFv2b4G1-drGywf84,568
37
+ mttf-1.1.12.dist-info/WHEEL,sha256=wXxTzcEDnjrTwFYjLPcsW_7_XihufBwmpiBeiXNBGEA,91
38
+ mttf-1.1.12.dist-info/top_level.txt,sha256=WcqGFu9cV7iMZg09iam8eNxUvGpLSKKF2Iubf6SJVOo,3
39
+ mttf-1.1.12.dist-info/RECORD,,
File without changes
File without changes