keras-hub-nightly 0.15.0.dev20240823171555__py3-none-any.whl → 0.16.0.dev2024092017__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- keras_hub/__init__.py +0 -6
- keras_hub/api/__init__.py +2 -0
- keras_hub/api/bounding_box/__init__.py +36 -0
- keras_hub/api/layers/__init__.py +14 -0
- keras_hub/api/models/__init__.py +97 -48
- keras_hub/api/tokenizers/__init__.py +30 -0
- keras_hub/api/utils/__init__.py +22 -0
- keras_hub/src/api_export.py +15 -9
- keras_hub/src/bounding_box/__init__.py +13 -0
- keras_hub/src/bounding_box/converters.py +529 -0
- keras_hub/src/bounding_box/formats.py +162 -0
- keras_hub/src/bounding_box/iou.py +263 -0
- keras_hub/src/bounding_box/to_dense.py +95 -0
- keras_hub/src/bounding_box/to_ragged.py +99 -0
- keras_hub/src/bounding_box/utils.py +194 -0
- keras_hub/src/bounding_box/validate_format.py +99 -0
- keras_hub/src/layers/preprocessing/audio_converter.py +121 -0
- keras_hub/src/layers/preprocessing/image_converter.py +130 -0
- keras_hub/src/layers/preprocessing/masked_lm_mask_generator.py +2 -0
- keras_hub/src/layers/preprocessing/multi_segment_packer.py +9 -8
- keras_hub/src/layers/preprocessing/preprocessing_layer.py +2 -29
- keras_hub/src/layers/preprocessing/random_deletion.py +33 -31
- keras_hub/src/layers/preprocessing/random_swap.py +33 -31
- keras_hub/src/layers/preprocessing/resizing_image_converter.py +101 -0
- keras_hub/src/layers/preprocessing/start_end_packer.py +3 -2
- keras_hub/src/models/albert/__init__.py +1 -2
- keras_hub/src/models/albert/albert_masked_lm_preprocessor.py +6 -86
- keras_hub/src/models/albert/{albert_classifier.py → albert_text_classifier.py} +34 -10
- keras_hub/src/models/albert/{albert_preprocessor.py → albert_text_classifier_preprocessor.py} +14 -70
- keras_hub/src/models/albert/albert_tokenizer.py +17 -36
- keras_hub/src/models/backbone.py +12 -34
- keras_hub/src/models/bart/__init__.py +1 -2
- keras_hub/src/models/bart/bart_seq_2_seq_lm_preprocessor.py +21 -148
- keras_hub/src/models/bart/bart_tokenizer.py +12 -39
- keras_hub/src/models/bert/__init__.py +1 -5
- keras_hub/src/models/bert/bert_masked_lm_preprocessor.py +6 -87
- keras_hub/src/models/bert/bert_presets.py +1 -4
- keras_hub/src/models/bert/{bert_classifier.py → bert_text_classifier.py} +19 -12
- keras_hub/src/models/bert/{bert_preprocessor.py → bert_text_classifier_preprocessor.py} +14 -70
- keras_hub/src/models/bert/bert_tokenizer.py +17 -35
- keras_hub/src/models/bloom/__init__.py +1 -2
- keras_hub/src/models/bloom/bloom_causal_lm_preprocessor.py +6 -91
- keras_hub/src/models/bloom/bloom_tokenizer.py +12 -41
- keras_hub/src/models/causal_lm.py +10 -29
- keras_hub/src/models/causal_lm_preprocessor.py +195 -0
- keras_hub/src/models/csp_darknet/csp_darknet_backbone.py +54 -15
- keras_hub/src/models/deberta_v3/__init__.py +1 -4
- keras_hub/src/models/deberta_v3/deberta_v3_masked_lm_preprocessor.py +14 -77
- keras_hub/src/models/deberta_v3/{deberta_v3_classifier.py → deberta_v3_text_classifier.py} +16 -11
- keras_hub/src/models/deberta_v3/{deberta_v3_preprocessor.py → deberta_v3_text_classifier_preprocessor.py} +23 -64
- keras_hub/src/models/deberta_v3/deberta_v3_tokenizer.py +30 -25
- keras_hub/src/models/densenet/densenet_backbone.py +46 -22
- keras_hub/src/models/distil_bert/__init__.py +1 -4
- keras_hub/src/models/distil_bert/distil_bert_masked_lm_preprocessor.py +14 -76
- keras_hub/src/models/distil_bert/{distil_bert_classifier.py → distil_bert_text_classifier.py} +17 -12
- keras_hub/src/models/distil_bert/{distil_bert_preprocessor.py → distil_bert_text_classifier_preprocessor.py} +23 -63
- keras_hub/src/models/distil_bert/distil_bert_tokenizer.py +19 -35
- keras_hub/src/models/efficientnet/__init__.py +13 -0
- keras_hub/src/models/efficientnet/efficientnet_backbone.py +569 -0
- keras_hub/src/models/efficientnet/fusedmbconv.py +229 -0
- keras_hub/src/models/efficientnet/mbconv.py +238 -0
- keras_hub/src/models/electra/__init__.py +1 -2
- keras_hub/src/models/electra/electra_tokenizer.py +17 -32
- keras_hub/src/models/f_net/__init__.py +1 -2
- keras_hub/src/models/f_net/f_net_masked_lm_preprocessor.py +12 -78
- keras_hub/src/models/f_net/{f_net_classifier.py → f_net_text_classifier.py} +17 -10
- keras_hub/src/models/f_net/{f_net_preprocessor.py → f_net_text_classifier_preprocessor.py} +19 -63
- keras_hub/src/models/f_net/f_net_tokenizer.py +17 -35
- keras_hub/src/models/falcon/__init__.py +1 -2
- keras_hub/src/models/falcon/falcon_causal_lm_preprocessor.py +6 -89
- keras_hub/src/models/falcon/falcon_tokenizer.py +12 -35
- keras_hub/src/models/gemma/__init__.py +1 -2
- keras_hub/src/models/gemma/gemma_causal_lm_preprocessor.py +6 -90
- keras_hub/src/models/gemma/gemma_decoder_block.py +1 -1
- keras_hub/src/models/gemma/gemma_tokenizer.py +12 -23
- keras_hub/src/models/gpt2/__init__.py +1 -2
- keras_hub/src/models/gpt2/gpt2_causal_lm_preprocessor.py +6 -89
- keras_hub/src/models/gpt2/gpt2_preprocessor.py +12 -90
- keras_hub/src/models/gpt2/gpt2_tokenizer.py +12 -34
- keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm_preprocessor.py +6 -91
- keras_hub/src/models/gpt_neo_x/gpt_neo_x_tokenizer.py +12 -34
- keras_hub/src/models/image_classifier.py +0 -5
- keras_hub/src/models/image_classifier_preprocessor.py +83 -0
- keras_hub/src/models/llama/__init__.py +1 -2
- keras_hub/src/models/llama/llama_causal_lm_preprocessor.py +6 -85
- keras_hub/src/models/llama/llama_tokenizer.py +12 -25
- keras_hub/src/models/llama3/__init__.py +1 -2
- keras_hub/src/models/llama3/llama3_causal_lm_preprocessor.py +6 -89
- keras_hub/src/models/llama3/llama3_tokenizer.py +12 -33
- keras_hub/src/models/masked_lm.py +0 -2
- keras_hub/src/models/masked_lm_preprocessor.py +156 -0
- keras_hub/src/models/mistral/__init__.py +1 -2
- keras_hub/src/models/mistral/mistral_causal_lm_preprocessor.py +6 -91
- keras_hub/src/models/mistral/mistral_tokenizer.py +12 -23
- keras_hub/src/models/mix_transformer/mix_transformer_backbone.py +2 -2
- keras_hub/src/models/mobilenet/__init__.py +13 -0
- keras_hub/src/models/mobilenet/mobilenet_backbone.py +530 -0
- keras_hub/src/models/mobilenet/mobilenet_image_classifier.py +114 -0
- keras_hub/src/models/opt/__init__.py +1 -2
- keras_hub/src/models/opt/opt_causal_lm_preprocessor.py +6 -93
- keras_hub/src/models/opt/opt_tokenizer.py +12 -41
- keras_hub/src/models/pali_gemma/__init__.py +1 -4
- keras_hub/src/models/pali_gemma/pali_gemma_causal_lm_preprocessor.py +28 -28
- keras_hub/src/models/pali_gemma/pali_gemma_image_converter.py +25 -0
- keras_hub/src/models/pali_gemma/pali_gemma_presets.py +5 -5
- keras_hub/src/models/pali_gemma/pali_gemma_tokenizer.py +11 -3
- keras_hub/src/models/phi3/__init__.py +1 -2
- keras_hub/src/models/phi3/phi3_causal_lm.py +3 -9
- keras_hub/src/models/phi3/phi3_causal_lm_preprocessor.py +6 -89
- keras_hub/src/models/phi3/phi3_tokenizer.py +12 -36
- keras_hub/src/models/preprocessor.py +72 -83
- keras_hub/src/models/resnet/__init__.py +6 -0
- keras_hub/src/models/resnet/resnet_backbone.py +390 -42
- keras_hub/src/models/resnet/resnet_image_classifier.py +33 -6
- keras_hub/src/models/resnet/resnet_image_classifier_preprocessor.py +28 -0
- keras_hub/src/models/{llama3/llama3_preprocessor.py → resnet/resnet_image_converter.py} +7 -5
- keras_hub/src/models/resnet/resnet_presets.py +95 -0
- keras_hub/src/models/retinanet/__init__.py +13 -0
- keras_hub/src/models/retinanet/anchor_generator.py +175 -0
- keras_hub/src/models/retinanet/box_matcher.py +259 -0
- keras_hub/src/models/retinanet/non_max_supression.py +578 -0
- keras_hub/src/models/roberta/__init__.py +1 -2
- keras_hub/src/models/roberta/roberta_masked_lm_preprocessor.py +22 -74
- keras_hub/src/models/roberta/{roberta_classifier.py → roberta_text_classifier.py} +16 -11
- keras_hub/src/models/roberta/{roberta_preprocessor.py → roberta_text_classifier_preprocessor.py} +21 -53
- keras_hub/src/models/roberta/roberta_tokenizer.py +13 -52
- keras_hub/src/models/seq_2_seq_lm_preprocessor.py +269 -0
- keras_hub/src/models/stable_diffusion_v3/__init__.py +13 -0
- keras_hub/src/models/stable_diffusion_v3/clip_encoder_block.py +103 -0
- keras_hub/src/models/stable_diffusion_v3/clip_preprocessor.py +93 -0
- keras_hub/src/models/stable_diffusion_v3/clip_text_encoder.py +149 -0
- keras_hub/src/models/stable_diffusion_v3/clip_tokenizer.py +167 -0
- keras_hub/src/models/stable_diffusion_v3/mmdit.py +427 -0
- keras_hub/src/models/stable_diffusion_v3/mmdit_block.py +317 -0
- keras_hub/src/models/stable_diffusion_v3/t5_xxl_preprocessor.py +74 -0
- keras_hub/src/models/stable_diffusion_v3/t5_xxl_text_encoder.py +155 -0
- keras_hub/src/models/stable_diffusion_v3/vae_attention.py +126 -0
- keras_hub/src/models/stable_diffusion_v3/vae_image_decoder.py +186 -0
- keras_hub/src/models/t5/__init__.py +1 -2
- keras_hub/src/models/t5/t5_tokenizer.py +13 -23
- keras_hub/src/models/task.py +71 -116
- keras_hub/src/models/{classifier.py → text_classifier.py} +19 -13
- keras_hub/src/models/text_classifier_preprocessor.py +138 -0
- keras_hub/src/models/whisper/__init__.py +1 -2
- keras_hub/src/models/whisper/{whisper_audio_feature_extractor.py → whisper_audio_converter.py} +20 -18
- keras_hub/src/models/whisper/whisper_backbone.py +0 -3
- keras_hub/src/models/whisper/whisper_presets.py +10 -10
- keras_hub/src/models/whisper/whisper_tokenizer.py +20 -16
- keras_hub/src/models/xlm_roberta/__init__.py +1 -4
- keras_hub/src/models/xlm_roberta/xlm_roberta_masked_lm_preprocessor.py +26 -72
- keras_hub/src/models/xlm_roberta/{xlm_roberta_classifier.py → xlm_roberta_text_classifier.py} +16 -11
- keras_hub/src/models/xlm_roberta/{xlm_roberta_preprocessor.py → xlm_roberta_text_classifier_preprocessor.py} +26 -53
- keras_hub/src/models/xlm_roberta/xlm_roberta_tokenizer.py +25 -10
- keras_hub/src/tests/test_case.py +46 -0
- keras_hub/src/tokenizers/byte_pair_tokenizer.py +30 -17
- keras_hub/src/tokenizers/byte_tokenizer.py +14 -15
- keras_hub/src/tokenizers/sentence_piece_tokenizer.py +20 -7
- keras_hub/src/tokenizers/tokenizer.py +67 -32
- keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py +14 -15
- keras_hub/src/tokenizers/word_piece_tokenizer.py +34 -47
- keras_hub/src/utils/imagenet/__init__.py +13 -0
- keras_hub/src/utils/imagenet/imagenet_utils.py +1067 -0
- keras_hub/src/utils/keras_utils.py +0 -50
- keras_hub/src/utils/preset_utils.py +230 -68
- keras_hub/src/utils/tensor_utils.py +187 -69
- keras_hub/src/utils/timm/convert_resnet.py +19 -16
- keras_hub/src/utils/timm/preset_loader.py +66 -0
- keras_hub/src/utils/transformers/convert_albert.py +193 -0
- keras_hub/src/utils/transformers/convert_bart.py +373 -0
- keras_hub/src/utils/transformers/convert_bert.py +7 -17
- keras_hub/src/utils/transformers/convert_distilbert.py +10 -20
- keras_hub/src/utils/transformers/convert_gemma.py +5 -19
- keras_hub/src/utils/transformers/convert_gpt2.py +5 -18
- keras_hub/src/utils/transformers/convert_llama3.py +7 -18
- keras_hub/src/utils/transformers/convert_mistral.py +129 -0
- keras_hub/src/utils/transformers/convert_pali_gemma.py +7 -29
- keras_hub/src/utils/transformers/preset_loader.py +77 -0
- keras_hub/src/utils/transformers/safetensor_utils.py +2 -2
- keras_hub/src/version_utils.py +1 -1
- keras_hub_nightly-0.16.0.dev2024092017.dist-info/METADATA +202 -0
- keras_hub_nightly-0.16.0.dev2024092017.dist-info/RECORD +334 -0
- {keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.16.0.dev2024092017.dist-info}/WHEEL +1 -1
- keras_hub/src/models/bart/bart_preprocessor.py +0 -276
- keras_hub/src/models/bloom/bloom_preprocessor.py +0 -185
- keras_hub/src/models/electra/electra_preprocessor.py +0 -154
- keras_hub/src/models/falcon/falcon_preprocessor.py +0 -187
- keras_hub/src/models/gemma/gemma_preprocessor.py +0 -191
- keras_hub/src/models/gpt_neo_x/gpt_neo_x_preprocessor.py +0 -145
- keras_hub/src/models/llama/llama_preprocessor.py +0 -189
- keras_hub/src/models/mistral/mistral_preprocessor.py +0 -190
- keras_hub/src/models/opt/opt_preprocessor.py +0 -188
- keras_hub/src/models/phi3/phi3_preprocessor.py +0 -190
- keras_hub/src/models/whisper/whisper_preprocessor.py +0 -326
- keras_hub/src/utils/timm/convert.py +0 -37
- keras_hub/src/utils/transformers/convert.py +0 -101
- keras_hub_nightly-0.15.0.dev20240823171555.dist-info/METADATA +0 -34
- keras_hub_nightly-0.15.0.dev20240823171555.dist-info/RECORD +0 -297
- {keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.16.0.dev2024092017.dist-info}/top_level.txt +0 -0
@@ -13,12 +13,18 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
15
|
from keras_hub.src.api_export import keras_hub_export
|
16
|
+
from keras_hub.src.models.mistral.mistral_backbone import MistralBackbone
|
16
17
|
from keras_hub.src.tokenizers.sentence_piece_tokenizer import (
|
17
18
|
SentencePieceTokenizer,
|
18
19
|
)
|
19
20
|
|
20
21
|
|
21
|
-
@keras_hub_export(
|
22
|
+
@keras_hub_export(
|
23
|
+
[
|
24
|
+
"keras_hub.tokenizers.MistralTokenizer",
|
25
|
+
"keras_hub.models.MistralTokenizer",
|
26
|
+
]
|
27
|
+
)
|
22
28
|
class MistralTokenizer(SentencePieceTokenizer):
|
23
29
|
"""Mistral tokenizer layer based on SentencePiece.
|
24
30
|
|
@@ -28,10 +34,6 @@ class MistralTokenizer(SentencePieceTokenizer):
|
|
28
34
|
Mistral models and provides a `from_preset()` method to automatically
|
29
35
|
download a matching vocabulary for a Mistral preset.
|
30
36
|
|
31
|
-
This tokenizer does not provide truncation or padding of inputs. It can be
|
32
|
-
combined with a `keras_hub.models.MistralPreprocessor` layer for input
|
33
|
-
packing.
|
34
|
-
|
35
37
|
If input is a batch of strings (rank > 0), the layer will output a
|
36
38
|
`tf.RaggedTensor` where the last dimension of the output is ragged.
|
37
39
|
|
@@ -60,23 +62,10 @@ class MistralTokenizer(SentencePieceTokenizer):
|
|
60
62
|
```
|
61
63
|
"""
|
62
64
|
|
65
|
+
backbone_cls = MistralBackbone
|
66
|
+
|
63
67
|
def __init__(self, proto, **kwargs):
|
64
|
-
self.
|
65
|
-
self.
|
68
|
+
self._add_special_token("<s>", "start_token")
|
69
|
+
self._add_special_token("</s>", "end_token")
|
70
|
+
self.pad_token_id = 0
|
66
71
|
super().__init__(proto=proto, **kwargs)
|
67
|
-
|
68
|
-
def set_proto(self, proto):
|
69
|
-
super().set_proto(proto)
|
70
|
-
if proto is not None:
|
71
|
-
for token in [self.start_token, self.end_token]:
|
72
|
-
if token not in self.get_vocabulary():
|
73
|
-
raise ValueError(
|
74
|
-
f"Cannot find token `'{token}'` in the provided "
|
75
|
-
f"`vocabulary`. Please provide `'{token}'` in your "
|
76
|
-
"`vocabulary` or use a pretrained `vocabulary` name."
|
77
|
-
)
|
78
|
-
self.start_token_id = self.token_to_id(self.start_token)
|
79
|
-
self.end_token_id = self.token_to_id(self.end_token)
|
80
|
-
else:
|
81
|
-
self.start_token_id = None
|
82
|
-
self.end_token_id = None
|
@@ -37,7 +37,7 @@ class MiTBackbone(FeaturePyramidBackbone):
|
|
37
37
|
patch_sizes,
|
38
38
|
strides,
|
39
39
|
include_rescaling=True,
|
40
|
-
image_shape=(
|
40
|
+
image_shape=(None, None, 3),
|
41
41
|
hidden_dims=None,
|
42
42
|
**kwargs,
|
43
43
|
):
|
@@ -63,7 +63,7 @@ class MiTBackbone(FeaturePyramidBackbone):
|
|
63
63
|
include_rescaling: bool, whether to rescale the inputs. If set
|
64
64
|
to `True`, inputs will be passed through a `Rescaling(1/255.0)`
|
65
65
|
layer. Defaults to `True`.
|
66
|
-
image_shape: optional shape tuple, defaults to (
|
66
|
+
image_shape: optional shape tuple, defaults to (None, None, 3).
|
67
67
|
hidden_dims: the embedding dims per hierarchical layer, used as
|
68
68
|
the levels of the feature pyramid.
|
69
69
|
patch_sizes: list of integers, the patch_size to apply for each layer.
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# Copyright 2024 The KerasHub Authors
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
@@ -0,0 +1,530 @@
|
|
1
|
+
# Copyright 2024 The KerasHub Authors
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
import keras
|
15
|
+
from keras import ops
|
16
|
+
|
17
|
+
from keras_hub.src.api_export import keras_hub_export
|
18
|
+
from keras_hub.src.models.backbone import Backbone
|
19
|
+
|
20
|
+
BN_EPSILON = 1e-3
|
21
|
+
BN_MOMENTUM = 0.999
|
22
|
+
|
23
|
+
|
24
|
+
@keras_hub_export("keras_hub.models.MobileNetBackbone")
|
25
|
+
class MobileNetBackbone(Backbone):
|
26
|
+
"""Instantiates the MobileNet architecture.
|
27
|
+
|
28
|
+
MobileNet is a lightweight convolutional neural network (CNN)
|
29
|
+
optimized for mobile and edge devices, striking a balance between
|
30
|
+
accuracy and efficiency. By employing depthwise separable convolutions
|
31
|
+
and techniques like Squeeze-and-Excitation (SE) blocks,
|
32
|
+
MobileNet models are highly suitable for real-time applications on
|
33
|
+
resource-constrained devices.
|
34
|
+
|
35
|
+
References:
|
36
|
+
- [MobileNets: Efficient Convolutional Neural Networks
|
37
|
+
for Mobile Vision Applications](
|
38
|
+
https://arxiv.org/abs/1704.04861)
|
39
|
+
- [MobileNetV2: Inverted Residuals and Linear Bottlenecks](
|
40
|
+
https://arxiv.org/abs/1801.04381) (CVPR 2018)
|
41
|
+
- [Searching for MobileNetV3](https://arxiv.org/pdf/1905.02244.pdf)
|
42
|
+
(ICCV 2019)
|
43
|
+
|
44
|
+
Args:
|
45
|
+
stackwise_expansion: list of ints or floats, the expansion ratio for
|
46
|
+
each inverted residual block in the model.
|
47
|
+
stackwise_num_filters: list of ints, number of filters for each inverted
|
48
|
+
residual block in the model.
|
49
|
+
stackwise_kernel_size: list of ints, kernel size for each inverted
|
50
|
+
residual block in the model.
|
51
|
+
stackwise_num_strides: list of ints, stride length for each inverted
|
52
|
+
residual block in the model.
|
53
|
+
stackwise_se_ratio: se ratio for each inverted residual block in the
|
54
|
+
model. 0 if dont want to add Squeeze and Excite layer.
|
55
|
+
stackwise_activation: list of activation functions, for each inverted
|
56
|
+
residual block in the model.
|
57
|
+
include_rescaling: bool, whether to rescale the inputs. If set to True,
|
58
|
+
inputs will be passed through a `Rescaling(scale=1 / 255)`
|
59
|
+
layer.
|
60
|
+
image_shape: optional shape tuple, defaults to (224, 224, 3).
|
61
|
+
depth_multiplier: float, controls the width of the network.
|
62
|
+
- If `depth_multiplier` < 1.0, proportionally decreases the number
|
63
|
+
of filters in each layer.
|
64
|
+
- If `depth_multiplier` > 1.0, proportionally increases the number
|
65
|
+
of filters in each layer.
|
66
|
+
- If `depth_multiplier` = 1, default number of filters from the paper
|
67
|
+
are used at each layer.
|
68
|
+
input_num_filters: number of filters in first convolution layer
|
69
|
+
output_num_filters: specifies whether to add conv and batch_norm in the end,
|
70
|
+
if set to None, it will not add these layers in the end.
|
71
|
+
'None' for MobileNetV1
|
72
|
+
input_activation: activation function to be used in the input layer
|
73
|
+
'hard_swish' for MobileNetV3,
|
74
|
+
'relu6' for MobileNetV1 and MobileNetV2
|
75
|
+
output_activation: activation function to be used in the output layer
|
76
|
+
'hard_swish' for MobileNetV3,
|
77
|
+
'relu6' for MobileNetV1 and MobileNetV2
|
78
|
+
inverted_res_block: whether to use inverted residual blocks or not,
|
79
|
+
'False' for MobileNetV1,
|
80
|
+
'True' for MobileNetV2 and MobileNetV3
|
81
|
+
|
82
|
+
|
83
|
+
Example:
|
84
|
+
```python
|
85
|
+
input_data = tf.ones(shape=(8, 224, 224, 3))
|
86
|
+
|
87
|
+
# Randomly initialized backbone with a custom config
|
88
|
+
model = MobileNetBackbone(
|
89
|
+
stackwise_expansion=[1, 4, 6],
|
90
|
+
stackwise_num_filters=[4, 8, 16],
|
91
|
+
stackwise_kernel_size=[3, 3, 5],
|
92
|
+
stackwise_num_strides=[2, 2, 1],
|
93
|
+
stackwise_se_ratio=[0.25, None, 0.25],
|
94
|
+
stackwise_activation=["relu", "relu6", "hard_swish"],
|
95
|
+
include_rescaling=False,
|
96
|
+
output_num_filters=1280,
|
97
|
+
input_activation='hard_swish',
|
98
|
+
output_activation='hard_swish',
|
99
|
+
inverted_res_block=True,
|
100
|
+
|
101
|
+
)
|
102
|
+
output = model(input_data)
|
103
|
+
```
|
104
|
+
"""
|
105
|
+
|
106
|
+
def __init__(
|
107
|
+
self,
|
108
|
+
stackwise_expansion,
|
109
|
+
stackwise_num_filters,
|
110
|
+
stackwise_kernel_size,
|
111
|
+
stackwise_num_strides,
|
112
|
+
stackwise_se_ratio,
|
113
|
+
stackwise_activation,
|
114
|
+
include_rescaling,
|
115
|
+
output_num_filters,
|
116
|
+
inverted_res_block,
|
117
|
+
image_shape=(224, 224, 3),
|
118
|
+
input_activation="hard_swish",
|
119
|
+
output_activation="hard_swish",
|
120
|
+
depth_multiplier=1.0,
|
121
|
+
input_num_filters=16,
|
122
|
+
**kwargs,
|
123
|
+
):
|
124
|
+
# === Functional Model ===
|
125
|
+
channel_axis = (
|
126
|
+
-1 if keras.config.image_data_format() == "channels_last" else 1
|
127
|
+
)
|
128
|
+
|
129
|
+
inputs = keras.layers.Input(shape=image_shape)
|
130
|
+
x = inputs
|
131
|
+
|
132
|
+
if include_rescaling:
|
133
|
+
x = keras.layers.Rescaling(scale=1 / 255)(x)
|
134
|
+
|
135
|
+
input_num_filters = adjust_channels(input_num_filters)
|
136
|
+
x = keras.layers.Conv2D(
|
137
|
+
input_num_filters,
|
138
|
+
kernel_size=3,
|
139
|
+
strides=(2, 2),
|
140
|
+
padding="same",
|
141
|
+
data_format=keras.config.image_data_format(),
|
142
|
+
use_bias=False,
|
143
|
+
name="input_conv",
|
144
|
+
)(x)
|
145
|
+
x = keras.layers.BatchNormalization(
|
146
|
+
axis=channel_axis,
|
147
|
+
epsilon=BN_EPSILON,
|
148
|
+
momentum=BN_MOMENTUM,
|
149
|
+
name="input_batch_norm",
|
150
|
+
)(x)
|
151
|
+
x = keras.layers.Activation(input_activation)(x)
|
152
|
+
|
153
|
+
for stack_index in range(len(stackwise_num_filters)):
|
154
|
+
filters = adjust_channels(
|
155
|
+
(stackwise_num_filters[stack_index]) * depth_multiplier
|
156
|
+
)
|
157
|
+
|
158
|
+
if inverted_res_block:
|
159
|
+
x = apply_inverted_res_block(
|
160
|
+
x,
|
161
|
+
expansion=stackwise_expansion[stack_index],
|
162
|
+
filters=filters,
|
163
|
+
kernel_size=stackwise_kernel_size[stack_index],
|
164
|
+
stride=stackwise_num_strides[stack_index],
|
165
|
+
se_ratio=(stackwise_se_ratio[stack_index]),
|
166
|
+
activation=stackwise_activation[stack_index],
|
167
|
+
expansion_index=stack_index,
|
168
|
+
)
|
169
|
+
else:
|
170
|
+
x = apply_depthwise_conv_block(
|
171
|
+
x,
|
172
|
+
filters=filters,
|
173
|
+
kernel_size=3,
|
174
|
+
stride=stackwise_num_strides[stack_index],
|
175
|
+
depth_multiplier=depth_multiplier,
|
176
|
+
block_id=stack_index,
|
177
|
+
)
|
178
|
+
|
179
|
+
if output_num_filters is not None:
|
180
|
+
last_conv_ch = adjust_channels(x.shape[channel_axis] * 6)
|
181
|
+
|
182
|
+
x = keras.layers.Conv2D(
|
183
|
+
last_conv_ch,
|
184
|
+
kernel_size=1,
|
185
|
+
padding="same",
|
186
|
+
data_format=keras.config.image_data_format(),
|
187
|
+
use_bias=False,
|
188
|
+
name="output_conv",
|
189
|
+
)(x)
|
190
|
+
x = keras.layers.BatchNormalization(
|
191
|
+
axis=channel_axis,
|
192
|
+
epsilon=BN_EPSILON,
|
193
|
+
momentum=BN_MOMENTUM,
|
194
|
+
name="output_batch_norm",
|
195
|
+
)(x)
|
196
|
+
x = keras.layers.Activation(output_activation)(x)
|
197
|
+
|
198
|
+
super().__init__(inputs=inputs, outputs=x, **kwargs)
|
199
|
+
|
200
|
+
# === Config ===
|
201
|
+
self.stackwise_expansion = stackwise_expansion
|
202
|
+
self.stackwise_num_filters = stackwise_num_filters
|
203
|
+
self.stackwise_kernel_size = stackwise_kernel_size
|
204
|
+
self.stackwise_num_strides = stackwise_num_strides
|
205
|
+
self.stackwise_se_ratio = stackwise_se_ratio
|
206
|
+
self.stackwise_activation = stackwise_activation
|
207
|
+
self.include_rescaling = include_rescaling
|
208
|
+
self.depth_multiplier = depth_multiplier
|
209
|
+
self.input_num_filters = input_num_filters
|
210
|
+
self.output_num_filters = output_num_filters
|
211
|
+
self.input_activation = keras.activations.get(input_activation)
|
212
|
+
self.output_activation = keras.activations.get(output_activation)
|
213
|
+
self.inverted_res_block = inverted_res_block
|
214
|
+
self.image_shape = image_shape
|
215
|
+
|
216
|
+
def get_config(self):
|
217
|
+
config = super().get_config()
|
218
|
+
config.update(
|
219
|
+
{
|
220
|
+
"stackwise_expansion": self.stackwise_expansion,
|
221
|
+
"stackwise_num_filters": self.stackwise_num_filters,
|
222
|
+
"stackwise_kernel_size": self.stackwise_kernel_size,
|
223
|
+
"stackwise_num_strides": self.stackwise_num_strides,
|
224
|
+
"stackwise_se_ratio": self.stackwise_se_ratio,
|
225
|
+
"stackwise_activation": self.stackwise_activation,
|
226
|
+
"include_rescaling": self.include_rescaling,
|
227
|
+
"image_shape": self.image_shape,
|
228
|
+
"depth_multiplier": self.depth_multiplier,
|
229
|
+
"input_num_filters": self.input_num_filters,
|
230
|
+
"output_num_filters": self.output_num_filters,
|
231
|
+
"input_activation": keras.activations.serialize(
|
232
|
+
activation=self.input_activation
|
233
|
+
),
|
234
|
+
"output_activation": keras.activations.serialize(
|
235
|
+
activation=self.output_activation
|
236
|
+
),
|
237
|
+
"inverted_res_block": self.inverted_res_block,
|
238
|
+
}
|
239
|
+
)
|
240
|
+
return config
|
241
|
+
|
242
|
+
|
243
|
+
def adjust_channels(x, divisor=8, min_value=None):
|
244
|
+
"""Ensure that all layers have a channel number divisible by the `divisor`.
|
245
|
+
|
246
|
+
Args:
|
247
|
+
x: integer, input value.
|
248
|
+
divisor: integer, the value by which a channel number should be
|
249
|
+
divisible, defaults to 8.
|
250
|
+
min_value: float, optional minimum value for the new tensor. If None,
|
251
|
+
defaults to value of divisor.
|
252
|
+
|
253
|
+
Returns:
|
254
|
+
the updated input scalar.
|
255
|
+
"""
|
256
|
+
|
257
|
+
if min_value is None:
|
258
|
+
min_value = divisor
|
259
|
+
|
260
|
+
new_x = max(min_value, int(x + divisor / 2) // divisor * divisor)
|
261
|
+
|
262
|
+
# make sure that round down does not go down by more than 10%.
|
263
|
+
if new_x < 0.9 * x:
|
264
|
+
new_x += divisor
|
265
|
+
return new_x
|
266
|
+
|
267
|
+
|
268
|
+
def apply_inverted_res_block(
|
269
|
+
x,
|
270
|
+
expansion,
|
271
|
+
filters,
|
272
|
+
kernel_size,
|
273
|
+
stride,
|
274
|
+
se_ratio,
|
275
|
+
activation,
|
276
|
+
expansion_index,
|
277
|
+
):
|
278
|
+
"""An Inverted Residual Block.
|
279
|
+
|
280
|
+
Args:
|
281
|
+
x: input tensor.
|
282
|
+
expansion: integer, the expansion ratio, multiplied with infilters to
|
283
|
+
get the minimum value passed to adjust_channels.
|
284
|
+
filters: integer, number of filters for convolution layer.
|
285
|
+
kernel_size: integer, the kernel size for DepthWise Convolutions.
|
286
|
+
stride: integer, the stride length for DepthWise Convolutions.
|
287
|
+
se_ratio: float, ratio for bottleneck filters. Number of bottleneck
|
288
|
+
filters = filters * se_ratio.
|
289
|
+
activation: the activation layer to use.
|
290
|
+
expansion_index: integer, a unique identification if you want to use
|
291
|
+
expanded convolutions. If greater than 0, an additional Conv+BN
|
292
|
+
layer is added after the expanded convolutional layer.
|
293
|
+
|
294
|
+
Returns:
|
295
|
+
the updated input tensor.
|
296
|
+
"""
|
297
|
+
channel_axis = (
|
298
|
+
-1 if keras.config.image_data_format() == "channels_last" else 1
|
299
|
+
)
|
300
|
+
activation = keras.activations.get(activation)
|
301
|
+
shortcut = x
|
302
|
+
prefix = "expanded_conv_"
|
303
|
+
infilters = x.shape[channel_axis]
|
304
|
+
|
305
|
+
if expansion_index > 0:
|
306
|
+
prefix = f"expanded_conv_{expansion_index}_"
|
307
|
+
|
308
|
+
x = keras.layers.Conv2D(
|
309
|
+
adjust_channels(infilters * expansion),
|
310
|
+
kernel_size=1,
|
311
|
+
padding="same",
|
312
|
+
data_format=keras.config.image_data_format(),
|
313
|
+
use_bias=False,
|
314
|
+
name=prefix + "expand",
|
315
|
+
)(x)
|
316
|
+
x = keras.layers.BatchNormalization(
|
317
|
+
axis=channel_axis,
|
318
|
+
epsilon=BN_EPSILON,
|
319
|
+
momentum=BN_MOMENTUM,
|
320
|
+
name=prefix + "expand_BatchNorm",
|
321
|
+
)(x)
|
322
|
+
x = keras.layers.Activation(activation=activation)(x)
|
323
|
+
|
324
|
+
if stride == 2:
|
325
|
+
x = keras.layers.ZeroPadding2D(
|
326
|
+
padding=correct_pad_downsample(x, kernel_size),
|
327
|
+
name=prefix + "depthwise_pad",
|
328
|
+
)(x)
|
329
|
+
|
330
|
+
x = keras.layers.DepthwiseConv2D(
|
331
|
+
kernel_size,
|
332
|
+
strides=stride,
|
333
|
+
padding="same" if stride == 1 else "valid",
|
334
|
+
data_format=keras.config.image_data_format(),
|
335
|
+
use_bias=False,
|
336
|
+
name=prefix + "depthwise",
|
337
|
+
)(x)
|
338
|
+
x = keras.layers.BatchNormalization(
|
339
|
+
axis=channel_axis,
|
340
|
+
epsilon=BN_EPSILON,
|
341
|
+
momentum=BN_MOMENTUM,
|
342
|
+
name=prefix + "depthwise_BatchNorm",
|
343
|
+
)(x)
|
344
|
+
x = keras.layers.Activation(activation=activation)(x)
|
345
|
+
|
346
|
+
if se_ratio:
|
347
|
+
se_filters = adjust_channels(infilters * expansion)
|
348
|
+
x = SqueezeAndExcite2D(
|
349
|
+
input=x,
|
350
|
+
filters=se_filters,
|
351
|
+
bottleneck_filters=adjust_channels(se_filters * se_ratio),
|
352
|
+
squeeze_activation="relu",
|
353
|
+
excite_activation=keras.activations.hard_sigmoid,
|
354
|
+
)
|
355
|
+
|
356
|
+
x = keras.layers.Conv2D(
|
357
|
+
filters,
|
358
|
+
kernel_size=1,
|
359
|
+
padding="same",
|
360
|
+
data_format=keras.config.image_data_format(),
|
361
|
+
use_bias=False,
|
362
|
+
name=prefix + "project",
|
363
|
+
)(x)
|
364
|
+
x = keras.layers.BatchNormalization(
|
365
|
+
axis=channel_axis,
|
366
|
+
epsilon=BN_EPSILON,
|
367
|
+
momentum=BN_MOMENTUM,
|
368
|
+
name=prefix + "project_BatchNorm",
|
369
|
+
)(x)
|
370
|
+
|
371
|
+
if stride == 1 and infilters == filters:
|
372
|
+
x = keras.layers.Add(name=prefix + "Add")([shortcut, x])
|
373
|
+
|
374
|
+
return x
|
375
|
+
|
376
|
+
|
377
|
+
def apply_depthwise_conv_block(
|
378
|
+
x,
|
379
|
+
filters,
|
380
|
+
kernel_size=3,
|
381
|
+
depth_multiplier=1,
|
382
|
+
stride=1,
|
383
|
+
block_id=1,
|
384
|
+
):
|
385
|
+
"""Adds a depthwise convolution block.
|
386
|
+
|
387
|
+
A depthwise convolution block consists of a depthwise conv,
|
388
|
+
batch normalization, relu6, pointwise convolution,
|
389
|
+
batch normalization and relu6 activation.
|
390
|
+
|
391
|
+
Args:
|
392
|
+
x: Input tensor of shape `(rows, cols, channels)
|
393
|
+
filters: Integer, the dimensionality of the output space
|
394
|
+
(i.e. the number of output filters in the pointwise convolution).
|
395
|
+
depth_multiplier: controls the width of the network.
|
396
|
+
- If `depth_multiplier` < 1.0, proportionally decreases the number
|
397
|
+
of filters in each layer.
|
398
|
+
- If `depth_multiplier` > 1.0, proportionally increases the number
|
399
|
+
of filters in each layer.
|
400
|
+
- If `depth_multiplier` = 1, default number of filters from the
|
401
|
+
paper are used at each layer.
|
402
|
+
strides: An integer or tuple/list of 2 integers, specifying the strides
|
403
|
+
of the convolution along the width and height.
|
404
|
+
Can be a single integer to specify the same value for
|
405
|
+
all spatial dimensions. Specifying any stride value != 1 is
|
406
|
+
incompatible with specifying any `dilation_rate` value != 1.
|
407
|
+
block_id: Integer, a unique identification designating the block number.
|
408
|
+
|
409
|
+
Input shape:
|
410
|
+
4D tensor with shape: `(batch, rows, cols, channels)` in "channels_last"
|
411
|
+
4D tensor with shape: `(batch, channels, rows, cols)` in "channels_first"
|
412
|
+
Returns:
|
413
|
+
Output tensor of block.
|
414
|
+
"""
|
415
|
+
channel_axis = (
|
416
|
+
-1 if keras.config.image_data_format() == "channels_last" else 1
|
417
|
+
)
|
418
|
+
if stride == 2:
|
419
|
+
x = keras.layers.ZeroPadding2D(
|
420
|
+
padding=correct_pad_downsample(x, kernel_size),
|
421
|
+
name="conv_pad_%d" % block_id,
|
422
|
+
)(x)
|
423
|
+
|
424
|
+
x = keras.layers.DepthwiseConv2D(
|
425
|
+
kernel_size,
|
426
|
+
strides=stride,
|
427
|
+
padding="same" if stride == 1 else "valid",
|
428
|
+
data_format=keras.config.image_data_format(),
|
429
|
+
depth_multiplier=depth_multiplier,
|
430
|
+
use_bias=False,
|
431
|
+
name="depthwise_%d" % block_id,
|
432
|
+
)(x)
|
433
|
+
x = keras.layers.BatchNormalization(
|
434
|
+
axis=channel_axis,
|
435
|
+
epsilon=BN_EPSILON,
|
436
|
+
momentum=BN_MOMENTUM,
|
437
|
+
name="depthwise_BatchNorm_%d" % block_id,
|
438
|
+
)(x)
|
439
|
+
x = keras.layers.ReLU(6.0)(x)
|
440
|
+
|
441
|
+
x = keras.layers.Conv2D(
|
442
|
+
filters,
|
443
|
+
kernel_size=1,
|
444
|
+
padding="same",
|
445
|
+
data_format=keras.config.image_data_format(),
|
446
|
+
use_bias=False,
|
447
|
+
name="conv_%d" % block_id,
|
448
|
+
)(x)
|
449
|
+
x = keras.layers.BatchNormalization(
|
450
|
+
axis=channel_axis,
|
451
|
+
epsilon=BN_EPSILON,
|
452
|
+
momentum=BN_MOMENTUM,
|
453
|
+
name="BatchNorm_%d" % block_id,
|
454
|
+
)(x)
|
455
|
+
return keras.layers.ReLU(6.0)(x)
|
456
|
+
|
457
|
+
|
458
|
+
def SqueezeAndExcite2D(
|
459
|
+
input,
|
460
|
+
filters,
|
461
|
+
bottleneck_filters=None,
|
462
|
+
squeeze_activation="relu",
|
463
|
+
excite_activation="sigmoid",
|
464
|
+
):
|
465
|
+
"""
|
466
|
+
Description:
|
467
|
+
This layer applies a content-aware mechanism to adaptively assign
|
468
|
+
channel-wise weights. It uses global average pooling to compress
|
469
|
+
feature maps into single values, which are then processed by
|
470
|
+
two Conv1D layers: the first reduces the dimensionality, and
|
471
|
+
the second restores it.
|
472
|
+
Args:
|
473
|
+
filters: Number of input and output filters. The number of input and
|
474
|
+
output filters is same.
|
475
|
+
bottleneck_filters: (Optional) Number of bottleneck filters. Defaults
|
476
|
+
to `0.25 * filters`
|
477
|
+
squeeze_activation: (Optional) String, callable (or
|
478
|
+
keras.layers.Layer) or keras.activations.Activation instance
|
479
|
+
denoting activation to be applied after squeeze convolution.
|
480
|
+
Defaults to `relu`.
|
481
|
+
excite_activation: (Optional) String, callable (or
|
482
|
+
keras.layers.Layer) or keras.activations.Activation instance
|
483
|
+
denoting activation to be applied after excite convolution.
|
484
|
+
Defaults to `sigmoid`.
|
485
|
+
"""
|
486
|
+
if not bottleneck_filters:
|
487
|
+
bottleneck_filters = filters // 4
|
488
|
+
|
489
|
+
x = keras.layers.GlobalAveragePooling2D(keepdims=True)(input)
|
490
|
+
|
491
|
+
x = keras.layers.Conv2D(
|
492
|
+
bottleneck_filters,
|
493
|
+
(1, 1),
|
494
|
+
data_format=keras.config.image_data_format(),
|
495
|
+
activation=squeeze_activation,
|
496
|
+
)(x)
|
497
|
+
x = keras.layers.Conv2D(
|
498
|
+
filters,
|
499
|
+
(1, 1),
|
500
|
+
data_format=keras.config.image_data_format(),
|
501
|
+
activation=excite_activation,
|
502
|
+
)(x)
|
503
|
+
|
504
|
+
x = ops.multiply(x, input)
|
505
|
+
return x
|
506
|
+
|
507
|
+
|
508
|
+
def correct_pad_downsample(inputs, kernel_size):
|
509
|
+
"""Returns a tuple for zero-padding for 2D convolution with downsampling.
|
510
|
+
|
511
|
+
Args:
|
512
|
+
inputs: Input tensor.
|
513
|
+
kernel_size: An integer or tuple/list of 2 integers.
|
514
|
+
|
515
|
+
Returns:
|
516
|
+
A tuple.
|
517
|
+
"""
|
518
|
+
img_dim = 1
|
519
|
+
input_size = inputs.shape[img_dim : (img_dim + 2)]
|
520
|
+
if isinstance(kernel_size, int):
|
521
|
+
kernel_size = (kernel_size, kernel_size)
|
522
|
+
if input_size[0] is None:
|
523
|
+
adjust = (1, 1)
|
524
|
+
else:
|
525
|
+
adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2)
|
526
|
+
correct = (kernel_size[0] // 2, kernel_size[1] // 2)
|
527
|
+
return (
|
528
|
+
(correct[0] - adjust[0], correct[0]),
|
529
|
+
(correct[1] - adjust[1], correct[1]),
|
530
|
+
)
|