keras-hub-nightly 0.15.0.dev20240823171555__py3-none-any.whl → 0.16.0.dev2024092017__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. keras_hub/__init__.py +0 -6
  2. keras_hub/api/__init__.py +2 -0
  3. keras_hub/api/bounding_box/__init__.py +36 -0
  4. keras_hub/api/layers/__init__.py +14 -0
  5. keras_hub/api/models/__init__.py +97 -48
  6. keras_hub/api/tokenizers/__init__.py +30 -0
  7. keras_hub/api/utils/__init__.py +22 -0
  8. keras_hub/src/api_export.py +15 -9
  9. keras_hub/src/bounding_box/__init__.py +13 -0
  10. keras_hub/src/bounding_box/converters.py +529 -0
  11. keras_hub/src/bounding_box/formats.py +162 -0
  12. keras_hub/src/bounding_box/iou.py +263 -0
  13. keras_hub/src/bounding_box/to_dense.py +95 -0
  14. keras_hub/src/bounding_box/to_ragged.py +99 -0
  15. keras_hub/src/bounding_box/utils.py +194 -0
  16. keras_hub/src/bounding_box/validate_format.py +99 -0
  17. keras_hub/src/layers/preprocessing/audio_converter.py +121 -0
  18. keras_hub/src/layers/preprocessing/image_converter.py +130 -0
  19. keras_hub/src/layers/preprocessing/masked_lm_mask_generator.py +2 -0
  20. keras_hub/src/layers/preprocessing/multi_segment_packer.py +9 -8
  21. keras_hub/src/layers/preprocessing/preprocessing_layer.py +2 -29
  22. keras_hub/src/layers/preprocessing/random_deletion.py +33 -31
  23. keras_hub/src/layers/preprocessing/random_swap.py +33 -31
  24. keras_hub/src/layers/preprocessing/resizing_image_converter.py +101 -0
  25. keras_hub/src/layers/preprocessing/start_end_packer.py +3 -2
  26. keras_hub/src/models/albert/__init__.py +1 -2
  27. keras_hub/src/models/albert/albert_masked_lm_preprocessor.py +6 -86
  28. keras_hub/src/models/albert/{albert_classifier.py → albert_text_classifier.py} +34 -10
  29. keras_hub/src/models/albert/{albert_preprocessor.py → albert_text_classifier_preprocessor.py} +14 -70
  30. keras_hub/src/models/albert/albert_tokenizer.py +17 -36
  31. keras_hub/src/models/backbone.py +12 -34
  32. keras_hub/src/models/bart/__init__.py +1 -2
  33. keras_hub/src/models/bart/bart_seq_2_seq_lm_preprocessor.py +21 -148
  34. keras_hub/src/models/bart/bart_tokenizer.py +12 -39
  35. keras_hub/src/models/bert/__init__.py +1 -5
  36. keras_hub/src/models/bert/bert_masked_lm_preprocessor.py +6 -87
  37. keras_hub/src/models/bert/bert_presets.py +1 -4
  38. keras_hub/src/models/bert/{bert_classifier.py → bert_text_classifier.py} +19 -12
  39. keras_hub/src/models/bert/{bert_preprocessor.py → bert_text_classifier_preprocessor.py} +14 -70
  40. keras_hub/src/models/bert/bert_tokenizer.py +17 -35
  41. keras_hub/src/models/bloom/__init__.py +1 -2
  42. keras_hub/src/models/bloom/bloom_causal_lm_preprocessor.py +6 -91
  43. keras_hub/src/models/bloom/bloom_tokenizer.py +12 -41
  44. keras_hub/src/models/causal_lm.py +10 -29
  45. keras_hub/src/models/causal_lm_preprocessor.py +195 -0
  46. keras_hub/src/models/csp_darknet/csp_darknet_backbone.py +54 -15
  47. keras_hub/src/models/deberta_v3/__init__.py +1 -4
  48. keras_hub/src/models/deberta_v3/deberta_v3_masked_lm_preprocessor.py +14 -77
  49. keras_hub/src/models/deberta_v3/{deberta_v3_classifier.py → deberta_v3_text_classifier.py} +16 -11
  50. keras_hub/src/models/deberta_v3/{deberta_v3_preprocessor.py → deberta_v3_text_classifier_preprocessor.py} +23 -64
  51. keras_hub/src/models/deberta_v3/deberta_v3_tokenizer.py +30 -25
  52. keras_hub/src/models/densenet/densenet_backbone.py +46 -22
  53. keras_hub/src/models/distil_bert/__init__.py +1 -4
  54. keras_hub/src/models/distil_bert/distil_bert_masked_lm_preprocessor.py +14 -76
  55. keras_hub/src/models/distil_bert/{distil_bert_classifier.py → distil_bert_text_classifier.py} +17 -12
  56. keras_hub/src/models/distil_bert/{distil_bert_preprocessor.py → distil_bert_text_classifier_preprocessor.py} +23 -63
  57. keras_hub/src/models/distil_bert/distil_bert_tokenizer.py +19 -35
  58. keras_hub/src/models/efficientnet/__init__.py +13 -0
  59. keras_hub/src/models/efficientnet/efficientnet_backbone.py +569 -0
  60. keras_hub/src/models/efficientnet/fusedmbconv.py +229 -0
  61. keras_hub/src/models/efficientnet/mbconv.py +238 -0
  62. keras_hub/src/models/electra/__init__.py +1 -2
  63. keras_hub/src/models/electra/electra_tokenizer.py +17 -32
  64. keras_hub/src/models/f_net/__init__.py +1 -2
  65. keras_hub/src/models/f_net/f_net_masked_lm_preprocessor.py +12 -78
  66. keras_hub/src/models/f_net/{f_net_classifier.py → f_net_text_classifier.py} +17 -10
  67. keras_hub/src/models/f_net/{f_net_preprocessor.py → f_net_text_classifier_preprocessor.py} +19 -63
  68. keras_hub/src/models/f_net/f_net_tokenizer.py +17 -35
  69. keras_hub/src/models/falcon/__init__.py +1 -2
  70. keras_hub/src/models/falcon/falcon_causal_lm_preprocessor.py +6 -89
  71. keras_hub/src/models/falcon/falcon_tokenizer.py +12 -35
  72. keras_hub/src/models/gemma/__init__.py +1 -2
  73. keras_hub/src/models/gemma/gemma_causal_lm_preprocessor.py +6 -90
  74. keras_hub/src/models/gemma/gemma_decoder_block.py +1 -1
  75. keras_hub/src/models/gemma/gemma_tokenizer.py +12 -23
  76. keras_hub/src/models/gpt2/__init__.py +1 -2
  77. keras_hub/src/models/gpt2/gpt2_causal_lm_preprocessor.py +6 -89
  78. keras_hub/src/models/gpt2/gpt2_preprocessor.py +12 -90
  79. keras_hub/src/models/gpt2/gpt2_tokenizer.py +12 -34
  80. keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm_preprocessor.py +6 -91
  81. keras_hub/src/models/gpt_neo_x/gpt_neo_x_tokenizer.py +12 -34
  82. keras_hub/src/models/image_classifier.py +0 -5
  83. keras_hub/src/models/image_classifier_preprocessor.py +83 -0
  84. keras_hub/src/models/llama/__init__.py +1 -2
  85. keras_hub/src/models/llama/llama_causal_lm_preprocessor.py +6 -85
  86. keras_hub/src/models/llama/llama_tokenizer.py +12 -25
  87. keras_hub/src/models/llama3/__init__.py +1 -2
  88. keras_hub/src/models/llama3/llama3_causal_lm_preprocessor.py +6 -89
  89. keras_hub/src/models/llama3/llama3_tokenizer.py +12 -33
  90. keras_hub/src/models/masked_lm.py +0 -2
  91. keras_hub/src/models/masked_lm_preprocessor.py +156 -0
  92. keras_hub/src/models/mistral/__init__.py +1 -2
  93. keras_hub/src/models/mistral/mistral_causal_lm_preprocessor.py +6 -91
  94. keras_hub/src/models/mistral/mistral_tokenizer.py +12 -23
  95. keras_hub/src/models/mix_transformer/mix_transformer_backbone.py +2 -2
  96. keras_hub/src/models/mobilenet/__init__.py +13 -0
  97. keras_hub/src/models/mobilenet/mobilenet_backbone.py +530 -0
  98. keras_hub/src/models/mobilenet/mobilenet_image_classifier.py +114 -0
  99. keras_hub/src/models/opt/__init__.py +1 -2
  100. keras_hub/src/models/opt/opt_causal_lm_preprocessor.py +6 -93
  101. keras_hub/src/models/opt/opt_tokenizer.py +12 -41
  102. keras_hub/src/models/pali_gemma/__init__.py +1 -4
  103. keras_hub/src/models/pali_gemma/pali_gemma_causal_lm_preprocessor.py +28 -28
  104. keras_hub/src/models/pali_gemma/pali_gemma_image_converter.py +25 -0
  105. keras_hub/src/models/pali_gemma/pali_gemma_presets.py +5 -5
  106. keras_hub/src/models/pali_gemma/pali_gemma_tokenizer.py +11 -3
  107. keras_hub/src/models/phi3/__init__.py +1 -2
  108. keras_hub/src/models/phi3/phi3_causal_lm.py +3 -9
  109. keras_hub/src/models/phi3/phi3_causal_lm_preprocessor.py +6 -89
  110. keras_hub/src/models/phi3/phi3_tokenizer.py +12 -36
  111. keras_hub/src/models/preprocessor.py +72 -83
  112. keras_hub/src/models/resnet/__init__.py +6 -0
  113. keras_hub/src/models/resnet/resnet_backbone.py +390 -42
  114. keras_hub/src/models/resnet/resnet_image_classifier.py +33 -6
  115. keras_hub/src/models/resnet/resnet_image_classifier_preprocessor.py +28 -0
  116. keras_hub/src/models/{llama3/llama3_preprocessor.py → resnet/resnet_image_converter.py} +7 -5
  117. keras_hub/src/models/resnet/resnet_presets.py +95 -0
  118. keras_hub/src/models/retinanet/__init__.py +13 -0
  119. keras_hub/src/models/retinanet/anchor_generator.py +175 -0
  120. keras_hub/src/models/retinanet/box_matcher.py +259 -0
  121. keras_hub/src/models/retinanet/non_max_supression.py +578 -0
  122. keras_hub/src/models/roberta/__init__.py +1 -2
  123. keras_hub/src/models/roberta/roberta_masked_lm_preprocessor.py +22 -74
  124. keras_hub/src/models/roberta/{roberta_classifier.py → roberta_text_classifier.py} +16 -11
  125. keras_hub/src/models/roberta/{roberta_preprocessor.py → roberta_text_classifier_preprocessor.py} +21 -53
  126. keras_hub/src/models/roberta/roberta_tokenizer.py +13 -52
  127. keras_hub/src/models/seq_2_seq_lm_preprocessor.py +269 -0
  128. keras_hub/src/models/stable_diffusion_v3/__init__.py +13 -0
  129. keras_hub/src/models/stable_diffusion_v3/clip_encoder_block.py +103 -0
  130. keras_hub/src/models/stable_diffusion_v3/clip_preprocessor.py +93 -0
  131. keras_hub/src/models/stable_diffusion_v3/clip_text_encoder.py +149 -0
  132. keras_hub/src/models/stable_diffusion_v3/clip_tokenizer.py +167 -0
  133. keras_hub/src/models/stable_diffusion_v3/mmdit.py +427 -0
  134. keras_hub/src/models/stable_diffusion_v3/mmdit_block.py +317 -0
  135. keras_hub/src/models/stable_diffusion_v3/t5_xxl_preprocessor.py +74 -0
  136. keras_hub/src/models/stable_diffusion_v3/t5_xxl_text_encoder.py +155 -0
  137. keras_hub/src/models/stable_diffusion_v3/vae_attention.py +126 -0
  138. keras_hub/src/models/stable_diffusion_v3/vae_image_decoder.py +186 -0
  139. keras_hub/src/models/t5/__init__.py +1 -2
  140. keras_hub/src/models/t5/t5_tokenizer.py +13 -23
  141. keras_hub/src/models/task.py +71 -116
  142. keras_hub/src/models/{classifier.py → text_classifier.py} +19 -13
  143. keras_hub/src/models/text_classifier_preprocessor.py +138 -0
  144. keras_hub/src/models/whisper/__init__.py +1 -2
  145. keras_hub/src/models/whisper/{whisper_audio_feature_extractor.py → whisper_audio_converter.py} +20 -18
  146. keras_hub/src/models/whisper/whisper_backbone.py +0 -3
  147. keras_hub/src/models/whisper/whisper_presets.py +10 -10
  148. keras_hub/src/models/whisper/whisper_tokenizer.py +20 -16
  149. keras_hub/src/models/xlm_roberta/__init__.py +1 -4
  150. keras_hub/src/models/xlm_roberta/xlm_roberta_masked_lm_preprocessor.py +26 -72
  151. keras_hub/src/models/xlm_roberta/{xlm_roberta_classifier.py → xlm_roberta_text_classifier.py} +16 -11
  152. keras_hub/src/models/xlm_roberta/{xlm_roberta_preprocessor.py → xlm_roberta_text_classifier_preprocessor.py} +26 -53
  153. keras_hub/src/models/xlm_roberta/xlm_roberta_tokenizer.py +25 -10
  154. keras_hub/src/tests/test_case.py +46 -0
  155. keras_hub/src/tokenizers/byte_pair_tokenizer.py +30 -17
  156. keras_hub/src/tokenizers/byte_tokenizer.py +14 -15
  157. keras_hub/src/tokenizers/sentence_piece_tokenizer.py +20 -7
  158. keras_hub/src/tokenizers/tokenizer.py +67 -32
  159. keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py +14 -15
  160. keras_hub/src/tokenizers/word_piece_tokenizer.py +34 -47
  161. keras_hub/src/utils/imagenet/__init__.py +13 -0
  162. keras_hub/src/utils/imagenet/imagenet_utils.py +1067 -0
  163. keras_hub/src/utils/keras_utils.py +0 -50
  164. keras_hub/src/utils/preset_utils.py +230 -68
  165. keras_hub/src/utils/tensor_utils.py +187 -69
  166. keras_hub/src/utils/timm/convert_resnet.py +19 -16
  167. keras_hub/src/utils/timm/preset_loader.py +66 -0
  168. keras_hub/src/utils/transformers/convert_albert.py +193 -0
  169. keras_hub/src/utils/transformers/convert_bart.py +373 -0
  170. keras_hub/src/utils/transformers/convert_bert.py +7 -17
  171. keras_hub/src/utils/transformers/convert_distilbert.py +10 -20
  172. keras_hub/src/utils/transformers/convert_gemma.py +5 -19
  173. keras_hub/src/utils/transformers/convert_gpt2.py +5 -18
  174. keras_hub/src/utils/transformers/convert_llama3.py +7 -18
  175. keras_hub/src/utils/transformers/convert_mistral.py +129 -0
  176. keras_hub/src/utils/transformers/convert_pali_gemma.py +7 -29
  177. keras_hub/src/utils/transformers/preset_loader.py +77 -0
  178. keras_hub/src/utils/transformers/safetensor_utils.py +2 -2
  179. keras_hub/src/version_utils.py +1 -1
  180. keras_hub_nightly-0.16.0.dev2024092017.dist-info/METADATA +202 -0
  181. keras_hub_nightly-0.16.0.dev2024092017.dist-info/RECORD +334 -0
  182. {keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.16.0.dev2024092017.dist-info}/WHEEL +1 -1
  183. keras_hub/src/models/bart/bart_preprocessor.py +0 -276
  184. keras_hub/src/models/bloom/bloom_preprocessor.py +0 -185
  185. keras_hub/src/models/electra/electra_preprocessor.py +0 -154
  186. keras_hub/src/models/falcon/falcon_preprocessor.py +0 -187
  187. keras_hub/src/models/gemma/gemma_preprocessor.py +0 -191
  188. keras_hub/src/models/gpt_neo_x/gpt_neo_x_preprocessor.py +0 -145
  189. keras_hub/src/models/llama/llama_preprocessor.py +0 -189
  190. keras_hub/src/models/mistral/mistral_preprocessor.py +0 -190
  191. keras_hub/src/models/opt/opt_preprocessor.py +0 -188
  192. keras_hub/src/models/phi3/phi3_preprocessor.py +0 -190
  193. keras_hub/src/models/whisper/whisper_preprocessor.py +0 -326
  194. keras_hub/src/utils/timm/convert.py +0 -37
  195. keras_hub/src/utils/transformers/convert.py +0 -101
  196. keras_hub_nightly-0.15.0.dev20240823171555.dist-info/METADATA +0 -34
  197. keras_hub_nightly-0.15.0.dev20240823171555.dist-info/RECORD +0 -297
  198. {keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.16.0.dev2024092017.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,229 @@
1
+ # Copyright 2024 The KerasHub Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import keras
16
+
17
+ BN_AXIS = 3
18
+
19
+ CONV_KERNEL_INITIALIZER = {
20
+ "class_name": "VarianceScaling",
21
+ "config": {
22
+ "scale": 2.0,
23
+ "mode": "fan_out",
24
+ "distribution": "truncated_normal",
25
+ },
26
+ }
27
+
28
+
29
+ class FusedMBConvBlock(keras.layers.Layer):
30
+ """Implementation of the FusedMBConv block
31
+
32
+ Also known as a Fused Mobile Inverted Residual Bottleneck block from:
33
+ [EfficientNet-EdgeTPU: Creating Accelerator-Optimized Neural Networks with AutoML]
34
+ (https://ai.googleblog.com/2019/08/efficientnet-edgetpu-creating.html)
35
+ [EfficientNetV2: Smaller Models and Faster Training]
36
+ (https://arxiv.org/abs/2104.00298v3).
37
+
38
+ FusedMBConv blocks are based on MBConv blocks, and replace the depthwise and
39
+ 1x1 output convolution blocks with a single 3x3 convolution block, fusing
40
+ them together - hence the name "FusedMBConv". Alongside MBConv blocks, they
41
+ can be used in mobile-oriented and efficient architectures, and are present
42
+ in architectures EfficientNet.
43
+
44
+ FusedMBConv blocks follow a narrow-wide-narrow structure - expanding a 1x1
45
+ convolution, performing Squeeze-Excitation and then applying a 3x3
46
+ convolution, which is a more efficient operation than conventional
47
+ wide-narrow-wide structures.
48
+
49
+ As they're frequently used for models to be deployed to edge devices,
50
+ they're implemented as a layer for ease of use and re-use.
51
+
52
+ Args:
53
+ input_filters: int, the number of input filters
54
+ output_filters: int, the number of output filters
55
+ expand_ratio: default 1, the ratio by which input_filters are multiplied
56
+ to expand the structure in the middle expansion phase
57
+ kernel_size: default 3, the kernel_size to apply to the expansion phase
58
+ convolutions
59
+ strides: default 1, the strides to apply to the expansion phase
60
+ convolutions
61
+ se_ratio: default 0.0, The filters used in the Squeeze-Excitation phase,
62
+ and are chosen as the maximum between 1 and input_filters*se_ratio
63
+ batch_norm_momentum: default 0.9, the BatchNormalization momentum
64
+ activation: default "swish", the activation function used between
65
+ convolution operations
66
+ dropout: float, the optional dropout rate to apply before the output
67
+ convolution, defaults to 0.2
68
+
69
+ Returns:
70
+ A tensor representing a feature map, passed through the FusedMBConv
71
+ block
72
+
73
+ Note:
74
+ Not intended to be used outside of the EfficientNet architecture.
75
+ """
76
+
77
+ def __init__(
78
+ self,
79
+ input_filters,
80
+ output_filters,
81
+ expand_ratio=1,
82
+ kernel_size=3,
83
+ strides=1,
84
+ se_ratio=0.0,
85
+ batch_norm_momentum=0.9,
86
+ activation="swish",
87
+ dropout=0.2,
88
+ **kwargs
89
+ ):
90
+ super().__init__(**kwargs)
91
+ self.input_filters = input_filters
92
+ self.output_filters = output_filters
93
+ self.expand_ratio = expand_ratio
94
+ self.kernel_size = kernel_size
95
+ self.strides = strides
96
+ self.se_ratio = se_ratio
97
+ self.batch_norm_momentum = batch_norm_momentum
98
+ self.activation = activation
99
+ self.dropout = dropout
100
+ self.filters = self.input_filters * self.expand_ratio
101
+ self.filters_se = max(1, int(input_filters * se_ratio))
102
+
103
+ self.conv1 = keras.layers.Conv2D(
104
+ filters=self.filters,
105
+ kernel_size=kernel_size,
106
+ strides=strides,
107
+ kernel_initializer=CONV_KERNEL_INITIALIZER,
108
+ padding="same",
109
+ data_format="channels_last",
110
+ use_bias=False,
111
+ name=self.name + "expand_conv",
112
+ )
113
+ self.bn1 = keras.layers.BatchNormalization(
114
+ axis=BN_AXIS,
115
+ momentum=self.batch_norm_momentum,
116
+ name=self.name + "expand_bn",
117
+ )
118
+ self.act = keras.layers.Activation(
119
+ self.activation, name=self.name + "expand_activation"
120
+ )
121
+
122
+ self.bn2 = keras.layers.BatchNormalization(
123
+ axis=BN_AXIS,
124
+ momentum=self.batch_norm_momentum,
125
+ name=self.name + "bn",
126
+ )
127
+
128
+ self.se_conv1 = keras.layers.Conv2D(
129
+ self.filters_se,
130
+ 1,
131
+ padding="same",
132
+ activation=self.activation,
133
+ kernel_initializer=CONV_KERNEL_INITIALIZER,
134
+ name=self.name + "se_reduce",
135
+ )
136
+
137
+ self.se_conv2 = keras.layers.Conv2D(
138
+ self.filters,
139
+ 1,
140
+ padding="same",
141
+ activation="sigmoid",
142
+ kernel_initializer=CONV_KERNEL_INITIALIZER,
143
+ name=self.name + "se_expand",
144
+ )
145
+
146
+ self.output_conv = keras.layers.Conv2D(
147
+ filters=self.output_filters,
148
+ kernel_size=1 if expand_ratio != 1 else kernel_size,
149
+ strides=1,
150
+ kernel_initializer=CONV_KERNEL_INITIALIZER,
151
+ padding="same",
152
+ data_format="channels_last",
153
+ use_bias=False,
154
+ name=self.name + "project_conv",
155
+ )
156
+
157
+ self.bn3 = keras.layers.BatchNormalization(
158
+ axis=BN_AXIS,
159
+ momentum=self.batch_norm_momentum,
160
+ name=self.name + "project_bn",
161
+ )
162
+
163
+ if self.dropout:
164
+ self.dropout_layer = keras.layers.Dropout(
165
+ self.dropout,
166
+ noise_shape=(None, 1, 1, 1),
167
+ name=self.name + "drop",
168
+ )
169
+
170
+ def build(self, input_shape):
171
+ if self.name is None:
172
+ self.name = keras.backend.get_uid("block0")
173
+
174
+ def call(self, inputs):
175
+ # Expansion phase
176
+ if self.expand_ratio != 1:
177
+ x = self.conv1(inputs)
178
+ x = self.bn1(x)
179
+ x = self.act(x)
180
+ else:
181
+ x = inputs
182
+
183
+ # Squeeze and excite
184
+ if 0 < self.se_ratio <= 1:
185
+ se = keras.layers.GlobalAveragePooling2D(
186
+ name=self.name + "se_squeeze"
187
+ )(x)
188
+ if BN_AXIS == 1:
189
+ se_shape = (self.filters, 1, 1)
190
+ else:
191
+ se_shape = (1, 1, self.filters)
192
+
193
+ se = keras.layers.Reshape(se_shape, name=self.name + "se_reshape")(
194
+ se
195
+ )
196
+
197
+ se = self.se_conv1(se)
198
+ se = self.se_conv2(se)
199
+
200
+ x = keras.layers.multiply([x, se], name=self.name + "se_excite")
201
+
202
+ # Output phase:
203
+ x = self.output_conv(x)
204
+ x = self.bn3(x)
205
+ if self.expand_ratio == 1:
206
+ x = self.act(x)
207
+
208
+ # Residual:
209
+ if self.strides == 1 and self.input_filters == self.output_filters:
210
+ if self.dropout:
211
+ x = self.dropout_layer(x)
212
+ x = keras.layers.Add(name=self.name + "add")([x, inputs])
213
+ return x
214
+
215
+ def get_config(self):
216
+ config = {
217
+ "input_filters": self.input_filters,
218
+ "output_filters": self.output_filters,
219
+ "expand_ratio": self.expand_ratio,
220
+ "kernel_size": self.kernel_size,
221
+ "strides": self.strides,
222
+ "se_ratio": self.se_ratio,
223
+ "batch_norm_momentum": self.batch_norm_momentum,
224
+ "activation": self.activation,
225
+ "dropout": self.dropout,
226
+ }
227
+
228
+ base_config = super().get_config()
229
+ return dict(list(base_config.items()) + list(config.items()))
@@ -0,0 +1,238 @@
1
+ # Copyright 2024 The KerasHub Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import keras
16
+
17
+ BN_AXIS = 3
18
+
19
+ CONV_KERNEL_INITIALIZER = {
20
+ "class_name": "VarianceScaling",
21
+ "config": {
22
+ "scale": 2.0,
23
+ "mode": "fan_out",
24
+ "distribution": "truncated_normal",
25
+ },
26
+ }
27
+
28
+
29
+ class MBConvBlock(keras.layers.Layer):
30
+ def __init__(
31
+ self,
32
+ input_filters,
33
+ output_filters,
34
+ expand_ratio=1,
35
+ kernel_size=3,
36
+ strides=1,
37
+ se_ratio=0.0,
38
+ batch_norm_momentum=0.9,
39
+ activation="swish",
40
+ dropout=0.2,
41
+ **kwargs
42
+ ):
43
+ """Implementation of the MBConv block
44
+
45
+ Also known as a Mobile Inverted Residual Bottleneck block from:
46
+ [MobileNetV2: Inverted Residuals and Linear Bottlenecks]
47
+ (https://arxiv.org/abs/1801.04381v4).
48
+
49
+ MBConv blocks are common blocks used in mobile-oriented and efficient
50
+ architectures, present in architectures such as MobileNet, EfficientNet,
51
+ MaxViT, etc.
52
+
53
+ MBConv blocks follow a narrow-wide-narrow structure - expanding a 1x1
54
+ convolution, applying depthwise convolution, and narrowing back to a 1x1
55
+ convolution, which is a more efficient operation than conventional
56
+ wide-narrow-wide structures.
57
+
58
+ As they're frequently used for models to be deployed to edge devices,
59
+ they're implemented as a layer for ease of use and re-use.
60
+
61
+ Args:
62
+ input_filters: int, the number of input filters
63
+ output_filters: int, the optional number of output filters after
64
+ Squeeze-Excitation
65
+ expand_ratio: default 1, the ratio by which input_filters are
66
+ multiplied to expand the structure in the middle expansion phase
67
+ kernel_size: default 3, the kernel_size to apply to the expansion
68
+ phase convolutions
69
+ strides: default 1, the strides to apply to the expansion phase
70
+ convolutions
71
+ se_ratio: default 0.0, Squeeze-Excitation happens before depthwise
72
+ convolution and before output convolution only if the se_ratio
73
+ is above 0. The filters used in this phase are chosen as the
74
+ maximum between 1 and input_filters*se_ratio
75
+ batch_norm_momentum: default 0.9, the BatchNormalization momentum
76
+ activation: default "swish", the activation function used between
77
+ convolution operations
78
+ dropout: float, the optional dropout rate to apply before the output
79
+ convolution, defaults to 0.2
80
+
81
+ Returns:
82
+ A tensor representing a feature map, passed through the MBConv
83
+ block
84
+
85
+
86
+ Note:
87
+ Not intended to be used outside of the EfficientNet architecture.
88
+ """
89
+
90
+ super().__init__(**kwargs)
91
+ self.input_filters = input_filters
92
+ self.output_filters = output_filters
93
+ self.expand_ratio = expand_ratio
94
+ self.kernel_size = kernel_size
95
+ self.strides = strides
96
+ self.se_ratio = se_ratio
97
+ self.batch_norm_momentum = batch_norm_momentum
98
+ self.activation = activation
99
+ self.dropout = dropout
100
+ self.filters = self.input_filters * self.expand_ratio
101
+ self.filters_se = max(1, int(input_filters * se_ratio))
102
+
103
+ self.conv1 = keras.layers.Conv2D(
104
+ filters=self.filters,
105
+ kernel_size=1,
106
+ strides=1,
107
+ kernel_initializer=CONV_KERNEL_INITIALIZER,
108
+ padding="same",
109
+ data_format="channels_last",
110
+ use_bias=False,
111
+ name=self.name + "expand_conv",
112
+ )
113
+ self.bn1 = keras.layers.BatchNormalization(
114
+ axis=BN_AXIS,
115
+ momentum=self.batch_norm_momentum,
116
+ name=self.name + "expand_bn",
117
+ )
118
+ self.act = keras.layers.Activation(
119
+ self.activation, name=self.name + "activation"
120
+ )
121
+ self.depthwise = keras.layers.DepthwiseConv2D(
122
+ kernel_size=self.kernel_size,
123
+ strides=self.strides,
124
+ depthwise_initializer=CONV_KERNEL_INITIALIZER,
125
+ padding="same",
126
+ data_format="channels_last",
127
+ use_bias=False,
128
+ name=self.name + "dwconv2",
129
+ )
130
+
131
+ self.bn2 = keras.layers.BatchNormalization(
132
+ axis=BN_AXIS,
133
+ momentum=self.batch_norm_momentum,
134
+ name=self.name + "bn",
135
+ )
136
+
137
+ self.se_conv1 = keras.layers.Conv2D(
138
+ self.filters_se,
139
+ 1,
140
+ padding="same",
141
+ activation=self.activation,
142
+ kernel_initializer=CONV_KERNEL_INITIALIZER,
143
+ name=self.name + "se_reduce",
144
+ )
145
+
146
+ self.se_conv2 = keras.layers.Conv2D(
147
+ self.filters,
148
+ 1,
149
+ padding="same",
150
+ activation="sigmoid",
151
+ kernel_initializer=CONV_KERNEL_INITIALIZER,
152
+ name=self.name + "se_expand",
153
+ )
154
+
155
+ self.output_conv = keras.layers.Conv2D(
156
+ filters=self.output_filters,
157
+ kernel_size=1 if expand_ratio != 1 else kernel_size,
158
+ strides=1,
159
+ kernel_initializer=CONV_KERNEL_INITIALIZER,
160
+ padding="same",
161
+ data_format="channels_last",
162
+ use_bias=False,
163
+ name=self.name + "project_conv",
164
+ )
165
+
166
+ self.bn3 = keras.layers.BatchNormalization(
167
+ axis=BN_AXIS,
168
+ momentum=self.batch_norm_momentum,
169
+ name=self.name + "project_bn",
170
+ )
171
+
172
+ if self.dropout:
173
+ self.dropout_layer = keras.layers.Dropout(
174
+ self.dropout,
175
+ noise_shape=(None, 1, 1, 1),
176
+ name=self.name + "drop",
177
+ )
178
+
179
+ def build(self, input_shape):
180
+ if self.name is None:
181
+ self.name = keras.backend.get_uid("block0")
182
+
183
+ def call(self, inputs):
184
+ # Expansion phase
185
+ if self.expand_ratio != 1:
186
+ x = self.conv1(inputs)
187
+ x = self.bn1(x)
188
+ x = self.act(x)
189
+ else:
190
+ x = inputs
191
+
192
+ # Depthwise conv
193
+ x = self.depthwise(x)
194
+ x = self.bn2(x)
195
+ x = self.act(x)
196
+
197
+ # Squeeze and excite
198
+ if 0 < self.se_ratio <= 1:
199
+ se = keras.layers.GlobalAveragePooling2D(
200
+ name=self.name + "se_squeeze"
201
+ )(x)
202
+ if BN_AXIS == 1:
203
+ se_shape = (self.filters, 1, 1)
204
+ else:
205
+ se_shape = (1, 1, self.filters)
206
+ se = keras.layers.Reshape(se_shape, name=self.name + "se_reshape")(
207
+ se
208
+ )
209
+
210
+ se = self.se_conv1(se)
211
+ se = self.se_conv2(se)
212
+
213
+ x = keras.layers.multiply([x, se], name=self.name + "se_excite")
214
+
215
+ # Output phase
216
+ x = self.output_conv(x)
217
+ x = self.bn3(x)
218
+
219
+ if self.strides == 1 and self.input_filters == self.output_filters:
220
+ if self.dropout:
221
+ x = self.dropout_layer(x)
222
+ x = keras.layers.Add(name=self.name + "add")([x, inputs])
223
+ return x
224
+
225
+ def get_config(self):
226
+ config = {
227
+ "input_filters": self.input_filters,
228
+ "output_filters": self.output_filters,
229
+ "expand_ratio": self.expand_ratio,
230
+ "kernel_size": self.kernel_size,
231
+ "strides": self.strides,
232
+ "se_ratio": self.se_ratio,
233
+ "batch_norm_momentum": self.batch_norm_momentum,
234
+ "activation": self.activation,
235
+ "dropout": self.dropout,
236
+ }
237
+ base_config = super().get_config()
238
+ return dict(list(base_config.items()) + list(config.items()))
@@ -14,7 +14,6 @@
14
14
 
15
15
  from keras_hub.src.models.electra.electra_backbone import ElectraBackbone
16
16
  from keras_hub.src.models.electra.electra_presets import backbone_presets
17
- from keras_hub.src.models.electra.electra_tokenizer import ElectraTokenizer
18
17
  from keras_hub.src.utils.preset_utils import register_presets
19
18
 
20
- register_presets(backbone_presets, (ElectraBackbone, ElectraTokenizer))
19
+ register_presets(backbone_presets, ElectraBackbone)
@@ -13,10 +13,16 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from keras_hub.src.api_export import keras_hub_export
16
+ from keras_hub.src.models.electra.electra_backbone import ElectraBackbone
16
17
  from keras_hub.src.tokenizers.word_piece_tokenizer import WordPieceTokenizer
17
18
 
18
19
 
19
- @keras_hub_export("keras_hub.models.ElectraTokenizer")
20
+ @keras_hub_export(
21
+ [
22
+ "keras_hub.tokenizers.ElectraTokenizer",
23
+ "keras_hub.models.ElectraTokenizer",
24
+ ]
25
+ )
20
26
  class ElectraTokenizer(WordPieceTokenizer):
21
27
  """A ELECTRA tokenizer using WordPiece subword segmentation.
22
28
 
@@ -60,45 +66,24 @@ class ElectraTokenizer(WordPieceTokenizer):
60
66
  ```
61
67
  """
62
68
 
69
+ backbone_cls = ElectraBackbone
70
+
63
71
  def __init__(
64
72
  self,
65
73
  vocabulary,
66
74
  lowercase=False,
67
- special_tokens_in_strings=False,
68
75
  **kwargs,
69
76
  ):
70
- self.cls_token = "[CLS]"
71
- self.sep_token = "[SEP]"
72
- self.pad_token = "[PAD]"
73
- self.mask_token = "[MASK]"
77
+ self._add_special_token("[CLS]", "cls_token")
78
+ self._add_special_token("[SEP]", "sep_token")
79
+ self._add_special_token("[PAD]", "pad_token")
80
+ self._add_special_token("[MASK]", "mask_token")
81
+ # Also add `tokenizer.start_token` and `tokenizer.end_token` for
82
+ # compatibility with other tokenizers.
83
+ self._add_special_token("[CLS]", "start_token")
84
+ self._add_special_token("[SEP]", "end_token")
74
85
  super().__init__(
75
86
  vocabulary=vocabulary,
76
87
  lowercase=lowercase,
77
- special_tokens=[
78
- self.cls_token,
79
- self.sep_token,
80
- self.pad_token,
81
- self.mask_token,
82
- ],
83
- special_tokens_in_strings=special_tokens_in_strings,
84
88
  **kwargs,
85
89
  )
86
-
87
- def set_vocabulary(self, vocabulary):
88
- super().set_vocabulary(vocabulary)
89
-
90
- if vocabulary is not None:
91
- self.cls_token_id = self.token_to_id(self.cls_token)
92
- self.sep_token_id = self.token_to_id(self.sep_token)
93
- self.pad_token_id = self.token_to_id(self.pad_token)
94
- self.mask_token_id = self.token_to_id(self.mask_token)
95
- else:
96
- self.cls_token_id = None
97
- self.sep_token_id = None
98
- self.pad_token_id = None
99
- self.mask_token_id = None
100
-
101
- def get_config(self):
102
- config = super().get_config()
103
- del config["special_tokens"] # Not configurable; set in __init__.
104
- return config
@@ -14,7 +14,6 @@
14
14
 
15
15
  from keras_hub.src.models.f_net.f_net_backbone import FNetBackbone
16
16
  from keras_hub.src.models.f_net.f_net_presets import backbone_presets
17
- from keras_hub.src.models.f_net.f_net_tokenizer import FNetTokenizer
18
17
  from keras_hub.src.utils.preset_utils import register_presets
19
18
 
20
- register_presets(backbone_presets, (FNetBackbone, FNetTokenizer))
19
+ register_presets(backbone_presets, FNetBackbone)
@@ -13,17 +13,16 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import keras
16
- from absl import logging
17
16
 
18
17
  from keras_hub.src.api_export import keras_hub_export
19
- from keras_hub.src.layers.preprocessing.masked_lm_mask_generator import (
20
- MaskedLMMaskGenerator,
21
- )
22
- from keras_hub.src.models.f_net.f_net_preprocessor import FNetPreprocessor
18
+ from keras_hub.src.models.f_net.f_net_backbone import FNetBackbone
19
+ from keras_hub.src.models.f_net.f_net_tokenizer import FNetTokenizer
20
+ from keras_hub.src.models.masked_lm_preprocessor import MaskedLMPreprocessor
21
+ from keras_hub.src.utils.tensor_utils import preprocessing_function
23
22
 
24
23
 
25
24
  @keras_hub_export("keras_hub.models.FNetMaskedLMPreprocessor")
26
- class FNetMaskedLMPreprocessor(FNetPreprocessor):
25
+ class FNetMaskedLMPreprocessor(MaskedLMPreprocessor):
27
26
  """FNet preprocessing for the masked language modeling task.
28
27
 
29
28
  This preprocessing layer will prepare inputs for a masked language modeling
@@ -119,78 +118,13 @@ class FNetMaskedLMPreprocessor(FNetPreprocessor):
119
118
  ```
120
119
  """
121
120
 
122
- def __init__(
123
- self,
124
- tokenizer,
125
- sequence_length=512,
126
- truncate="round_robin",
127
- mask_selection_rate=0.15,
128
- mask_selection_length=96,
129
- mask_token_rate=0.8,
130
- random_token_rate=0.1,
131
- **kwargs,
132
- ):
133
- super().__init__(
134
- tokenizer,
135
- sequence_length=sequence_length,
136
- truncate=truncate,
137
- **kwargs,
138
- )
139
- self.mask_selection_rate = mask_selection_rate
140
- self.mask_selection_length = mask_selection_length
141
- self.mask_token_rate = mask_token_rate
142
- self.random_token_rate = random_token_rate
143
- self.masker = None
144
-
145
- def build(self, input_shape):
146
- super().build(input_shape)
147
- # Defer masker creation to `build()` so that we can be sure tokenizer
148
- # assets have loaded when restoring a saved model.
149
- self.masker = MaskedLMMaskGenerator(
150
- mask_selection_rate=self.mask_selection_rate,
151
- mask_selection_length=self.mask_selection_length,
152
- mask_token_rate=self.mask_token_rate,
153
- random_token_rate=self.random_token_rate,
154
- vocabulary_size=self.tokenizer.vocabulary_size(),
155
- mask_token_id=self.tokenizer.mask_token_id,
156
- unselectable_token_ids=[
157
- self.tokenizer.cls_token_id,
158
- self.tokenizer.sep_token_id,
159
- self.tokenizer.pad_token_id,
160
- ],
161
- )
162
-
163
- def get_config(self):
164
- config = super().get_config()
165
- config.update(
166
- {
167
- "mask_selection_rate": self.mask_selection_rate,
168
- "mask_selection_length": self.mask_selection_length,
169
- "mask_token_rate": self.mask_token_rate,
170
- "random_token_rate": self.random_token_rate,
171
- }
172
- )
173
- return config
121
+ backbone_cls = FNetBackbone
122
+ tokenizer_cls = FNetTokenizer
174
123
 
124
+ @preprocessing_function
175
125
  def call(self, x, y=None, sample_weight=None):
176
- if y is not None or sample_weight is not None:
177
- logging.warning(
178
- f"{self.__class__.__name__} generates `y` and `sample_weight` "
179
- "based on your input data, but your data already contains `y` "
180
- "or `sample_weight`. Your `y` and `sample_weight` will be "
181
- "ignored."
182
- )
183
- x = super().call(x)
184
- token_ids, segment_ids = (
185
- x["token_ids"],
186
- x["segment_ids"],
187
- )
188
- masker_outputs = self.masker(token_ids)
189
- x = {
190
- "token_ids": masker_outputs["token_ids"],
191
- "segment_ids": segment_ids,
192
- "mask_positions": masker_outputs["mask_positions"],
193
- }
194
- y = masker_outputs["mask_ids"]
195
- sample_weight = masker_outputs["mask_weights"]
126
+ output = super().call(x, y=y, sample_weight=sample_weight)
127
+ x, y, sample_weight = keras.utils.unpack_x_y_sample_weight(output)
128
+ # FNet has not padding mask.
129
+ del x["padding_mask"]
196
130
  return keras.utils.pack_x_y_sample_weight(x, y, sample_weight)