keras-hub-nightly 0.15.0.dev20240823171555__py3-none-any.whl → 0.15.0.dev20240911134614__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. keras_hub/api/__init__.py +1 -0
  2. keras_hub/api/bounding_box/__init__.py +36 -0
  3. keras_hub/api/layers/__init__.py +14 -0
  4. keras_hub/api/models/__init__.py +75 -31
  5. keras_hub/api/tokenizers/__init__.py +30 -0
  6. keras_hub/src/bounding_box/__init__.py +13 -0
  7. keras_hub/src/bounding_box/converters.py +529 -0
  8. keras_hub/src/bounding_box/formats.py +162 -0
  9. keras_hub/src/bounding_box/iou.py +263 -0
  10. keras_hub/src/bounding_box/to_dense.py +95 -0
  11. keras_hub/src/bounding_box/to_ragged.py +99 -0
  12. keras_hub/src/bounding_box/utils.py +194 -0
  13. keras_hub/src/bounding_box/validate_format.py +99 -0
  14. keras_hub/src/layers/preprocessing/audio_converter.py +121 -0
  15. keras_hub/src/layers/preprocessing/image_converter.py +130 -0
  16. keras_hub/src/layers/preprocessing/masked_lm_mask_generator.py +2 -0
  17. keras_hub/src/layers/preprocessing/multi_segment_packer.py +9 -8
  18. keras_hub/src/layers/preprocessing/preprocessing_layer.py +2 -29
  19. keras_hub/src/layers/preprocessing/random_deletion.py +33 -31
  20. keras_hub/src/layers/preprocessing/random_swap.py +33 -31
  21. keras_hub/src/layers/preprocessing/resizing_image_converter.py +101 -0
  22. keras_hub/src/layers/preprocessing/start_end_packer.py +3 -2
  23. keras_hub/src/models/albert/__init__.py +1 -2
  24. keras_hub/src/models/albert/albert_masked_lm_preprocessor.py +6 -86
  25. keras_hub/src/models/albert/{albert_classifier.py → albert_text_classifier.py} +29 -10
  26. keras_hub/src/models/albert/{albert_preprocessor.py → albert_text_classifier_preprocessor.py} +14 -70
  27. keras_hub/src/models/albert/albert_tokenizer.py +17 -36
  28. keras_hub/src/models/backbone.py +12 -34
  29. keras_hub/src/models/bart/__init__.py +1 -2
  30. keras_hub/src/models/bart/bart_preprocessor.py +6 -18
  31. keras_hub/src/models/bart/bart_seq_2_seq_lm_preprocessor.py +21 -148
  32. keras_hub/src/models/bart/bart_tokenizer.py +12 -39
  33. keras_hub/src/models/bert/__init__.py +1 -5
  34. keras_hub/src/models/bert/bert_masked_lm_preprocessor.py +6 -87
  35. keras_hub/src/models/bert/bert_presets.py +1 -4
  36. keras_hub/src/models/bert/{bert_classifier.py → bert_text_classifier.py} +12 -10
  37. keras_hub/src/models/bert/{bert_preprocessor.py → bert_text_classifier_preprocessor.py} +14 -70
  38. keras_hub/src/models/bert/bert_tokenizer.py +17 -35
  39. keras_hub/src/models/bloom/__init__.py +1 -2
  40. keras_hub/src/models/bloom/bloom_causal_lm_preprocessor.py +6 -91
  41. keras_hub/src/models/bloom/bloom_preprocessor.py +5 -12
  42. keras_hub/src/models/bloom/bloom_tokenizer.py +12 -41
  43. keras_hub/src/models/causal_lm.py +10 -29
  44. keras_hub/src/models/causal_lm_preprocessor.py +195 -0
  45. keras_hub/src/models/csp_darknet/csp_darknet_backbone.py +54 -15
  46. keras_hub/src/models/deberta_v3/__init__.py +1 -4
  47. keras_hub/src/models/deberta_v3/deberta_v3_masked_lm_preprocessor.py +14 -77
  48. keras_hub/src/models/deberta_v3/{deberta_v3_classifier.py → deberta_v3_text_classifier.py} +11 -11
  49. keras_hub/src/models/deberta_v3/{deberta_v3_preprocessor.py → deberta_v3_text_classifier_preprocessor.py} +23 -64
  50. keras_hub/src/models/deberta_v3/deberta_v3_tokenizer.py +30 -25
  51. keras_hub/src/models/densenet/densenet_backbone.py +46 -22
  52. keras_hub/src/models/distil_bert/__init__.py +1 -4
  53. keras_hub/src/models/distil_bert/distil_bert_masked_lm_preprocessor.py +14 -76
  54. keras_hub/src/models/distil_bert/{distil_bert_classifier.py → distil_bert_text_classifier.py} +12 -12
  55. keras_hub/src/models/distil_bert/{distil_bert_preprocessor.py → distil_bert_text_classifier_preprocessor.py} +23 -63
  56. keras_hub/src/models/distil_bert/distil_bert_tokenizer.py +19 -35
  57. keras_hub/src/models/efficientnet/__init__.py +13 -0
  58. keras_hub/src/models/efficientnet/efficientnet_backbone.py +569 -0
  59. keras_hub/src/models/efficientnet/fusedmbconv.py +229 -0
  60. keras_hub/src/models/efficientnet/mbconv.py +238 -0
  61. keras_hub/src/models/electra/__init__.py +1 -2
  62. keras_hub/src/models/electra/electra_preprocessor.py +6 -5
  63. keras_hub/src/models/electra/electra_tokenizer.py +17 -32
  64. keras_hub/src/models/f_net/__init__.py +1 -2
  65. keras_hub/src/models/f_net/f_net_masked_lm_preprocessor.py +12 -78
  66. keras_hub/src/models/f_net/{f_net_classifier.py → f_net_text_classifier.py} +10 -8
  67. keras_hub/src/models/f_net/{f_net_preprocessor.py → f_net_text_classifier_preprocessor.py} +19 -63
  68. keras_hub/src/models/f_net/f_net_tokenizer.py +17 -35
  69. keras_hub/src/models/falcon/__init__.py +1 -2
  70. keras_hub/src/models/falcon/falcon_causal_lm_preprocessor.py +6 -89
  71. keras_hub/src/models/falcon/falcon_preprocessor.py +5 -12
  72. keras_hub/src/models/falcon/falcon_tokenizer.py +12 -35
  73. keras_hub/src/models/gemma/__init__.py +1 -2
  74. keras_hub/src/models/gemma/gemma_causal_lm_preprocessor.py +6 -90
  75. keras_hub/src/models/gemma/gemma_preprocessor.py +5 -12
  76. keras_hub/src/models/gemma/gemma_tokenizer.py +12 -23
  77. keras_hub/src/models/gpt2/__init__.py +1 -2
  78. keras_hub/src/models/gpt2/gpt2_causal_lm_preprocessor.py +6 -89
  79. keras_hub/src/models/gpt2/gpt2_preprocessor.py +5 -12
  80. keras_hub/src/models/gpt2/gpt2_tokenizer.py +12 -34
  81. keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm_preprocessor.py +6 -91
  82. keras_hub/src/models/gpt_neo_x/gpt_neo_x_preprocessor.py +5 -12
  83. keras_hub/src/models/gpt_neo_x/gpt_neo_x_tokenizer.py +12 -34
  84. keras_hub/src/models/image_classifier.py +0 -5
  85. keras_hub/src/models/image_classifier_preprocessor.py +83 -0
  86. keras_hub/src/models/llama/__init__.py +1 -2
  87. keras_hub/src/models/llama/llama_causal_lm_preprocessor.py +6 -85
  88. keras_hub/src/models/llama/llama_preprocessor.py +5 -12
  89. keras_hub/src/models/llama/llama_tokenizer.py +12 -25
  90. keras_hub/src/models/llama3/__init__.py +1 -2
  91. keras_hub/src/models/llama3/llama3_causal_lm_preprocessor.py +6 -89
  92. keras_hub/src/models/llama3/llama3_preprocessor.py +2 -0
  93. keras_hub/src/models/llama3/llama3_tokenizer.py +12 -33
  94. keras_hub/src/models/masked_lm.py +0 -2
  95. keras_hub/src/models/masked_lm_preprocessor.py +156 -0
  96. keras_hub/src/models/mistral/__init__.py +1 -2
  97. keras_hub/src/models/mistral/mistral_causal_lm_preprocessor.py +6 -91
  98. keras_hub/src/models/mistral/mistral_preprocessor.py +5 -12
  99. keras_hub/src/models/mistral/mistral_tokenizer.py +12 -23
  100. keras_hub/src/models/mix_transformer/mix_transformer_backbone.py +2 -2
  101. keras_hub/src/models/mobilenet/__init__.py +13 -0
  102. keras_hub/src/models/mobilenet/mobilenet_backbone.py +530 -0
  103. keras_hub/src/models/mobilenet/mobilenet_image_classifier.py +114 -0
  104. keras_hub/src/models/opt/__init__.py +1 -2
  105. keras_hub/src/models/opt/opt_causal_lm_preprocessor.py +6 -93
  106. keras_hub/src/models/opt/opt_preprocessor.py +5 -12
  107. keras_hub/src/models/opt/opt_tokenizer.py +12 -41
  108. keras_hub/src/models/pali_gemma/__init__.py +1 -4
  109. keras_hub/src/models/pali_gemma/pali_gemma_causal_lm_preprocessor.py +28 -28
  110. keras_hub/src/models/pali_gemma/pali_gemma_image_converter.py +25 -0
  111. keras_hub/src/models/pali_gemma/pali_gemma_presets.py +5 -5
  112. keras_hub/src/models/pali_gemma/pali_gemma_tokenizer.py +10 -2
  113. keras_hub/src/models/phi3/__init__.py +1 -2
  114. keras_hub/src/models/phi3/phi3_causal_lm.py +3 -9
  115. keras_hub/src/models/phi3/phi3_causal_lm_preprocessor.py +6 -89
  116. keras_hub/src/models/phi3/phi3_preprocessor.py +5 -12
  117. keras_hub/src/models/phi3/phi3_tokenizer.py +12 -36
  118. keras_hub/src/models/preprocessor.py +76 -83
  119. keras_hub/src/models/resnet/__init__.py +6 -0
  120. keras_hub/src/models/resnet/resnet_backbone.py +387 -26
  121. keras_hub/src/models/resnet/resnet_image_classifier.py +7 -3
  122. keras_hub/src/models/resnet/resnet_image_classifier_preprocessor.py +28 -0
  123. keras_hub/src/models/resnet/resnet_image_converter.py +23 -0
  124. keras_hub/src/models/resnet/resnet_presets.py +95 -0
  125. keras_hub/src/models/roberta/__init__.py +1 -2
  126. keras_hub/src/models/roberta/roberta_masked_lm_preprocessor.py +22 -74
  127. keras_hub/src/models/roberta/{roberta_classifier.py → roberta_text_classifier.py} +11 -11
  128. keras_hub/src/models/roberta/{roberta_preprocessor.py → roberta_text_classifier_preprocessor.py} +21 -53
  129. keras_hub/src/models/roberta/roberta_tokenizer.py +13 -52
  130. keras_hub/src/models/seq_2_seq_lm_preprocessor.py +269 -0
  131. keras_hub/src/models/stable_diffusion_v3/__init__.py +13 -0
  132. keras_hub/src/models/stable_diffusion_v3/clip_encoder_block.py +103 -0
  133. keras_hub/src/models/stable_diffusion_v3/clip_preprocessor.py +93 -0
  134. keras_hub/src/models/stable_diffusion_v3/clip_text_encoder.py +149 -0
  135. keras_hub/src/models/stable_diffusion_v3/clip_tokenizer.py +167 -0
  136. keras_hub/src/models/stable_diffusion_v3/mmdit.py +427 -0
  137. keras_hub/src/models/stable_diffusion_v3/mmdit_block.py +317 -0
  138. keras_hub/src/models/stable_diffusion_v3/t5_xxl_preprocessor.py +74 -0
  139. keras_hub/src/models/stable_diffusion_v3/t5_xxl_text_encoder.py +155 -0
  140. keras_hub/src/models/stable_diffusion_v3/vae_attention.py +126 -0
  141. keras_hub/src/models/stable_diffusion_v3/vae_image_decoder.py +186 -0
  142. keras_hub/src/models/t5/__init__.py +1 -2
  143. keras_hub/src/models/t5/t5_tokenizer.py +13 -23
  144. keras_hub/src/models/task.py +71 -116
  145. keras_hub/src/models/{classifier.py → text_classifier.py} +8 -13
  146. keras_hub/src/models/text_classifier_preprocessor.py +138 -0
  147. keras_hub/src/models/whisper/__init__.py +1 -2
  148. keras_hub/src/models/whisper/{whisper_audio_feature_extractor.py → whisper_audio_converter.py} +20 -18
  149. keras_hub/src/models/whisper/whisper_backbone.py +0 -3
  150. keras_hub/src/models/whisper/whisper_presets.py +10 -10
  151. keras_hub/src/models/whisper/whisper_tokenizer.py +20 -16
  152. keras_hub/src/models/xlm_roberta/__init__.py +1 -4
  153. keras_hub/src/models/xlm_roberta/xlm_roberta_masked_lm_preprocessor.py +26 -72
  154. keras_hub/src/models/xlm_roberta/{xlm_roberta_classifier.py → xlm_roberta_text_classifier.py} +11 -11
  155. keras_hub/src/models/xlm_roberta/{xlm_roberta_preprocessor.py → xlm_roberta_text_classifier_preprocessor.py} +26 -53
  156. keras_hub/src/models/xlm_roberta/xlm_roberta_tokenizer.py +25 -10
  157. keras_hub/src/tests/test_case.py +25 -0
  158. keras_hub/src/tokenizers/byte_pair_tokenizer.py +29 -17
  159. keras_hub/src/tokenizers/byte_tokenizer.py +14 -15
  160. keras_hub/src/tokenizers/sentence_piece_tokenizer.py +19 -7
  161. keras_hub/src/tokenizers/tokenizer.py +67 -32
  162. keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py +14 -15
  163. keras_hub/src/tokenizers/word_piece_tokenizer.py +33 -47
  164. keras_hub/src/utils/keras_utils.py +0 -50
  165. keras_hub/src/utils/preset_utils.py +238 -67
  166. keras_hub/src/utils/tensor_utils.py +187 -69
  167. keras_hub/src/utils/timm/convert_resnet.py +20 -16
  168. keras_hub/src/utils/timm/preset_loader.py +67 -0
  169. keras_hub/src/utils/transformers/convert_albert.py +193 -0
  170. keras_hub/src/utils/transformers/convert_bart.py +373 -0
  171. keras_hub/src/utils/transformers/convert_bert.py +7 -17
  172. keras_hub/src/utils/transformers/convert_distilbert.py +10 -20
  173. keras_hub/src/utils/transformers/convert_gemma.py +5 -19
  174. keras_hub/src/utils/transformers/convert_gpt2.py +5 -18
  175. keras_hub/src/utils/transformers/convert_llama3.py +7 -18
  176. keras_hub/src/utils/transformers/convert_mistral.py +129 -0
  177. keras_hub/src/utils/transformers/convert_pali_gemma.py +7 -29
  178. keras_hub/src/utils/transformers/preset_loader.py +77 -0
  179. keras_hub/src/utils/transformers/safetensor_utils.py +2 -2
  180. keras_hub/src/version_utils.py +1 -1
  181. {keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.15.0.dev20240911134614.dist-info}/METADATA +1 -2
  182. keras_hub_nightly-0.15.0.dev20240911134614.dist-info/RECORD +338 -0
  183. {keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.15.0.dev20240911134614.dist-info}/WHEEL +1 -1
  184. keras_hub/src/models/whisper/whisper_preprocessor.py +0 -326
  185. keras_hub/src/utils/timm/convert.py +0 -37
  186. keras_hub/src/utils/transformers/convert.py +0 -101
  187. keras_hub_nightly-0.15.0.dev20240823171555.dist-info/RECORD +0 -297
  188. {keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.15.0.dev20240911134614.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,229 @@
1
+ # Copyright 2024 The KerasHub Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import keras
16
+
17
+ BN_AXIS = 3
18
+
19
+ CONV_KERNEL_INITIALIZER = {
20
+ "class_name": "VarianceScaling",
21
+ "config": {
22
+ "scale": 2.0,
23
+ "mode": "fan_out",
24
+ "distribution": "truncated_normal",
25
+ },
26
+ }
27
+
28
+
29
+ class FusedMBConvBlock(keras.layers.Layer):
30
+ """Implementation of the FusedMBConv block
31
+
32
+ Also known as a Fused Mobile Inverted Residual Bottleneck block from:
33
+ [EfficientNet-EdgeTPU: Creating Accelerator-Optimized Neural Networks with AutoML]
34
+ (https://ai.googleblog.com/2019/08/efficientnet-edgetpu-creating.html)
35
+ [EfficientNetV2: Smaller Models and Faster Training]
36
+ (https://arxiv.org/abs/2104.00298v3).
37
+
38
+ FusedMBConv blocks are based on MBConv blocks, and replace the depthwise and
39
+ 1x1 output convolution blocks with a single 3x3 convolution block, fusing
40
+ them together - hence the name "FusedMBConv". Alongside MBConv blocks, they
41
+ can be used in mobile-oriented and efficient architectures, and are present
42
+ in architectures EfficientNet.
43
+
44
+ FusedMBConv blocks follow a narrow-wide-narrow structure - expanding a 1x1
45
+ convolution, performing Squeeze-Excitation and then applying a 3x3
46
+ convolution, which is a more efficient operation than conventional
47
+ wide-narrow-wide structures.
48
+
49
+ As they're frequently used for models to be deployed to edge devices,
50
+ they're implemented as a layer for ease of use and re-use.
51
+
52
+ Args:
53
+ input_filters: int, the number of input filters
54
+ output_filters: int, the number of output filters
55
+ expand_ratio: default 1, the ratio by which input_filters are multiplied
56
+ to expand the structure in the middle expansion phase
57
+ kernel_size: default 3, the kernel_size to apply to the expansion phase
58
+ convolutions
59
+ strides: default 1, the strides to apply to the expansion phase
60
+ convolutions
61
+ se_ratio: default 0.0, The filters used in the Squeeze-Excitation phase,
62
+ and are chosen as the maximum between 1 and input_filters*se_ratio
63
+ batch_norm_momentum: default 0.9, the BatchNormalization momentum
64
+ activation: default "swish", the activation function used between
65
+ convolution operations
66
+ dropout: float, the optional dropout rate to apply before the output
67
+ convolution, defaults to 0.2
68
+
69
+ Returns:
70
+ A tensor representing a feature map, passed through the FusedMBConv
71
+ block
72
+
73
+ Note:
74
+ Not intended to be used outside of the EfficientNet architecture.
75
+ """
76
+
77
+ def __init__(
78
+ self,
79
+ input_filters,
80
+ output_filters,
81
+ expand_ratio=1,
82
+ kernel_size=3,
83
+ strides=1,
84
+ se_ratio=0.0,
85
+ batch_norm_momentum=0.9,
86
+ activation="swish",
87
+ dropout=0.2,
88
+ **kwargs
89
+ ):
90
+ super().__init__(**kwargs)
91
+ self.input_filters = input_filters
92
+ self.output_filters = output_filters
93
+ self.expand_ratio = expand_ratio
94
+ self.kernel_size = kernel_size
95
+ self.strides = strides
96
+ self.se_ratio = se_ratio
97
+ self.batch_norm_momentum = batch_norm_momentum
98
+ self.activation = activation
99
+ self.dropout = dropout
100
+ self.filters = self.input_filters * self.expand_ratio
101
+ self.filters_se = max(1, int(input_filters * se_ratio))
102
+
103
+ self.conv1 = keras.layers.Conv2D(
104
+ filters=self.filters,
105
+ kernel_size=kernel_size,
106
+ strides=strides,
107
+ kernel_initializer=CONV_KERNEL_INITIALIZER,
108
+ padding="same",
109
+ data_format="channels_last",
110
+ use_bias=False,
111
+ name=self.name + "expand_conv",
112
+ )
113
+ self.bn1 = keras.layers.BatchNormalization(
114
+ axis=BN_AXIS,
115
+ momentum=self.batch_norm_momentum,
116
+ name=self.name + "expand_bn",
117
+ )
118
+ self.act = keras.layers.Activation(
119
+ self.activation, name=self.name + "expand_activation"
120
+ )
121
+
122
+ self.bn2 = keras.layers.BatchNormalization(
123
+ axis=BN_AXIS,
124
+ momentum=self.batch_norm_momentum,
125
+ name=self.name + "bn",
126
+ )
127
+
128
+ self.se_conv1 = keras.layers.Conv2D(
129
+ self.filters_se,
130
+ 1,
131
+ padding="same",
132
+ activation=self.activation,
133
+ kernel_initializer=CONV_KERNEL_INITIALIZER,
134
+ name=self.name + "se_reduce",
135
+ )
136
+
137
+ self.se_conv2 = keras.layers.Conv2D(
138
+ self.filters,
139
+ 1,
140
+ padding="same",
141
+ activation="sigmoid",
142
+ kernel_initializer=CONV_KERNEL_INITIALIZER,
143
+ name=self.name + "se_expand",
144
+ )
145
+
146
+ self.output_conv = keras.layers.Conv2D(
147
+ filters=self.output_filters,
148
+ kernel_size=1 if expand_ratio != 1 else kernel_size,
149
+ strides=1,
150
+ kernel_initializer=CONV_KERNEL_INITIALIZER,
151
+ padding="same",
152
+ data_format="channels_last",
153
+ use_bias=False,
154
+ name=self.name + "project_conv",
155
+ )
156
+
157
+ self.bn3 = keras.layers.BatchNormalization(
158
+ axis=BN_AXIS,
159
+ momentum=self.batch_norm_momentum,
160
+ name=self.name + "project_bn",
161
+ )
162
+
163
+ if self.dropout:
164
+ self.dropout_layer = keras.layers.Dropout(
165
+ self.dropout,
166
+ noise_shape=(None, 1, 1, 1),
167
+ name=self.name + "drop",
168
+ )
169
+
170
+ def build(self, input_shape):
171
+ if self.name is None:
172
+ self.name = keras.backend.get_uid("block0")
173
+
174
+ def call(self, inputs):
175
+ # Expansion phase
176
+ if self.expand_ratio != 1:
177
+ x = self.conv1(inputs)
178
+ x = self.bn1(x)
179
+ x = self.act(x)
180
+ else:
181
+ x = inputs
182
+
183
+ # Squeeze and excite
184
+ if 0 < self.se_ratio <= 1:
185
+ se = keras.layers.GlobalAveragePooling2D(
186
+ name=self.name + "se_squeeze"
187
+ )(x)
188
+ if BN_AXIS == 1:
189
+ se_shape = (self.filters, 1, 1)
190
+ else:
191
+ se_shape = (1, 1, self.filters)
192
+
193
+ se = keras.layers.Reshape(se_shape, name=self.name + "se_reshape")(
194
+ se
195
+ )
196
+
197
+ se = self.se_conv1(se)
198
+ se = self.se_conv2(se)
199
+
200
+ x = keras.layers.multiply([x, se], name=self.name + "se_excite")
201
+
202
+ # Output phase:
203
+ x = self.output_conv(x)
204
+ x = self.bn3(x)
205
+ if self.expand_ratio == 1:
206
+ x = self.act(x)
207
+
208
+ # Residual:
209
+ if self.strides == 1 and self.input_filters == self.output_filters:
210
+ if self.dropout:
211
+ x = self.dropout_layer(x)
212
+ x = keras.layers.Add(name=self.name + "add")([x, inputs])
213
+ return x
214
+
215
+ def get_config(self):
216
+ config = {
217
+ "input_filters": self.input_filters,
218
+ "output_filters": self.output_filters,
219
+ "expand_ratio": self.expand_ratio,
220
+ "kernel_size": self.kernel_size,
221
+ "strides": self.strides,
222
+ "se_ratio": self.se_ratio,
223
+ "batch_norm_momentum": self.batch_norm_momentum,
224
+ "activation": self.activation,
225
+ "dropout": self.dropout,
226
+ }
227
+
228
+ base_config = super().get_config()
229
+ return dict(list(base_config.items()) + list(config.items()))
@@ -0,0 +1,238 @@
1
+ # Copyright 2024 The KerasHub Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import keras
16
+
17
+ BN_AXIS = 3
18
+
19
+ CONV_KERNEL_INITIALIZER = {
20
+ "class_name": "VarianceScaling",
21
+ "config": {
22
+ "scale": 2.0,
23
+ "mode": "fan_out",
24
+ "distribution": "truncated_normal",
25
+ },
26
+ }
27
+
28
+
29
+ class MBConvBlock(keras.layers.Layer):
30
+ def __init__(
31
+ self,
32
+ input_filters,
33
+ output_filters,
34
+ expand_ratio=1,
35
+ kernel_size=3,
36
+ strides=1,
37
+ se_ratio=0.0,
38
+ batch_norm_momentum=0.9,
39
+ activation="swish",
40
+ dropout=0.2,
41
+ **kwargs
42
+ ):
43
+ """Implementation of the MBConv block
44
+
45
+ Also known as a Mobile Inverted Residual Bottleneck block from:
46
+ [MobileNetV2: Inverted Residuals and Linear Bottlenecks]
47
+ (https://arxiv.org/abs/1801.04381v4).
48
+
49
+ MBConv blocks are common blocks used in mobile-oriented and efficient
50
+ architectures, present in architectures such as MobileNet, EfficientNet,
51
+ MaxViT, etc.
52
+
53
+ MBConv blocks follow a narrow-wide-narrow structure - expanding a 1x1
54
+ convolution, applying depthwise convolution, and narrowing back to a 1x1
55
+ convolution, which is a more efficient operation than conventional
56
+ wide-narrow-wide structures.
57
+
58
+ As they're frequently used for models to be deployed to edge devices,
59
+ they're implemented as a layer for ease of use and re-use.
60
+
61
+ Args:
62
+ input_filters: int, the number of input filters
63
+ output_filters: int, the optional number of output filters after
64
+ Squeeze-Excitation
65
+ expand_ratio: default 1, the ratio by which input_filters are
66
+ multiplied to expand the structure in the middle expansion phase
67
+ kernel_size: default 3, the kernel_size to apply to the expansion
68
+ phase convolutions
69
+ strides: default 1, the strides to apply to the expansion phase
70
+ convolutions
71
+ se_ratio: default 0.0, Squeeze-Excitation happens before depthwise
72
+ convolution and before output convolution only if the se_ratio
73
+ is above 0. The filters used in this phase are chosen as the
74
+ maximum between 1 and input_filters*se_ratio
75
+ batch_norm_momentum: default 0.9, the BatchNormalization momentum
76
+ activation: default "swish", the activation function used between
77
+ convolution operations
78
+ dropout: float, the optional dropout rate to apply before the output
79
+ convolution, defaults to 0.2
80
+
81
+ Returns:
82
+ A tensor representing a feature map, passed through the MBConv
83
+ block
84
+
85
+
86
+ Note:
87
+ Not intended to be used outside of the EfficientNet architecture.
88
+ """
89
+
90
+ super().__init__(**kwargs)
91
+ self.input_filters = input_filters
92
+ self.output_filters = output_filters
93
+ self.expand_ratio = expand_ratio
94
+ self.kernel_size = kernel_size
95
+ self.strides = strides
96
+ self.se_ratio = se_ratio
97
+ self.batch_norm_momentum = batch_norm_momentum
98
+ self.activation = activation
99
+ self.dropout = dropout
100
+ self.filters = self.input_filters * self.expand_ratio
101
+ self.filters_se = max(1, int(input_filters * se_ratio))
102
+
103
+ self.conv1 = keras.layers.Conv2D(
104
+ filters=self.filters,
105
+ kernel_size=1,
106
+ strides=1,
107
+ kernel_initializer=CONV_KERNEL_INITIALIZER,
108
+ padding="same",
109
+ data_format="channels_last",
110
+ use_bias=False,
111
+ name=self.name + "expand_conv",
112
+ )
113
+ self.bn1 = keras.layers.BatchNormalization(
114
+ axis=BN_AXIS,
115
+ momentum=self.batch_norm_momentum,
116
+ name=self.name + "expand_bn",
117
+ )
118
+ self.act = keras.layers.Activation(
119
+ self.activation, name=self.name + "activation"
120
+ )
121
+ self.depthwise = keras.layers.DepthwiseConv2D(
122
+ kernel_size=self.kernel_size,
123
+ strides=self.strides,
124
+ depthwise_initializer=CONV_KERNEL_INITIALIZER,
125
+ padding="same",
126
+ data_format="channels_last",
127
+ use_bias=False,
128
+ name=self.name + "dwconv2",
129
+ )
130
+
131
+ self.bn2 = keras.layers.BatchNormalization(
132
+ axis=BN_AXIS,
133
+ momentum=self.batch_norm_momentum,
134
+ name=self.name + "bn",
135
+ )
136
+
137
+ self.se_conv1 = keras.layers.Conv2D(
138
+ self.filters_se,
139
+ 1,
140
+ padding="same",
141
+ activation=self.activation,
142
+ kernel_initializer=CONV_KERNEL_INITIALIZER,
143
+ name=self.name + "se_reduce",
144
+ )
145
+
146
+ self.se_conv2 = keras.layers.Conv2D(
147
+ self.filters,
148
+ 1,
149
+ padding="same",
150
+ activation="sigmoid",
151
+ kernel_initializer=CONV_KERNEL_INITIALIZER,
152
+ name=self.name + "se_expand",
153
+ )
154
+
155
+ self.output_conv = keras.layers.Conv2D(
156
+ filters=self.output_filters,
157
+ kernel_size=1 if expand_ratio != 1 else kernel_size,
158
+ strides=1,
159
+ kernel_initializer=CONV_KERNEL_INITIALIZER,
160
+ padding="same",
161
+ data_format="channels_last",
162
+ use_bias=False,
163
+ name=self.name + "project_conv",
164
+ )
165
+
166
+ self.bn3 = keras.layers.BatchNormalization(
167
+ axis=BN_AXIS,
168
+ momentum=self.batch_norm_momentum,
169
+ name=self.name + "project_bn",
170
+ )
171
+
172
+ if self.dropout:
173
+ self.dropout_layer = keras.layers.Dropout(
174
+ self.dropout,
175
+ noise_shape=(None, 1, 1, 1),
176
+ name=self.name + "drop",
177
+ )
178
+
179
+ def build(self, input_shape):
180
+ if self.name is None:
181
+ self.name = keras.backend.get_uid("block0")
182
+
183
+ def call(self, inputs):
184
+ # Expansion phase
185
+ if self.expand_ratio != 1:
186
+ x = self.conv1(inputs)
187
+ x = self.bn1(x)
188
+ x = self.act(x)
189
+ else:
190
+ x = inputs
191
+
192
+ # Depthwise conv
193
+ x = self.depthwise(x)
194
+ x = self.bn2(x)
195
+ x = self.act(x)
196
+
197
+ # Squeeze and excite
198
+ if 0 < self.se_ratio <= 1:
199
+ se = keras.layers.GlobalAveragePooling2D(
200
+ name=self.name + "se_squeeze"
201
+ )(x)
202
+ if BN_AXIS == 1:
203
+ se_shape = (self.filters, 1, 1)
204
+ else:
205
+ se_shape = (1, 1, self.filters)
206
+ se = keras.layers.Reshape(se_shape, name=self.name + "se_reshape")(
207
+ se
208
+ )
209
+
210
+ se = self.se_conv1(se)
211
+ se = self.se_conv2(se)
212
+
213
+ x = keras.layers.multiply([x, se], name=self.name + "se_excite")
214
+
215
+ # Output phase
216
+ x = self.output_conv(x)
217
+ x = self.bn3(x)
218
+
219
+ if self.strides == 1 and self.input_filters == self.output_filters:
220
+ if self.dropout:
221
+ x = self.dropout_layer(x)
222
+ x = keras.layers.Add(name=self.name + "add")([x, inputs])
223
+ return x
224
+
225
+ def get_config(self):
226
+ config = {
227
+ "input_filters": self.input_filters,
228
+ "output_filters": self.output_filters,
229
+ "expand_ratio": self.expand_ratio,
230
+ "kernel_size": self.kernel_size,
231
+ "strides": self.strides,
232
+ "se_ratio": self.se_ratio,
233
+ "batch_norm_momentum": self.batch_norm_momentum,
234
+ "activation": self.activation,
235
+ "dropout": self.dropout,
236
+ }
237
+ base_config = super().get_config()
238
+ return dict(list(base_config.items()) + list(config.items()))
@@ -14,7 +14,6 @@
14
14
 
15
15
  from keras_hub.src.models.electra.electra_backbone import ElectraBackbone
16
16
  from keras_hub.src.models.electra.electra_presets import backbone_presets
17
- from keras_hub.src.models.electra.electra_tokenizer import ElectraTokenizer
18
17
  from keras_hub.src.utils.preset_utils import register_presets
19
18
 
20
- register_presets(backbone_presets, (ElectraBackbone, ElectraTokenizer))
19
+ register_presets(backbone_presets, ElectraBackbone)
@@ -18,11 +18,10 @@ from keras_hub.src.api_export import keras_hub_export
18
18
  from keras_hub.src.layers.preprocessing.multi_segment_packer import (
19
19
  MultiSegmentPacker,
20
20
  )
21
+ from keras_hub.src.models.electra.electra_backbone import ElectraBackbone
21
22
  from keras_hub.src.models.electra.electra_tokenizer import ElectraTokenizer
22
23
  from keras_hub.src.models.preprocessor import Preprocessor
23
- from keras_hub.src.utils.keras_utils import (
24
- convert_inputs_to_list_of_tensor_segments,
25
- )
24
+ from keras_hub.src.utils.tensor_utils import preprocessing_function
26
25
 
27
26
 
28
27
  @keras_hub_export("keras_hub.models.ElectraPreprocessor")
@@ -113,6 +112,7 @@ class ElectraPreprocessor(Preprocessor):
113
112
  ```
114
113
  """
115
114
 
115
+ backbone_cls = ElectraBackbone
116
116
  tokenizer_cls = ElectraTokenizer
117
117
 
118
118
  def __init__(
@@ -142,9 +142,10 @@ class ElectraPreprocessor(Preprocessor):
142
142
  )
143
143
  return config
144
144
 
145
+ @preprocessing_function
145
146
  def call(self, x, y=None, sample_weight=None):
146
- x = convert_inputs_to_list_of_tensor_segments(x)
147
- x = [self.tokenizer(segment) for segment in x]
147
+ x = x if isinstance(x, tuple) else (x,)
148
+ x = tuple(self.tokenizer(segment) for segment in x)
148
149
  token_ids, segment_ids = self.packer(x)
149
150
  x = {
150
151
  "token_ids": token_ids,
@@ -13,10 +13,16 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from keras_hub.src.api_export import keras_hub_export
16
+ from keras_hub.src.models.electra.electra_backbone import ElectraBackbone
16
17
  from keras_hub.src.tokenizers.word_piece_tokenizer import WordPieceTokenizer
17
18
 
18
19
 
19
- @keras_hub_export("keras_hub.models.ElectraTokenizer")
20
+ @keras_hub_export(
21
+ [
22
+ "keras_hub.tokenizers.ElectraTokenizer",
23
+ "keras_hub.models.ElectraTokenizer",
24
+ ]
25
+ )
20
26
  class ElectraTokenizer(WordPieceTokenizer):
21
27
  """A ELECTRA tokenizer using WordPiece subword segmentation.
22
28
 
@@ -60,45 +66,24 @@ class ElectraTokenizer(WordPieceTokenizer):
60
66
  ```
61
67
  """
62
68
 
69
+ backbone_cls = ElectraBackbone
70
+
63
71
  def __init__(
64
72
  self,
65
73
  vocabulary,
66
74
  lowercase=False,
67
- special_tokens_in_strings=False,
68
75
  **kwargs,
69
76
  ):
70
- self.cls_token = "[CLS]"
71
- self.sep_token = "[SEP]"
72
- self.pad_token = "[PAD]"
73
- self.mask_token = "[MASK]"
77
+ self._add_special_token("[CLS]", "cls_token")
78
+ self._add_special_token("[SEP]", "sep_token")
79
+ self._add_special_token("[PAD]", "pad_token")
80
+ self._add_special_token("[MASK]", "mask_token")
81
+ # Also add `tokenizer.start_token` and `tokenizer.end_token` for
82
+ # compatibility with other tokenizers.
83
+ self._add_special_token("[CLS]", "start_token")
84
+ self._add_special_token("[SEP]", "end_token")
74
85
  super().__init__(
75
86
  vocabulary=vocabulary,
76
87
  lowercase=lowercase,
77
- special_tokens=[
78
- self.cls_token,
79
- self.sep_token,
80
- self.pad_token,
81
- self.mask_token,
82
- ],
83
- special_tokens_in_strings=special_tokens_in_strings,
84
88
  **kwargs,
85
89
  )
86
-
87
- def set_vocabulary(self, vocabulary):
88
- super().set_vocabulary(vocabulary)
89
-
90
- if vocabulary is not None:
91
- self.cls_token_id = self.token_to_id(self.cls_token)
92
- self.sep_token_id = self.token_to_id(self.sep_token)
93
- self.pad_token_id = self.token_to_id(self.pad_token)
94
- self.mask_token_id = self.token_to_id(self.mask_token)
95
- else:
96
- self.cls_token_id = None
97
- self.sep_token_id = None
98
- self.pad_token_id = None
99
- self.mask_token_id = None
100
-
101
- def get_config(self):
102
- config = super().get_config()
103
- del config["special_tokens"] # Not configurable; set in __init__.
104
- return config
@@ -14,7 +14,6 @@
14
14
 
15
15
  from keras_hub.src.models.f_net.f_net_backbone import FNetBackbone
16
16
  from keras_hub.src.models.f_net.f_net_presets import backbone_presets
17
- from keras_hub.src.models.f_net.f_net_tokenizer import FNetTokenizer
18
17
  from keras_hub.src.utils.preset_utils import register_presets
19
18
 
20
- register_presets(backbone_presets, (FNetBackbone, FNetTokenizer))
19
+ register_presets(backbone_presets, FNetBackbone)