keras-hub-nightly 0.15.0.dev20240823171555__py3-none-any.whl → 0.15.0.dev20240911134614__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. keras_hub/api/__init__.py +1 -0
  2. keras_hub/api/bounding_box/__init__.py +36 -0
  3. keras_hub/api/layers/__init__.py +14 -0
  4. keras_hub/api/models/__init__.py +75 -31
  5. keras_hub/api/tokenizers/__init__.py +30 -0
  6. keras_hub/src/bounding_box/__init__.py +13 -0
  7. keras_hub/src/bounding_box/converters.py +529 -0
  8. keras_hub/src/bounding_box/formats.py +162 -0
  9. keras_hub/src/bounding_box/iou.py +263 -0
  10. keras_hub/src/bounding_box/to_dense.py +95 -0
  11. keras_hub/src/bounding_box/to_ragged.py +99 -0
  12. keras_hub/src/bounding_box/utils.py +194 -0
  13. keras_hub/src/bounding_box/validate_format.py +99 -0
  14. keras_hub/src/layers/preprocessing/audio_converter.py +121 -0
  15. keras_hub/src/layers/preprocessing/image_converter.py +130 -0
  16. keras_hub/src/layers/preprocessing/masked_lm_mask_generator.py +2 -0
  17. keras_hub/src/layers/preprocessing/multi_segment_packer.py +9 -8
  18. keras_hub/src/layers/preprocessing/preprocessing_layer.py +2 -29
  19. keras_hub/src/layers/preprocessing/random_deletion.py +33 -31
  20. keras_hub/src/layers/preprocessing/random_swap.py +33 -31
  21. keras_hub/src/layers/preprocessing/resizing_image_converter.py +101 -0
  22. keras_hub/src/layers/preprocessing/start_end_packer.py +3 -2
  23. keras_hub/src/models/albert/__init__.py +1 -2
  24. keras_hub/src/models/albert/albert_masked_lm_preprocessor.py +6 -86
  25. keras_hub/src/models/albert/{albert_classifier.py → albert_text_classifier.py} +29 -10
  26. keras_hub/src/models/albert/{albert_preprocessor.py → albert_text_classifier_preprocessor.py} +14 -70
  27. keras_hub/src/models/albert/albert_tokenizer.py +17 -36
  28. keras_hub/src/models/backbone.py +12 -34
  29. keras_hub/src/models/bart/__init__.py +1 -2
  30. keras_hub/src/models/bart/bart_preprocessor.py +6 -18
  31. keras_hub/src/models/bart/bart_seq_2_seq_lm_preprocessor.py +21 -148
  32. keras_hub/src/models/bart/bart_tokenizer.py +12 -39
  33. keras_hub/src/models/bert/__init__.py +1 -5
  34. keras_hub/src/models/bert/bert_masked_lm_preprocessor.py +6 -87
  35. keras_hub/src/models/bert/bert_presets.py +1 -4
  36. keras_hub/src/models/bert/{bert_classifier.py → bert_text_classifier.py} +12 -10
  37. keras_hub/src/models/bert/{bert_preprocessor.py → bert_text_classifier_preprocessor.py} +14 -70
  38. keras_hub/src/models/bert/bert_tokenizer.py +17 -35
  39. keras_hub/src/models/bloom/__init__.py +1 -2
  40. keras_hub/src/models/bloom/bloom_causal_lm_preprocessor.py +6 -91
  41. keras_hub/src/models/bloom/bloom_preprocessor.py +5 -12
  42. keras_hub/src/models/bloom/bloom_tokenizer.py +12 -41
  43. keras_hub/src/models/causal_lm.py +10 -29
  44. keras_hub/src/models/causal_lm_preprocessor.py +195 -0
  45. keras_hub/src/models/csp_darknet/csp_darknet_backbone.py +54 -15
  46. keras_hub/src/models/deberta_v3/__init__.py +1 -4
  47. keras_hub/src/models/deberta_v3/deberta_v3_masked_lm_preprocessor.py +14 -77
  48. keras_hub/src/models/deberta_v3/{deberta_v3_classifier.py → deberta_v3_text_classifier.py} +11 -11
  49. keras_hub/src/models/deberta_v3/{deberta_v3_preprocessor.py → deberta_v3_text_classifier_preprocessor.py} +23 -64
  50. keras_hub/src/models/deberta_v3/deberta_v3_tokenizer.py +30 -25
  51. keras_hub/src/models/densenet/densenet_backbone.py +46 -22
  52. keras_hub/src/models/distil_bert/__init__.py +1 -4
  53. keras_hub/src/models/distil_bert/distil_bert_masked_lm_preprocessor.py +14 -76
  54. keras_hub/src/models/distil_bert/{distil_bert_classifier.py → distil_bert_text_classifier.py} +12 -12
  55. keras_hub/src/models/distil_bert/{distil_bert_preprocessor.py → distil_bert_text_classifier_preprocessor.py} +23 -63
  56. keras_hub/src/models/distil_bert/distil_bert_tokenizer.py +19 -35
  57. keras_hub/src/models/efficientnet/__init__.py +13 -0
  58. keras_hub/src/models/efficientnet/efficientnet_backbone.py +569 -0
  59. keras_hub/src/models/efficientnet/fusedmbconv.py +229 -0
  60. keras_hub/src/models/efficientnet/mbconv.py +238 -0
  61. keras_hub/src/models/electra/__init__.py +1 -2
  62. keras_hub/src/models/electra/electra_preprocessor.py +6 -5
  63. keras_hub/src/models/electra/electra_tokenizer.py +17 -32
  64. keras_hub/src/models/f_net/__init__.py +1 -2
  65. keras_hub/src/models/f_net/f_net_masked_lm_preprocessor.py +12 -78
  66. keras_hub/src/models/f_net/{f_net_classifier.py → f_net_text_classifier.py} +10 -8
  67. keras_hub/src/models/f_net/{f_net_preprocessor.py → f_net_text_classifier_preprocessor.py} +19 -63
  68. keras_hub/src/models/f_net/f_net_tokenizer.py +17 -35
  69. keras_hub/src/models/falcon/__init__.py +1 -2
  70. keras_hub/src/models/falcon/falcon_causal_lm_preprocessor.py +6 -89
  71. keras_hub/src/models/falcon/falcon_preprocessor.py +5 -12
  72. keras_hub/src/models/falcon/falcon_tokenizer.py +12 -35
  73. keras_hub/src/models/gemma/__init__.py +1 -2
  74. keras_hub/src/models/gemma/gemma_causal_lm_preprocessor.py +6 -90
  75. keras_hub/src/models/gemma/gemma_preprocessor.py +5 -12
  76. keras_hub/src/models/gemma/gemma_tokenizer.py +12 -23
  77. keras_hub/src/models/gpt2/__init__.py +1 -2
  78. keras_hub/src/models/gpt2/gpt2_causal_lm_preprocessor.py +6 -89
  79. keras_hub/src/models/gpt2/gpt2_preprocessor.py +5 -12
  80. keras_hub/src/models/gpt2/gpt2_tokenizer.py +12 -34
  81. keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm_preprocessor.py +6 -91
  82. keras_hub/src/models/gpt_neo_x/gpt_neo_x_preprocessor.py +5 -12
  83. keras_hub/src/models/gpt_neo_x/gpt_neo_x_tokenizer.py +12 -34
  84. keras_hub/src/models/image_classifier.py +0 -5
  85. keras_hub/src/models/image_classifier_preprocessor.py +83 -0
  86. keras_hub/src/models/llama/__init__.py +1 -2
  87. keras_hub/src/models/llama/llama_causal_lm_preprocessor.py +6 -85
  88. keras_hub/src/models/llama/llama_preprocessor.py +5 -12
  89. keras_hub/src/models/llama/llama_tokenizer.py +12 -25
  90. keras_hub/src/models/llama3/__init__.py +1 -2
  91. keras_hub/src/models/llama3/llama3_causal_lm_preprocessor.py +6 -89
  92. keras_hub/src/models/llama3/llama3_preprocessor.py +2 -0
  93. keras_hub/src/models/llama3/llama3_tokenizer.py +12 -33
  94. keras_hub/src/models/masked_lm.py +0 -2
  95. keras_hub/src/models/masked_lm_preprocessor.py +156 -0
  96. keras_hub/src/models/mistral/__init__.py +1 -2
  97. keras_hub/src/models/mistral/mistral_causal_lm_preprocessor.py +6 -91
  98. keras_hub/src/models/mistral/mistral_preprocessor.py +5 -12
  99. keras_hub/src/models/mistral/mistral_tokenizer.py +12 -23
  100. keras_hub/src/models/mix_transformer/mix_transformer_backbone.py +2 -2
  101. keras_hub/src/models/mobilenet/__init__.py +13 -0
  102. keras_hub/src/models/mobilenet/mobilenet_backbone.py +530 -0
  103. keras_hub/src/models/mobilenet/mobilenet_image_classifier.py +114 -0
  104. keras_hub/src/models/opt/__init__.py +1 -2
  105. keras_hub/src/models/opt/opt_causal_lm_preprocessor.py +6 -93
  106. keras_hub/src/models/opt/opt_preprocessor.py +5 -12
  107. keras_hub/src/models/opt/opt_tokenizer.py +12 -41
  108. keras_hub/src/models/pali_gemma/__init__.py +1 -4
  109. keras_hub/src/models/pali_gemma/pali_gemma_causal_lm_preprocessor.py +28 -28
  110. keras_hub/src/models/pali_gemma/pali_gemma_image_converter.py +25 -0
  111. keras_hub/src/models/pali_gemma/pali_gemma_presets.py +5 -5
  112. keras_hub/src/models/pali_gemma/pali_gemma_tokenizer.py +10 -2
  113. keras_hub/src/models/phi3/__init__.py +1 -2
  114. keras_hub/src/models/phi3/phi3_causal_lm.py +3 -9
  115. keras_hub/src/models/phi3/phi3_causal_lm_preprocessor.py +6 -89
  116. keras_hub/src/models/phi3/phi3_preprocessor.py +5 -12
  117. keras_hub/src/models/phi3/phi3_tokenizer.py +12 -36
  118. keras_hub/src/models/preprocessor.py +76 -83
  119. keras_hub/src/models/resnet/__init__.py +6 -0
  120. keras_hub/src/models/resnet/resnet_backbone.py +387 -26
  121. keras_hub/src/models/resnet/resnet_image_classifier.py +7 -3
  122. keras_hub/src/models/resnet/resnet_image_classifier_preprocessor.py +28 -0
  123. keras_hub/src/models/resnet/resnet_image_converter.py +23 -0
  124. keras_hub/src/models/resnet/resnet_presets.py +95 -0
  125. keras_hub/src/models/roberta/__init__.py +1 -2
  126. keras_hub/src/models/roberta/roberta_masked_lm_preprocessor.py +22 -74
  127. keras_hub/src/models/roberta/{roberta_classifier.py → roberta_text_classifier.py} +11 -11
  128. keras_hub/src/models/roberta/{roberta_preprocessor.py → roberta_text_classifier_preprocessor.py} +21 -53
  129. keras_hub/src/models/roberta/roberta_tokenizer.py +13 -52
  130. keras_hub/src/models/seq_2_seq_lm_preprocessor.py +269 -0
  131. keras_hub/src/models/stable_diffusion_v3/__init__.py +13 -0
  132. keras_hub/src/models/stable_diffusion_v3/clip_encoder_block.py +103 -0
  133. keras_hub/src/models/stable_diffusion_v3/clip_preprocessor.py +93 -0
  134. keras_hub/src/models/stable_diffusion_v3/clip_text_encoder.py +149 -0
  135. keras_hub/src/models/stable_diffusion_v3/clip_tokenizer.py +167 -0
  136. keras_hub/src/models/stable_diffusion_v3/mmdit.py +427 -0
  137. keras_hub/src/models/stable_diffusion_v3/mmdit_block.py +317 -0
  138. keras_hub/src/models/stable_diffusion_v3/t5_xxl_preprocessor.py +74 -0
  139. keras_hub/src/models/stable_diffusion_v3/t5_xxl_text_encoder.py +155 -0
  140. keras_hub/src/models/stable_diffusion_v3/vae_attention.py +126 -0
  141. keras_hub/src/models/stable_diffusion_v3/vae_image_decoder.py +186 -0
  142. keras_hub/src/models/t5/__init__.py +1 -2
  143. keras_hub/src/models/t5/t5_tokenizer.py +13 -23
  144. keras_hub/src/models/task.py +71 -116
  145. keras_hub/src/models/{classifier.py → text_classifier.py} +8 -13
  146. keras_hub/src/models/text_classifier_preprocessor.py +138 -0
  147. keras_hub/src/models/whisper/__init__.py +1 -2
  148. keras_hub/src/models/whisper/{whisper_audio_feature_extractor.py → whisper_audio_converter.py} +20 -18
  149. keras_hub/src/models/whisper/whisper_backbone.py +0 -3
  150. keras_hub/src/models/whisper/whisper_presets.py +10 -10
  151. keras_hub/src/models/whisper/whisper_tokenizer.py +20 -16
  152. keras_hub/src/models/xlm_roberta/__init__.py +1 -4
  153. keras_hub/src/models/xlm_roberta/xlm_roberta_masked_lm_preprocessor.py +26 -72
  154. keras_hub/src/models/xlm_roberta/{xlm_roberta_classifier.py → xlm_roberta_text_classifier.py} +11 -11
  155. keras_hub/src/models/xlm_roberta/{xlm_roberta_preprocessor.py → xlm_roberta_text_classifier_preprocessor.py} +26 -53
  156. keras_hub/src/models/xlm_roberta/xlm_roberta_tokenizer.py +25 -10
  157. keras_hub/src/tests/test_case.py +25 -0
  158. keras_hub/src/tokenizers/byte_pair_tokenizer.py +29 -17
  159. keras_hub/src/tokenizers/byte_tokenizer.py +14 -15
  160. keras_hub/src/tokenizers/sentence_piece_tokenizer.py +19 -7
  161. keras_hub/src/tokenizers/tokenizer.py +67 -32
  162. keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py +14 -15
  163. keras_hub/src/tokenizers/word_piece_tokenizer.py +33 -47
  164. keras_hub/src/utils/keras_utils.py +0 -50
  165. keras_hub/src/utils/preset_utils.py +238 -67
  166. keras_hub/src/utils/tensor_utils.py +187 -69
  167. keras_hub/src/utils/timm/convert_resnet.py +20 -16
  168. keras_hub/src/utils/timm/preset_loader.py +67 -0
  169. keras_hub/src/utils/transformers/convert_albert.py +193 -0
  170. keras_hub/src/utils/transformers/convert_bart.py +373 -0
  171. keras_hub/src/utils/transformers/convert_bert.py +7 -17
  172. keras_hub/src/utils/transformers/convert_distilbert.py +10 -20
  173. keras_hub/src/utils/transformers/convert_gemma.py +5 -19
  174. keras_hub/src/utils/transformers/convert_gpt2.py +5 -18
  175. keras_hub/src/utils/transformers/convert_llama3.py +7 -18
  176. keras_hub/src/utils/transformers/convert_mistral.py +129 -0
  177. keras_hub/src/utils/transformers/convert_pali_gemma.py +7 -29
  178. keras_hub/src/utils/transformers/preset_loader.py +77 -0
  179. keras_hub/src/utils/transformers/safetensor_utils.py +2 -2
  180. keras_hub/src/version_utils.py +1 -1
  181. {keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.15.0.dev20240911134614.dist-info}/METADATA +1 -2
  182. keras_hub_nightly-0.15.0.dev20240911134614.dist-info/RECORD +338 -0
  183. {keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.15.0.dev20240911134614.dist-info}/WHEEL +1 -1
  184. keras_hub/src/models/whisper/whisper_preprocessor.py +0 -326
  185. keras_hub/src/utils/timm/convert.py +0 -37
  186. keras_hub/src/utils/transformers/convert.py +0 -101
  187. keras_hub_nightly-0.15.0.dev20240823171555.dist-info/RECORD +0 -297
  188. {keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.15.0.dev20240911134614.dist-info}/top_level.txt +0 -0
@@ -12,24 +12,28 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
-
16
15
  import keras
17
16
 
18
17
  from keras_hub.src.api_export import keras_hub_export
19
- from keras_hub.src.layers.preprocessing.multi_segment_packer import (
20
- MultiSegmentPacker,
18
+ from keras_hub.src.models.deberta_v3.deberta_v3_backbone import (
19
+ DebertaV3Backbone,
21
20
  )
22
21
  from keras_hub.src.models.deberta_v3.deberta_v3_tokenizer import (
23
22
  DebertaV3Tokenizer,
24
23
  )
25
- from keras_hub.src.models.preprocessor import Preprocessor
26
- from keras_hub.src.utils.keras_utils import (
27
- convert_inputs_to_list_of_tensor_segments,
24
+ from keras_hub.src.models.text_classifier_preprocessor import (
25
+ TextClassifierPreprocessor,
28
26
  )
27
+ from keras_hub.src.utils.tensor_utils import preprocessing_function
29
28
 
30
29
 
31
- @keras_hub_export("keras_hub.models.DebertaV3Preprocessor")
32
- class DebertaV3Preprocessor(Preprocessor):
30
+ @keras_hub_export(
31
+ [
32
+ "keras_hub.models.DebertaV3TextClassifierPreprocessor",
33
+ "keras_hub.models.DebertaV3Preprocessor",
34
+ ]
35
+ )
36
+ class DebertaV3TextClassifierPreprocessor(TextClassifierPreprocessor):
33
37
  """A DeBERTa preprocessing layer which tokenizes and packs inputs.
34
38
 
35
39
  This preprocessing layer will do three things:
@@ -74,7 +78,7 @@ class DebertaV3Preprocessor(Preprocessor):
74
78
  Examples:
75
79
  Directly calling the layer on data.
76
80
  ```python
77
- preprocessor = keras_hub.models.DebertaV3Preprocessor.from_preset(
81
+ preprocessor = keras_hub.models.TextClassifierPreprocessor.from_preset(
78
82
  "deberta_v3_base_en"
79
83
  )
80
84
 
@@ -110,13 +114,15 @@ class DebertaV3Preprocessor(Preprocessor):
110
114
  tokenizer = keras_hub.models.DebertaV3Tokenizer(
111
115
  proto=bytes_io.getvalue(),
112
116
  )
113
- preprocessor = keras_hub.models.DebertaV3Preprocessor(tokenizer)
117
+ preprocessor = keras_hub.models.DebertaV3TextClassifierPreprocessor(
118
+ tokenizer
119
+ )
114
120
  preprocessor("The quick brown fox jumped.")
115
121
  ```
116
122
 
117
123
  Mapping with `tf.data.Dataset`.
118
124
  ```python
119
- preprocessor = keras_hub.models.DebertaV3Preprocessor.from_preset(
125
+ preprocessor = keras_hub.models.TextClassifierPreprocessor.from_preset(
120
126
  "deberta_v3_base_en"
121
127
  )
122
128
 
@@ -147,60 +153,13 @@ class DebertaV3Preprocessor(Preprocessor):
147
153
  ```
148
154
  """
149
155
 
156
+ backbone_cls = DebertaV3Backbone
150
157
  tokenizer_cls = DebertaV3Tokenizer
151
158
 
152
- def __init__(
153
- self,
154
- tokenizer,
155
- sequence_length=512,
156
- truncate="round_robin",
157
- **kwargs,
158
- ):
159
- super().__init__(**kwargs)
160
- self.tokenizer = tokenizer
161
- self.packer = None
162
- self.truncate = truncate
163
- self.sequence_length = sequence_length
164
-
165
- def build(self, input_shape):
166
- # Defer packer creation to `build()` so that we can be sure tokenizer
167
- # assets have loaded when restoring a saved model.
168
- self.packer = MultiSegmentPacker(
169
- start_value=self.tokenizer.cls_token_id,
170
- end_value=self.tokenizer.sep_token_id,
171
- pad_value=self.tokenizer.pad_token_id,
172
- truncate=self.truncate,
173
- sequence_length=self.sequence_length,
174
- )
175
- self.built = True
176
-
177
- def get_config(self):
178
- config = super().get_config()
179
- config.update(
180
- {
181
- "sequence_length": self.sequence_length,
182
- "truncate": self.truncate,
183
- }
184
- )
185
- return config
186
-
159
+ @preprocessing_function
187
160
  def call(self, x, y=None, sample_weight=None):
188
- x = convert_inputs_to_list_of_tensor_segments(x)
189
- x = [self.tokenizer(segment) for segment in x]
190
- token_ids, _ = self.packer(x)
191
- x = {
192
- "token_ids": token_ids,
193
- "padding_mask": token_ids != self.tokenizer.pad_token_id,
194
- }
161
+ output = super().call(x, y=y, sample_weight=sample_weight)
162
+ x, y, sample_weight = keras.utils.unpack_x_y_sample_weight(output)
163
+ # Backbone has no segment ID input.
164
+ del x["segment_ids"]
195
165
  return keras.utils.pack_x_y_sample_weight(x, y, sample_weight)
196
-
197
- @property
198
- def sequence_length(self):
199
- """The padded length of model input sequences."""
200
- return self._sequence_length
201
-
202
- @sequence_length.setter
203
- def sequence_length(self, value):
204
- self._sequence_length = value
205
- if self.packer is not None:
206
- self.packer.sequence_length = value
@@ -14,6 +14,9 @@
14
14
 
15
15
 
16
16
  from keras_hub.src.api_export import keras_hub_export
17
+ from keras_hub.src.models.deberta_v3.deberta_v3_backbone import (
18
+ DebertaV3Backbone,
19
+ )
17
20
  from keras_hub.src.tokenizers.sentence_piece_tokenizer import (
18
21
  SentencePieceTokenizer,
19
22
  )
@@ -24,7 +27,12 @@ except ImportError:
24
27
  tf = None
25
28
 
26
29
 
27
- @keras_hub_export("keras_hub.models.DebertaV3Tokenizer")
30
+ @keras_hub_export(
31
+ [
32
+ "keras_hub.tokenizers.DebertaV3Tokenizer",
33
+ "keras_hub.models.DebertaV3Tokenizer",
34
+ ]
35
+ )
28
36
  class DebertaV3Tokenizer(SentencePieceTokenizer):
29
37
  """DeBERTa tokenizer layer based on SentencePiece.
30
38
 
@@ -34,10 +42,6 @@ class DebertaV3Tokenizer(SentencePieceTokenizer):
34
42
  DeBERTa models and provides a `from_preset()` method to automatically
35
43
  download a matching vocabulary for a DeBERTa preset.
36
44
 
37
- This tokenizer does not provide truncation or padding of inputs. It can be
38
- combined with a `keras_hub.models.DebertaV3Preprocessor` layer for input
39
- packing.
40
-
41
45
  If input is a batch of strings (rank > 0), the layer will output a
42
46
  `tf.RaggedTensor` where the last dimension of the output is ragged.
43
47
 
@@ -94,38 +98,37 @@ class DebertaV3Tokenizer(SentencePieceTokenizer):
94
98
  ```
95
99
  """
96
100
 
101
+ backbone_cls = DebertaV3Backbone
102
+
97
103
  def __init__(self, proto, **kwargs):
98
- self.cls_token = "[CLS]"
99
- self.sep_token = "[SEP]"
100
- self.pad_token = "[PAD]"
104
+ self._add_special_token("[CLS]", "cls_token")
105
+ self._add_special_token("[SEP]", "sep_token")
106
+ self._add_special_token("[PAD]", "pad_token")
107
+ # Also add `tokenizer.start_token` and `tokenizer.end_token` for
108
+ # compatibility with other tokenizers.
109
+ self._add_special_token("[CLS]", "start_token")
110
+ self._add_special_token("[SEP]", "end_token")
111
+ # Handle mask separately as it's not always in the vocab.
101
112
  self.mask_token = "[MASK]"
102
-
113
+ self.mask_token_id = None
103
114
  super().__init__(proto=proto, **kwargs)
104
115
 
116
+ @property
117
+ def special_tokens(self):
118
+ return super().special_tokens + [self.mask_token]
119
+
120
+ @property
121
+ def special_token_ids(self):
122
+ return super().special_token_ids + [self.mask_token_id]
123
+
105
124
  def set_proto(self, proto):
106
125
  super().set_proto(proto)
107
126
  if proto is not None:
108
- for token in [self.cls_token, self.pad_token, self.sep_token]:
109
- if token not in super().get_vocabulary():
110
- raise ValueError(
111
- f"Cannot find token `'{token}'` in the provided "
112
- f"`vocabulary`. Please provide `'{token}'` in your "
113
- "`vocabulary` or use a pretrained `vocabulary` name."
114
- )
115
-
116
- self.cls_token_id = self.token_to_id(self.cls_token)
117
- self.sep_token_id = self.token_to_id(self.sep_token)
118
- self.pad_token_id = self.token_to_id(self.pad_token)
119
- # If the mask token is not in the vocabulary, add it to the end of the
120
- # vocabulary.
121
127
  if self.mask_token in super().get_vocabulary():
122
128
  self.mask_token_id = super().token_to_id(self.mask_token)
123
129
  else:
124
130
  self.mask_token_id = super().vocabulary_size()
125
131
  else:
126
- self.cls_token_id = None
127
- self.sep_token_id = None
128
- self.pad_token_id = None
129
132
  self.mask_token_id = None
130
133
 
131
134
  def vocabulary_size(self):
@@ -136,6 +139,8 @@ class DebertaV3Tokenizer(SentencePieceTokenizer):
136
139
 
137
140
  def get_vocabulary(self):
138
141
  sentence_piece_vocabulary = super().get_vocabulary()
142
+ if self.mask_token_id is None:
143
+ return sentence_piece_vocabulary
139
144
  if self.mask_token_id < super().vocabulary_size():
140
145
  return sentence_piece_vocabulary
141
146
  return sentence_piece_vocabulary + ["[MASK]"]
@@ -14,14 +14,13 @@
14
14
  import keras
15
15
 
16
16
  from keras_hub.src.api_export import keras_hub_export
17
- from keras_hub.src.models.backbone import Backbone
17
+ from keras_hub.src.models.feature_pyramid_backbone import FeaturePyramidBackbone
18
18
 
19
- BN_AXIS = 3
20
19
  BN_EPSILON = 1.001e-5
21
20
 
22
21
 
23
22
  @keras_hub_export("keras_hub.models.DenseNetBackbone")
24
- class DenseNetBackbone(Backbone):
23
+ class DenseNetBackbone(FeaturePyramidBackbone):
25
24
  """Instantiates the DenseNet architecture.
26
25
 
27
26
  This class implements a DenseNet backbone as described in
@@ -35,7 +34,7 @@ class DenseNetBackbone(Backbone):
35
34
  include_rescaling: bool, whether to rescale the inputs. If set
36
35
  to `True`, inputs will be passed through a `Rescaling(1/255.0)`
37
36
  layer. Defaults to `True`.
38
- image_shape: optional shape tuple, defaults to (224, 224, 3).
37
+ image_shape: optional shape tuple, defaults to (None, None, 3).
39
38
  compression_ratio: float, compression rate at transition layers,
40
39
  defaults to 0.5.
41
40
  growth_rate: int, number of filters added by each dense block,
@@ -62,12 +61,14 @@ class DenseNetBackbone(Backbone):
62
61
  self,
63
62
  stackwise_num_repeats,
64
63
  include_rescaling=True,
65
- image_shape=(224, 224, 3),
64
+ image_shape=(None, None, 3),
66
65
  compression_ratio=0.5,
67
66
  growth_rate=32,
68
67
  **kwargs,
69
68
  ):
70
69
  # === Functional Model ===
70
+ data_format = keras.config.image_data_format()
71
+ channel_axis = -1 if data_format == "channels_last" else 1
71
72
  image_input = keras.layers.Input(shape=image_shape)
72
73
 
73
74
  x = image_input
@@ -75,37 +76,47 @@ class DenseNetBackbone(Backbone):
75
76
  x = keras.layers.Rescaling(1 / 255.0)(x)
76
77
 
77
78
  x = keras.layers.Conv2D(
78
- 64, 7, strides=2, use_bias=False, padding="same", name="conv1_conv"
79
+ 64,
80
+ 7,
81
+ strides=2,
82
+ use_bias=False,
83
+ padding="same",
84
+ data_format=data_format,
85
+ name="conv1_conv",
79
86
  )(x)
80
87
  x = keras.layers.BatchNormalization(
81
- axis=BN_AXIS, epsilon=BN_EPSILON, name="conv1_bn"
88
+ axis=channel_axis, epsilon=BN_EPSILON, name="conv1_bn"
82
89
  )(x)
83
90
  x = keras.layers.Activation("relu", name="conv1_relu")(x)
84
91
  x = keras.layers.MaxPooling2D(
85
- 3, strides=2, padding="same", name="pool1"
92
+ 3, strides=2, padding="same", data_format=data_format, name="pool1"
86
93
  )(x)
87
94
 
95
+ pyramid_outputs = {}
88
96
  for stack_index in range(len(stackwise_num_repeats) - 1):
89
97
  index = stack_index + 2
90
98
  x = apply_dense_block(
91
99
  x,
100
+ channel_axis,
92
101
  stackwise_num_repeats[stack_index],
93
102
  growth_rate,
94
103
  name=f"conv{index}",
95
104
  )
105
+ pyramid_outputs[f"P{index}"] = x
96
106
  x = apply_transition_block(
97
- x, compression_ratio, name=f"pool{index}"
107
+ x, channel_axis, compression_ratio, name=f"pool{index}"
98
108
  )
99
109
 
100
110
  x = apply_dense_block(
101
111
  x,
112
+ channel_axis,
102
113
  stackwise_num_repeats[-1],
103
114
  growth_rate,
104
115
  name=f"conv{len(stackwise_num_repeats) + 1}",
105
116
  )
106
-
117
+ pyramid_outputs[f"P{len(stackwise_num_repeats) +1}"] = x
107
118
  x = keras.layers.BatchNormalization(
108
- axis=BN_AXIS, epsilon=BN_EPSILON, name="bn"
119
+ axis=channel_axis, epsilon=BN_EPSILON, name="bn"
109
120
  )(x)
110
121
  x = keras.layers.Activation("relu", name="relu")(x)
111
122
 
@@ -117,6 +128,7 @@ class DenseNetBackbone(Backbone):
117
128
  self.compression_ratio = compression_ratio
118
129
  self.growth_rate = growth_rate
119
130
  self.image_shape = image_shape
131
+ self.pyramid_outputs = pyramid_outputs
120
132
 
121
133
  def get_config(self):
122
134
  config = super().get_config()
@@ -132,7 +144,7 @@ class DenseNetBackbone(Backbone):
132
144
  return config
133
145
 
134
146
 
135
- def apply_dense_block(x, num_repeats, growth_rate, name=None):
147
+ def apply_dense_block(x, channel_axis, num_repeats, growth_rate, name=None):
136
148
  """A dense block.
137
149
 
138
150
  Args:
@@ -145,11 +157,13 @@ def apply_dense_block(x, num_repeats, growth_rate, name=None):
145
157
  name = f"dense_block_{keras.backend.get_uid('dense_block')}"
146
158
 
147
159
  for i in range(num_repeats):
148
- x = apply_conv_block(x, growth_rate, name=f"{name}_block_{i}")
160
+ x = apply_conv_block(
161
+ x, channel_axis, growth_rate, name=f"{name}_block_{i}"
162
+ )
149
163
  return x
150
164
 
151
165
 
152
- def apply_transition_block(x, compression_ratio, name=None):
166
+ def apply_transition_block(x, channel_axis, compression_ratio, name=None):
153
167
  """A transition block.
154
168
 
155
169
  Args:
@@ -157,24 +171,28 @@ def apply_transition_block(x, compression_ratio, name=None):
157
171
  compression_ratio: float, compression rate at transition layers.
158
172
  name: string, block label.
159
173
  """
174
+ data_format = keras.config.image_data_format()
160
175
  if name is None:
161
176
  name = f"transition_block_{keras.backend.get_uid('transition_block')}"
162
177
 
163
178
  x = keras.layers.BatchNormalization(
164
- axis=BN_AXIS, epsilon=BN_EPSILON, name=f"{name}_bn"
179
+ axis=channel_axis, epsilon=BN_EPSILON, name=f"{name}_bn"
165
180
  )(x)
166
181
  x = keras.layers.Activation("relu", name=f"{name}_relu")(x)
167
182
  x = keras.layers.Conv2D(
168
- int(x.shape[BN_AXIS] * compression_ratio),
183
+ int(x.shape[channel_axis] * compression_ratio),
169
184
  1,
170
185
  use_bias=False,
186
+ data_format=data_format,
171
187
  name=f"{name}_conv",
172
188
  )(x)
173
- x = keras.layers.AveragePooling2D(2, strides=2, name=f"{name}_pool")(x)
189
+ x = keras.layers.AveragePooling2D(
190
+ 2, strides=2, data_format=data_format, name=f"{name}_pool"
191
+ )(x)
174
192
  return x
175
193
 
176
194
 
177
- def apply_conv_block(x, growth_rate, name=None):
195
+ def apply_conv_block(x, channel_axis, growth_rate, name=None):
178
196
  """A building block for a dense block.
179
197
 
180
198
  Args:
@@ -182,19 +200,24 @@ def apply_conv_block(x, growth_rate, name=None):
182
200
  growth_rate: int, number of filters added by each dense block.
183
201
  name: string, block label.
184
202
  """
203
+ data_format = keras.config.image_data_format()
185
204
  if name is None:
186
205
  name = f"conv_block_{keras.backend.get_uid('conv_block')}"
187
206
 
188
207
  shortcut = x
189
208
  x = keras.layers.BatchNormalization(
190
- axis=BN_AXIS, epsilon=BN_EPSILON, name=f"{name}_0_bn"
209
+ axis=channel_axis, epsilon=BN_EPSILON, name=f"{name}_0_bn"
191
210
  )(x)
192
211
  x = keras.layers.Activation("relu", name=f"{name}_0_relu")(x)
193
212
  x = keras.layers.Conv2D(
194
- 4 * growth_rate, 1, use_bias=False, name=f"{name}_1_conv"
213
+ 4 * growth_rate,
214
+ 1,
215
+ use_bias=False,
216
+ data_format=data_format,
217
+ name=f"{name}_1_conv",
195
218
  )(x)
196
219
  x = keras.layers.BatchNormalization(
197
- axis=BN_AXIS, epsilon=BN_EPSILON, name=f"{name}_1_bn"
220
+ axis=channel_axis, epsilon=BN_EPSILON, name=f"{name}_1_bn"
198
221
  )(x)
199
222
  x = keras.layers.Activation("relu", name=f"{name}_1_relu")(x)
200
223
  x = keras.layers.Conv2D(
@@ -202,9 +225,10 @@ def apply_conv_block(x, growth_rate, name=None):
202
225
  3,
203
226
  padding="same",
204
227
  use_bias=False,
228
+ data_format=data_format,
205
229
  name=f"{name}_2_conv",
206
230
  )(x)
207
- x = keras.layers.Concatenate(axis=BN_AXIS, name=f"{name}_concat")(
231
+ x = keras.layers.Concatenate(axis=channel_axis, name=f"{name}_concat")(
208
232
  [shortcut, x]
209
233
  )
210
234
  return x
@@ -18,9 +18,6 @@ from keras_hub.src.models.distil_bert.distil_bert_backbone import (
18
18
  from keras_hub.src.models.distil_bert.distil_bert_presets import (
19
19
  backbone_presets,
20
20
  )
21
- from keras_hub.src.models.distil_bert.distil_bert_tokenizer import (
22
- DistilBertTokenizer,
23
- )
24
21
  from keras_hub.src.utils.preset_utils import register_presets
25
22
 
26
- register_presets(backbone_presets, (DistilBertBackbone, DistilBertTokenizer))
23
+ register_presets(backbone_presets, DistilBertBackbone)
@@ -13,19 +13,20 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import keras
16
- from absl import logging
17
16
 
18
17
  from keras_hub.src.api_export import keras_hub_export
19
- from keras_hub.src.layers.preprocessing.masked_lm_mask_generator import (
20
- MaskedLMMaskGenerator,
18
+ from keras_hub.src.models.distil_bert.distil_bert_backbone import (
19
+ DistilBertBackbone,
21
20
  )
22
- from keras_hub.src.models.distil_bert.distil_bert_preprocessor import (
23
- DistilBertPreprocessor,
21
+ from keras_hub.src.models.distil_bert.distil_bert_tokenizer import (
22
+ DistilBertTokenizer,
24
23
  )
24
+ from keras_hub.src.models.masked_lm_preprocessor import MaskedLMPreprocessor
25
+ from keras_hub.src.utils.tensor_utils import preprocessing_function
25
26
 
26
27
 
27
28
  @keras_hub_export("keras_hub.models.DistilBertMaskedLMPreprocessor")
28
- class DistilBertMaskedLMPreprocessor(DistilBertPreprocessor):
29
+ class DistilBertMaskedLMPreprocessor(MaskedLMPreprocessor):
29
30
  """DistilBERT preprocessing for the masked language modeling task.
30
31
 
31
32
  This preprocessing layer will prepare inputs for a masked language modeling
@@ -119,76 +120,13 @@ class DistilBertMaskedLMPreprocessor(DistilBertPreprocessor):
119
120
  ```
120
121
  """
121
122
 
122
- def __init__(
123
- self,
124
- tokenizer,
125
- sequence_length=512,
126
- truncate="round_robin",
127
- mask_selection_rate=0.15,
128
- mask_selection_length=96,
129
- mask_token_rate=0.8,
130
- random_token_rate=0.1,
131
- **kwargs,
132
- ):
133
- super().__init__(
134
- tokenizer,
135
- sequence_length=sequence_length,
136
- truncate=truncate,
137
- **kwargs,
138
- )
139
- self.mask_selection_rate = mask_selection_rate
140
- self.mask_selection_length = mask_selection_length
141
- self.mask_token_rate = mask_token_rate
142
- self.random_token_rate = random_token_rate
143
- self.masker = None
144
-
145
- def build(self, input_shape):
146
- super().build(input_shape)
147
- # Defer masker creation to `build()` so that we can be sure tokenizer
148
- # assets have loaded when restoring a saved model.
149
- self.masker = MaskedLMMaskGenerator(
150
- mask_selection_rate=self.mask_selection_rate,
151
- mask_selection_length=self.mask_selection_length,
152
- mask_token_rate=self.mask_token_rate,
153
- random_token_rate=self.random_token_rate,
154
- vocabulary_size=self.tokenizer.vocabulary_size(),
155
- mask_token_id=self.tokenizer.mask_token_id,
156
- unselectable_token_ids=[
157
- self.tokenizer.cls_token_id,
158
- self.tokenizer.sep_token_id,
159
- self.tokenizer.pad_token_id,
160
- ],
161
- )
123
+ backbone_cls = DistilBertBackbone
124
+ tokenizer_cls = DistilBertTokenizer
162
125
 
126
+ @preprocessing_function
163
127
  def call(self, x, y=None, sample_weight=None):
164
- if y is not None or sample_weight is not None:
165
- logging.warning(
166
- f"{self.__class__.__name__} generates `y` and `sample_weight` "
167
- "based on your input data, but your data already contains `y` "
168
- "or `sample_weight`. Your `y` and `sample_weight` will be "
169
- "ignored."
170
- )
171
-
172
- x = super().call(x)
173
- token_ids, padding_mask = x["token_ids"], x["padding_mask"]
174
- masker_outputs = self.masker(token_ids)
175
- x = {
176
- "token_ids": masker_outputs["token_ids"],
177
- "padding_mask": padding_mask,
178
- "mask_positions": masker_outputs["mask_positions"],
179
- }
180
- y = masker_outputs["mask_ids"]
181
- sample_weight = masker_outputs["mask_weights"]
128
+ output = super().call(x, y=y, sample_weight=sample_weight)
129
+ x, y, sample_weight = keras.utils.unpack_x_y_sample_weight(output)
130
+ # Backbone has no segment ID input.
131
+ del x["segment_ids"]
182
132
  return keras.utils.pack_x_y_sample_weight(x, y, sample_weight)
183
-
184
- def get_config(self):
185
- config = super().get_config()
186
- config.update(
187
- {
188
- "mask_selection_rate": self.mask_selection_rate,
189
- "mask_selection_length": self.mask_selection_length,
190
- "mask_token_rate": self.mask_token_rate,
191
- "random_token_rate": self.random_token_rate,
192
- }
193
- )
194
- return config
@@ -16,20 +16,20 @@
16
16
  import keras
17
17
 
18
18
  from keras_hub.src.api_export import keras_hub_export
19
- from keras_hub.src.models.classifier import Classifier
20
19
  from keras_hub.src.models.distil_bert.distil_bert_backbone import (
21
20
  DistilBertBackbone,
22
21
  )
23
22
  from keras_hub.src.models.distil_bert.distil_bert_backbone import (
24
23
  distilbert_kernel_initializer,
25
24
  )
26
- from keras_hub.src.models.distil_bert.distil_bert_preprocessor import (
27
- DistilBertPreprocessor,
25
+ from keras_hub.src.models.distil_bert.distil_bert_text_classifier_preprocessor import (
26
+ DistilBertTextClassifierPreprocessor,
28
27
  )
28
+ from keras_hub.src.models.text_classifier import TextClassifier
29
29
 
30
30
 
31
- @keras_hub_export("keras_hub.models.DistilBertClassifier")
32
- class DistilBertClassifier(Classifier):
31
+ @keras_hub_export("keras_hub.models.DistilBertTextClassifier")
32
+ class DistilBertTextClassifier(TextClassifier):
33
33
  """An end-to-end DistilBERT model for classification tasks.
34
34
 
35
35
  This model attaches a classification head to a
@@ -50,7 +50,7 @@ class DistilBertClassifier(Classifier):
50
50
  Args:
51
51
  backbone: A `keras_hub.models.DistilBert` instance.
52
52
  num_classes: int. Number of classes to predict.
53
- preprocessor: A `keras_hub.models.DistilBertPreprocessor` or `None`. If
53
+ preprocessor: A `keras_hub.models.DistilBertTextClassifierPreprocessor` or `None`. If
54
54
  `None`, this model will not apply preprocessing, and inputs should
55
55
  be preprocessed before calling the model.
56
56
  activation: Optional `str` or callable. The
@@ -69,12 +69,12 @@ class DistilBertClassifier(Classifier):
69
69
  labels = [0, 3]
70
70
 
71
71
  # Use a shorter sequence length.
72
- preprocessor = keras_hub.models.DistilBertPreprocessor.from_preset(
72
+ preprocessor = keras_hub.models.DistilBertTextClassifierPreprocessor.from_preset(
73
73
  "distil_bert_base_en_uncased",
74
74
  sequence_length=128,
75
75
  )
76
76
  # Pretrained classifier.
77
- classifier = keras_hub.models.DistilBertClassifier.from_preset(
77
+ classifier = keras_hub.models.DistilBertTextClassifier.from_preset(
78
78
  "distil_bert_base_en_uncased",
79
79
  num_classes=4,
80
80
  preprocessor=preprocessor,
@@ -102,7 +102,7 @@ class DistilBertClassifier(Classifier):
102
102
  labels = [0, 3]
103
103
 
104
104
  # Pretrained classifier without preprocessing.
105
- classifier = keras_hub.models.DistilBertClassifier.from_preset(
105
+ classifier = keras_hub.models.DistilBertTextClassifier.from_preset(
106
106
  "distil_bert_base_en_uncased",
107
107
  num_classes=4,
108
108
  preprocessor=None,
@@ -119,7 +119,7 @@ class DistilBertClassifier(Classifier):
119
119
  tokenizer = keras_hub.models.DistilBertTokenizer(
120
120
  vocabulary=vocab,
121
121
  )
122
- preprocessor = keras_hub.models.DistilBertPreprocessor(
122
+ preprocessor = keras_hub.models.DistilBertTextClassifierPreprocessor(
123
123
  tokenizer=tokenizer,
124
124
  sequence_length=128,
125
125
  )
@@ -131,7 +131,7 @@ class DistilBertClassifier(Classifier):
131
131
  intermediate_dim=512,
132
132
  max_sequence_length=128,
133
133
  )
134
- classifier = keras_hub.models.DistilBertClassifier(
134
+ classifier = keras_hub.models.DistilBertTextClassifier(
135
135
  backbone=backbone,
136
136
  preprocessor=preprocessor,
137
137
  num_classes=4,
@@ -141,7 +141,7 @@ class DistilBertClassifier(Classifier):
141
141
  """
142
142
 
143
143
  backbone_cls = DistilBertBackbone
144
- preprocessor_cls = DistilBertPreprocessor
144
+ preprocessor_cls = DistilBertTextClassifierPreprocessor
145
145
 
146
146
  def __init__(
147
147
  self,