keras-hub-nightly 0.15.0.dev20240823171555__py3-none-any.whl → 0.16.0.dev2024092017__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. keras_hub/__init__.py +0 -6
  2. keras_hub/api/__init__.py +2 -0
  3. keras_hub/api/bounding_box/__init__.py +36 -0
  4. keras_hub/api/layers/__init__.py +14 -0
  5. keras_hub/api/models/__init__.py +97 -48
  6. keras_hub/api/tokenizers/__init__.py +30 -0
  7. keras_hub/api/utils/__init__.py +22 -0
  8. keras_hub/src/api_export.py +15 -9
  9. keras_hub/src/bounding_box/__init__.py +13 -0
  10. keras_hub/src/bounding_box/converters.py +529 -0
  11. keras_hub/src/bounding_box/formats.py +162 -0
  12. keras_hub/src/bounding_box/iou.py +263 -0
  13. keras_hub/src/bounding_box/to_dense.py +95 -0
  14. keras_hub/src/bounding_box/to_ragged.py +99 -0
  15. keras_hub/src/bounding_box/utils.py +194 -0
  16. keras_hub/src/bounding_box/validate_format.py +99 -0
  17. keras_hub/src/layers/preprocessing/audio_converter.py +121 -0
  18. keras_hub/src/layers/preprocessing/image_converter.py +130 -0
  19. keras_hub/src/layers/preprocessing/masked_lm_mask_generator.py +2 -0
  20. keras_hub/src/layers/preprocessing/multi_segment_packer.py +9 -8
  21. keras_hub/src/layers/preprocessing/preprocessing_layer.py +2 -29
  22. keras_hub/src/layers/preprocessing/random_deletion.py +33 -31
  23. keras_hub/src/layers/preprocessing/random_swap.py +33 -31
  24. keras_hub/src/layers/preprocessing/resizing_image_converter.py +101 -0
  25. keras_hub/src/layers/preprocessing/start_end_packer.py +3 -2
  26. keras_hub/src/models/albert/__init__.py +1 -2
  27. keras_hub/src/models/albert/albert_masked_lm_preprocessor.py +6 -86
  28. keras_hub/src/models/albert/{albert_classifier.py → albert_text_classifier.py} +34 -10
  29. keras_hub/src/models/albert/{albert_preprocessor.py → albert_text_classifier_preprocessor.py} +14 -70
  30. keras_hub/src/models/albert/albert_tokenizer.py +17 -36
  31. keras_hub/src/models/backbone.py +12 -34
  32. keras_hub/src/models/bart/__init__.py +1 -2
  33. keras_hub/src/models/bart/bart_seq_2_seq_lm_preprocessor.py +21 -148
  34. keras_hub/src/models/bart/bart_tokenizer.py +12 -39
  35. keras_hub/src/models/bert/__init__.py +1 -5
  36. keras_hub/src/models/bert/bert_masked_lm_preprocessor.py +6 -87
  37. keras_hub/src/models/bert/bert_presets.py +1 -4
  38. keras_hub/src/models/bert/{bert_classifier.py → bert_text_classifier.py} +19 -12
  39. keras_hub/src/models/bert/{bert_preprocessor.py → bert_text_classifier_preprocessor.py} +14 -70
  40. keras_hub/src/models/bert/bert_tokenizer.py +17 -35
  41. keras_hub/src/models/bloom/__init__.py +1 -2
  42. keras_hub/src/models/bloom/bloom_causal_lm_preprocessor.py +6 -91
  43. keras_hub/src/models/bloom/bloom_tokenizer.py +12 -41
  44. keras_hub/src/models/causal_lm.py +10 -29
  45. keras_hub/src/models/causal_lm_preprocessor.py +195 -0
  46. keras_hub/src/models/csp_darknet/csp_darknet_backbone.py +54 -15
  47. keras_hub/src/models/deberta_v3/__init__.py +1 -4
  48. keras_hub/src/models/deberta_v3/deberta_v3_masked_lm_preprocessor.py +14 -77
  49. keras_hub/src/models/deberta_v3/{deberta_v3_classifier.py → deberta_v3_text_classifier.py} +16 -11
  50. keras_hub/src/models/deberta_v3/{deberta_v3_preprocessor.py → deberta_v3_text_classifier_preprocessor.py} +23 -64
  51. keras_hub/src/models/deberta_v3/deberta_v3_tokenizer.py +30 -25
  52. keras_hub/src/models/densenet/densenet_backbone.py +46 -22
  53. keras_hub/src/models/distil_bert/__init__.py +1 -4
  54. keras_hub/src/models/distil_bert/distil_bert_masked_lm_preprocessor.py +14 -76
  55. keras_hub/src/models/distil_bert/{distil_bert_classifier.py → distil_bert_text_classifier.py} +17 -12
  56. keras_hub/src/models/distil_bert/{distil_bert_preprocessor.py → distil_bert_text_classifier_preprocessor.py} +23 -63
  57. keras_hub/src/models/distil_bert/distil_bert_tokenizer.py +19 -35
  58. keras_hub/src/models/efficientnet/__init__.py +13 -0
  59. keras_hub/src/models/efficientnet/efficientnet_backbone.py +569 -0
  60. keras_hub/src/models/efficientnet/fusedmbconv.py +229 -0
  61. keras_hub/src/models/efficientnet/mbconv.py +238 -0
  62. keras_hub/src/models/electra/__init__.py +1 -2
  63. keras_hub/src/models/electra/electra_tokenizer.py +17 -32
  64. keras_hub/src/models/f_net/__init__.py +1 -2
  65. keras_hub/src/models/f_net/f_net_masked_lm_preprocessor.py +12 -78
  66. keras_hub/src/models/f_net/{f_net_classifier.py → f_net_text_classifier.py} +17 -10
  67. keras_hub/src/models/f_net/{f_net_preprocessor.py → f_net_text_classifier_preprocessor.py} +19 -63
  68. keras_hub/src/models/f_net/f_net_tokenizer.py +17 -35
  69. keras_hub/src/models/falcon/__init__.py +1 -2
  70. keras_hub/src/models/falcon/falcon_causal_lm_preprocessor.py +6 -89
  71. keras_hub/src/models/falcon/falcon_tokenizer.py +12 -35
  72. keras_hub/src/models/gemma/__init__.py +1 -2
  73. keras_hub/src/models/gemma/gemma_causal_lm_preprocessor.py +6 -90
  74. keras_hub/src/models/gemma/gemma_decoder_block.py +1 -1
  75. keras_hub/src/models/gemma/gemma_tokenizer.py +12 -23
  76. keras_hub/src/models/gpt2/__init__.py +1 -2
  77. keras_hub/src/models/gpt2/gpt2_causal_lm_preprocessor.py +6 -89
  78. keras_hub/src/models/gpt2/gpt2_preprocessor.py +12 -90
  79. keras_hub/src/models/gpt2/gpt2_tokenizer.py +12 -34
  80. keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm_preprocessor.py +6 -91
  81. keras_hub/src/models/gpt_neo_x/gpt_neo_x_tokenizer.py +12 -34
  82. keras_hub/src/models/image_classifier.py +0 -5
  83. keras_hub/src/models/image_classifier_preprocessor.py +83 -0
  84. keras_hub/src/models/llama/__init__.py +1 -2
  85. keras_hub/src/models/llama/llama_causal_lm_preprocessor.py +6 -85
  86. keras_hub/src/models/llama/llama_tokenizer.py +12 -25
  87. keras_hub/src/models/llama3/__init__.py +1 -2
  88. keras_hub/src/models/llama3/llama3_causal_lm_preprocessor.py +6 -89
  89. keras_hub/src/models/llama3/llama3_tokenizer.py +12 -33
  90. keras_hub/src/models/masked_lm.py +0 -2
  91. keras_hub/src/models/masked_lm_preprocessor.py +156 -0
  92. keras_hub/src/models/mistral/__init__.py +1 -2
  93. keras_hub/src/models/mistral/mistral_causal_lm_preprocessor.py +6 -91
  94. keras_hub/src/models/mistral/mistral_tokenizer.py +12 -23
  95. keras_hub/src/models/mix_transformer/mix_transformer_backbone.py +2 -2
  96. keras_hub/src/models/mobilenet/__init__.py +13 -0
  97. keras_hub/src/models/mobilenet/mobilenet_backbone.py +530 -0
  98. keras_hub/src/models/mobilenet/mobilenet_image_classifier.py +114 -0
  99. keras_hub/src/models/opt/__init__.py +1 -2
  100. keras_hub/src/models/opt/opt_causal_lm_preprocessor.py +6 -93
  101. keras_hub/src/models/opt/opt_tokenizer.py +12 -41
  102. keras_hub/src/models/pali_gemma/__init__.py +1 -4
  103. keras_hub/src/models/pali_gemma/pali_gemma_causal_lm_preprocessor.py +28 -28
  104. keras_hub/src/models/pali_gemma/pali_gemma_image_converter.py +25 -0
  105. keras_hub/src/models/pali_gemma/pali_gemma_presets.py +5 -5
  106. keras_hub/src/models/pali_gemma/pali_gemma_tokenizer.py +11 -3
  107. keras_hub/src/models/phi3/__init__.py +1 -2
  108. keras_hub/src/models/phi3/phi3_causal_lm.py +3 -9
  109. keras_hub/src/models/phi3/phi3_causal_lm_preprocessor.py +6 -89
  110. keras_hub/src/models/phi3/phi3_tokenizer.py +12 -36
  111. keras_hub/src/models/preprocessor.py +72 -83
  112. keras_hub/src/models/resnet/__init__.py +6 -0
  113. keras_hub/src/models/resnet/resnet_backbone.py +390 -42
  114. keras_hub/src/models/resnet/resnet_image_classifier.py +33 -6
  115. keras_hub/src/models/resnet/resnet_image_classifier_preprocessor.py +28 -0
  116. keras_hub/src/models/{llama3/llama3_preprocessor.py → resnet/resnet_image_converter.py} +7 -5
  117. keras_hub/src/models/resnet/resnet_presets.py +95 -0
  118. keras_hub/src/models/retinanet/__init__.py +13 -0
  119. keras_hub/src/models/retinanet/anchor_generator.py +175 -0
  120. keras_hub/src/models/retinanet/box_matcher.py +259 -0
  121. keras_hub/src/models/retinanet/non_max_supression.py +578 -0
  122. keras_hub/src/models/roberta/__init__.py +1 -2
  123. keras_hub/src/models/roberta/roberta_masked_lm_preprocessor.py +22 -74
  124. keras_hub/src/models/roberta/{roberta_classifier.py → roberta_text_classifier.py} +16 -11
  125. keras_hub/src/models/roberta/{roberta_preprocessor.py → roberta_text_classifier_preprocessor.py} +21 -53
  126. keras_hub/src/models/roberta/roberta_tokenizer.py +13 -52
  127. keras_hub/src/models/seq_2_seq_lm_preprocessor.py +269 -0
  128. keras_hub/src/models/stable_diffusion_v3/__init__.py +13 -0
  129. keras_hub/src/models/stable_diffusion_v3/clip_encoder_block.py +103 -0
  130. keras_hub/src/models/stable_diffusion_v3/clip_preprocessor.py +93 -0
  131. keras_hub/src/models/stable_diffusion_v3/clip_text_encoder.py +149 -0
  132. keras_hub/src/models/stable_diffusion_v3/clip_tokenizer.py +167 -0
  133. keras_hub/src/models/stable_diffusion_v3/mmdit.py +427 -0
  134. keras_hub/src/models/stable_diffusion_v3/mmdit_block.py +317 -0
  135. keras_hub/src/models/stable_diffusion_v3/t5_xxl_preprocessor.py +74 -0
  136. keras_hub/src/models/stable_diffusion_v3/t5_xxl_text_encoder.py +155 -0
  137. keras_hub/src/models/stable_diffusion_v3/vae_attention.py +126 -0
  138. keras_hub/src/models/stable_diffusion_v3/vae_image_decoder.py +186 -0
  139. keras_hub/src/models/t5/__init__.py +1 -2
  140. keras_hub/src/models/t5/t5_tokenizer.py +13 -23
  141. keras_hub/src/models/task.py +71 -116
  142. keras_hub/src/models/{classifier.py → text_classifier.py} +19 -13
  143. keras_hub/src/models/text_classifier_preprocessor.py +138 -0
  144. keras_hub/src/models/whisper/__init__.py +1 -2
  145. keras_hub/src/models/whisper/{whisper_audio_feature_extractor.py → whisper_audio_converter.py} +20 -18
  146. keras_hub/src/models/whisper/whisper_backbone.py +0 -3
  147. keras_hub/src/models/whisper/whisper_presets.py +10 -10
  148. keras_hub/src/models/whisper/whisper_tokenizer.py +20 -16
  149. keras_hub/src/models/xlm_roberta/__init__.py +1 -4
  150. keras_hub/src/models/xlm_roberta/xlm_roberta_masked_lm_preprocessor.py +26 -72
  151. keras_hub/src/models/xlm_roberta/{xlm_roberta_classifier.py → xlm_roberta_text_classifier.py} +16 -11
  152. keras_hub/src/models/xlm_roberta/{xlm_roberta_preprocessor.py → xlm_roberta_text_classifier_preprocessor.py} +26 -53
  153. keras_hub/src/models/xlm_roberta/xlm_roberta_tokenizer.py +25 -10
  154. keras_hub/src/tests/test_case.py +46 -0
  155. keras_hub/src/tokenizers/byte_pair_tokenizer.py +30 -17
  156. keras_hub/src/tokenizers/byte_tokenizer.py +14 -15
  157. keras_hub/src/tokenizers/sentence_piece_tokenizer.py +20 -7
  158. keras_hub/src/tokenizers/tokenizer.py +67 -32
  159. keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py +14 -15
  160. keras_hub/src/tokenizers/word_piece_tokenizer.py +34 -47
  161. keras_hub/src/utils/imagenet/__init__.py +13 -0
  162. keras_hub/src/utils/imagenet/imagenet_utils.py +1067 -0
  163. keras_hub/src/utils/keras_utils.py +0 -50
  164. keras_hub/src/utils/preset_utils.py +230 -68
  165. keras_hub/src/utils/tensor_utils.py +187 -69
  166. keras_hub/src/utils/timm/convert_resnet.py +19 -16
  167. keras_hub/src/utils/timm/preset_loader.py +66 -0
  168. keras_hub/src/utils/transformers/convert_albert.py +193 -0
  169. keras_hub/src/utils/transformers/convert_bart.py +373 -0
  170. keras_hub/src/utils/transformers/convert_bert.py +7 -17
  171. keras_hub/src/utils/transformers/convert_distilbert.py +10 -20
  172. keras_hub/src/utils/transformers/convert_gemma.py +5 -19
  173. keras_hub/src/utils/transformers/convert_gpt2.py +5 -18
  174. keras_hub/src/utils/transformers/convert_llama3.py +7 -18
  175. keras_hub/src/utils/transformers/convert_mistral.py +129 -0
  176. keras_hub/src/utils/transformers/convert_pali_gemma.py +7 -29
  177. keras_hub/src/utils/transformers/preset_loader.py +77 -0
  178. keras_hub/src/utils/transformers/safetensor_utils.py +2 -2
  179. keras_hub/src/version_utils.py +1 -1
  180. keras_hub_nightly-0.16.0.dev2024092017.dist-info/METADATA +202 -0
  181. keras_hub_nightly-0.16.0.dev2024092017.dist-info/RECORD +334 -0
  182. {keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.16.0.dev2024092017.dist-info}/WHEEL +1 -1
  183. keras_hub/src/models/bart/bart_preprocessor.py +0 -276
  184. keras_hub/src/models/bloom/bloom_preprocessor.py +0 -185
  185. keras_hub/src/models/electra/electra_preprocessor.py +0 -154
  186. keras_hub/src/models/falcon/falcon_preprocessor.py +0 -187
  187. keras_hub/src/models/gemma/gemma_preprocessor.py +0 -191
  188. keras_hub/src/models/gpt_neo_x/gpt_neo_x_preprocessor.py +0 -145
  189. keras_hub/src/models/llama/llama_preprocessor.py +0 -189
  190. keras_hub/src/models/mistral/mistral_preprocessor.py +0 -190
  191. keras_hub/src/models/opt/opt_preprocessor.py +0 -188
  192. keras_hub/src/models/phi3/phi3_preprocessor.py +0 -190
  193. keras_hub/src/models/whisper/whisper_preprocessor.py +0 -326
  194. keras_hub/src/utils/timm/convert.py +0 -37
  195. keras_hub/src/utils/transformers/convert.py +0 -101
  196. keras_hub_nightly-0.15.0.dev20240823171555.dist-info/METADATA +0 -34
  197. keras_hub_nightly-0.15.0.dev20240823171555.dist-info/RECORD +0 -297
  198. {keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.16.0.dev2024092017.dist-info}/top_level.txt +0 -0
@@ -13,19 +13,19 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import keras
16
- from absl import logging
17
16
 
18
17
  from keras_hub.src.api_export import keras_hub_export
19
- from keras_hub.src.layers.preprocessing.masked_lm_mask_generator import (
20
- MaskedLMMaskGenerator,
21
- )
22
- from keras_hub.src.models.roberta.roberta_preprocessor import (
23
- RobertaPreprocessor,
18
+ from keras_hub.src.layers.preprocessing.multi_segment_packer import (
19
+ MultiSegmentPacker,
24
20
  )
21
+ from keras_hub.src.models.masked_lm_preprocessor import MaskedLMPreprocessor
22
+ from keras_hub.src.models.roberta.roberta_backbone import RobertaBackbone
23
+ from keras_hub.src.models.roberta.roberta_tokenizer import RobertaTokenizer
24
+ from keras_hub.src.utils.tensor_utils import preprocessing_function
25
25
 
26
26
 
27
27
  @keras_hub_export("keras_hub.models.RobertaMaskedLMPreprocessor")
28
- class RobertaMaskedLMPreprocessor(RobertaPreprocessor):
28
+ class RobertaMaskedLMPreprocessor(MaskedLMPreprocessor):
29
29
  """RoBERTa preprocessing for the masked language modeling task.
30
30
 
31
31
  This preprocessing layer will prepare inputs for a masked language modeling
@@ -122,77 +122,25 @@ class RobertaMaskedLMPreprocessor(RobertaPreprocessor):
122
122
  ```
123
123
  """
124
124
 
125
- def __init__(
126
- self,
127
- tokenizer,
128
- sequence_length=512,
129
- truncate="round_robin",
130
- mask_selection_rate=0.15,
131
- mask_selection_length=96,
132
- mask_token_rate=0.8,
133
- random_token_rate=0.1,
134
- **kwargs,
135
- ):
136
- super().__init__(
137
- tokenizer,
138
- sequence_length=sequence_length,
139
- truncate=truncate,
140
- **kwargs,
141
- )
142
- self.mask_selection_rate = mask_selection_rate
143
- self.mask_selection_length = mask_selection_length
144
- self.mask_token_rate = mask_token_rate
145
- self.random_token_rate = random_token_rate
146
- self.masker = None
125
+ backbone_cls = RobertaBackbone
126
+ tokenizer_cls = RobertaTokenizer
147
127
 
148
128
  def build(self, input_shape):
149
129
  super().build(input_shape)
150
- # Defer packer creation to `build()` so that we can be sure tokenizer
151
- # assets have loaded when restoring a saved model.
152
- self.masker = MaskedLMMaskGenerator(
153
- mask_selection_rate=self.mask_selection_rate,
154
- mask_selection_length=self.mask_selection_length,
155
- mask_token_rate=self.mask_token_rate,
156
- random_token_rate=self.random_token_rate,
157
- vocabulary_size=self.tokenizer.vocabulary_size(),
158
- mask_token_id=self.tokenizer.mask_token_id,
159
- unselectable_token_ids=[
160
- self.tokenizer.start_token_id,
161
- self.tokenizer.end_token_id,
162
- self.tokenizer.pad_token_id,
163
- ],
130
+ # Roberta is doubles up the sep token, so we override build.
131
+ self.packer = MultiSegmentPacker(
132
+ start_value=self.tokenizer.start_token_id,
133
+ end_value=self.tokenizer.end_token_id,
134
+ sep_value=[self.tokenizer.end_token_id] * 2,
135
+ pad_value=self.tokenizer.pad_token_id,
136
+ truncate=self.truncate,
137
+ sequence_length=self.sequence_length,
164
138
  )
165
- self.built = True
166
139
 
140
+ @preprocessing_function
167
141
  def call(self, x, y=None, sample_weight=None):
168
- if y is not None or sample_weight is not None:
169
- logging.warning(
170
- f"{self.__class__.__name__} generates `y` and `sample_weight` "
171
- "based on your input data, but your data already contains `y` "
172
- "or `sample_weight`. Your `y` and `sample_weight` will be "
173
- "ignored."
174
- )
175
-
176
- x = super().call(x)
177
- token_ids, padding_mask = x["token_ids"], x["padding_mask"]
178
- masker_outputs = self.masker(token_ids)
179
- x = {
180
- "token_ids": masker_outputs["token_ids"],
181
- "padding_mask": padding_mask,
182
- "mask_positions": masker_outputs["mask_positions"],
183
- }
184
- y = masker_outputs["mask_ids"]
185
- sample_weight = masker_outputs["mask_weights"]
142
+ output = super().call(x, y=y, sample_weight=sample_weight)
143
+ x, y, sample_weight = keras.utils.unpack_x_y_sample_weight(output)
144
+ # Backbone has no segment ID input.
145
+ del x["segment_ids"]
186
146
  return keras.utils.pack_x_y_sample_weight(x, y, sample_weight)
187
-
188
- def get_config(self):
189
- config = super().get_config()
190
- config.update(
191
- {
192
- "mask_selection_rate": self.mask_selection_rate,
193
- "mask_selection_length": self.mask_selection_length,
194
- "mask_token_rate": self.mask_token_rate,
195
- "random_token_rate": self.random_token_rate,
196
- }
197
- )
198
- return config
@@ -16,18 +16,23 @@
16
16
  import keras
17
17
 
18
18
  from keras_hub.src.api_export import keras_hub_export
19
- from keras_hub.src.models.classifier import Classifier
20
19
  from keras_hub.src.models.roberta.roberta_backbone import RobertaBackbone
21
20
  from keras_hub.src.models.roberta.roberta_backbone import (
22
21
  roberta_kernel_initializer,
23
22
  )
24
- from keras_hub.src.models.roberta.roberta_preprocessor import (
25
- RobertaPreprocessor,
23
+ from keras_hub.src.models.roberta.roberta_text_classifier_preprocessor import (
24
+ RobertaTextClassifierPreprocessor,
26
25
  )
26
+ from keras_hub.src.models.text_classifier import TextClassifier
27
27
 
28
28
 
29
- @keras_hub_export("keras_hub.models.RobertaClassifier")
30
- class RobertaClassifier(Classifier):
29
+ @keras_hub_export(
30
+ [
31
+ "keras_hub.models.RobertaTextClassifier",
32
+ "keras_hub.models.RobertaClassifier",
33
+ ]
34
+ )
35
+ class RobertaTextClassifier(TextClassifier):
31
36
  """An end-to-end RoBERTa model for classification tasks.
32
37
 
33
38
  This model attaches a classification head to a
@@ -48,7 +53,7 @@ class RobertaClassifier(Classifier):
48
53
  Args:
49
54
  backbone: A `keras_hub.models.RobertaBackbone` instance.
50
55
  num_classes: int. Number of classes to predict.
51
- preprocessor: A `keras_hub.models.RobertaPreprocessor` or `None`. If
56
+ preprocessor: A `keras_hub.models.RobertaTextClassifierPreprocessor` or `None`. If
52
57
  `None`, this model will not apply preprocessing, and inputs should
53
58
  be preprocessed before calling the model.
54
59
  activation: Optional `str` or callable. The activation function to use
@@ -66,7 +71,7 @@ class RobertaClassifier(Classifier):
66
71
  labels = [0, 3]
67
72
 
68
73
  # Pretrained classifier.
69
- classifier = keras_hub.models.RobertaClassifier.from_preset(
74
+ classifier = keras_hub.models.RobertaTextClassifier.from_preset(
70
75
  "roberta_base_en",
71
76
  num_classes=4,
72
77
  )
@@ -94,7 +99,7 @@ class RobertaClassifier(Classifier):
94
99
  labels = [0, 3]
95
100
 
96
101
  # Pretrained classifier without preprocessing.
97
- classifier = keras_hub.models.RobertaClassifier.from_preset(
102
+ classifier = keras_hub.models.RobertaTextClassifier.from_preset(
98
103
  "roberta_base_en",
99
104
  num_classes=4,
100
105
  preprocessor=None,
@@ -115,7 +120,7 @@ class RobertaClassifier(Classifier):
115
120
  vocabulary=vocab,
116
121
  merges=merges
117
122
  )
118
- preprocessor = keras_hub.models.RobertaPreprocessor(
123
+ preprocessor = keras_hub.models.RobertaTextClassifierPreprocessor(
119
124
  tokenizer=tokenizer,
120
125
  sequence_length=128,
121
126
  )
@@ -127,7 +132,7 @@ class RobertaClassifier(Classifier):
127
132
  intermediate_dim=512,
128
133
  max_sequence_length=128
129
134
  )
130
- classifier = keras_hub.models.RobertaClassifier(
135
+ classifier = keras_hub.models.RobertaTextClassifier(
131
136
  backbone=backbone,
132
137
  preprocessor=preprocessor,
133
138
  num_classes=4,
@@ -137,7 +142,7 @@ class RobertaClassifier(Classifier):
137
142
  """
138
143
 
139
144
  backbone_cls = RobertaBackbone
140
- preprocessor_cls = RobertaPreprocessor
145
+ preprocessor_cls = RobertaTextClassifierPreprocessor
141
146
 
142
147
  def __init__(
143
148
  self,
@@ -12,22 +12,27 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
-
16
15
  import keras
17
16
 
18
17
  from keras_hub.src.api_export import keras_hub_export
19
18
  from keras_hub.src.layers.preprocessing.multi_segment_packer import (
20
19
  MultiSegmentPacker,
21
20
  )
22
- from keras_hub.src.models.preprocessor import Preprocessor
21
+ from keras_hub.src.models.roberta.roberta_backbone import RobertaBackbone
23
22
  from keras_hub.src.models.roberta.roberta_tokenizer import RobertaTokenizer
24
- from keras_hub.src.utils.keras_utils import (
25
- convert_inputs_to_list_of_tensor_segments,
23
+ from keras_hub.src.models.text_classifier_preprocessor import (
24
+ TextClassifierPreprocessor,
26
25
  )
26
+ from keras_hub.src.utils.tensor_utils import preprocessing_function
27
27
 
28
28
 
29
- @keras_hub_export("keras_hub.models.RobertaPreprocessor")
30
- class RobertaPreprocessor(Preprocessor):
29
+ @keras_hub_export(
30
+ [
31
+ "keras_hub.models.RobertaTextClassifierPreprocessor",
32
+ "keras_hub.models.RobertaPreprocessor",
33
+ ]
34
+ )
35
+ class RobertaTextClassifierPreprocessor(TextClassifierPreprocessor):
31
36
  """A RoBERTa preprocessing layer which tokenizes and packs inputs.
32
37
 
33
38
  This preprocessing layer will do three things:
@@ -71,7 +76,7 @@ class RobertaPreprocessor(Preprocessor):
71
76
 
72
77
  Directly calling the layer on data.
73
78
  ```python
74
- preprocessor = keras_hub.models.RobertaPreprocessor.from_preset(
79
+ preprocessor = keras_hub.models.TextClassifierPreprocessor.from_preset(
75
80
  "roberta_base_en"
76
81
  )
77
82
 
@@ -95,12 +100,12 @@ class RobertaPreprocessor(Preprocessor):
95
100
  vocabulary=vocab,
96
101
  merges=merges
97
102
  )
98
- preprocessor = keras_hub.models.RobertaPreprocessor(tokenizer)
103
+ preprocessor = keras_hub.models.RobertaTextClassifierPreprocessor(tokenizer)
99
104
  preprocessor("a quick fox")
100
105
  ```
101
106
  Mapping with `tf.data.Dataset`.
102
107
  ```python
103
- preprocessor = keras_hub.models.RobertaPreprocessor.from_preset(
108
+ preprocessor = keras_hub.models.TextClassifierPreprocessor.from_preset(
104
109
  "roberta_base_en"
105
110
  )
106
111
 
@@ -131,25 +136,11 @@ class RobertaPreprocessor(Preprocessor):
131
136
  ```
132
137
  """
133
138
 
139
+ backbone_cls = RobertaBackbone
134
140
  tokenizer_cls = RobertaTokenizer
135
141
 
136
- def __init__(
137
- self,
138
- tokenizer,
139
- sequence_length=512,
140
- truncate="round_robin",
141
- **kwargs,
142
- ):
143
- super().__init__(**kwargs)
144
-
145
- self.tokenizer = tokenizer
146
- self.packer = None
147
- self.truncate = truncate
148
- self.sequence_length = sequence_length
149
-
150
142
  def build(self, input_shape):
151
- # Defer packer creation to `build()` so that we can be sure tokenizer
152
- # assets have loaded when restoring a saved model.
143
+ # Roberta is doubles up the sep token, so we override build.
153
144
  self.packer = MultiSegmentPacker(
154
145
  start_value=self.tokenizer.start_token_id,
155
146
  end_value=self.tokenizer.end_token_id,
@@ -160,33 +151,10 @@ class RobertaPreprocessor(Preprocessor):
160
151
  )
161
152
  self.built = True
162
153
 
154
+ @preprocessing_function
163
155
  def call(self, x, y=None, sample_weight=None):
164
- x = convert_inputs_to_list_of_tensor_segments(x)
165
- x = [self.tokenizer(segment) for segment in x]
166
- token_ids, _ = self.packer(x)
167
- x = {
168
- "token_ids": token_ids,
169
- "padding_mask": token_ids != self.tokenizer.pad_token_id,
170
- }
156
+ output = super().call(x, y=y, sample_weight=sample_weight)
157
+ x, y, sample_weight = keras.utils.unpack_x_y_sample_weight(output)
158
+ # Backbone has no segment ID input.
159
+ del x["segment_ids"]
171
160
  return keras.utils.pack_x_y_sample_weight(x, y, sample_weight)
172
-
173
- def get_config(self):
174
- config = super().get_config()
175
- config.update(
176
- {
177
- "sequence_length": self.sequence_length,
178
- "truncate": self.truncate,
179
- }
180
- )
181
- return config
182
-
183
- @property
184
- def sequence_length(self):
185
- """The padded length of model input sequences."""
186
- return self._sequence_length
187
-
188
- @sequence_length.setter
189
- def sequence_length(self, value):
190
- self._sequence_length = value
191
- if self.packer is not None:
192
- self.packer.sequence_length = value
@@ -14,10 +14,16 @@
14
14
 
15
15
 
16
16
  from keras_hub.src.api_export import keras_hub_export
17
+ from keras_hub.src.models.roberta.roberta_backbone import RobertaBackbone
17
18
  from keras_hub.src.tokenizers.byte_pair_tokenizer import BytePairTokenizer
18
19
 
19
20
 
20
- @keras_hub_export("keras_hub.models.RobertaTokenizer")
21
+ @keras_hub_export(
22
+ [
23
+ "keras_hub.tokenizers.RobertaTokenizer",
24
+ "keras_hub.models.RobertaTokenizer",
25
+ ]
26
+ )
21
27
  class RobertaTokenizer(BytePairTokenizer):
22
28
  """A RoBERTa tokenizer using Byte-Pair Encoding subword segmentation.
23
29
 
@@ -27,10 +33,6 @@ class RobertaTokenizer(BytePairTokenizer):
27
33
  models and provides a `from_preset()` method to automatically download
28
34
  a matching vocabulary for a RoBERTa preset.
29
35
 
30
- This tokenizer does not provide truncation or padding of inputs. It can be
31
- combined with a `keras_hub.models.RobertaPreprocessor` layer for input
32
- packing.
33
-
34
36
  If input is a batch of strings (rank > 0), the layer will output a
35
37
  `tf.RaggedTensor` where the last dimension of the output is ragged.
36
38
 
@@ -72,61 +74,20 @@ class RobertaTokenizer(BytePairTokenizer):
72
74
  ```
73
75
  """
74
76
 
77
+ backbone_cls = RobertaBackbone
78
+
75
79
  def __init__(
76
80
  self,
77
81
  vocabulary=None,
78
82
  merges=None,
79
83
  **kwargs,
80
84
  ):
81
- self.start_token = "<s>"
82
- self.pad_token = "<pad>"
83
- self.end_token = "</s>"
84
- self.mask_token = "<mask>"
85
-
85
+ self._add_special_token("<s>", "start_token")
86
+ self._add_special_token("</s>", "end_token")
87
+ self._add_special_token("<pad>", "pad_token")
88
+ self._add_special_token("<mask>", "mask_token")
86
89
  super().__init__(
87
90
  vocabulary=vocabulary,
88
91
  merges=merges,
89
- unsplittable_tokens=[
90
- self.start_token,
91
- self.pad_token,
92
- self.end_token,
93
- self.mask_token,
94
- ],
95
92
  **kwargs,
96
93
  )
97
-
98
- def set_vocabulary_and_merges(self, vocabulary, merges):
99
- super().set_vocabulary_and_merges(vocabulary, merges)
100
-
101
- if vocabulary is not None:
102
- # Check for necessary special tokens.
103
- for token in [
104
- self.start_token,
105
- self.pad_token,
106
- self.end_token,
107
- self.mask_token,
108
- ]:
109
- if token not in self.vocabulary:
110
- raise ValueError(
111
- f"Cannot find token `'{token}'` in the provided "
112
- f"`vocabulary`. Please provide `'{token}'` in your "
113
- "`vocabulary` or use a pretrained `vocabulary` name."
114
- )
115
-
116
- self.start_token_id = self.token_to_id(self.start_token)
117
- self.pad_token_id = self.token_to_id(self.pad_token)
118
- self.end_token_id = self.token_to_id(self.end_token)
119
- self.mask_token_id = self.token_to_id(self.mask_token)
120
- else:
121
- self.start_token_id = None
122
- self.pad_token_id = None
123
- self.end_token_id = None
124
- self.mask_token_id = None
125
-
126
- def get_config(self):
127
- config = super().get_config()
128
- # In the constructor, we pass the list of special tokens to the
129
- # `unsplittable_tokens` arg of the superclass' constructor. Hence, we
130
- # delete it from the config here.
131
- del config["unsplittable_tokens"]
132
- return config
@@ -0,0 +1,269 @@
1
+ # Copyright 2024 The KerasHub Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import keras
15
+
16
+ from keras_hub.src.api_export import keras_hub_export
17
+ from keras_hub.src.layers.preprocessing.start_end_packer import StartEndPacker
18
+ from keras_hub.src.models.preprocessor import Preprocessor
19
+ from keras_hub.src.utils.tensor_utils import preprocessing_function
20
+ from keras_hub.src.utils.tensor_utils import strip_to_ragged
21
+
22
+ try:
23
+ import tensorflow as tf
24
+ except ImportError:
25
+ tf = None
26
+
27
+
28
+ @keras_hub_export("keras_hub.models.Seq2SeqLMPreprocessor")
29
+ class Seq2SeqLMPreprocessor(Preprocessor):
30
+ """Base class for seq2seq language modeling preprocessing layers.
31
+
32
+ `Seq2SeqLMPreprocessor` tasks wrap a `keras_hub.tokenizer.Tokenizer` to
33
+ create a preprocessing layer for seq2seq language modeling tasks. It is
34
+ intended to be paired with a `keras.models.Seq2SeqLM` task.
35
+
36
+ All `Seq2SeqLMPreprocessor` layers take inputs a dictionary input with keys
37
+ `"encoder_text"` and `"decoder_text"`.
38
+
39
+ This layer will always output a `(x, y, sample_weight)` tuple, where `x`
40
+ is a dictionary with the tokenized inputs, `y` contains the tokens from `x`
41
+ offset by 1, and `sample_weight` marks where `y` contains padded
42
+ values. The exact contents of `x` will vary depending on the model being
43
+ used.
44
+
45
+ a `Seq2SeqLMPreprocessor` contains two extra methods, `generate_preprocess`
46
+ and `generate_postprocess` for use with generation. See examples below.
47
+
48
+ All `Seq2SeqLMPreprocessor` tasks include a `from_preset()` constructor
49
+ which can be used to load a pre-trained config and vocabularies. You can
50
+ call the `from_preset()` constructor directly on this base class, in which
51
+ case the correct class for you model will be automatically instantiated.
52
+
53
+ Examples.
54
+ ```python
55
+ preprocessor = keras_hub.models.Seq2SeqLMPreprocessor.from_preset(
56
+ "bart_base_en",
57
+ encoder_sequence_length=256,
58
+ decoder_sequence_length=256,
59
+ )
60
+
61
+ # Tokenize, mask and pack a single sentence.
62
+ x = {
63
+ "encoder_text": "The fox was sleeping.",
64
+ "decoder_text": "The fox was awake.",
65
+ }
66
+ x, y, sample_weight = preprocessor(x)
67
+
68
+ # Tokenize and pad/truncate a batch of labeled sentences.
69
+ x = {
70
+ "encoder_text": ["The fox was sleeping."],
71
+ "decoder_text": ["The fox was awake."],
72
+ x, y, sample_weight = preprocessor(x)
73
+
74
+ # With a `tf.data.Dataset`.
75
+ ds = tf.data.Dataset.from_tensor_slices(x)
76
+ ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE)
77
+
78
+ # Generate preprocess and postprocess.
79
+ x = preprocessor.generate_preprocess(x) # Tokenized numeric inputs.
80
+ x = preprocessor.generate_postprocess(x) # Detokenized string outputs.
81
+ ```
82
+ """
83
+
84
+ def __init__(
85
+ self,
86
+ tokenizer,
87
+ encoder_sequence_length=1024,
88
+ decoder_sequence_length=1024,
89
+ **kwargs,
90
+ ):
91
+ super().__init__(**kwargs)
92
+ self.tokenizer = tokenizer
93
+ self.encoder_packer = None
94
+ self.decoder_packer = None
95
+ self.encoder_sequence_length = encoder_sequence_length
96
+ self.decoder_sequence_length = decoder_sequence_length
97
+
98
+ def build(self, input_shape):
99
+ # Defer packer creation to `build()` so that we can be sure tokenizer
100
+ # assets have loaded when restoring a saved model.
101
+ self.encoder_packer = StartEndPacker(
102
+ start_value=self.tokenizer.start_token_id,
103
+ end_value=self.tokenizer.end_token_id,
104
+ pad_value=self.tokenizer.pad_token_id,
105
+ sequence_length=self.encoder_sequence_length,
106
+ return_padding_mask=True,
107
+ )
108
+ self.decoder_packer = StartEndPacker(
109
+ start_value=self.tokenizer.start_token_id,
110
+ end_value=self.tokenizer.end_token_id,
111
+ pad_value=self.tokenizer.pad_token_id,
112
+ sequence_length=self.decoder_sequence_length,
113
+ return_padding_mask=True,
114
+ )
115
+ self.built = True
116
+
117
+ @preprocessing_function
118
+ def call(
119
+ self,
120
+ x,
121
+ y=None,
122
+ sample_weight=None,
123
+ *,
124
+ encoder_sequence_length=None,
125
+ decoder_sequence_length=None,
126
+ # `sequence_length` is an alias for `decoder_sequence_length`
127
+ sequence_length=None,
128
+ ):
129
+ if encoder_sequence_length is None:
130
+ encoder_sequence_length = self.encoder_sequence_length
131
+ decoder_sequence_length = decoder_sequence_length or sequence_length
132
+ if decoder_sequence_length is None:
133
+ decoder_sequence_length = self.decoder_sequence_length
134
+
135
+ encoder_inputs = self.tokenizer(x["encoder_text"])
136
+ encoder_token_ids, encoder_padding_mask = self.encoder_packer(
137
+ encoder_inputs,
138
+ sequence_length=encoder_sequence_length,
139
+ )
140
+ decoder_inputs = self.tokenizer(x["decoder_text"])
141
+ decoder_token_ids, decoder_padding_mask = self.decoder_packer(
142
+ decoder_inputs,
143
+ sequence_length=decoder_sequence_length + 1,
144
+ )
145
+ x = {
146
+ "encoder_token_ids": encoder_token_ids,
147
+ "encoder_padding_mask": encoder_padding_mask,
148
+ "decoder_token_ids": decoder_token_ids[..., :-1],
149
+ "decoder_padding_mask": decoder_padding_mask[..., :-1],
150
+ }
151
+ # Target `y` will be the decoder input sequence shifted one step to the
152
+ # left (i.e., the next token).
153
+ y = decoder_token_ids[..., 1:]
154
+ sample_weight = decoder_padding_mask[..., 1:]
155
+ return keras.utils.pack_x_y_sample_weight(x, y, sample_weight)
156
+
157
+ @preprocessing_function
158
+ def generate_preprocess(
159
+ self,
160
+ x,
161
+ *,
162
+ encoder_sequence_length=None,
163
+ decoder_sequence_length=None,
164
+ # `sequence_length` is an alias for `decoder_sequence_length`
165
+ sequence_length=None,
166
+ ):
167
+ """Convert encoder and decoder input strings to integer token inputs for generation.
168
+
169
+ Similar to calling the layer for training, this method takes in a dict
170
+ containing `"encoder_text"` and `"decoder_text"`, with strings or tensor
171
+ strings for values, tokenizes and packs the input, and computes a
172
+ padding mask masking all inputs not filled in with a padded value.
173
+
174
+ Unlike calling the layer for training, this method does not compute
175
+ labels and will never append a tokenizer.end_token_id to the end of
176
+ the decoder sequence (as generation is expected to continue at the end
177
+ of the inputted decoder prompt).
178
+ """
179
+ if not self.built:
180
+ self.build(None)
181
+
182
+ if isinstance(x, dict):
183
+ encoder_text = x["encoder_text"]
184
+ decoder_text = x["decoder_text"]
185
+ else:
186
+ encoder_text = x
187
+ # Initialize empty prompt for the decoder.
188
+ decoder_text = tf.fill((tf.shape(encoder_text)[0],), "")
189
+
190
+ if encoder_sequence_length is None:
191
+ encoder_sequence_length = self.encoder_sequence_length
192
+ decoder_sequence_length = decoder_sequence_length or sequence_length
193
+ if decoder_sequence_length is None:
194
+ decoder_sequence_length = self.decoder_sequence_length
195
+
196
+ # Tokenize and pack the encoder inputs.
197
+ encoder_token_ids = self.tokenizer(encoder_text)
198
+ encoder_token_ids, encoder_padding_mask = self.encoder_packer(
199
+ encoder_token_ids,
200
+ sequence_length=encoder_sequence_length,
201
+ )
202
+
203
+ # Tokenize and pack the decoder inputs.
204
+ decoder_token_ids = self.tokenizer(decoder_text)
205
+ decoder_token_ids, decoder_padding_mask = self.decoder_packer(
206
+ decoder_token_ids,
207
+ sequence_length=decoder_sequence_length,
208
+ add_end_value=False,
209
+ )
210
+
211
+ return {
212
+ "encoder_token_ids": encoder_token_ids,
213
+ "encoder_padding_mask": encoder_padding_mask,
214
+ "decoder_token_ids": decoder_token_ids,
215
+ "decoder_padding_mask": decoder_padding_mask,
216
+ }
217
+
218
+ @preprocessing_function
219
+ def generate_postprocess(
220
+ self,
221
+ x,
222
+ ):
223
+ """Convert integer token output to strings for generation.
224
+
225
+ This method reverses `generate_preprocess()`, by first removing all
226
+ padding and start/end tokens, and then converting the integer sequence
227
+ back to a string.
228
+ """
229
+ if not self.built:
230
+ self.build(None)
231
+
232
+ token_ids, padding_mask = (
233
+ x["decoder_token_ids"],
234
+ x["decoder_padding_mask"],
235
+ )
236
+ ids_to_strip = self.tokenizer.special_token_ids
237
+ token_ids = strip_to_ragged(token_ids, padding_mask, ids_to_strip)
238
+ return self.tokenizer.detokenize(token_ids)
239
+
240
+ @property
241
+ def encoder_sequence_length(self):
242
+ """The padded length of encoder input sequences."""
243
+ return self._encoder_sequence_length
244
+
245
+ @encoder_sequence_length.setter
246
+ def encoder_sequence_length(self, value):
247
+ self._encoder_sequence_length = value
248
+ if self.encoder_packer is not None:
249
+ self.encoder_packer.sequence_length = value
250
+
251
+ @property
252
+ def decoder_sequence_length(self):
253
+ """The padded length of decoder input sequences."""
254
+ return self._decoder_sequence_length
255
+
256
+ @decoder_sequence_length.setter
257
+ def decoder_sequence_length(self, value):
258
+ self._decoder_sequence_length = value
259
+ if self.decoder_packer is not None:
260
+ self.decoder_packer.sequence_length = value
261
+
262
+ @property
263
+ def sequence_length(self):
264
+ """Alias for `decoder_sequence_length`."""
265
+ return self.decoder_sequence_length
266
+
267
+ @sequence_length.setter
268
+ def sequence_length(self, value):
269
+ self.decoder_sequence_length = value