keras-hub-nightly 0.15.0.dev20240823171555__py3-none-any.whl → 0.16.0.dev2024092017__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. keras_hub/__init__.py +0 -6
  2. keras_hub/api/__init__.py +2 -0
  3. keras_hub/api/bounding_box/__init__.py +36 -0
  4. keras_hub/api/layers/__init__.py +14 -0
  5. keras_hub/api/models/__init__.py +97 -48
  6. keras_hub/api/tokenizers/__init__.py +30 -0
  7. keras_hub/api/utils/__init__.py +22 -0
  8. keras_hub/src/api_export.py +15 -9
  9. keras_hub/src/bounding_box/__init__.py +13 -0
  10. keras_hub/src/bounding_box/converters.py +529 -0
  11. keras_hub/src/bounding_box/formats.py +162 -0
  12. keras_hub/src/bounding_box/iou.py +263 -0
  13. keras_hub/src/bounding_box/to_dense.py +95 -0
  14. keras_hub/src/bounding_box/to_ragged.py +99 -0
  15. keras_hub/src/bounding_box/utils.py +194 -0
  16. keras_hub/src/bounding_box/validate_format.py +99 -0
  17. keras_hub/src/layers/preprocessing/audio_converter.py +121 -0
  18. keras_hub/src/layers/preprocessing/image_converter.py +130 -0
  19. keras_hub/src/layers/preprocessing/masked_lm_mask_generator.py +2 -0
  20. keras_hub/src/layers/preprocessing/multi_segment_packer.py +9 -8
  21. keras_hub/src/layers/preprocessing/preprocessing_layer.py +2 -29
  22. keras_hub/src/layers/preprocessing/random_deletion.py +33 -31
  23. keras_hub/src/layers/preprocessing/random_swap.py +33 -31
  24. keras_hub/src/layers/preprocessing/resizing_image_converter.py +101 -0
  25. keras_hub/src/layers/preprocessing/start_end_packer.py +3 -2
  26. keras_hub/src/models/albert/__init__.py +1 -2
  27. keras_hub/src/models/albert/albert_masked_lm_preprocessor.py +6 -86
  28. keras_hub/src/models/albert/{albert_classifier.py → albert_text_classifier.py} +34 -10
  29. keras_hub/src/models/albert/{albert_preprocessor.py → albert_text_classifier_preprocessor.py} +14 -70
  30. keras_hub/src/models/albert/albert_tokenizer.py +17 -36
  31. keras_hub/src/models/backbone.py +12 -34
  32. keras_hub/src/models/bart/__init__.py +1 -2
  33. keras_hub/src/models/bart/bart_seq_2_seq_lm_preprocessor.py +21 -148
  34. keras_hub/src/models/bart/bart_tokenizer.py +12 -39
  35. keras_hub/src/models/bert/__init__.py +1 -5
  36. keras_hub/src/models/bert/bert_masked_lm_preprocessor.py +6 -87
  37. keras_hub/src/models/bert/bert_presets.py +1 -4
  38. keras_hub/src/models/bert/{bert_classifier.py → bert_text_classifier.py} +19 -12
  39. keras_hub/src/models/bert/{bert_preprocessor.py → bert_text_classifier_preprocessor.py} +14 -70
  40. keras_hub/src/models/bert/bert_tokenizer.py +17 -35
  41. keras_hub/src/models/bloom/__init__.py +1 -2
  42. keras_hub/src/models/bloom/bloom_causal_lm_preprocessor.py +6 -91
  43. keras_hub/src/models/bloom/bloom_tokenizer.py +12 -41
  44. keras_hub/src/models/causal_lm.py +10 -29
  45. keras_hub/src/models/causal_lm_preprocessor.py +195 -0
  46. keras_hub/src/models/csp_darknet/csp_darknet_backbone.py +54 -15
  47. keras_hub/src/models/deberta_v3/__init__.py +1 -4
  48. keras_hub/src/models/deberta_v3/deberta_v3_masked_lm_preprocessor.py +14 -77
  49. keras_hub/src/models/deberta_v3/{deberta_v3_classifier.py → deberta_v3_text_classifier.py} +16 -11
  50. keras_hub/src/models/deberta_v3/{deberta_v3_preprocessor.py → deberta_v3_text_classifier_preprocessor.py} +23 -64
  51. keras_hub/src/models/deberta_v3/deberta_v3_tokenizer.py +30 -25
  52. keras_hub/src/models/densenet/densenet_backbone.py +46 -22
  53. keras_hub/src/models/distil_bert/__init__.py +1 -4
  54. keras_hub/src/models/distil_bert/distil_bert_masked_lm_preprocessor.py +14 -76
  55. keras_hub/src/models/distil_bert/{distil_bert_classifier.py → distil_bert_text_classifier.py} +17 -12
  56. keras_hub/src/models/distil_bert/{distil_bert_preprocessor.py → distil_bert_text_classifier_preprocessor.py} +23 -63
  57. keras_hub/src/models/distil_bert/distil_bert_tokenizer.py +19 -35
  58. keras_hub/src/models/efficientnet/__init__.py +13 -0
  59. keras_hub/src/models/efficientnet/efficientnet_backbone.py +569 -0
  60. keras_hub/src/models/efficientnet/fusedmbconv.py +229 -0
  61. keras_hub/src/models/efficientnet/mbconv.py +238 -0
  62. keras_hub/src/models/electra/__init__.py +1 -2
  63. keras_hub/src/models/electra/electra_tokenizer.py +17 -32
  64. keras_hub/src/models/f_net/__init__.py +1 -2
  65. keras_hub/src/models/f_net/f_net_masked_lm_preprocessor.py +12 -78
  66. keras_hub/src/models/f_net/{f_net_classifier.py → f_net_text_classifier.py} +17 -10
  67. keras_hub/src/models/f_net/{f_net_preprocessor.py → f_net_text_classifier_preprocessor.py} +19 -63
  68. keras_hub/src/models/f_net/f_net_tokenizer.py +17 -35
  69. keras_hub/src/models/falcon/__init__.py +1 -2
  70. keras_hub/src/models/falcon/falcon_causal_lm_preprocessor.py +6 -89
  71. keras_hub/src/models/falcon/falcon_tokenizer.py +12 -35
  72. keras_hub/src/models/gemma/__init__.py +1 -2
  73. keras_hub/src/models/gemma/gemma_causal_lm_preprocessor.py +6 -90
  74. keras_hub/src/models/gemma/gemma_decoder_block.py +1 -1
  75. keras_hub/src/models/gemma/gemma_tokenizer.py +12 -23
  76. keras_hub/src/models/gpt2/__init__.py +1 -2
  77. keras_hub/src/models/gpt2/gpt2_causal_lm_preprocessor.py +6 -89
  78. keras_hub/src/models/gpt2/gpt2_preprocessor.py +12 -90
  79. keras_hub/src/models/gpt2/gpt2_tokenizer.py +12 -34
  80. keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm_preprocessor.py +6 -91
  81. keras_hub/src/models/gpt_neo_x/gpt_neo_x_tokenizer.py +12 -34
  82. keras_hub/src/models/image_classifier.py +0 -5
  83. keras_hub/src/models/image_classifier_preprocessor.py +83 -0
  84. keras_hub/src/models/llama/__init__.py +1 -2
  85. keras_hub/src/models/llama/llama_causal_lm_preprocessor.py +6 -85
  86. keras_hub/src/models/llama/llama_tokenizer.py +12 -25
  87. keras_hub/src/models/llama3/__init__.py +1 -2
  88. keras_hub/src/models/llama3/llama3_causal_lm_preprocessor.py +6 -89
  89. keras_hub/src/models/llama3/llama3_tokenizer.py +12 -33
  90. keras_hub/src/models/masked_lm.py +0 -2
  91. keras_hub/src/models/masked_lm_preprocessor.py +156 -0
  92. keras_hub/src/models/mistral/__init__.py +1 -2
  93. keras_hub/src/models/mistral/mistral_causal_lm_preprocessor.py +6 -91
  94. keras_hub/src/models/mistral/mistral_tokenizer.py +12 -23
  95. keras_hub/src/models/mix_transformer/mix_transformer_backbone.py +2 -2
  96. keras_hub/src/models/mobilenet/__init__.py +13 -0
  97. keras_hub/src/models/mobilenet/mobilenet_backbone.py +530 -0
  98. keras_hub/src/models/mobilenet/mobilenet_image_classifier.py +114 -0
  99. keras_hub/src/models/opt/__init__.py +1 -2
  100. keras_hub/src/models/opt/opt_causal_lm_preprocessor.py +6 -93
  101. keras_hub/src/models/opt/opt_tokenizer.py +12 -41
  102. keras_hub/src/models/pali_gemma/__init__.py +1 -4
  103. keras_hub/src/models/pali_gemma/pali_gemma_causal_lm_preprocessor.py +28 -28
  104. keras_hub/src/models/pali_gemma/pali_gemma_image_converter.py +25 -0
  105. keras_hub/src/models/pali_gemma/pali_gemma_presets.py +5 -5
  106. keras_hub/src/models/pali_gemma/pali_gemma_tokenizer.py +11 -3
  107. keras_hub/src/models/phi3/__init__.py +1 -2
  108. keras_hub/src/models/phi3/phi3_causal_lm.py +3 -9
  109. keras_hub/src/models/phi3/phi3_causal_lm_preprocessor.py +6 -89
  110. keras_hub/src/models/phi3/phi3_tokenizer.py +12 -36
  111. keras_hub/src/models/preprocessor.py +72 -83
  112. keras_hub/src/models/resnet/__init__.py +6 -0
  113. keras_hub/src/models/resnet/resnet_backbone.py +390 -42
  114. keras_hub/src/models/resnet/resnet_image_classifier.py +33 -6
  115. keras_hub/src/models/resnet/resnet_image_classifier_preprocessor.py +28 -0
  116. keras_hub/src/models/{llama3/llama3_preprocessor.py → resnet/resnet_image_converter.py} +7 -5
  117. keras_hub/src/models/resnet/resnet_presets.py +95 -0
  118. keras_hub/src/models/retinanet/__init__.py +13 -0
  119. keras_hub/src/models/retinanet/anchor_generator.py +175 -0
  120. keras_hub/src/models/retinanet/box_matcher.py +259 -0
  121. keras_hub/src/models/retinanet/non_max_supression.py +578 -0
  122. keras_hub/src/models/roberta/__init__.py +1 -2
  123. keras_hub/src/models/roberta/roberta_masked_lm_preprocessor.py +22 -74
  124. keras_hub/src/models/roberta/{roberta_classifier.py → roberta_text_classifier.py} +16 -11
  125. keras_hub/src/models/roberta/{roberta_preprocessor.py → roberta_text_classifier_preprocessor.py} +21 -53
  126. keras_hub/src/models/roberta/roberta_tokenizer.py +13 -52
  127. keras_hub/src/models/seq_2_seq_lm_preprocessor.py +269 -0
  128. keras_hub/src/models/stable_diffusion_v3/__init__.py +13 -0
  129. keras_hub/src/models/stable_diffusion_v3/clip_encoder_block.py +103 -0
  130. keras_hub/src/models/stable_diffusion_v3/clip_preprocessor.py +93 -0
  131. keras_hub/src/models/stable_diffusion_v3/clip_text_encoder.py +149 -0
  132. keras_hub/src/models/stable_diffusion_v3/clip_tokenizer.py +167 -0
  133. keras_hub/src/models/stable_diffusion_v3/mmdit.py +427 -0
  134. keras_hub/src/models/stable_diffusion_v3/mmdit_block.py +317 -0
  135. keras_hub/src/models/stable_diffusion_v3/t5_xxl_preprocessor.py +74 -0
  136. keras_hub/src/models/stable_diffusion_v3/t5_xxl_text_encoder.py +155 -0
  137. keras_hub/src/models/stable_diffusion_v3/vae_attention.py +126 -0
  138. keras_hub/src/models/stable_diffusion_v3/vae_image_decoder.py +186 -0
  139. keras_hub/src/models/t5/__init__.py +1 -2
  140. keras_hub/src/models/t5/t5_tokenizer.py +13 -23
  141. keras_hub/src/models/task.py +71 -116
  142. keras_hub/src/models/{classifier.py → text_classifier.py} +19 -13
  143. keras_hub/src/models/text_classifier_preprocessor.py +138 -0
  144. keras_hub/src/models/whisper/__init__.py +1 -2
  145. keras_hub/src/models/whisper/{whisper_audio_feature_extractor.py → whisper_audio_converter.py} +20 -18
  146. keras_hub/src/models/whisper/whisper_backbone.py +0 -3
  147. keras_hub/src/models/whisper/whisper_presets.py +10 -10
  148. keras_hub/src/models/whisper/whisper_tokenizer.py +20 -16
  149. keras_hub/src/models/xlm_roberta/__init__.py +1 -4
  150. keras_hub/src/models/xlm_roberta/xlm_roberta_masked_lm_preprocessor.py +26 -72
  151. keras_hub/src/models/xlm_roberta/{xlm_roberta_classifier.py → xlm_roberta_text_classifier.py} +16 -11
  152. keras_hub/src/models/xlm_roberta/{xlm_roberta_preprocessor.py → xlm_roberta_text_classifier_preprocessor.py} +26 -53
  153. keras_hub/src/models/xlm_roberta/xlm_roberta_tokenizer.py +25 -10
  154. keras_hub/src/tests/test_case.py +46 -0
  155. keras_hub/src/tokenizers/byte_pair_tokenizer.py +30 -17
  156. keras_hub/src/tokenizers/byte_tokenizer.py +14 -15
  157. keras_hub/src/tokenizers/sentence_piece_tokenizer.py +20 -7
  158. keras_hub/src/tokenizers/tokenizer.py +67 -32
  159. keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py +14 -15
  160. keras_hub/src/tokenizers/word_piece_tokenizer.py +34 -47
  161. keras_hub/src/utils/imagenet/__init__.py +13 -0
  162. keras_hub/src/utils/imagenet/imagenet_utils.py +1067 -0
  163. keras_hub/src/utils/keras_utils.py +0 -50
  164. keras_hub/src/utils/preset_utils.py +230 -68
  165. keras_hub/src/utils/tensor_utils.py +187 -69
  166. keras_hub/src/utils/timm/convert_resnet.py +19 -16
  167. keras_hub/src/utils/timm/preset_loader.py +66 -0
  168. keras_hub/src/utils/transformers/convert_albert.py +193 -0
  169. keras_hub/src/utils/transformers/convert_bart.py +373 -0
  170. keras_hub/src/utils/transformers/convert_bert.py +7 -17
  171. keras_hub/src/utils/transformers/convert_distilbert.py +10 -20
  172. keras_hub/src/utils/transformers/convert_gemma.py +5 -19
  173. keras_hub/src/utils/transformers/convert_gpt2.py +5 -18
  174. keras_hub/src/utils/transformers/convert_llama3.py +7 -18
  175. keras_hub/src/utils/transformers/convert_mistral.py +129 -0
  176. keras_hub/src/utils/transformers/convert_pali_gemma.py +7 -29
  177. keras_hub/src/utils/transformers/preset_loader.py +77 -0
  178. keras_hub/src/utils/transformers/safetensor_utils.py +2 -2
  179. keras_hub/src/version_utils.py +1 -1
  180. keras_hub_nightly-0.16.0.dev2024092017.dist-info/METADATA +202 -0
  181. keras_hub_nightly-0.16.0.dev2024092017.dist-info/RECORD +334 -0
  182. {keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.16.0.dev2024092017.dist-info}/WHEEL +1 -1
  183. keras_hub/src/models/bart/bart_preprocessor.py +0 -276
  184. keras_hub/src/models/bloom/bloom_preprocessor.py +0 -185
  185. keras_hub/src/models/electra/electra_preprocessor.py +0 -154
  186. keras_hub/src/models/falcon/falcon_preprocessor.py +0 -187
  187. keras_hub/src/models/gemma/gemma_preprocessor.py +0 -191
  188. keras_hub/src/models/gpt_neo_x/gpt_neo_x_preprocessor.py +0 -145
  189. keras_hub/src/models/llama/llama_preprocessor.py +0 -189
  190. keras_hub/src/models/mistral/mistral_preprocessor.py +0 -190
  191. keras_hub/src/models/opt/opt_preprocessor.py +0 -188
  192. keras_hub/src/models/phi3/phi3_preprocessor.py +0 -190
  193. keras_hub/src/models/whisper/whisper_preprocessor.py +0 -326
  194. keras_hub/src/utils/timm/convert.py +0 -37
  195. keras_hub/src/utils/transformers/convert.py +0 -101
  196. keras_hub_nightly-0.15.0.dev20240823171555.dist-info/METADATA +0 -34
  197. keras_hub_nightly-0.15.0.dev20240823171555.dist-info/RECORD +0 -297
  198. {keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.16.0.dev2024092017.dist-info}/top_level.txt +0 -0
@@ -12,19 +12,15 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import keras
16
- from absl import logging
17
15
 
18
16
  from keras_hub.src.api_export import keras_hub_export
19
- from keras_hub.src.models.llama.llama_preprocessor import LlamaPreprocessor
20
- from keras_hub.src.utils.keras_utils import (
21
- convert_inputs_to_list_of_tensor_segments,
22
- )
23
- from keras_hub.src.utils.tensor_utils import strip_to_ragged
17
+ from keras_hub.src.models.causal_lm_preprocessor import CausalLMPreprocessor
18
+ from keras_hub.src.models.llama.llama_backbone import LlamaBackbone
19
+ from keras_hub.src.models.llama.llama_tokenizer import LlamaTokenizer
24
20
 
25
21
 
26
22
  @keras_hub_export("keras_hub.models.LlamaCausalLMPreprocessor")
27
- class LlamaCausalLMPreprocessor(LlamaPreprocessor):
23
+ class LlamaCausalLMPreprocessor(CausalLMPreprocessor):
28
24
  """Llama Causal LM preprocessor.
29
25
 
30
26
  This preprocessing layer is meant for use with
@@ -91,80 +87,5 @@ class LlamaCausalLMPreprocessor(LlamaPreprocessor):
91
87
  ```
92
88
  """
93
89
 
94
- def call(
95
- self,
96
- x,
97
- y=None,
98
- sample_weight=None,
99
- sequence_length=None,
100
- ):
101
- if y is not None or sample_weight is not None:
102
- logging.warning(
103
- "`LlamaCausalLMPreprocessor` generates `y` and "
104
- "`sample_weight` based on your input data, but your data "
105
- "already contains `y` or `sample_weight`. Your `y` and "
106
- "`sample_weight` will be ignored."
107
- )
108
- sequence_length = sequence_length or self.sequence_length
109
-
110
- x = convert_inputs_to_list_of_tensor_segments(x)[0]
111
- x = self.tokenizer(x)
112
- # Pad with one extra token to account for the truncation below.
113
- token_ids, padding_mask = self.packer(
114
- x,
115
- sequence_length=sequence_length + 1,
116
- add_start_value=self.add_start_token,
117
- add_end_value=self.add_end_token,
118
- )
119
- # The last token does not have a next token, so we truncate it out.
120
- x = {
121
- "token_ids": token_ids[..., :-1],
122
- "padding_mask": padding_mask[..., :-1],
123
- }
124
- # Target `y` will be the next token.
125
- y, sample_weight = token_ids[..., 1:], padding_mask[..., 1:]
126
- return keras.utils.pack_x_y_sample_weight(x, y, sample_weight)
127
-
128
- def generate_preprocess(
129
- self,
130
- x,
131
- sequence_length=None,
132
- ):
133
- """Convert strings to integer token input for generation.
134
-
135
- Similar to calling the layer for training, this method takes in strings
136
- or tensor strings, tokenizes and packs the input, and computes a padding
137
- mask masking all inputs not filled in with a padded value.
138
-
139
- Unlike calling the layer for training, this method does not compute
140
- labels and will never append a `tokenizer.end_token_id` to the end of
141
- the sequence (as generation is expected to continue at the end of the
142
- inputted prompt).
143
- """
144
- if not self.built:
145
- self.build(None)
146
-
147
- x = convert_inputs_to_list_of_tensor_segments(x)[0]
148
- x = self.tokenizer(x)
149
- token_ids, padding_mask = self.packer(
150
- x, sequence_length=sequence_length, add_end_value=False
151
- )
152
- return {
153
- "token_ids": token_ids,
154
- "padding_mask": padding_mask,
155
- }
156
-
157
- def generate_postprocess(
158
- self,
159
- x,
160
- ):
161
- """Convert integer token output to strings for generation.
162
-
163
- This method reverses `generate_preprocess()`, by first removing all
164
- padding and start/end tokens, and then converting the integer sequence
165
- back to a string.
166
- """
167
- token_ids, padding_mask = x["token_ids"], x["padding_mask"]
168
- ids_to_strip = (self.tokenizer.end_token_id,)
169
- token_ids = strip_to_ragged(token_ids, padding_mask, ids_to_strip)
170
- return self.tokenizer.detokenize(token_ids)
90
+ backbone_cls = LlamaBackbone
91
+ tokenizer_cls = LlamaTokenizer
@@ -13,12 +13,18 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from keras_hub.src.api_export import keras_hub_export
16
+ from keras_hub.src.models.llama.llama_backbone import LlamaBackbone
16
17
  from keras_hub.src.tokenizers.sentence_piece_tokenizer import (
17
18
  SentencePieceTokenizer,
18
19
  )
19
20
 
20
21
 
21
- @keras_hub_export("keras_hub.models.LlamaTokenizer")
22
+ @keras_hub_export(
23
+ [
24
+ "keras_hub.tokenizers.LlamaTokenizer",
25
+ "keras_hub.models.LlamaTokenizer",
26
+ ]
27
+ )
22
28
  class LlamaTokenizer(SentencePieceTokenizer):
23
29
  """Llama tokenizer layer based on SentencePiece.
24
30
 
@@ -28,10 +34,6 @@ class LlamaTokenizer(SentencePieceTokenizer):
28
34
  Llama models and provides a `from_preset()` method to automatically
29
35
  download a matching vocabulary for a Llama preset.
30
36
 
31
- This tokenizer does not provide truncation or padding of inputs. It can be
32
- combined with a `keras_hub.models.LlamaPreprocessor` layer for input
33
- packing.
34
-
35
37
  If input is a batch of strings (rank > 0), the layer will output a
36
38
  `tf.RaggedTensor` where the last dimension of the output is ragged.
37
39
 
@@ -60,25 +62,10 @@ class LlamaTokenizer(SentencePieceTokenizer):
60
62
  ```
61
63
  """
62
64
 
65
+ backbone_cls = LlamaBackbone
66
+
63
67
  def __init__(self, proto, **kwargs):
64
- self.start_token = "<s>"
65
- self.end_token = "</s>"
68
+ self._add_special_token("<s>", "start_token")
69
+ self._add_special_token("</s>", "end_token")
70
+ self.pad_token_id = 0
66
71
  super().__init__(proto=proto, **kwargs)
67
-
68
- def set_proto(self, proto):
69
- super().set_proto(proto)
70
- if proto is not None:
71
- for token in [self.start_token, self.end_token]:
72
- if token not in self.get_vocabulary():
73
- raise ValueError(
74
- f"Cannot find token `'{token}'` in the provided "
75
- f"`vocabulary`. Please provide `'{token}'` in your "
76
- "`vocabulary` or use a pretrained `vocabulary` name."
77
- )
78
- self.start_token_id = self.token_to_id(self.start_token)
79
- self.end_token_id = self.token_to_id(self.end_token)
80
- self.pad_token_id = 0
81
- else:
82
- self.start_token_id = None
83
- self.end_token_id = None
84
- self.pad_token_id = None
@@ -14,7 +14,6 @@
14
14
 
15
15
  from keras_hub.src.models.llama3.llama3_backbone import Llama3Backbone
16
16
  from keras_hub.src.models.llama3.llama3_presets import backbone_presets
17
- from keras_hub.src.models.llama3.llama3_tokenizer import Llama3Tokenizer
18
17
  from keras_hub.src.utils.preset_utils import register_presets
19
18
 
20
- register_presets(backbone_presets, (Llama3Backbone, Llama3Tokenizer))
19
+ register_presets(backbone_presets, Llama3Backbone)
@@ -12,19 +12,14 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import keras
16
- from absl import logging
17
-
18
15
  from keras_hub.src.api_export import keras_hub_export
19
- from keras_hub.src.models.llama3.llama3_preprocessor import Llama3Preprocessor
20
- from keras_hub.src.utils.keras_utils import (
21
- convert_inputs_to_list_of_tensor_segments,
22
- )
23
- from keras_hub.src.utils.tensor_utils import strip_to_ragged
16
+ from keras_hub.src.models.causal_lm_preprocessor import CausalLMPreprocessor
17
+ from keras_hub.src.models.llama3.llama3_backbone import Llama3Backbone
18
+ from keras_hub.src.models.llama3.llama3_tokenizer import Llama3Tokenizer
24
19
 
25
20
 
26
21
  @keras_hub_export("keras_hub.models.Llama3CausalLMPreprocessor")
27
- class Llama3CausalLMPreprocessor(Llama3Preprocessor):
22
+ class Llama3CausalLMPreprocessor(CausalLMPreprocessor):
28
23
  """Llama 3 Causal LM preprocessor.
29
24
 
30
25
  This preprocessing layer is meant for use with
@@ -91,83 +86,5 @@ class Llama3CausalLMPreprocessor(Llama3Preprocessor):
91
86
  ```
92
87
  """
93
88
 
94
- def call(
95
- self,
96
- x,
97
- y=None,
98
- sample_weight=None,
99
- sequence_length=None,
100
- ):
101
- if y is not None or sample_weight is not None:
102
- logging.warning(
103
- "`Llama3CausalLMPreprocessor` generates `y` and "
104
- "`sample_weight` based on your input data, but your data "
105
- "already contains `y` or `sample_weight`. Your `y` and "
106
- "`sample_weight` will be ignored."
107
- )
108
- sequence_length = sequence_length or self.sequence_length
109
-
110
- x = convert_inputs_to_list_of_tensor_segments(x)[0]
111
- x = self.tokenizer(x)
112
- # Pad with one extra token to account for the truncation below.
113
- token_ids, padding_mask = self.packer(
114
- x,
115
- sequence_length=sequence_length + 1,
116
- add_start_value=self.add_start_token,
117
- add_end_value=self.add_end_token,
118
- )
119
- # The last token does not have a next token, so we truncate it out.
120
- x = {
121
- "token_ids": token_ids[..., :-1],
122
- "padding_mask": padding_mask[..., :-1],
123
- }
124
- # Target `y` will be the next token.
125
- y, sample_weight = token_ids[..., 1:], padding_mask[..., 1:]
126
- return keras.utils.pack_x_y_sample_weight(x, y, sample_weight)
127
-
128
- def generate_preprocess(
129
- self,
130
- x,
131
- sequence_length=None,
132
- ):
133
- """Convert strings to integer token input for generation.
134
-
135
- Similar to calling the layer for training, this method takes in strings
136
- or tensor strings, tokenizes and packs the input, and computes a padding
137
- mask masking all inputs not filled in with a padded value.
138
-
139
- Unlike calling the layer for training, this method does not compute
140
- labels and will never append a `tokenizer.end_token_id` to the end of
141
- the sequence (as generation is expected to continue at the end of the
142
- inputted prompt).
143
- """
144
- if not self.built:
145
- self.build(None)
146
-
147
- x = convert_inputs_to_list_of_tensor_segments(x)[0]
148
- x = self.tokenizer(x)
149
- token_ids, padding_mask = self.packer(
150
- x, sequence_length=sequence_length, add_end_value=False
151
- )
152
- return {
153
- "token_ids": token_ids,
154
- "padding_mask": padding_mask,
155
- }
156
-
157
- def generate_postprocess(
158
- self,
159
- x,
160
- ):
161
- """Convert integer token output to strings for generation.
162
-
163
- This method reverses `generate_preprocess()`, by first removing all
164
- padding and start/end tokens, and then converting the integer sequence
165
- back to a string.
166
- """
167
- token_ids, padding_mask = x["token_ids"], x["padding_mask"]
168
- ids_to_strip = (
169
- self.tokenizer.end_token_id,
170
- self.tokenizer.start_token_id,
171
- )
172
- token_ids = strip_to_ragged(token_ids, padding_mask, ids_to_strip)
173
- return self.tokenizer.detokenize(token_ids)
89
+ backbone_cls = Llama3Backbone
90
+ tokenizer_cls = Llama3Tokenizer
@@ -13,51 +13,30 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from keras_hub.src.api_export import keras_hub_export
16
+ from keras_hub.src.models.llama3.llama3_backbone import Llama3Backbone
16
17
  from keras_hub.src.tokenizers.byte_pair_tokenizer import BytePairTokenizer
17
18
 
18
19
 
19
- @keras_hub_export("keras_hub.models.Llama3Tokenizer")
20
+ @keras_hub_export(
21
+ [
22
+ "keras_hub.tokenizers.Llama3Tokenizer",
23
+ "keras_hub.models.Llama3Tokenizer",
24
+ ]
25
+ )
20
26
  class Llama3Tokenizer(BytePairTokenizer):
27
+ backbone_cls = Llama3Backbone
28
+
21
29
  def __init__(
22
30
  self,
23
31
  vocabulary=None,
24
32
  merges=None,
25
33
  **kwargs,
26
34
  ):
27
- self.start_token = "<|begin_of_text|>"
28
- self.end_token = "<|end_of_text|>"
29
-
35
+ self._add_special_token("<|begin_of_text|>", "start_token")
36
+ self._add_special_token("<|end_of_text|>", "end_token")
37
+ self.pad_token_id = 0
30
38
  super().__init__(
31
39
  vocabulary=vocabulary,
32
40
  merges=merges,
33
- unsplittable_tokens=[self.start_token, self.end_token],
34
41
  **kwargs,
35
42
  )
36
-
37
- def set_vocabulary_and_merges(self, vocabulary, merges):
38
- super().set_vocabulary_and_merges(vocabulary, merges)
39
-
40
- if vocabulary is not None:
41
- # Check for necessary special tokens.
42
- if self.end_token not in self.get_vocabulary():
43
- raise ValueError(
44
- f"Cannot find token `'{self.end_token}'` in the provided "
45
- f"`vocabulary`. Please provide `'{self.end_token}'` in "
46
- "your `vocabulary` or use a pretrained `vocabulary` name."
47
- )
48
-
49
- self.start_token_id = self.token_to_id(self.start_token)
50
- self.end_token_id = self.token_to_id(self.end_token)
51
- self.pad_token_id = 0
52
- else:
53
- self.end_token_id = None
54
- self.start_token_id = None
55
- self.pad_token_id = None
56
-
57
- def get_config(self):
58
- config = super().get_config()
59
- # In the constructor, we pass the list of special tokens to the
60
- # `unsplittable_tokens` arg of the superclass' constructor. Hence, we
61
- # delete it from the config here.
62
- del config["unsplittable_tokens"]
63
- return config
@@ -45,8 +45,6 @@ class MaskedLM(Task):
45
45
 
46
46
  def __init__(self, *args, **kwargs):
47
47
  super().__init__(*args, **kwargs)
48
- # Default compilation.
49
- self.compile()
50
48
 
51
49
  def compile(
52
50
  self,
@@ -0,0 +1,156 @@
1
+ # Copyright 2024 The KerasHub Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import keras
15
+
16
+ from keras_hub.src.api_export import keras_hub_export
17
+ from keras_hub.src.layers.preprocessing.masked_lm_mask_generator import (
18
+ MaskedLMMaskGenerator,
19
+ )
20
+ from keras_hub.src.layers.preprocessing.multi_segment_packer import (
21
+ MultiSegmentPacker,
22
+ )
23
+ from keras_hub.src.models.preprocessor import Preprocessor
24
+ from keras_hub.src.utils.tensor_utils import preprocessing_function
25
+
26
+
27
+ @keras_hub_export("keras_hub.models.MaskedLMPreprocessor")
28
+ class MaskedLMPreprocessor(Preprocessor):
29
+ """Base class for masked language modeling preprocessing layers.
30
+
31
+ `MaskedLMPreprocessor` tasks wrap a `keras_hub.tokenizer.Tokenizer` to
32
+ create a preprocessing layer for masked language modeling tasks. It is
33
+ intended to be paired with a `keras.models.MaskedLM` task.
34
+
35
+ All `MaskedLMPreprocessor` take inputs a single input. This can be a single
36
+ string, a batch of strings, or a tuple of batches of string segments that
37
+ should be combined into a single sequence. See examples below. These inputs
38
+ will be tokenized, combined, and masked randomly along the sequence.
39
+
40
+ This layer will always output a `(x, y, sample_weight)` tuple, where `x`
41
+ is a dictionary with the masked, tokenized inputs, `y` contains the tokens
42
+ that were masked in `x`, and `sample_weight` marks where `y` contains padded
43
+ values. The exact contents of `x` will vary depending on the model being
44
+ used.
45
+
46
+ All `MaskedLMPreprocessor` tasks include a `from_preset()` constructor
47
+ which can be used to load a pre-trained config and vocabularies. You can
48
+ call the `from_preset()` constructor directly on this base class, in which
49
+ case the correct class for you model will be automatically instantiated.
50
+
51
+ Examples.
52
+ ```python
53
+ preprocessor = keras_hub.models.MaskedLMPreprocessor.from_preset(
54
+ "bert_base_en_uncased",
55
+ sequence_length=256, # Optional.
56
+ )
57
+
58
+ # Tokenize, mask and pack a single sentence.
59
+ x = "The quick brown fox jumped."
60
+ x, y, sample_weight = preprocessor(x)
61
+
62
+ # Preprocess a batch of labeled sentence pairs.
63
+ first = ["The quick brown fox jumped.", "Call me Ishmael."]
64
+ second = ["The fox tripped.", "Oh look, a whale."]
65
+ x, y, sample_weight = preprocessor((first, second))
66
+
67
+ # With a `tf.data.Dataset`.
68
+ ds = tf.data.Dataset.from_tensor_slices((first, second))
69
+ ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE)
70
+ ```
71
+ """
72
+
73
+ def __init__(
74
+ self,
75
+ tokenizer,
76
+ sequence_length=512,
77
+ truncate="round_robin",
78
+ mask_selection_rate=0.15,
79
+ mask_selection_length=96,
80
+ mask_token_rate=0.8,
81
+ random_token_rate=0.1,
82
+ **kwargs,
83
+ ):
84
+ super().__init__(**kwargs)
85
+ self.tokenizer = tokenizer
86
+ self.packer = None
87
+ self.sequence_length = sequence_length
88
+ self.truncate = truncate
89
+ self.mask_selection_rate = mask_selection_rate
90
+ self.mask_selection_length = mask_selection_length
91
+ self.mask_token_rate = mask_token_rate
92
+ self.random_token_rate = random_token_rate
93
+ self.masker = None
94
+
95
+ def build(self, input_shape):
96
+ super().build(input_shape)
97
+ # Defer masker creation to `build()` so that we can be sure tokenizer
98
+ # assets have loaded when restoring a saved model.
99
+ self.packer = MultiSegmentPacker(
100
+ start_value=self.tokenizer.start_token_id,
101
+ end_value=self.tokenizer.end_token_id,
102
+ pad_value=self.tokenizer.pad_token_id,
103
+ truncate=self.truncate,
104
+ sequence_length=self.sequence_length,
105
+ )
106
+ self.masker = MaskedLMMaskGenerator(
107
+ mask_selection_rate=self.mask_selection_rate,
108
+ mask_selection_length=self.mask_selection_length,
109
+ mask_token_rate=self.mask_token_rate,
110
+ random_token_rate=self.random_token_rate,
111
+ vocabulary_size=self.tokenizer.vocabulary_size(),
112
+ mask_token_id=self.tokenizer.mask_token_id,
113
+ unselectable_token_ids=self.tokenizer.special_token_ids,
114
+ )
115
+
116
+ @preprocessing_function
117
+ def call(self, x, y=None, sample_weight=None):
118
+ x = x if isinstance(x, tuple) else (x,)
119
+ x = tuple(self.tokenizer(segment) for segment in x)
120
+ token_ids, segment_ids = self.packer(x)
121
+ padding_mask = token_ids != self.tokenizer.pad_token_id
122
+ masker_outputs = self.masker(token_ids)
123
+ x = {
124
+ "token_ids": masker_outputs["token_ids"],
125
+ "padding_mask": padding_mask,
126
+ "segment_ids": segment_ids,
127
+ "mask_positions": masker_outputs["mask_positions"],
128
+ }
129
+ y = masker_outputs["mask_ids"]
130
+ sample_weight = masker_outputs["mask_weights"]
131
+ return keras.utils.pack_x_y_sample_weight(x, y, sample_weight)
132
+
133
+ def get_config(self):
134
+ config = super().get_config()
135
+ config.update(
136
+ {
137
+ "sequence_length": self.sequence_length,
138
+ "truncate": self.truncate,
139
+ "mask_selection_rate": self.mask_selection_rate,
140
+ "mask_selection_length": self.mask_selection_length,
141
+ "mask_token_rate": self.mask_token_rate,
142
+ "random_token_rate": self.random_token_rate,
143
+ }
144
+ )
145
+ return config
146
+
147
+ @property
148
+ def sequence_length(self):
149
+ """The padded length of model input sequences."""
150
+ return self._sequence_length
151
+
152
+ @sequence_length.setter
153
+ def sequence_length(self, value):
154
+ self._sequence_length = value
155
+ if self.packer is not None:
156
+ self.packer.sequence_length = value
@@ -14,7 +14,6 @@
14
14
 
15
15
  from keras_hub.src.models.mistral.mistral_backbone import MistralBackbone
16
16
  from keras_hub.src.models.mistral.mistral_presets import backbone_presets
17
- from keras_hub.src.models.mistral.mistral_tokenizer import MistralTokenizer
18
17
  from keras_hub.src.utils.preset_utils import register_presets
19
18
 
20
- register_presets(backbone_presets, (MistralBackbone, MistralTokenizer))
19
+ register_presets(backbone_presets, MistralBackbone)
@@ -12,21 +12,14 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import keras
16
- from absl import logging
17
-
18
15
  from keras_hub.src.api_export import keras_hub_export
19
- from keras_hub.src.models.mistral.mistral_preprocessor import (
20
- MistralPreprocessor,
21
- )
22
- from keras_hub.src.utils.keras_utils import (
23
- convert_inputs_to_list_of_tensor_segments,
24
- )
25
- from keras_hub.src.utils.tensor_utils import strip_to_ragged
16
+ from keras_hub.src.models.causal_lm_preprocessor import CausalLMPreprocessor
17
+ from keras_hub.src.models.mistral.mistral_backbone import MistralBackbone
18
+ from keras_hub.src.models.mistral.mistral_tokenizer import MistralTokenizer
26
19
 
27
20
 
28
21
  @keras_hub_export("keras_hub.models.MistralCausalLMPreprocessor")
29
- class MistralCausalLMPreprocessor(MistralPreprocessor):
22
+ class MistralCausalLMPreprocessor(CausalLMPreprocessor):
30
23
  """Mistral Causal LM preprocessor.
31
24
 
32
25
  This preprocessing layer is meant for use with
@@ -93,83 +86,5 @@ class MistralCausalLMPreprocessor(MistralPreprocessor):
93
86
  ```
94
87
  """
95
88
 
96
- def call(
97
- self,
98
- x,
99
- y=None,
100
- sample_weight=None,
101
- sequence_length=None,
102
- ):
103
- if y is not None or sample_weight is not None:
104
- logging.warning(
105
- "`MistralCausalLMPreprocessor` generates `y` and "
106
- "`sample_weight` based on your input data, but your data "
107
- "already contains `y` or `sample_weight`. Your `y` and "
108
- "`sample_weight` will be ignored."
109
- )
110
- sequence_length = sequence_length or self.sequence_length
111
-
112
- x = convert_inputs_to_list_of_tensor_segments(x)[0]
113
- x = self.tokenizer(x)
114
- # Pad with one extra token to account for the truncation below.
115
- token_ids, padding_mask = self.packer(
116
- x,
117
- sequence_length=sequence_length + 1,
118
- add_start_value=self.add_start_token,
119
- add_end_value=self.add_end_token,
120
- )
121
- # The last token does not have a next token, so we truncate it out.
122
- x = {
123
- "token_ids": token_ids[..., :-1],
124
- "padding_mask": padding_mask[..., :-1],
125
- }
126
- # Target `y` will be the next token.
127
- y, sample_weight = token_ids[..., 1:], padding_mask[..., 1:]
128
- return keras.utils.pack_x_y_sample_weight(x, y, sample_weight)
129
-
130
- def generate_preprocess(
131
- self,
132
- x,
133
- sequence_length=None,
134
- ):
135
- """Convert strings to integer token input for generation.
136
-
137
- Similar to calling the layer for training, this method takes in strings
138
- or tensor strings, tokenizes and packs the input, and computes a padding
139
- mask masking all inputs not filled in with a padded value.
140
-
141
- Unlike calling the layer for training, this method does not compute
142
- labels and will never append a `tokenizer.end_token_id` to the end of
143
- the sequence (as generation is expected to continue at the end of the
144
- inputted prompt).
145
- """
146
- if not self.built:
147
- self.build(None)
148
-
149
- x = convert_inputs_to_list_of_tensor_segments(x)[0]
150
- x = self.tokenizer(x)
151
- token_ids, padding_mask = self.packer(
152
- x, sequence_length=sequence_length, add_end_value=False
153
- )
154
- return {
155
- "token_ids": token_ids,
156
- "padding_mask": padding_mask,
157
- }
158
-
159
- def generate_postprocess(
160
- self,
161
- x,
162
- ):
163
- """Convert integer token output to strings for generation.
164
-
165
- This method reverses `generate_preprocess()`, by first removing all
166
- padding and start/end tokens, and then converting the integer sequence
167
- back to a string.
168
- """
169
- token_ids, padding_mask = x["token_ids"], x["padding_mask"]
170
- ids_to_strip = (
171
- self.tokenizer.start_token_id,
172
- self.tokenizer.end_token_id,
173
- )
174
- token_ids = strip_to_ragged(token_ids, padding_mask, ids_to_strip)
175
- return self.tokenizer.detokenize(token_ids)
89
+ backbone_cls = MistralBackbone
90
+ tokenizer_cls = MistralTokenizer