keras-hub-nightly 0.15.0.dev20240823171555__py3-none-any.whl → 0.15.0.dev20240911134614__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. keras_hub/api/__init__.py +1 -0
  2. keras_hub/api/bounding_box/__init__.py +36 -0
  3. keras_hub/api/layers/__init__.py +14 -0
  4. keras_hub/api/models/__init__.py +75 -31
  5. keras_hub/api/tokenizers/__init__.py +30 -0
  6. keras_hub/src/bounding_box/__init__.py +13 -0
  7. keras_hub/src/bounding_box/converters.py +529 -0
  8. keras_hub/src/bounding_box/formats.py +162 -0
  9. keras_hub/src/bounding_box/iou.py +263 -0
  10. keras_hub/src/bounding_box/to_dense.py +95 -0
  11. keras_hub/src/bounding_box/to_ragged.py +99 -0
  12. keras_hub/src/bounding_box/utils.py +194 -0
  13. keras_hub/src/bounding_box/validate_format.py +99 -0
  14. keras_hub/src/layers/preprocessing/audio_converter.py +121 -0
  15. keras_hub/src/layers/preprocessing/image_converter.py +130 -0
  16. keras_hub/src/layers/preprocessing/masked_lm_mask_generator.py +2 -0
  17. keras_hub/src/layers/preprocessing/multi_segment_packer.py +9 -8
  18. keras_hub/src/layers/preprocessing/preprocessing_layer.py +2 -29
  19. keras_hub/src/layers/preprocessing/random_deletion.py +33 -31
  20. keras_hub/src/layers/preprocessing/random_swap.py +33 -31
  21. keras_hub/src/layers/preprocessing/resizing_image_converter.py +101 -0
  22. keras_hub/src/layers/preprocessing/start_end_packer.py +3 -2
  23. keras_hub/src/models/albert/__init__.py +1 -2
  24. keras_hub/src/models/albert/albert_masked_lm_preprocessor.py +6 -86
  25. keras_hub/src/models/albert/{albert_classifier.py → albert_text_classifier.py} +29 -10
  26. keras_hub/src/models/albert/{albert_preprocessor.py → albert_text_classifier_preprocessor.py} +14 -70
  27. keras_hub/src/models/albert/albert_tokenizer.py +17 -36
  28. keras_hub/src/models/backbone.py +12 -34
  29. keras_hub/src/models/bart/__init__.py +1 -2
  30. keras_hub/src/models/bart/bart_preprocessor.py +6 -18
  31. keras_hub/src/models/bart/bart_seq_2_seq_lm_preprocessor.py +21 -148
  32. keras_hub/src/models/bart/bart_tokenizer.py +12 -39
  33. keras_hub/src/models/bert/__init__.py +1 -5
  34. keras_hub/src/models/bert/bert_masked_lm_preprocessor.py +6 -87
  35. keras_hub/src/models/bert/bert_presets.py +1 -4
  36. keras_hub/src/models/bert/{bert_classifier.py → bert_text_classifier.py} +12 -10
  37. keras_hub/src/models/bert/{bert_preprocessor.py → bert_text_classifier_preprocessor.py} +14 -70
  38. keras_hub/src/models/bert/bert_tokenizer.py +17 -35
  39. keras_hub/src/models/bloom/__init__.py +1 -2
  40. keras_hub/src/models/bloom/bloom_causal_lm_preprocessor.py +6 -91
  41. keras_hub/src/models/bloom/bloom_preprocessor.py +5 -12
  42. keras_hub/src/models/bloom/bloom_tokenizer.py +12 -41
  43. keras_hub/src/models/causal_lm.py +10 -29
  44. keras_hub/src/models/causal_lm_preprocessor.py +195 -0
  45. keras_hub/src/models/csp_darknet/csp_darknet_backbone.py +54 -15
  46. keras_hub/src/models/deberta_v3/__init__.py +1 -4
  47. keras_hub/src/models/deberta_v3/deberta_v3_masked_lm_preprocessor.py +14 -77
  48. keras_hub/src/models/deberta_v3/{deberta_v3_classifier.py → deberta_v3_text_classifier.py} +11 -11
  49. keras_hub/src/models/deberta_v3/{deberta_v3_preprocessor.py → deberta_v3_text_classifier_preprocessor.py} +23 -64
  50. keras_hub/src/models/deberta_v3/deberta_v3_tokenizer.py +30 -25
  51. keras_hub/src/models/densenet/densenet_backbone.py +46 -22
  52. keras_hub/src/models/distil_bert/__init__.py +1 -4
  53. keras_hub/src/models/distil_bert/distil_bert_masked_lm_preprocessor.py +14 -76
  54. keras_hub/src/models/distil_bert/{distil_bert_classifier.py → distil_bert_text_classifier.py} +12 -12
  55. keras_hub/src/models/distil_bert/{distil_bert_preprocessor.py → distil_bert_text_classifier_preprocessor.py} +23 -63
  56. keras_hub/src/models/distil_bert/distil_bert_tokenizer.py +19 -35
  57. keras_hub/src/models/efficientnet/__init__.py +13 -0
  58. keras_hub/src/models/efficientnet/efficientnet_backbone.py +569 -0
  59. keras_hub/src/models/efficientnet/fusedmbconv.py +229 -0
  60. keras_hub/src/models/efficientnet/mbconv.py +238 -0
  61. keras_hub/src/models/electra/__init__.py +1 -2
  62. keras_hub/src/models/electra/electra_preprocessor.py +6 -5
  63. keras_hub/src/models/electra/electra_tokenizer.py +17 -32
  64. keras_hub/src/models/f_net/__init__.py +1 -2
  65. keras_hub/src/models/f_net/f_net_masked_lm_preprocessor.py +12 -78
  66. keras_hub/src/models/f_net/{f_net_classifier.py → f_net_text_classifier.py} +10 -8
  67. keras_hub/src/models/f_net/{f_net_preprocessor.py → f_net_text_classifier_preprocessor.py} +19 -63
  68. keras_hub/src/models/f_net/f_net_tokenizer.py +17 -35
  69. keras_hub/src/models/falcon/__init__.py +1 -2
  70. keras_hub/src/models/falcon/falcon_causal_lm_preprocessor.py +6 -89
  71. keras_hub/src/models/falcon/falcon_preprocessor.py +5 -12
  72. keras_hub/src/models/falcon/falcon_tokenizer.py +12 -35
  73. keras_hub/src/models/gemma/__init__.py +1 -2
  74. keras_hub/src/models/gemma/gemma_causal_lm_preprocessor.py +6 -90
  75. keras_hub/src/models/gemma/gemma_preprocessor.py +5 -12
  76. keras_hub/src/models/gemma/gemma_tokenizer.py +12 -23
  77. keras_hub/src/models/gpt2/__init__.py +1 -2
  78. keras_hub/src/models/gpt2/gpt2_causal_lm_preprocessor.py +6 -89
  79. keras_hub/src/models/gpt2/gpt2_preprocessor.py +5 -12
  80. keras_hub/src/models/gpt2/gpt2_tokenizer.py +12 -34
  81. keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm_preprocessor.py +6 -91
  82. keras_hub/src/models/gpt_neo_x/gpt_neo_x_preprocessor.py +5 -12
  83. keras_hub/src/models/gpt_neo_x/gpt_neo_x_tokenizer.py +12 -34
  84. keras_hub/src/models/image_classifier.py +0 -5
  85. keras_hub/src/models/image_classifier_preprocessor.py +83 -0
  86. keras_hub/src/models/llama/__init__.py +1 -2
  87. keras_hub/src/models/llama/llama_causal_lm_preprocessor.py +6 -85
  88. keras_hub/src/models/llama/llama_preprocessor.py +5 -12
  89. keras_hub/src/models/llama/llama_tokenizer.py +12 -25
  90. keras_hub/src/models/llama3/__init__.py +1 -2
  91. keras_hub/src/models/llama3/llama3_causal_lm_preprocessor.py +6 -89
  92. keras_hub/src/models/llama3/llama3_preprocessor.py +2 -0
  93. keras_hub/src/models/llama3/llama3_tokenizer.py +12 -33
  94. keras_hub/src/models/masked_lm.py +0 -2
  95. keras_hub/src/models/masked_lm_preprocessor.py +156 -0
  96. keras_hub/src/models/mistral/__init__.py +1 -2
  97. keras_hub/src/models/mistral/mistral_causal_lm_preprocessor.py +6 -91
  98. keras_hub/src/models/mistral/mistral_preprocessor.py +5 -12
  99. keras_hub/src/models/mistral/mistral_tokenizer.py +12 -23
  100. keras_hub/src/models/mix_transformer/mix_transformer_backbone.py +2 -2
  101. keras_hub/src/models/mobilenet/__init__.py +13 -0
  102. keras_hub/src/models/mobilenet/mobilenet_backbone.py +530 -0
  103. keras_hub/src/models/mobilenet/mobilenet_image_classifier.py +114 -0
  104. keras_hub/src/models/opt/__init__.py +1 -2
  105. keras_hub/src/models/opt/opt_causal_lm_preprocessor.py +6 -93
  106. keras_hub/src/models/opt/opt_preprocessor.py +5 -12
  107. keras_hub/src/models/opt/opt_tokenizer.py +12 -41
  108. keras_hub/src/models/pali_gemma/__init__.py +1 -4
  109. keras_hub/src/models/pali_gemma/pali_gemma_causal_lm_preprocessor.py +28 -28
  110. keras_hub/src/models/pali_gemma/pali_gemma_image_converter.py +25 -0
  111. keras_hub/src/models/pali_gemma/pali_gemma_presets.py +5 -5
  112. keras_hub/src/models/pali_gemma/pali_gemma_tokenizer.py +10 -2
  113. keras_hub/src/models/phi3/__init__.py +1 -2
  114. keras_hub/src/models/phi3/phi3_causal_lm.py +3 -9
  115. keras_hub/src/models/phi3/phi3_causal_lm_preprocessor.py +6 -89
  116. keras_hub/src/models/phi3/phi3_preprocessor.py +5 -12
  117. keras_hub/src/models/phi3/phi3_tokenizer.py +12 -36
  118. keras_hub/src/models/preprocessor.py +76 -83
  119. keras_hub/src/models/resnet/__init__.py +6 -0
  120. keras_hub/src/models/resnet/resnet_backbone.py +387 -26
  121. keras_hub/src/models/resnet/resnet_image_classifier.py +7 -3
  122. keras_hub/src/models/resnet/resnet_image_classifier_preprocessor.py +28 -0
  123. keras_hub/src/models/resnet/resnet_image_converter.py +23 -0
  124. keras_hub/src/models/resnet/resnet_presets.py +95 -0
  125. keras_hub/src/models/roberta/__init__.py +1 -2
  126. keras_hub/src/models/roberta/roberta_masked_lm_preprocessor.py +22 -74
  127. keras_hub/src/models/roberta/{roberta_classifier.py → roberta_text_classifier.py} +11 -11
  128. keras_hub/src/models/roberta/{roberta_preprocessor.py → roberta_text_classifier_preprocessor.py} +21 -53
  129. keras_hub/src/models/roberta/roberta_tokenizer.py +13 -52
  130. keras_hub/src/models/seq_2_seq_lm_preprocessor.py +269 -0
  131. keras_hub/src/models/stable_diffusion_v3/__init__.py +13 -0
  132. keras_hub/src/models/stable_diffusion_v3/clip_encoder_block.py +103 -0
  133. keras_hub/src/models/stable_diffusion_v3/clip_preprocessor.py +93 -0
  134. keras_hub/src/models/stable_diffusion_v3/clip_text_encoder.py +149 -0
  135. keras_hub/src/models/stable_diffusion_v3/clip_tokenizer.py +167 -0
  136. keras_hub/src/models/stable_diffusion_v3/mmdit.py +427 -0
  137. keras_hub/src/models/stable_diffusion_v3/mmdit_block.py +317 -0
  138. keras_hub/src/models/stable_diffusion_v3/t5_xxl_preprocessor.py +74 -0
  139. keras_hub/src/models/stable_diffusion_v3/t5_xxl_text_encoder.py +155 -0
  140. keras_hub/src/models/stable_diffusion_v3/vae_attention.py +126 -0
  141. keras_hub/src/models/stable_diffusion_v3/vae_image_decoder.py +186 -0
  142. keras_hub/src/models/t5/__init__.py +1 -2
  143. keras_hub/src/models/t5/t5_tokenizer.py +13 -23
  144. keras_hub/src/models/task.py +71 -116
  145. keras_hub/src/models/{classifier.py → text_classifier.py} +8 -13
  146. keras_hub/src/models/text_classifier_preprocessor.py +138 -0
  147. keras_hub/src/models/whisper/__init__.py +1 -2
  148. keras_hub/src/models/whisper/{whisper_audio_feature_extractor.py → whisper_audio_converter.py} +20 -18
  149. keras_hub/src/models/whisper/whisper_backbone.py +0 -3
  150. keras_hub/src/models/whisper/whisper_presets.py +10 -10
  151. keras_hub/src/models/whisper/whisper_tokenizer.py +20 -16
  152. keras_hub/src/models/xlm_roberta/__init__.py +1 -4
  153. keras_hub/src/models/xlm_roberta/xlm_roberta_masked_lm_preprocessor.py +26 -72
  154. keras_hub/src/models/xlm_roberta/{xlm_roberta_classifier.py → xlm_roberta_text_classifier.py} +11 -11
  155. keras_hub/src/models/xlm_roberta/{xlm_roberta_preprocessor.py → xlm_roberta_text_classifier_preprocessor.py} +26 -53
  156. keras_hub/src/models/xlm_roberta/xlm_roberta_tokenizer.py +25 -10
  157. keras_hub/src/tests/test_case.py +25 -0
  158. keras_hub/src/tokenizers/byte_pair_tokenizer.py +29 -17
  159. keras_hub/src/tokenizers/byte_tokenizer.py +14 -15
  160. keras_hub/src/tokenizers/sentence_piece_tokenizer.py +19 -7
  161. keras_hub/src/tokenizers/tokenizer.py +67 -32
  162. keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py +14 -15
  163. keras_hub/src/tokenizers/word_piece_tokenizer.py +33 -47
  164. keras_hub/src/utils/keras_utils.py +0 -50
  165. keras_hub/src/utils/preset_utils.py +238 -67
  166. keras_hub/src/utils/tensor_utils.py +187 -69
  167. keras_hub/src/utils/timm/convert_resnet.py +20 -16
  168. keras_hub/src/utils/timm/preset_loader.py +67 -0
  169. keras_hub/src/utils/transformers/convert_albert.py +193 -0
  170. keras_hub/src/utils/transformers/convert_bart.py +373 -0
  171. keras_hub/src/utils/transformers/convert_bert.py +7 -17
  172. keras_hub/src/utils/transformers/convert_distilbert.py +10 -20
  173. keras_hub/src/utils/transformers/convert_gemma.py +5 -19
  174. keras_hub/src/utils/transformers/convert_gpt2.py +5 -18
  175. keras_hub/src/utils/transformers/convert_llama3.py +7 -18
  176. keras_hub/src/utils/transformers/convert_mistral.py +129 -0
  177. keras_hub/src/utils/transformers/convert_pali_gemma.py +7 -29
  178. keras_hub/src/utils/transformers/preset_loader.py +77 -0
  179. keras_hub/src/utils/transformers/safetensor_utils.py +2 -2
  180. keras_hub/src/version_utils.py +1 -1
  181. {keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.15.0.dev20240911134614.dist-info}/METADATA +1 -2
  182. keras_hub_nightly-0.15.0.dev20240911134614.dist-info/RECORD +338 -0
  183. {keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.15.0.dev20240911134614.dist-info}/WHEEL +1 -1
  184. keras_hub/src/models/whisper/whisper_preprocessor.py +0 -326
  185. keras_hub/src/utils/timm/convert.py +0 -37
  186. keras_hub/src/utils/transformers/convert.py +0 -101
  187. keras_hub_nightly-0.15.0.dev20240823171555.dist-info/RECORD +0 -297
  188. {keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.15.0.dev20240911134614.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,156 @@
1
+ # Copyright 2024 The KerasHub Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import keras
15
+
16
+ from keras_hub.src.api_export import keras_hub_export
17
+ from keras_hub.src.layers.preprocessing.masked_lm_mask_generator import (
18
+ MaskedLMMaskGenerator,
19
+ )
20
+ from keras_hub.src.layers.preprocessing.multi_segment_packer import (
21
+ MultiSegmentPacker,
22
+ )
23
+ from keras_hub.src.models.preprocessor import Preprocessor
24
+ from keras_hub.src.utils.tensor_utils import preprocessing_function
25
+
26
+
27
+ @keras_hub_export("keras_hub.models.MaskedLMPreprocessor")
28
+ class MaskedLMPreprocessor(Preprocessor):
29
+ """Base class for masked language modeling preprocessing layers.
30
+
31
+ `MaskedLMPreprocessor` tasks wrap a `keras_hub.tokenizer.Tokenizer` to
32
+ create a preprocessing layer for masked language modeling tasks. It is
33
+ intended to be paired with a `keras.models.MaskedLM` task.
34
+
35
+ All `MaskedLMPreprocessor` take inputs a single input. This can be a single
36
+ string, a batch of strings, or a tuple of batches of string segments that
37
+ should be combined into a single sequence. See examples below. These inputs
38
+ will be tokenized, combined, and masked randomly along the sequence.
39
+
40
+ This layer will always output a `(x, y, sample_weight)` tuple, where `x`
41
+ is a dictionary with the masked, tokenized inputs, `y` contains the tokens
42
+ that were masked in `x`, and `sample_weight` marks where `y` contains padded
43
+ values. The exact contents of `x` will vary depending on the model being
44
+ used.
45
+
46
+ All `MaskedLMPreprocessor` tasks include a `from_preset()` constructor
47
+ which can be used to load a pre-trained config and vocabularies. You can
48
+ call the `from_preset()` constructor directly on this base class, in which
49
+ case the correct class for you model will be automatically instantiated.
50
+
51
+ Examples.
52
+ ```python
53
+ preprocessor = keras_hub.models.MaskedLMPreprocessor.from_preset(
54
+ "bert_base_en_uncased",
55
+ sequence_length=256, # Optional.
56
+ )
57
+
58
+ # Tokenize, mask and pack a single sentence.
59
+ x = "The quick brown fox jumped."
60
+ x, y, sample_weight = preprocessor(x)
61
+
62
+ # Preprocess a batch of labeled sentence pairs.
63
+ first = ["The quick brown fox jumped.", "Call me Ishmael."]
64
+ second = ["The fox tripped.", "Oh look, a whale."]
65
+ x, y, sample_weight = preprocessor((first, second))
66
+
67
+ # With a `tf.data.Dataset`.
68
+ ds = tf.data.Dataset.from_tensor_slices((first, second))
69
+ ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE)
70
+ ```
71
+ """
72
+
73
+ def __init__(
74
+ self,
75
+ tokenizer,
76
+ sequence_length=512,
77
+ truncate="round_robin",
78
+ mask_selection_rate=0.15,
79
+ mask_selection_length=96,
80
+ mask_token_rate=0.8,
81
+ random_token_rate=0.1,
82
+ **kwargs,
83
+ ):
84
+ super().__init__(**kwargs)
85
+ self.tokenizer = tokenizer
86
+ self.packer = None
87
+ self.sequence_length = sequence_length
88
+ self.truncate = truncate
89
+ self.mask_selection_rate = mask_selection_rate
90
+ self.mask_selection_length = mask_selection_length
91
+ self.mask_token_rate = mask_token_rate
92
+ self.random_token_rate = random_token_rate
93
+ self.masker = None
94
+
95
+ def build(self, input_shape):
96
+ super().build(input_shape)
97
+ # Defer masker creation to `build()` so that we can be sure tokenizer
98
+ # assets have loaded when restoring a saved model.
99
+ self.packer = MultiSegmentPacker(
100
+ start_value=self.tokenizer.start_token_id,
101
+ end_value=self.tokenizer.end_token_id,
102
+ pad_value=self.tokenizer.pad_token_id,
103
+ truncate=self.truncate,
104
+ sequence_length=self.sequence_length,
105
+ )
106
+ self.masker = MaskedLMMaskGenerator(
107
+ mask_selection_rate=self.mask_selection_rate,
108
+ mask_selection_length=self.mask_selection_length,
109
+ mask_token_rate=self.mask_token_rate,
110
+ random_token_rate=self.random_token_rate,
111
+ vocabulary_size=self.tokenizer.vocabulary_size(),
112
+ mask_token_id=self.tokenizer.mask_token_id,
113
+ unselectable_token_ids=self.tokenizer.special_token_ids,
114
+ )
115
+
116
+ @preprocessing_function
117
+ def call(self, x, y=None, sample_weight=None):
118
+ x = x if isinstance(x, tuple) else (x,)
119
+ x = tuple(self.tokenizer(segment) for segment in x)
120
+ token_ids, segment_ids = self.packer(x)
121
+ padding_mask = token_ids != self.tokenizer.pad_token_id
122
+ masker_outputs = self.masker(token_ids)
123
+ x = {
124
+ "token_ids": masker_outputs["token_ids"],
125
+ "padding_mask": padding_mask,
126
+ "segment_ids": segment_ids,
127
+ "mask_positions": masker_outputs["mask_positions"],
128
+ }
129
+ y = masker_outputs["mask_ids"]
130
+ sample_weight = masker_outputs["mask_weights"]
131
+ return keras.utils.pack_x_y_sample_weight(x, y, sample_weight)
132
+
133
+ def get_config(self):
134
+ config = super().get_config()
135
+ config.update(
136
+ {
137
+ "sequence_length": self.sequence_length,
138
+ "truncate": self.truncate,
139
+ "mask_selection_rate": self.mask_selection_rate,
140
+ "mask_selection_length": self.mask_selection_length,
141
+ "mask_token_rate": self.mask_token_rate,
142
+ "random_token_rate": self.random_token_rate,
143
+ }
144
+ )
145
+ return config
146
+
147
+ @property
148
+ def sequence_length(self):
149
+ """The padded length of model input sequences."""
150
+ return self._sequence_length
151
+
152
+ @sequence_length.setter
153
+ def sequence_length(self, value):
154
+ self._sequence_length = value
155
+ if self.packer is not None:
156
+ self.packer.sequence_length = value
@@ -14,7 +14,6 @@
14
14
 
15
15
  from keras_hub.src.models.mistral.mistral_backbone import MistralBackbone
16
16
  from keras_hub.src.models.mistral.mistral_presets import backbone_presets
17
- from keras_hub.src.models.mistral.mistral_tokenizer import MistralTokenizer
18
17
  from keras_hub.src.utils.preset_utils import register_presets
19
18
 
20
- register_presets(backbone_presets, (MistralBackbone, MistralTokenizer))
19
+ register_presets(backbone_presets, MistralBackbone)
@@ -12,21 +12,14 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import keras
16
- from absl import logging
17
-
18
15
  from keras_hub.src.api_export import keras_hub_export
19
- from keras_hub.src.models.mistral.mistral_preprocessor import (
20
- MistralPreprocessor,
21
- )
22
- from keras_hub.src.utils.keras_utils import (
23
- convert_inputs_to_list_of_tensor_segments,
24
- )
25
- from keras_hub.src.utils.tensor_utils import strip_to_ragged
16
+ from keras_hub.src.models.causal_lm_preprocessor import CausalLMPreprocessor
17
+ from keras_hub.src.models.mistral.mistral_backbone import MistralBackbone
18
+ from keras_hub.src.models.mistral.mistral_tokenizer import MistralTokenizer
26
19
 
27
20
 
28
21
  @keras_hub_export("keras_hub.models.MistralCausalLMPreprocessor")
29
- class MistralCausalLMPreprocessor(MistralPreprocessor):
22
+ class MistralCausalLMPreprocessor(CausalLMPreprocessor):
30
23
  """Mistral Causal LM preprocessor.
31
24
 
32
25
  This preprocessing layer is meant for use with
@@ -93,83 +86,5 @@ class MistralCausalLMPreprocessor(MistralPreprocessor):
93
86
  ```
94
87
  """
95
88
 
96
- def call(
97
- self,
98
- x,
99
- y=None,
100
- sample_weight=None,
101
- sequence_length=None,
102
- ):
103
- if y is not None or sample_weight is not None:
104
- logging.warning(
105
- "`MistralCausalLMPreprocessor` generates `y` and "
106
- "`sample_weight` based on your input data, but your data "
107
- "already contains `y` or `sample_weight`. Your `y` and "
108
- "`sample_weight` will be ignored."
109
- )
110
- sequence_length = sequence_length or self.sequence_length
111
-
112
- x = convert_inputs_to_list_of_tensor_segments(x)[0]
113
- x = self.tokenizer(x)
114
- # Pad with one extra token to account for the truncation below.
115
- token_ids, padding_mask = self.packer(
116
- x,
117
- sequence_length=sequence_length + 1,
118
- add_start_value=self.add_start_token,
119
- add_end_value=self.add_end_token,
120
- )
121
- # The last token does not have a next token, so we truncate it out.
122
- x = {
123
- "token_ids": token_ids[..., :-1],
124
- "padding_mask": padding_mask[..., :-1],
125
- }
126
- # Target `y` will be the next token.
127
- y, sample_weight = token_ids[..., 1:], padding_mask[..., 1:]
128
- return keras.utils.pack_x_y_sample_weight(x, y, sample_weight)
129
-
130
- def generate_preprocess(
131
- self,
132
- x,
133
- sequence_length=None,
134
- ):
135
- """Convert strings to integer token input for generation.
136
-
137
- Similar to calling the layer for training, this method takes in strings
138
- or tensor strings, tokenizes and packs the input, and computes a padding
139
- mask masking all inputs not filled in with a padded value.
140
-
141
- Unlike calling the layer for training, this method does not compute
142
- labels and will never append a `tokenizer.end_token_id` to the end of
143
- the sequence (as generation is expected to continue at the end of the
144
- inputted prompt).
145
- """
146
- if not self.built:
147
- self.build(None)
148
-
149
- x = convert_inputs_to_list_of_tensor_segments(x)[0]
150
- x = self.tokenizer(x)
151
- token_ids, padding_mask = self.packer(
152
- x, sequence_length=sequence_length, add_end_value=False
153
- )
154
- return {
155
- "token_ids": token_ids,
156
- "padding_mask": padding_mask,
157
- }
158
-
159
- def generate_postprocess(
160
- self,
161
- x,
162
- ):
163
- """Convert integer token output to strings for generation.
164
-
165
- This method reverses `generate_preprocess()`, by first removing all
166
- padding and start/end tokens, and then converting the integer sequence
167
- back to a string.
168
- """
169
- token_ids, padding_mask = x["token_ids"], x["padding_mask"]
170
- ids_to_strip = (
171
- self.tokenizer.start_token_id,
172
- self.tokenizer.end_token_id,
173
- )
174
- token_ids = strip_to_ragged(token_ids, padding_mask, ids_to_strip)
175
- return self.tokenizer.detokenize(token_ids)
89
+ backbone_cls = MistralBackbone
90
+ tokenizer_cls = MistralTokenizer
@@ -16,11 +16,10 @@ import keras
16
16
 
17
17
  from keras_hub.src.api_export import keras_hub_export
18
18
  from keras_hub.src.layers.preprocessing.start_end_packer import StartEndPacker
19
+ from keras_hub.src.models.mistral.mistral_backbone import MistralBackbone
19
20
  from keras_hub.src.models.mistral.mistral_tokenizer import MistralTokenizer
20
21
  from keras_hub.src.models.preprocessor import Preprocessor
21
- from keras_hub.src.utils.keras_utils import (
22
- convert_inputs_to_list_of_tensor_segments,
23
- )
22
+ from keras_hub.src.utils.tensor_utils import preprocessing_function
24
23
 
25
24
 
26
25
  @keras_hub_export("keras_hub.models.MistralPreprocessor")
@@ -111,6 +110,7 @@ class MistralPreprocessor(Preprocessor):
111
110
  ```
112
111
  """
113
112
 
113
+ backbone_cls = MistralBackbone
114
114
  tokenizer_cls = MistralTokenizer
115
115
 
116
116
  def __init__(
@@ -150,6 +150,7 @@ class MistralPreprocessor(Preprocessor):
150
150
  )
151
151
  return config
152
152
 
153
+ @preprocessing_function
153
154
  def call(
154
155
  self,
155
156
  x,
@@ -157,17 +158,9 @@ class MistralPreprocessor(Preprocessor):
157
158
  sample_weight=None,
158
159
  sequence_length=None,
159
160
  ):
160
- x = convert_inputs_to_list_of_tensor_segments(x)
161
- if len(x) != 1:
162
- raise ValueError(
163
- "Mistral requires each input feature to contain only "
164
- f"one segment, but received {len(x)}. If you are using Mistral"
165
- " for a multi-segment classification task, please refer to "
166
- "classification models like BERT or RoBERTa."
167
- )
168
161
  sequence_length = sequence_length or self.sequence_length
169
162
  token_ids, padding_mask = self.packer(
170
- self.tokenizer(x[0]),
163
+ self.tokenizer(x),
171
164
  sequence_length=sequence_length,
172
165
  add_start_value=self.add_start_token,
173
166
  add_end_value=self.add_end_token,
@@ -13,12 +13,18 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from keras_hub.src.api_export import keras_hub_export
16
+ from keras_hub.src.models.mistral.mistral_backbone import MistralBackbone
16
17
  from keras_hub.src.tokenizers.sentence_piece_tokenizer import (
17
18
  SentencePieceTokenizer,
18
19
  )
19
20
 
20
21
 
21
- @keras_hub_export("keras_hub.models.MistralTokenizer")
22
+ @keras_hub_export(
23
+ [
24
+ "keras_hub.tokenizers.MistralTokenizer",
25
+ "keras_hub.models.MistralTokenizer",
26
+ ]
27
+ )
22
28
  class MistralTokenizer(SentencePieceTokenizer):
23
29
  """Mistral tokenizer layer based on SentencePiece.
24
30
 
@@ -28,10 +34,6 @@ class MistralTokenizer(SentencePieceTokenizer):
28
34
  Mistral models and provides a `from_preset()` method to automatically
29
35
  download a matching vocabulary for a Mistral preset.
30
36
 
31
- This tokenizer does not provide truncation or padding of inputs. It can be
32
- combined with a `keras_hub.models.MistralPreprocessor` layer for input
33
- packing.
34
-
35
37
  If input is a batch of strings (rank > 0), the layer will output a
36
38
  `tf.RaggedTensor` where the last dimension of the output is ragged.
37
39
 
@@ -60,23 +62,10 @@ class MistralTokenizer(SentencePieceTokenizer):
60
62
  ```
61
63
  """
62
64
 
65
+ backbone_cls = MistralBackbone
66
+
63
67
  def __init__(self, proto, **kwargs):
64
- self.start_token = "<s>"
65
- self.end_token = "</s>"
68
+ self._add_special_token("<s>", "start_token")
69
+ self._add_special_token("</s>", "end_token")
70
+ self.pad_token_id = 0
66
71
  super().__init__(proto=proto, **kwargs)
67
-
68
- def set_proto(self, proto):
69
- super().set_proto(proto)
70
- if proto is not None:
71
- for token in [self.start_token, self.end_token]:
72
- if token not in self.get_vocabulary():
73
- raise ValueError(
74
- f"Cannot find token `'{token}'` in the provided "
75
- f"`vocabulary`. Please provide `'{token}'` in your "
76
- "`vocabulary` or use a pretrained `vocabulary` name."
77
- )
78
- self.start_token_id = self.token_to_id(self.start_token)
79
- self.end_token_id = self.token_to_id(self.end_token)
80
- else:
81
- self.start_token_id = None
82
- self.end_token_id = None
@@ -37,7 +37,7 @@ class MiTBackbone(FeaturePyramidBackbone):
37
37
  patch_sizes,
38
38
  strides,
39
39
  include_rescaling=True,
40
- image_shape=(224, 224, 3),
40
+ image_shape=(None, None, 3),
41
41
  hidden_dims=None,
42
42
  **kwargs,
43
43
  ):
@@ -63,7 +63,7 @@ class MiTBackbone(FeaturePyramidBackbone):
63
63
  include_rescaling: bool, whether to rescale the inputs. If set
64
64
  to `True`, inputs will be passed through a `Rescaling(1/255.0)`
65
65
  layer. Defaults to `True`.
66
- image_shape: optional shape tuple, defaults to (224, 224, 3).
66
+ image_shape: optional shape tuple, defaults to (None, None, 3).
67
67
  hidden_dims: the embedding dims per hierarchical layer, used as
68
68
  the levels of the feature pyramid.
69
69
  patch_sizes: list of integers, the patch_size to apply for each layer.
@@ -0,0 +1,13 @@
1
+ # Copyright 2024 The KerasHub Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.