keras-hub-nightly 0.15.0.dev20240823171555__py3-none-any.whl → 0.16.0.dev2024092017__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. keras_hub/__init__.py +0 -6
  2. keras_hub/api/__init__.py +2 -0
  3. keras_hub/api/bounding_box/__init__.py +36 -0
  4. keras_hub/api/layers/__init__.py +14 -0
  5. keras_hub/api/models/__init__.py +97 -48
  6. keras_hub/api/tokenizers/__init__.py +30 -0
  7. keras_hub/api/utils/__init__.py +22 -0
  8. keras_hub/src/api_export.py +15 -9
  9. keras_hub/src/bounding_box/__init__.py +13 -0
  10. keras_hub/src/bounding_box/converters.py +529 -0
  11. keras_hub/src/bounding_box/formats.py +162 -0
  12. keras_hub/src/bounding_box/iou.py +263 -0
  13. keras_hub/src/bounding_box/to_dense.py +95 -0
  14. keras_hub/src/bounding_box/to_ragged.py +99 -0
  15. keras_hub/src/bounding_box/utils.py +194 -0
  16. keras_hub/src/bounding_box/validate_format.py +99 -0
  17. keras_hub/src/layers/preprocessing/audio_converter.py +121 -0
  18. keras_hub/src/layers/preprocessing/image_converter.py +130 -0
  19. keras_hub/src/layers/preprocessing/masked_lm_mask_generator.py +2 -0
  20. keras_hub/src/layers/preprocessing/multi_segment_packer.py +9 -8
  21. keras_hub/src/layers/preprocessing/preprocessing_layer.py +2 -29
  22. keras_hub/src/layers/preprocessing/random_deletion.py +33 -31
  23. keras_hub/src/layers/preprocessing/random_swap.py +33 -31
  24. keras_hub/src/layers/preprocessing/resizing_image_converter.py +101 -0
  25. keras_hub/src/layers/preprocessing/start_end_packer.py +3 -2
  26. keras_hub/src/models/albert/__init__.py +1 -2
  27. keras_hub/src/models/albert/albert_masked_lm_preprocessor.py +6 -86
  28. keras_hub/src/models/albert/{albert_classifier.py → albert_text_classifier.py} +34 -10
  29. keras_hub/src/models/albert/{albert_preprocessor.py → albert_text_classifier_preprocessor.py} +14 -70
  30. keras_hub/src/models/albert/albert_tokenizer.py +17 -36
  31. keras_hub/src/models/backbone.py +12 -34
  32. keras_hub/src/models/bart/__init__.py +1 -2
  33. keras_hub/src/models/bart/bart_seq_2_seq_lm_preprocessor.py +21 -148
  34. keras_hub/src/models/bart/bart_tokenizer.py +12 -39
  35. keras_hub/src/models/bert/__init__.py +1 -5
  36. keras_hub/src/models/bert/bert_masked_lm_preprocessor.py +6 -87
  37. keras_hub/src/models/bert/bert_presets.py +1 -4
  38. keras_hub/src/models/bert/{bert_classifier.py → bert_text_classifier.py} +19 -12
  39. keras_hub/src/models/bert/{bert_preprocessor.py → bert_text_classifier_preprocessor.py} +14 -70
  40. keras_hub/src/models/bert/bert_tokenizer.py +17 -35
  41. keras_hub/src/models/bloom/__init__.py +1 -2
  42. keras_hub/src/models/bloom/bloom_causal_lm_preprocessor.py +6 -91
  43. keras_hub/src/models/bloom/bloom_tokenizer.py +12 -41
  44. keras_hub/src/models/causal_lm.py +10 -29
  45. keras_hub/src/models/causal_lm_preprocessor.py +195 -0
  46. keras_hub/src/models/csp_darknet/csp_darknet_backbone.py +54 -15
  47. keras_hub/src/models/deberta_v3/__init__.py +1 -4
  48. keras_hub/src/models/deberta_v3/deberta_v3_masked_lm_preprocessor.py +14 -77
  49. keras_hub/src/models/deberta_v3/{deberta_v3_classifier.py → deberta_v3_text_classifier.py} +16 -11
  50. keras_hub/src/models/deberta_v3/{deberta_v3_preprocessor.py → deberta_v3_text_classifier_preprocessor.py} +23 -64
  51. keras_hub/src/models/deberta_v3/deberta_v3_tokenizer.py +30 -25
  52. keras_hub/src/models/densenet/densenet_backbone.py +46 -22
  53. keras_hub/src/models/distil_bert/__init__.py +1 -4
  54. keras_hub/src/models/distil_bert/distil_bert_masked_lm_preprocessor.py +14 -76
  55. keras_hub/src/models/distil_bert/{distil_bert_classifier.py → distil_bert_text_classifier.py} +17 -12
  56. keras_hub/src/models/distil_bert/{distil_bert_preprocessor.py → distil_bert_text_classifier_preprocessor.py} +23 -63
  57. keras_hub/src/models/distil_bert/distil_bert_tokenizer.py +19 -35
  58. keras_hub/src/models/efficientnet/__init__.py +13 -0
  59. keras_hub/src/models/efficientnet/efficientnet_backbone.py +569 -0
  60. keras_hub/src/models/efficientnet/fusedmbconv.py +229 -0
  61. keras_hub/src/models/efficientnet/mbconv.py +238 -0
  62. keras_hub/src/models/electra/__init__.py +1 -2
  63. keras_hub/src/models/electra/electra_tokenizer.py +17 -32
  64. keras_hub/src/models/f_net/__init__.py +1 -2
  65. keras_hub/src/models/f_net/f_net_masked_lm_preprocessor.py +12 -78
  66. keras_hub/src/models/f_net/{f_net_classifier.py → f_net_text_classifier.py} +17 -10
  67. keras_hub/src/models/f_net/{f_net_preprocessor.py → f_net_text_classifier_preprocessor.py} +19 -63
  68. keras_hub/src/models/f_net/f_net_tokenizer.py +17 -35
  69. keras_hub/src/models/falcon/__init__.py +1 -2
  70. keras_hub/src/models/falcon/falcon_causal_lm_preprocessor.py +6 -89
  71. keras_hub/src/models/falcon/falcon_tokenizer.py +12 -35
  72. keras_hub/src/models/gemma/__init__.py +1 -2
  73. keras_hub/src/models/gemma/gemma_causal_lm_preprocessor.py +6 -90
  74. keras_hub/src/models/gemma/gemma_decoder_block.py +1 -1
  75. keras_hub/src/models/gemma/gemma_tokenizer.py +12 -23
  76. keras_hub/src/models/gpt2/__init__.py +1 -2
  77. keras_hub/src/models/gpt2/gpt2_causal_lm_preprocessor.py +6 -89
  78. keras_hub/src/models/gpt2/gpt2_preprocessor.py +12 -90
  79. keras_hub/src/models/gpt2/gpt2_tokenizer.py +12 -34
  80. keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm_preprocessor.py +6 -91
  81. keras_hub/src/models/gpt_neo_x/gpt_neo_x_tokenizer.py +12 -34
  82. keras_hub/src/models/image_classifier.py +0 -5
  83. keras_hub/src/models/image_classifier_preprocessor.py +83 -0
  84. keras_hub/src/models/llama/__init__.py +1 -2
  85. keras_hub/src/models/llama/llama_causal_lm_preprocessor.py +6 -85
  86. keras_hub/src/models/llama/llama_tokenizer.py +12 -25
  87. keras_hub/src/models/llama3/__init__.py +1 -2
  88. keras_hub/src/models/llama3/llama3_causal_lm_preprocessor.py +6 -89
  89. keras_hub/src/models/llama3/llama3_tokenizer.py +12 -33
  90. keras_hub/src/models/masked_lm.py +0 -2
  91. keras_hub/src/models/masked_lm_preprocessor.py +156 -0
  92. keras_hub/src/models/mistral/__init__.py +1 -2
  93. keras_hub/src/models/mistral/mistral_causal_lm_preprocessor.py +6 -91
  94. keras_hub/src/models/mistral/mistral_tokenizer.py +12 -23
  95. keras_hub/src/models/mix_transformer/mix_transformer_backbone.py +2 -2
  96. keras_hub/src/models/mobilenet/__init__.py +13 -0
  97. keras_hub/src/models/mobilenet/mobilenet_backbone.py +530 -0
  98. keras_hub/src/models/mobilenet/mobilenet_image_classifier.py +114 -0
  99. keras_hub/src/models/opt/__init__.py +1 -2
  100. keras_hub/src/models/opt/opt_causal_lm_preprocessor.py +6 -93
  101. keras_hub/src/models/opt/opt_tokenizer.py +12 -41
  102. keras_hub/src/models/pali_gemma/__init__.py +1 -4
  103. keras_hub/src/models/pali_gemma/pali_gemma_causal_lm_preprocessor.py +28 -28
  104. keras_hub/src/models/pali_gemma/pali_gemma_image_converter.py +25 -0
  105. keras_hub/src/models/pali_gemma/pali_gemma_presets.py +5 -5
  106. keras_hub/src/models/pali_gemma/pali_gemma_tokenizer.py +11 -3
  107. keras_hub/src/models/phi3/__init__.py +1 -2
  108. keras_hub/src/models/phi3/phi3_causal_lm.py +3 -9
  109. keras_hub/src/models/phi3/phi3_causal_lm_preprocessor.py +6 -89
  110. keras_hub/src/models/phi3/phi3_tokenizer.py +12 -36
  111. keras_hub/src/models/preprocessor.py +72 -83
  112. keras_hub/src/models/resnet/__init__.py +6 -0
  113. keras_hub/src/models/resnet/resnet_backbone.py +390 -42
  114. keras_hub/src/models/resnet/resnet_image_classifier.py +33 -6
  115. keras_hub/src/models/resnet/resnet_image_classifier_preprocessor.py +28 -0
  116. keras_hub/src/models/{llama3/llama3_preprocessor.py → resnet/resnet_image_converter.py} +7 -5
  117. keras_hub/src/models/resnet/resnet_presets.py +95 -0
  118. keras_hub/src/models/retinanet/__init__.py +13 -0
  119. keras_hub/src/models/retinanet/anchor_generator.py +175 -0
  120. keras_hub/src/models/retinanet/box_matcher.py +259 -0
  121. keras_hub/src/models/retinanet/non_max_supression.py +578 -0
  122. keras_hub/src/models/roberta/__init__.py +1 -2
  123. keras_hub/src/models/roberta/roberta_masked_lm_preprocessor.py +22 -74
  124. keras_hub/src/models/roberta/{roberta_classifier.py → roberta_text_classifier.py} +16 -11
  125. keras_hub/src/models/roberta/{roberta_preprocessor.py → roberta_text_classifier_preprocessor.py} +21 -53
  126. keras_hub/src/models/roberta/roberta_tokenizer.py +13 -52
  127. keras_hub/src/models/seq_2_seq_lm_preprocessor.py +269 -0
  128. keras_hub/src/models/stable_diffusion_v3/__init__.py +13 -0
  129. keras_hub/src/models/stable_diffusion_v3/clip_encoder_block.py +103 -0
  130. keras_hub/src/models/stable_diffusion_v3/clip_preprocessor.py +93 -0
  131. keras_hub/src/models/stable_diffusion_v3/clip_text_encoder.py +149 -0
  132. keras_hub/src/models/stable_diffusion_v3/clip_tokenizer.py +167 -0
  133. keras_hub/src/models/stable_diffusion_v3/mmdit.py +427 -0
  134. keras_hub/src/models/stable_diffusion_v3/mmdit_block.py +317 -0
  135. keras_hub/src/models/stable_diffusion_v3/t5_xxl_preprocessor.py +74 -0
  136. keras_hub/src/models/stable_diffusion_v3/t5_xxl_text_encoder.py +155 -0
  137. keras_hub/src/models/stable_diffusion_v3/vae_attention.py +126 -0
  138. keras_hub/src/models/stable_diffusion_v3/vae_image_decoder.py +186 -0
  139. keras_hub/src/models/t5/__init__.py +1 -2
  140. keras_hub/src/models/t5/t5_tokenizer.py +13 -23
  141. keras_hub/src/models/task.py +71 -116
  142. keras_hub/src/models/{classifier.py → text_classifier.py} +19 -13
  143. keras_hub/src/models/text_classifier_preprocessor.py +138 -0
  144. keras_hub/src/models/whisper/__init__.py +1 -2
  145. keras_hub/src/models/whisper/{whisper_audio_feature_extractor.py → whisper_audio_converter.py} +20 -18
  146. keras_hub/src/models/whisper/whisper_backbone.py +0 -3
  147. keras_hub/src/models/whisper/whisper_presets.py +10 -10
  148. keras_hub/src/models/whisper/whisper_tokenizer.py +20 -16
  149. keras_hub/src/models/xlm_roberta/__init__.py +1 -4
  150. keras_hub/src/models/xlm_roberta/xlm_roberta_masked_lm_preprocessor.py +26 -72
  151. keras_hub/src/models/xlm_roberta/{xlm_roberta_classifier.py → xlm_roberta_text_classifier.py} +16 -11
  152. keras_hub/src/models/xlm_roberta/{xlm_roberta_preprocessor.py → xlm_roberta_text_classifier_preprocessor.py} +26 -53
  153. keras_hub/src/models/xlm_roberta/xlm_roberta_tokenizer.py +25 -10
  154. keras_hub/src/tests/test_case.py +46 -0
  155. keras_hub/src/tokenizers/byte_pair_tokenizer.py +30 -17
  156. keras_hub/src/tokenizers/byte_tokenizer.py +14 -15
  157. keras_hub/src/tokenizers/sentence_piece_tokenizer.py +20 -7
  158. keras_hub/src/tokenizers/tokenizer.py +67 -32
  159. keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py +14 -15
  160. keras_hub/src/tokenizers/word_piece_tokenizer.py +34 -47
  161. keras_hub/src/utils/imagenet/__init__.py +13 -0
  162. keras_hub/src/utils/imagenet/imagenet_utils.py +1067 -0
  163. keras_hub/src/utils/keras_utils.py +0 -50
  164. keras_hub/src/utils/preset_utils.py +230 -68
  165. keras_hub/src/utils/tensor_utils.py +187 -69
  166. keras_hub/src/utils/timm/convert_resnet.py +19 -16
  167. keras_hub/src/utils/timm/preset_loader.py +66 -0
  168. keras_hub/src/utils/transformers/convert_albert.py +193 -0
  169. keras_hub/src/utils/transformers/convert_bart.py +373 -0
  170. keras_hub/src/utils/transformers/convert_bert.py +7 -17
  171. keras_hub/src/utils/transformers/convert_distilbert.py +10 -20
  172. keras_hub/src/utils/transformers/convert_gemma.py +5 -19
  173. keras_hub/src/utils/transformers/convert_gpt2.py +5 -18
  174. keras_hub/src/utils/transformers/convert_llama3.py +7 -18
  175. keras_hub/src/utils/transformers/convert_mistral.py +129 -0
  176. keras_hub/src/utils/transformers/convert_pali_gemma.py +7 -29
  177. keras_hub/src/utils/transformers/preset_loader.py +77 -0
  178. keras_hub/src/utils/transformers/safetensor_utils.py +2 -2
  179. keras_hub/src/version_utils.py +1 -1
  180. keras_hub_nightly-0.16.0.dev2024092017.dist-info/METADATA +202 -0
  181. keras_hub_nightly-0.16.0.dev2024092017.dist-info/RECORD +334 -0
  182. {keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.16.0.dev2024092017.dist-info}/WHEEL +1 -1
  183. keras_hub/src/models/bart/bart_preprocessor.py +0 -276
  184. keras_hub/src/models/bloom/bloom_preprocessor.py +0 -185
  185. keras_hub/src/models/electra/electra_preprocessor.py +0 -154
  186. keras_hub/src/models/falcon/falcon_preprocessor.py +0 -187
  187. keras_hub/src/models/gemma/gemma_preprocessor.py +0 -191
  188. keras_hub/src/models/gpt_neo_x/gpt_neo_x_preprocessor.py +0 -145
  189. keras_hub/src/models/llama/llama_preprocessor.py +0 -189
  190. keras_hub/src/models/mistral/mistral_preprocessor.py +0 -190
  191. keras_hub/src/models/opt/opt_preprocessor.py +0 -188
  192. keras_hub/src/models/phi3/phi3_preprocessor.py +0 -190
  193. keras_hub/src/models/whisper/whisper_preprocessor.py +0 -326
  194. keras_hub/src/utils/timm/convert.py +0 -37
  195. keras_hub/src/utils/transformers/convert.py +0 -101
  196. keras_hub_nightly-0.15.0.dev20240823171555.dist-info/METADATA +0 -34
  197. keras_hub_nightly-0.15.0.dev20240823171555.dist-info/RECORD +0 -297
  198. {keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.16.0.dev2024092017.dist-info}/top_level.txt +0 -0
@@ -1,326 +0,0 @@
1
- # Copyright 2024 The KerasHub Authors
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # https://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
-
16
- import keras
17
- from absl import logging
18
-
19
- from keras_hub.src.api_export import keras_hub_export
20
- from keras_hub.src.layers.preprocessing.start_end_packer import StartEndPacker
21
- from keras_hub.src.models.preprocessor import Preprocessor
22
- from keras_hub.src.models.whisper.whisper_audio_feature_extractor import (
23
- WhisperAudioFeatureExtractor,
24
- )
25
- from keras_hub.src.models.whisper.whisper_tokenizer import WhisperTokenizer
26
- from keras_hub.src.utils.keras_utils import (
27
- convert_inputs_to_list_of_tensor_segments,
28
- )
29
-
30
-
31
- @keras_hub_export("keras_hub.models.WhisperPreprocessor")
32
- class WhisperPreprocessor(Preprocessor):
33
- """A Whisper preprocessing layer which handles audio and text input.
34
-
35
- This preprocessing layer will do three things:
36
-
37
- 1. Compute the log-mel spectrogram of the audio tensor inputs using
38
- `audio_feature_extractor`.
39
- 2. Tokenize decoder inputs using the `tokenizer`.
40
- 2. Add the appropriate special tokens - `"<|startoftranscript|>", task
41
- token, language token, `"<|endoftext|>"`, etc.
42
- 3. Construct a dictionary with keys `"encoder_features"`,
43
- `"decoder_token_ids"`, `"decoder_padding_mask"` that can be passed
44
- directly to a Whisper model.
45
-
46
- Args:
47
- tokenizer: A `keras_hub.models.WhisperTokenizer` instance.
48
- audio_feature_extractor: A
49
- `keras_hub.models.WhisperAudioFeatureExtractor` instance or `None`.
50
- If `None` a feature extractor with default parameters will be
51
- created.
52
- decoder_sequence_length: The length of the packed decoder inputs.
53
- language: string, language token. Should only be passed if your
54
- tokenizer is multilingual.
55
- task: string, task name. One of `"transcribe"`, `"translate"`. Should
56
- only be passed if your tokenizer is multilingual.
57
- no_timestamps: bool. If True, `"<|no_timestamps|>"` will be added as a
58
- special token to your input.
59
-
60
- Call arguments:
61
- x: A dictionary with `"encoder_audio"` and `"decoder_text"` as its keys.
62
- `"encoder_audio"` should correspond to the input audio tensor.
63
- `"decoder_text"` should be a tensor of single string sequences.
64
- Inputs may be batched or unbatched. Raw python inputs will be
65
- converted to tensors.
66
- y: Any label data. Will be passed through unaltered.
67
- sample_weight: Any label weight data. Will be passed through unaltered.
68
-
69
- Examples:
70
-
71
- Directly calling the layer on data.
72
- ```python
73
- preprocessor = keras_hub.models.WhisperPreprocessor.from_preset(
74
- "whisper_tiny_en",
75
- )
76
-
77
- # Preprocess unbatched inputs.
78
- input_data = {
79
- "encoder_audio": tf.ones((200,)),
80
- "decoder_text": "The quick brown fox jumped.",
81
- }
82
- preprocessor(input_data)
83
-
84
- # Preprocess batched inputs.
85
- input_data = {
86
- "encoder_audio": tf.ones((2, 200)),
87
- "decoder_text": ["The quick brown fox jumped.", "Call me Ishmael."],
88
- }
89
- preprocessor(input_data)
90
-
91
- # Custom audio feature extractor and vocabulary.
92
- audio_feature_extractor = keras_hub.models.WhisperAudioFeatureExtractor(
93
- num_mels=80,
94
- num_fft_bins=400,
95
- stride=100,
96
- sampling_rate=100,
97
- max_audio_length=5,
98
- )
99
-
100
- features = ["a quick fox.", "a fox quick."]
101
- vocab = {"<|endoftext|>": 0, "a": 4, "Ġquick": 5, "Ġfox": 6}
102
- merges = ["Ġ q", "u i", "c k", "ui ck", "Ġq uick"]
103
- merges += ["Ġ f", "o x", "Ġf ox"]
104
- special_tokens = {
105
- "<|startoftranscript|>": 9,
106
- "<|endoftext|>": 10,
107
- "<|notimestamps|>": 11,
108
- "<|transcribe|>": 12,
109
- "<|translate|>": 13,
110
- }
111
-
112
- tokenizer = keras_hub.models.WhisperTokenizer(
113
- vocabulary=vocab,
114
- merges=merges,
115
- special_tokens=special_tokens,
116
- )
117
- preprocessor = keras_hub.models.WhisperPreprocessor(
118
- audio_feature_extractor=audio_feature_extractor,
119
- tokenizer=tokenizer,
120
- )
121
-
122
- input_data = {
123
- "encoder_audio": tf.ones((200,)),
124
- "decoder_text": "The quick brown fox jumped.",
125
- }
126
- preprocessor(input_data)
127
- ```
128
-
129
- Mapping with `tf.data.Dataset`.
130
- ```python
131
- preprocessor = keras_hub.models.WhisperPreprocessor.from_preset(
132
- "whisper_tiny_en")
133
-
134
- # Map labeled single sentences.
135
- features = {
136
- "encoder_audio": tf.ones((2, 200)),
137
- "decoder_text": ["The quick brown fox jumped.", "Call me Ishmael."],
138
- }
139
- labels = tf.constant(["True", "False"])
140
- ds = tf.data.Dataset.from_tensor_slices((features, labels))
141
- ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE)
142
-
143
- # Map unlabeled single sentences.
144
- features = {
145
- "encoder_audio": tf.ones((2, 200)),
146
- "decoder_text": ["The quick brown fox jumped.", "Call me Ishmael."],
147
- }
148
- ds = tf.data.Dataset.from_tensor_slices(features)
149
- ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE)
150
- ```
151
- """
152
-
153
- tokenizer_cls = WhisperTokenizer
154
-
155
- def __init__(
156
- self,
157
- tokenizer,
158
- audio_feature_extractor=None,
159
- decoder_sequence_length=448,
160
- language=None,
161
- task=None,
162
- no_timestamps=True,
163
- **kwargs,
164
- ):
165
- super().__init__(**kwargs)
166
- if audio_feature_extractor is None:
167
- audio_feature_extractor = WhisperAudioFeatureExtractor()
168
- self.audio_feature_extractor = audio_feature_extractor
169
- self.tokenizer = tokenizer
170
- self.decoder_packer = None
171
- self.decoder_sequence_length = decoder_sequence_length
172
- self.language = language
173
- self.task = task
174
- self.no_timestamps = no_timestamps
175
-
176
- def build(self, input_shape):
177
- # Defer packer creation to `build()` so that we can be sure tokenizer
178
- # assets have loaded when restoring a saved model.
179
-
180
- # Create list of tokens to be prepended to decoder inputs.
181
- bos_tokens = [self.tokenizer.bos_token_id]
182
- if self.tokenizer.language_tokens is not None:
183
- if (
184
- self.language is None
185
- or self.language not in self.tokenizer.language_tokens
186
- ):
187
- raise ValueError(
188
- "You must pass a non-None value for `language` when using "
189
- "a multilingual tokenizer. The value must be one of "
190
- f'{",".join(self.tokenizer.language_tokens.keys())}. '
191
- f"Received: language={self.language}."
192
- )
193
- if self.task is None or self.task not in [
194
- "transcribe",
195
- "translate",
196
- ]:
197
- raise ValueError(
198
- "You must pass a non-None value for `task` when using "
199
- "a multilingual tokenizer. The value must be one of "
200
- '`"transcribe"`, `"translate"`. '
201
- f"Received: task={self.task}."
202
- )
203
-
204
- bos_tokens += [self.tokenizer.language_tokens[self.language]]
205
-
206
- if self.task == "transcribe":
207
- bos_tokens += [self.tokenizer.special_tokens["<|transcribe|>"]]
208
- elif self.task == "translate":
209
- bos_tokens += [self.tokenizer.special_tokens["<|translate|>"]]
210
- else:
211
- if self.language is not None:
212
- logging.info(
213
- "`tokenizer` is monolingual, and `language` has a "
214
- "non-`None` value. Setting `language` to `None`."
215
- )
216
- self.language = None
217
- if self.task is not None:
218
- logging.info(
219
- "`tokenizer` is monolingual, and `task` has a "
220
- "non-`None` value. Setting `task` to `None`."
221
- )
222
- self.task = None
223
-
224
- if self.no_timestamps:
225
- bos_tokens += [self.tokenizer.no_timestamps_token_id]
226
-
227
- # TODO: Use `MultiSegmentPacker` instead of `StartEndPacker` once we
228
- # want to move to multi-segment packing and have improved
229
- # `MultiSegmentPacker`'s performance.
230
- self.decoder_packer = StartEndPacker(
231
- start_value=bos_tokens,
232
- end_value=self.tokenizer.eos_token_id,
233
- pad_value=self.tokenizer.pad_token_id,
234
- sequence_length=self.decoder_sequence_length,
235
- return_padding_mask=True,
236
- )
237
-
238
- def call(self, x, y=None, sample_weight=None, decoder_sequence_length=None):
239
- if not (
240
- isinstance(x, dict)
241
- and ["encoder_audio", "decoder_text"] == list(x.keys())
242
- ):
243
- raise ValueError(
244
- '`x` must be a dictionary, containing the keys `"encoder_audio"`'
245
- f' and `"decoder_text"`. Received x={x}.'
246
- )
247
-
248
- encoder_audio = x["encoder_audio"]
249
- decoder_text = x["decoder_text"]
250
-
251
- encoder_audio = convert_inputs_to_list_of_tensor_segments(encoder_audio)
252
- decoder_text = convert_inputs_to_list_of_tensor_segments(decoder_text)
253
-
254
- if len(encoder_audio) > 1 or len(decoder_text) > 1:
255
- raise ValueError(
256
- '`WhisperPreprocessor` requires both `"encoder_audio"` and '
257
- f'`"decoder_text"` to contain only one segment, but received '
258
- f"{len(encoder_audio)} and {len(decoder_text)}, respectively."
259
- )
260
-
261
- encoder_features = self.audio_feature_extractor(encoder_audio[0])
262
- decoder_sequence_length = (
263
- decoder_sequence_length or self.decoder_sequence_length
264
- )
265
- decoder_inputs = self.tokenizer(decoder_text[0])
266
- decoder_token_ids, decoder_padding_mask = self.decoder_packer(
267
- decoder_inputs,
268
- sequence_length=decoder_sequence_length,
269
- )
270
-
271
- x = {
272
- "encoder_features": encoder_features,
273
- "decoder_token_ids": decoder_token_ids,
274
- "decoder_padding_mask": decoder_padding_mask,
275
- }
276
-
277
- return keras.utils.pack_x_y_sample_weight(x, y, sample_weight)
278
-
279
- def get_config(self):
280
- config = super().get_config()
281
- config.update(
282
- {
283
- "audio_feature_extractor": keras.layers.serialize(
284
- self.audio_feature_extractor
285
- ),
286
- "decoder_sequence_length": self.decoder_sequence_length,
287
- "language": self.language,
288
- "task": self.task,
289
- "no_timestamps": self.no_timestamps,
290
- }
291
- )
292
- return config
293
-
294
- @classmethod
295
- def from_config(cls, config):
296
- if "tokenizer" in config and isinstance(config["tokenizer"], dict):
297
- config["tokenizer"] = keras.layers.deserialize(config["tokenizer"])
298
-
299
- if "audio_feature_extractor" in config and isinstance(
300
- config["audio_feature_extractor"], dict
301
- ):
302
- config["audio_feature_extractor"] = keras.layers.deserialize(
303
- config["audio_feature_extractor"]
304
- )
305
-
306
- return cls(**config)
307
-
308
- @property
309
- def decoder_sequence_length(self):
310
- """The padded length of decoder input sequences."""
311
- return self._decoder_sequence_length
312
-
313
- @decoder_sequence_length.setter
314
- def decoder_sequence_length(self, value):
315
- self._decoder_sequence_length = value
316
- if self.decoder_packer is not None:
317
- self.decoder_packer.sequence_length = value
318
-
319
- @property
320
- def sequence_length(self):
321
- """Alias for `decoder_sequence_length`."""
322
- return self.decoder_sequence_length
323
-
324
- @sequence_length.setter
325
- def sequence_length(self, value):
326
- self.decoder_sequence_length = value
@@ -1,37 +0,0 @@
1
- # Copyright 2024 The KerasHub Authors
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # https://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- """Convert timm models to KerasHub."""
15
-
16
- from keras_hub.src.utils.timm.convert_resnet import load_resnet_backbone
17
-
18
-
19
- def load_timm_backbone(cls, preset, load_weights, **kwargs):
20
- """Load a timm model config and weights as a KerasHub backbone.
21
-
22
- Args:
23
- cls (class): Keras model class.
24
- preset (str): Preset configuration name.
25
- load_weights (bool): Whether to load the weights.
26
-
27
- Returns:
28
- backbone: Initialized Keras model backbone.
29
- """
30
- if cls is None:
31
- raise ValueError("Backbone class is None")
32
- if cls.__name__ == "ResNetBackbone":
33
- return load_resnet_backbone(cls, preset, load_weights, **kwargs)
34
- raise ValueError(
35
- f"{cls} has not been ported from the Hugging Face format yet. "
36
- "Please check Hugging Face Hub for the Keras model. "
37
- )
@@ -1,101 +0,0 @@
1
- # Copyright 2024 The KerasHub Authors
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # https://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- """Convert huggingface models to KerasHub."""
15
-
16
-
17
- from keras_hub.src.utils.transformers.convert_bert import load_bert_backbone
18
- from keras_hub.src.utils.transformers.convert_bert import load_bert_tokenizer
19
- from keras_hub.src.utils.transformers.convert_distilbert import (
20
- load_distilbert_backbone,
21
- )
22
- from keras_hub.src.utils.transformers.convert_distilbert import (
23
- load_distilbert_tokenizer,
24
- )
25
- from keras_hub.src.utils.transformers.convert_gemma import load_gemma_backbone
26
- from keras_hub.src.utils.transformers.convert_gemma import load_gemma_tokenizer
27
- from keras_hub.src.utils.transformers.convert_gpt2 import load_gpt2_backbone
28
- from keras_hub.src.utils.transformers.convert_gpt2 import load_gpt2_tokenizer
29
- from keras_hub.src.utils.transformers.convert_llama3 import load_llama3_backbone
30
- from keras_hub.src.utils.transformers.convert_llama3 import (
31
- load_llama3_tokenizer,
32
- )
33
- from keras_hub.src.utils.transformers.convert_pali_gemma import (
34
- load_pali_gemma_backbone,
35
- )
36
- from keras_hub.src.utils.transformers.convert_pali_gemma import (
37
- load_pali_gemma_tokenizer,
38
- )
39
-
40
-
41
- def load_transformers_backbone(cls, preset, load_weights):
42
- """
43
- Load a Transformer model config and weights as a KerasHub backbone.
44
-
45
- Args:
46
- cls (class): Keras model class.
47
- preset (str): Preset configuration name.
48
- load_weights (bool): Whether to load the weights.
49
-
50
- Returns:
51
- backbone: Initialized Keras model backbone.
52
- """
53
- if cls is None:
54
- raise ValueError("Backbone class is None")
55
- if cls.__name__ == "BertBackbone":
56
- return load_bert_backbone(cls, preset, load_weights)
57
- if cls.__name__ == "GemmaBackbone":
58
- return load_gemma_backbone(cls, preset, load_weights)
59
- if cls.__name__ == "Llama3Backbone":
60
- return load_llama3_backbone(cls, preset, load_weights)
61
- if cls.__name__ == "PaliGemmaBackbone":
62
- return load_pali_gemma_backbone(cls, preset, load_weights)
63
- if cls.__name__ == "GPT2Backbone":
64
- return load_gpt2_backbone(cls, preset, load_weights)
65
- if cls.__name__ == "DistilBertBackbone":
66
- return load_distilbert_backbone(cls, preset, load_weights)
67
- raise ValueError(
68
- f"{cls} has not been ported from the Hugging Face format yet. "
69
- "Please check Hugging Face Hub for the Keras model. "
70
- )
71
-
72
-
73
- def load_transformers_tokenizer(cls, preset):
74
- """
75
- Load a Transformer tokenizer assets as a KerasHub tokenizer.
76
-
77
- Args:
78
- cls (class): Tokenizer class.
79
- preset (str): Preset configuration name.
80
-
81
- Returns:
82
- tokenizer: Initialized tokenizer.
83
- """
84
- if cls is None:
85
- raise ValueError("Tokenizer class is None")
86
- if cls.__name__ == "BertTokenizer":
87
- return load_bert_tokenizer(cls, preset)
88
- if cls.__name__ == "GemmaTokenizer":
89
- return load_gemma_tokenizer(cls, preset)
90
- if cls.__name__ == "Llama3Tokenizer":
91
- return load_llama3_tokenizer(cls, preset)
92
- if cls.__name__ == "PaliGemmaTokenizer":
93
- return load_pali_gemma_tokenizer(cls, preset)
94
- if cls.__name__ == "GPT2Tokenizer":
95
- return load_gpt2_tokenizer(cls, preset)
96
- if cls.__name__ == "DistilBertTokenizer":
97
- return load_distilbert_tokenizer(cls, preset)
98
- raise ValueError(
99
- f"{cls} has not been ported from the Hugging Face format yet. "
100
- "Please check Hugging Face Hub for the Keras model. "
101
- )
@@ -1,34 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: keras-hub-nightly
3
- Version: 0.15.0.dev20240823171555
4
- Summary: 🚧🚧🚧 Work in progress. 🚧🚧🚧 More details soon!
5
- Home-page: https://github.com/keras-team/keras-hub
6
- Author: Keras team
7
- Author-email: keras-hub@google.com
8
- License: Apache License 2.0
9
- Classifier: Development Status :: 3 - Alpha
10
- Classifier: Programming Language :: Python :: 3
11
- Classifier: Programming Language :: Python :: 3.8
12
- Classifier: Programming Language :: Python :: 3.9
13
- Classifier: Programming Language :: Python :: 3.10
14
- Classifier: Programming Language :: Python :: 3.11
15
- Classifier: Programming Language :: Python :: 3 :: Only
16
- Classifier: Operating System :: Unix
17
- Classifier: Operating System :: Microsoft :: Windows
18
- Classifier: Operating System :: MacOS
19
- Classifier: Intended Audience :: Science/Research
20
- Classifier: Topic :: Scientific/Engineering
21
- Classifier: Topic :: Software Development
22
- Requires-Python: >=3.9
23
- Requires-Dist: absl-py
24
- Requires-Dist: numpy
25
- Requires-Dist: packaging
26
- Requires-Dist: regex
27
- Requires-Dist: rich
28
- Requires-Dist: kagglehub
29
- Requires-Dist: tensorflow-text ; platform_system != "Darwin"
30
- Provides-Extra: extras
31
- Requires-Dist: rouge-score ; extra == 'extras'
32
- Requires-Dist: sentencepiece ; extra == 'extras'
33
-
34
- 🚧🚧🚧 Work in progress. 🚧🚧🚧 More details soon!