keras-hub 0.20.0.dev1__py3-none-any.whl → 0.21.0.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. keras_hub/__init__.py +15 -33
  2. keras_hub/layers/__init__.py +134 -0
  3. keras_hub/metrics/__init__.py +11 -0
  4. keras_hub/models/__init__.py +642 -0
  5. keras_hub/samplers/__init__.py +18 -0
  6. keras_hub/src/layers/modeling/reversible_embedding.py +25 -35
  7. keras_hub/src/layers/preprocessing/image_converter.py +1 -0
  8. keras_hub/src/layers/preprocessing/random_deletion.py +1 -1
  9. keras_hub/src/layers/preprocessing/random_swap.py +1 -1
  10. keras_hub/src/models/audio_to_text.py +66 -0
  11. keras_hub/src/models/audio_to_text_preprocessor.py +80 -0
  12. keras_hub/src/models/backbone.py +5 -2
  13. keras_hub/src/models/cspnet/cspnet_backbone.py +51 -26
  14. keras_hub/src/models/cspnet/cspnet_presets.py +38 -3
  15. keras_hub/src/models/falcon/falcon_backbone.py +1 -1
  16. keras_hub/src/models/gemma/gemma_presets.py +10 -10
  17. keras_hub/src/models/gemma3/gemma3_causal_lm_preprocessor.py +3 -2
  18. keras_hub/src/models/gemma3/gemma3_presets.py +8 -8
  19. keras_hub/src/models/gemma3/gemma3_vision_encoder.py +1 -1
  20. keras_hub/src/models/llama/llama_attention.py +24 -6
  21. keras_hub/src/models/llama/llama_backbone.py +50 -16
  22. keras_hub/src/models/llama/llama_decoder.py +20 -3
  23. keras_hub/src/models/llama/llama_presets.py +3 -3
  24. keras_hub/src/models/llama/llama_rotary_embedding.py +180 -0
  25. keras_hub/src/models/llama3/llama3_backbone.py +10 -2
  26. keras_hub/src/models/llama3/llama3_presets.py +84 -2
  27. keras_hub/src/models/mistral/mistral_presets.py +3 -3
  28. keras_hub/src/models/mixtral/__init__.py +5 -0
  29. keras_hub/src/models/mixtral/mixtral_attention.py +252 -0
  30. keras_hub/src/models/mixtral/mixtral_backbone.py +207 -0
  31. keras_hub/src/models/mixtral/mixtral_causal_lm.py +281 -0
  32. keras_hub/src/models/mixtral/mixtral_causal_lm_preprocessor.py +76 -0
  33. keras_hub/src/models/mixtral/mixtral_decoder.py +494 -0
  34. keras_hub/src/models/mixtral/mixtral_layer_norm.py +34 -0
  35. keras_hub/src/models/mixtral/mixtral_presets.py +26 -0
  36. keras_hub/src/models/mixtral/mixtral_tokenizer.py +21 -0
  37. keras_hub/src/models/moonshine/__init__.py +5 -0
  38. keras_hub/src/models/moonshine/moonshine_audio_converter.py +301 -0
  39. keras_hub/src/models/moonshine/moonshine_audio_to_text.py +383 -0
  40. keras_hub/src/models/moonshine/moonshine_audio_to_text_preprocessor.py +272 -0
  41. keras_hub/src/models/moonshine/moonshine_backbone.py +478 -0
  42. keras_hub/src/models/moonshine/moonshine_decoder.py +313 -0
  43. keras_hub/src/models/moonshine/moonshine_encoder.py +212 -0
  44. keras_hub/src/models/moonshine/moonshine_layers.py +239 -0
  45. keras_hub/src/models/moonshine/moonshine_multi_head_attention.py +355 -0
  46. keras_hub/src/models/moonshine/moonshine_presets.py +25 -0
  47. keras_hub/src/models/moonshine/moonshine_tokenizer.py +62 -0
  48. keras_hub/src/models/pali_gemma/pali_gemma_presets.py +11 -11
  49. keras_hub/src/models/pali_gemma/pali_gemma_vit.py +1 -1
  50. keras_hub/src/models/qwen/__init__.py +4 -0
  51. keras_hub/src/models/qwen/qwen_attention.py +3 -1
  52. keras_hub/src/models/qwen/qwen_backbone.py +8 -1
  53. keras_hub/src/models/qwen/qwen_causal_lm.py +7 -0
  54. keras_hub/src/models/qwen/qwen_causal_lm_preprocessor.py +7 -0
  55. keras_hub/src/models/qwen/qwen_presets.py +61 -0
  56. keras_hub/src/models/qwen/qwen_tokenizer.py +9 -0
  57. keras_hub/src/models/qwen_moe/__init__.py +5 -0
  58. keras_hub/src/models/qwen_moe/qwen_moe_attention.py +375 -0
  59. keras_hub/src/models/qwen_moe/qwen_moe_backbone.py +373 -0
  60. keras_hub/src/models/qwen_moe/qwen_moe_causal_lm.py +350 -0
  61. keras_hub/src/models/qwen_moe/qwen_moe_causal_lm_preprocessor.py +17 -0
  62. keras_hub/src/models/qwen_moe/qwen_moe_decoder.py +625 -0
  63. keras_hub/src/models/qwen_moe/qwen_moe_layernorm.py +32 -0
  64. keras_hub/src/models/qwen_moe/qwen_moe_presets.py +15 -0
  65. keras_hub/src/models/qwen_moe/qwen_moe_tokenizer.py +46 -0
  66. keras_hub/src/models/retinanet/retinanet_image_converter.py +0 -13
  67. keras_hub/src/models/retinanet/retinanet_presets.py +2 -2
  68. keras_hub/src/models/segformer/segformer_image_segmenter_preprocessor.py +0 -18
  69. keras_hub/src/models/segformer/segformer_presets.py +12 -12
  70. keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_backbone.py +6 -0
  71. keras_hub/src/models/task.py +5 -2
  72. keras_hub/src/models/xception/__init__.py +5 -0
  73. keras_hub/src/models/xception/xception_backbone.py +188 -0
  74. keras_hub/src/models/xception/xception_image_classifier.py +12 -0
  75. keras_hub/src/models/xception/xception_image_classifier_preprocessor.py +14 -0
  76. keras_hub/src/models/xception/xception_image_converter.py +8 -0
  77. keras_hub/src/models/xception/xception_presets.py +14 -0
  78. keras_hub/src/tests/mocks/mock_gemma3_tokenizer.py +155 -0
  79. keras_hub/src/utils/coco/__init__.py +0 -0
  80. keras_hub/src/utils/coco/coco_utils.py +133 -0
  81. keras_hub/src/utils/imagenet/imagenet_utils.py +36 -0
  82. keras_hub/src/utils/keras_utils.py +11 -0
  83. keras_hub/src/utils/preset_utils.py +70 -10
  84. keras_hub/src/utils/tensor_utils.py +27 -1
  85. keras_hub/src/utils/timm/convert_cspnet.py +94 -23
  86. keras_hub/src/utils/timm/preset_loader.py +6 -6
  87. keras_hub/src/utils/transformers/convert_llama3.py +21 -1
  88. keras_hub/src/utils/transformers/convert_mixtral.py +139 -0
  89. keras_hub/src/utils/transformers/convert_qwen.py +1 -0
  90. keras_hub/src/utils/transformers/convert_qwen_moe.py +253 -0
  91. keras_hub/src/utils/transformers/preset_loader.py +6 -0
  92. keras_hub/src/{version_utils.py → version.py} +1 -1
  93. keras_hub/tokenizers/__init__.py +117 -0
  94. keras_hub/utils/__init__.py +21 -0
  95. {keras_hub-0.20.0.dev1.dist-info → keras_hub-0.21.0.dev1.dist-info}/METADATA +6 -20
  96. {keras_hub-0.20.0.dev1.dist-info → keras_hub-0.21.0.dev1.dist-info}/RECORD +98 -55
  97. {keras_hub-0.20.0.dev1.dist-info → keras_hub-0.21.0.dev1.dist-info}/WHEEL +1 -1
  98. keras_hub/api/__init__.py +0 -15
  99. keras_hub/api/layers/__init__.py +0 -86
  100. keras_hub/api/metrics/__init__.py +0 -11
  101. keras_hub/api/models/__init__.py +0 -416
  102. keras_hub/api/samplers/__init__.py +0 -16
  103. keras_hub/api/tokenizers/__init__.py +0 -58
  104. keras_hub/api/utils/__init__.py +0 -9
  105. {keras_hub-0.20.0.dev1.dist-info → keras_hub-0.21.0.dev1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,272 @@
1
+ import keras
2
+
3
+ try:
4
+ import tensorflow as tf
5
+ except ImportError:
6
+ tf = None
7
+ from keras_hub.src.api_export import keras_hub_export
8
+ from keras_hub.src.layers.preprocessing.start_end_packer import StartEndPacker
9
+ from keras_hub.src.models.audio_to_text_preprocessor import (
10
+ AudioToTextPreprocessor,
11
+ )
12
+ from keras_hub.src.models.moonshine.moonshine_backbone import MoonshineBackbone
13
+ from keras_hub.src.models.moonshine.moonshine_tokenizer import (
14
+ MoonshineTokenizer,
15
+ )
16
+ from keras_hub.src.utils.tensor_utils import preprocessing_function
17
+
18
+
19
+ @keras_hub_export("keras_hub.models.MoonshineAudioToTextPreprocessor")
20
+ class MoonshineAudioToTextPreprocessor(AudioToTextPreprocessor):
21
+ """Moonshine Seq2Seq LM preprocessor for audio-to-text tasks.
22
+
23
+ This preprocessor converts raw audio and text inputs into a format suitable
24
+ for the `MoonshineAudioToText` model. It processes audio waveforms using
25
+ `MoonshineAudioConverter` for basic preprocessing (padding, normalization)
26
+ and tokenizes text using `MoonshineTokenizer` for the decoder. It supports
27
+ training and generation.
28
+
29
+ Args:
30
+ audio_converter: A `MoonshineAudioConverter` instance to process audio.
31
+ tokenizer: A `MoonshineTokenizer` instance to tokenize text.
32
+ decoder_sequence_length: int, optional. Maximum length for decoder token
33
+ sequences. Defaults to 1024.
34
+ **kwargs: Additional keyword arguments for the parent class.
35
+
36
+ Examples:
37
+ ```python
38
+ import keras
39
+ from keras_hub.layers import MoonshineAudioConverter
40
+ from keras_hub.models import MoonshineTokenizer
41
+
42
+ # Create audio converter and tokenizer instances.
43
+ audio_converter = MoonshineAudioConverter()
44
+ tokenizer = MoonshineTokenizer.from_preset("moonshine_base")
45
+
46
+ # Initialize the preprocessor.
47
+ preprocessor = keras_hub.models.MoonshineAudioToTextPreprocessor(
48
+ audio_converter=audio_converter,
49
+ tokenizer=tokenizer,
50
+ decoder_sequence_length=8
51
+ )
52
+
53
+ # Prepare input data (audio tensor and text).
54
+ inputs = {
55
+ "audio": keras.random.normal((1, 16000)),
56
+ "text": ["the quick brown fox"]
57
+ }
58
+
59
+ # Process the inputs for training.
60
+ x, y, sample_weight = preprocessor(inputs)
61
+
62
+ # Check output keys and shapes (shapes depend on padding/truncation).
63
+ print(x.keys())
64
+ # dict_keys(['encoder_input_values', 'encoder_padding_mask',
65
+ # 'decoder_token_ids', 'decoder_padding_mask']).
66
+ print(x["encoder_input_values"].shape) # e.g., (1, 16000, 1) / padded length
67
+ print(x["encoder_padding_mask"].shape) # e.g., (1, 16000) or padded length
68
+ print(x["decoder_token_ids"].shape) # (1, 8)
69
+ print(x["decoder_padding_mask"].shape) # (1, 8)
70
+ print(y.shape) # (1, 8) - Labels
71
+ print(sample_weight.shape) # (1, 8) - Sample weights
72
+
73
+ # Process inputs for generation.
74
+ gen_inputs = preprocessor.generate_preprocess(inputs)
75
+ print(gen_inputs.keys())
76
+ # dict_keys(['encoder_input_values', 'encoder_padding_mask',
77
+ # 'decoder_token_ids', 'decoder_padding_mask']).
78
+ ```
79
+ """
80
+
81
+ backbone_cls = MoonshineBackbone
82
+ tokenizer_cls = MoonshineTokenizer
83
+
84
+ def __init__(
85
+ self,
86
+ audio_converter,
87
+ tokenizer,
88
+ decoder_sequence_length=1024,
89
+ **kwargs,
90
+ ):
91
+ super().__init__(tokenizer=tokenizer, **kwargs)
92
+ self.audio_converter = audio_converter
93
+ self.decoder_sequence_length = decoder_sequence_length
94
+ self.decoder_packer = None
95
+ self._special_token_ids_set = None
96
+
97
+ def build(self, input_shape):
98
+ self.decoder_packer = StartEndPacker(
99
+ start_value=self.tokenizer.start_token_id,
100
+ end_value=self.tokenizer.end_token_id,
101
+ pad_value=self.tokenizer.pad_token_id,
102
+ sequence_length=self.decoder_sequence_length,
103
+ return_padding_mask=True,
104
+ )
105
+ self._special_token_ids_set = set(self.tokenizer.special_token_ids)
106
+ if self.tokenizer.pad_token_id is not None:
107
+ self._special_token_ids_set.add(self.tokenizer.pad_token_id)
108
+ self.built = True
109
+
110
+ @preprocessing_function
111
+ def call(
112
+ self,
113
+ x,
114
+ y=None,
115
+ sample_weight=None,
116
+ decoder_sequence_length=None,
117
+ sequence_length=None,
118
+ ):
119
+ if not self.built:
120
+ self.build(None)
121
+ if isinstance(x, tuple) and len(x) == 1:
122
+ x = x[0]
123
+ decoder_sequence_length = (
124
+ decoder_sequence_length
125
+ or sequence_length
126
+ or self.decoder_sequence_length
127
+ )
128
+ text = x["text"]
129
+ encoder_inputs = self.audio_converter(
130
+ x["audio"],
131
+ padding="longest",
132
+ )
133
+ encoder_inputs_shape = keras.ops.shape(encoder_inputs)
134
+ if len(encoder_inputs_shape) == 2:
135
+ encoder_inputs = keras.ops.expand_dims(encoder_inputs, axis=-1)
136
+ squeezed_inputs = encoder_inputs[:, :, 0]
137
+ is_tf_symbolic = (
138
+ tf is not None
139
+ and hasattr(squeezed_inputs, "graph")
140
+ and hasattr(squeezed_inputs.graph, "as_graph_def")
141
+ )
142
+ if is_tf_symbolic and keras.config.backend() != "tensorflow":
143
+ encoder_padding_mask = tf.logical_not(
144
+ tf.math.equal(
145
+ squeezed_inputs, float(self.audio_converter.padding_value)
146
+ )
147
+ )
148
+ else:
149
+ encoder_padding_mask = keras.ops.logical_not(
150
+ keras.ops.equal(
151
+ squeezed_inputs, self.audio_converter.padding_value
152
+ )
153
+ )
154
+ decoder_inputs = self.tokenizer(text)
155
+ decoder_token_ids, decoder_padding_mask = self.decoder_packer(
156
+ decoder_inputs,
157
+ sequence_length=decoder_sequence_length + 1,
158
+ add_end_value=True,
159
+ )
160
+ x_out = {
161
+ "encoder_input_values": encoder_inputs,
162
+ "encoder_padding_mask": encoder_padding_mask,
163
+ "decoder_token_ids": decoder_token_ids[..., :-1],
164
+ "decoder_padding_mask": decoder_padding_mask[..., :-1],
165
+ }
166
+ y_out = decoder_token_ids[..., 1:]
167
+ sample_weight_out = decoder_padding_mask[..., 1:]
168
+
169
+ return keras.utils.pack_x_y_sample_weight(
170
+ x_out, y_out, sample_weight_out
171
+ )
172
+
173
+ @preprocessing_function
174
+ def generate_preprocess(
175
+ self,
176
+ x,
177
+ decoder_sequence_length=None,
178
+ sequence_length=None,
179
+ ):
180
+ if not self.built:
181
+ self.build(None)
182
+ if isinstance(x, tuple) and len(x) == 1:
183
+ x = x[0]
184
+ decoder_sequence_length = (
185
+ decoder_sequence_length
186
+ or sequence_length
187
+ or self.decoder_sequence_length
188
+ )
189
+ encoder_inputs = self.audio_converter(
190
+ x["audio"],
191
+ padding="longest",
192
+ )
193
+ encoder_inputs_shape = keras.ops.shape(encoder_inputs)
194
+ if len(encoder_inputs_shape) == 2:
195
+ encoder_inputs = keras.ops.expand_dims(encoder_inputs, axis=-1)
196
+ squeezed_inputs = encoder_inputs[:, :, 0]
197
+ is_tf_symbolic = (
198
+ tf is not None
199
+ and hasattr(squeezed_inputs, "graph")
200
+ and hasattr(squeezed_inputs.graph, "as_graph_def")
201
+ )
202
+ if is_tf_symbolic and keras.config.backend() != "tensorflow":
203
+ encoder_padding_mask = tf.logical_not(
204
+ tf.math.equal(
205
+ squeezed_inputs, float(self.audio_converter.padding_value)
206
+ )
207
+ )
208
+ else:
209
+ encoder_padding_mask = keras.ops.logical_not(
210
+ keras.ops.equal(
211
+ squeezed_inputs, self.audio_converter.padding_value
212
+ )
213
+ )
214
+ audio_batch_size = keras.ops.shape(x["audio"])[0]
215
+ decoder_text = x.get("text", None)
216
+ if decoder_text is None:
217
+ decoder_token_ids = [
218
+ [self.tokenizer.start_token_id]
219
+ ] * audio_batch_size
220
+ else:
221
+ if isinstance(decoder_text, str):
222
+ decoder_text = [decoder_text] * audio_batch_size
223
+ elif len(decoder_text) != audio_batch_size:
224
+ if len(decoder_text) == 1:
225
+ decoder_text = decoder_text * audio_batch_size
226
+ else:
227
+ raise ValueError(
228
+ f"Batch size mismatch between audio "
229
+ f"({audio_batch_size}) and text prompts "
230
+ f"({len(decoder_text)})"
231
+ )
232
+ decoder_token_ids = self.tokenizer(decoder_text)
233
+ decoder_token_ids, decoder_padding_mask = self.decoder_packer(
234
+ decoder_token_ids,
235
+ sequence_length=decoder_sequence_length,
236
+ add_end_value=False,
237
+ )
238
+
239
+ return {
240
+ "encoder_input_values": encoder_inputs,
241
+ "encoder_padding_mask": encoder_padding_mask,
242
+ "decoder_token_ids": decoder_token_ids,
243
+ "decoder_padding_mask": decoder_padding_mask,
244
+ }
245
+
246
+ @preprocessing_function
247
+ def generate_postprocess(self, x):
248
+ if not self.built:
249
+ self.build(None)
250
+ token_ids, padding_mask = (
251
+ x["decoder_token_ids"],
252
+ x["decoder_padding_mask"],
253
+ )
254
+ token_ids_np = keras.ops.convert_to_numpy(token_ids)
255
+ padding_mask_np = keras.ops.convert_to_numpy(padding_mask)
256
+ vocab_size = self.tokenizer.vocabulary_size()
257
+ processed_sequences = []
258
+ for i in range(token_ids_np.shape[0]):
259
+ sequence = token_ids_np[i]
260
+ mask = padding_mask_np[i].astype(bool)
261
+ valid_tokens = sequence[mask]
262
+ filtered_tokens = [
263
+ int(token)
264
+ for token in valid_tokens
265
+ if token not in self._special_token_ids_set
266
+ and 0 <= token < vocab_size
267
+ ]
268
+ processed_sequences.append(filtered_tokens)
269
+ processed_sequences = tf.ragged.constant(
270
+ processed_sequences, dtype=tf.int32
271
+ )
272
+ return self.tokenizer.detokenize(processed_sequences)