keras-hub 0.25.0.dev0__py3-none-any.whl → 0.26.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. keras_hub/layers/__init__.py +21 -0
  2. keras_hub/models/__init__.py +27 -0
  3. keras_hub/src/layers/modeling/non_max_supression.py +5 -2
  4. keras_hub/src/layers/modeling/reversible_embedding.py +2 -275
  5. keras_hub/src/layers/modeling/token_and_position_embedding.py +6 -6
  6. keras_hub/src/layers/modeling/transformer_layer_utils.py +9 -9
  7. keras_hub/src/layers/preprocessing/masked_lm_mask_generator.py +3 -1
  8. keras_hub/src/layers/preprocessing/multi_segment_packer.py +3 -1
  9. keras_hub/src/models/albert/albert_backbone.py +1 -3
  10. keras_hub/src/models/backbone.py +3 -0
  11. keras_hub/src/models/bart/bart_backbone.py +1 -3
  12. keras_hub/src/models/bert/bert_backbone.py +2 -4
  13. keras_hub/src/models/bloom/bloom_backbone.py +1 -3
  14. keras_hub/src/models/causal_lm.py +2 -2
  15. keras_hub/src/models/deberta_v3/deberta_v3_backbone.py +1 -3
  16. keras_hub/src/models/edrec/edrec_backbone.py +147 -0
  17. keras_hub/src/models/edrec/edrec_layers.py +434 -0
  18. keras_hub/src/models/edrec/edrec_seq2seq_lm.py +273 -0
  19. keras_hub/src/models/electra/electra_backbone.py +1 -3
  20. keras_hub/src/models/f_net/f_net_backbone.py +1 -3
  21. keras_hub/src/models/falcon/falcon_backbone.py +1 -3
  22. keras_hub/src/models/flux/flux_layers.py +3 -3
  23. keras_hub/src/models/flux/flux_maths.py +29 -15
  24. keras_hub/src/models/gemma/gemma_backbone.py +1 -3
  25. keras_hub/src/models/gemma/gemma_causal_lm.py +1 -1
  26. keras_hub/src/models/gemma3/gemma3_attention.py +1 -1
  27. keras_hub/src/models/gemma3/gemma3_backbone.py +70 -8
  28. keras_hub/src/models/gemma3/gemma3_causal_lm.py +16 -1
  29. keras_hub/src/models/gemma3/gemma3_decoder_block.py +23 -3
  30. keras_hub/src/models/gemma3/{gemma3_interleave_embeddings.py → gemma3_layers.py} +101 -0
  31. keras_hub/src/models/gemma3/gemma3_presets.py +79 -7
  32. keras_hub/src/models/gemma3/gemma3_vision_encoder.py +1 -1
  33. keras_hub/src/models/gpt2/gpt2_backbone.py +1 -3
  34. keras_hub/src/models/gpt2/gpt2_causal_lm.py +1 -1
  35. keras_hub/src/models/gpt_neo_x/gpt_neo_x_backbone.py +1 -3
  36. keras_hub/src/models/gpt_oss/gpt_oss_backbone.py +1 -3
  37. keras_hub/src/models/llama/llama_backbone.py +1 -3
  38. keras_hub/src/models/masked_lm.py +1 -1
  39. keras_hub/src/models/mistral/mistral_backbone.py +1 -3
  40. keras_hub/src/models/mixtral/mixtral_backbone.py +1 -3
  41. keras_hub/src/models/moonshine/moonshine_backbone.py +1 -3
  42. keras_hub/src/models/pali_gemma/pali_gemma_backbone.py +1 -3
  43. keras_hub/src/models/parseq/parseq_tokenizer.py +3 -1
  44. keras_hub/src/models/phi3/phi3_backbone.py +1 -3
  45. keras_hub/src/models/qwen/qwen_backbone.py +1 -3
  46. keras_hub/src/models/qwen/qwen_presets.py +209 -0
  47. keras_hub/src/models/qwen3/qwen3_backbone.py +1 -3
  48. keras_hub/src/models/qwen3_moe/qwen3_moe_backbone.py +1 -3
  49. keras_hub/src/models/qwen3_moe/qwen3_moe_presets.py +15 -0
  50. keras_hub/src/models/qwen_moe/qwen_moe_backbone.py +1 -3
  51. keras_hub/src/models/roformer_v2/roformer_v2_backbone.py +1 -3
  52. keras_hub/src/models/rqvae/__init__.py +5 -0
  53. keras_hub/src/models/rqvae/rqvae_backbone.py +167 -0
  54. keras_hub/src/models/rqvae/rqvae_layers.py +335 -0
  55. keras_hub/src/models/rwkv7/__init__.py +5 -0
  56. keras_hub/src/models/rwkv7/rwkv7_backbone.py +180 -0
  57. keras_hub/src/models/rwkv7/rwkv7_causal_lm.py +259 -0
  58. keras_hub/src/models/rwkv7/rwkv7_causal_lm_preprocessor.py +214 -0
  59. keras_hub/src/models/rwkv7/rwkv7_layer.py +724 -0
  60. keras_hub/src/models/rwkv7/rwkv7_presets.py +26 -0
  61. keras_hub/src/models/rwkv7/rwkv7_tokenizer.py +495 -0
  62. keras_hub/src/models/sam/sam_backbone.py +5 -1
  63. keras_hub/src/models/sam/sam_prompt_encoder.py +1 -1
  64. keras_hub/src/models/sam3/__init__.py +7 -0
  65. keras_hub/src/models/sam3/roi_align.py +222 -0
  66. keras_hub/src/models/sam3/sam3_detr_decoder.py +641 -0
  67. keras_hub/src/models/sam3/sam3_detr_encoder.py +293 -0
  68. keras_hub/src/models/sam3/sam3_dot_product_scoring.py +120 -0
  69. keras_hub/src/models/sam3/sam3_geometry_encoder.py +517 -0
  70. keras_hub/src/models/sam3/sam3_image_converter.py +10 -0
  71. keras_hub/src/models/sam3/sam3_layers.py +814 -0
  72. keras_hub/src/models/sam3/sam3_mask_decoder.py +374 -0
  73. keras_hub/src/models/sam3/sam3_pc_backbone.py +306 -0
  74. keras_hub/src/models/sam3/sam3_pc_image_segmenter.py +282 -0
  75. keras_hub/src/models/sam3/sam3_pc_image_segmenter_preprocessor.py +336 -0
  76. keras_hub/src/models/sam3/sam3_presets.py +16 -0
  77. keras_hub/src/models/sam3/sam3_text_encoder.py +212 -0
  78. keras_hub/src/models/sam3/sam3_tokenizer.py +65 -0
  79. keras_hub/src/models/sam3/sam3_utils.py +134 -0
  80. keras_hub/src/models/sam3/sam3_vision_encoder.py +738 -0
  81. keras_hub/src/models/segformer/segformer_backbone.py +6 -6
  82. keras_hub/src/models/siglip/siglip_layers.py +1 -3
  83. keras_hub/src/models/smollm3/smollm3_backbone.py +1 -3
  84. keras_hub/src/models/stable_diffusion_3/t5_encoder.py +1 -3
  85. keras_hub/src/models/t5/t5_backbone.py +1 -3
  86. keras_hub/src/models/t5gemma/t5gemma_backbone.py +1 -3
  87. keras_hub/src/models/task.py +1 -1
  88. keras_hub/src/tests/test_case.py +394 -3
  89. keras_hub/src/tokenizers/byte_pair_tokenizer.py +33 -2
  90. keras_hub/src/tokenizers/byte_tokenizer.py +3 -1
  91. keras_hub/src/tokenizers/sentence_piece_tokenizer.py +15 -1
  92. keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py +3 -1
  93. keras_hub/src/tokenizers/word_piece_tokenizer.py +15 -1
  94. keras_hub/src/utils/preset_utils.py +1 -1
  95. keras_hub/src/utils/tensor_utils.py +12 -0
  96. keras_hub/src/utils/transformers/convert_gemma3.py +68 -22
  97. keras_hub/src/utils/transformers/convert_qwen3_moe.py +4 -1
  98. keras_hub/src/utils/transformers/convert_sam3.py +472 -0
  99. keras_hub/src/utils/transformers/export/gemma3.py +196 -0
  100. keras_hub/src/utils/transformers/export/hf_exporter.py +86 -25
  101. keras_hub/src/utils/transformers/export/qwen.py +136 -0
  102. keras_hub/src/utils/transformers/preset_loader.py +15 -1
  103. keras_hub/src/version.py +1 -1
  104. keras_hub/tokenizers/__init__.py +6 -0
  105. {keras_hub-0.25.0.dev0.dist-info → keras_hub-0.26.0.dev0.dist-info}/METADATA +6 -13
  106. {keras_hub-0.25.0.dev0.dist-info → keras_hub-0.26.0.dev0.dist-info}/RECORD +108 -76
  107. {keras_hub-0.25.0.dev0.dist-info → keras_hub-0.26.0.dev0.dist-info}/WHEEL +1 -1
  108. keras_hub/src/models/gemma3/rms_normalization.py +0 -26
  109. {keras_hub-0.25.0.dev0.dist-info → keras_hub-0.26.0.dev0.dist-info}/top_level.txt +0 -0
@@ -2,6 +2,107 @@ import keras
2
2
  from keras import ops
3
3
 
4
4
 
5
+ class RMSNormalization(keras.layers.Layer):
6
+ def __init__(self, epsilon=1e-6, **kwargs):
7
+ super().__init__(**kwargs)
8
+ self.epsilon = epsilon
9
+
10
+ def build(self, input_shape):
11
+ self.scale = self.add_weight(
12
+ name="scale",
13
+ trainable=True,
14
+ shape=(input_shape[-1],),
15
+ initializer="zeros",
16
+ )
17
+ self.built = True
18
+
19
+ def call(self, x):
20
+ # Always compute normalization in float32.
21
+ x = ops.cast(x, "float32")
22
+ scale = ops.cast(self.scale, "float32")
23
+ var = ops.mean(ops.square(x), axis=-1, keepdims=True)
24
+ normed_inputs = x * ops.reciprocal(ops.sqrt(var + self.epsilon))
25
+ normed_inputs = normed_inputs * (1 + scale)
26
+ return ops.cast(normed_inputs, self.compute_dtype)
27
+
28
+
29
+ class Gemma3MeanPooling(keras.layers.Layer):
30
+ """Mean pooling layer that computes the average of token embeddings.
31
+
32
+ This layer correctly handles variable-length sequences by ignoring
33
+ padded tokens in the mean calculation, using a `padding_mask`.
34
+
35
+ Example:
36
+ ```python
37
+ import numpy as np
38
+
39
+ sequence_output = np.random.rand(2, 4, 8).astype("float32")
40
+ padding_mask = np.array([[1, 1, 1, 0], [1, 1, 0, 0]], dtype="int32")
41
+ mean_pool_layer = Gemma3MeanPooling()
42
+ pooled = mean_pool_layer([sequence_output, padding_mask])
43
+ # pooled.shape -> (2, 8)
44
+ ```
45
+ """
46
+
47
+ def __init__(self, **kwargs):
48
+ super().__init__(**kwargs)
49
+ self.supports_masking = True
50
+
51
+ def call(self, inputs, padding_mask=None):
52
+ """Performs masked mean pooling on the token embeddings.
53
+
54
+ Args:
55
+ inputs: The sequence of embeddings to pool, with a shape of
56
+ `(batch_size, seq_len, hidden_dim)`.
57
+ padding_mask: The mask indicating valid tokens, with a shape of
58
+ `(batch_size, seq_len)`.
59
+
60
+ Returns:
61
+ A tensor representing the pooled embeddings, with a shape of
62
+ `(batch_size, hidden_dim)`.
63
+ """
64
+ if padding_mask is None:
65
+ inputs, padding_mask = inputs
66
+
67
+ sequence_output = inputs
68
+ mask = ops.expand_dims(
69
+ ops.cast(padding_mask, sequence_output.dtype), axis=-1
70
+ )
71
+
72
+ masked_output = sequence_output * mask
73
+
74
+ sum_embeddings = ops.sum(masked_output, axis=1)
75
+
76
+ num_tokens = ops.sum(
77
+ ops.cast(padding_mask, sequence_output.dtype), axis=1
78
+ )
79
+ num_tokens = ops.expand_dims(num_tokens, axis=1)
80
+ # Avoid division by zero
81
+ num_tokens = ops.maximum(num_tokens, 1e-9)
82
+
83
+ mean_embeddings = sum_embeddings / num_tokens
84
+ return ops.cast(mean_embeddings, self.compute_dtype)
85
+
86
+ def compute_output_shape(self, input_shape):
87
+ """Computes the output shape of the layer.
88
+
89
+ Args:
90
+ input_shape: A tuple or list of tuples representing input shapes.
91
+
92
+ Returns:
93
+ A tuple representing the output shape.
94
+ """
95
+ if isinstance(input_shape, list):
96
+ sequence_output_shape = input_shape[0]
97
+ else:
98
+ sequence_output_shape = input_shape
99
+ return sequence_output_shape[:-2] + (sequence_output_shape[-1],)
100
+
101
+ def get_config(self):
102
+ """Returns the config of the layer."""
103
+ return super().get_config()
104
+
105
+
5
106
  class Gemma3InterleaveEmbeddings(keras.layers.Layer):
6
107
  """Places image embeddings in the correct position in an embedding sequence.
7
108
 
@@ -181,12 +181,25 @@ backbone_presets = {
181
181
  },
182
182
  "kaggle_handle": "kaggle://keras/gemma3/keras/gemma3_instruct_270m/4",
183
183
  },
184
+ "medgemma_4b": {
185
+ "metadata": {
186
+ "description": (
187
+ "A 4 billion parameter model based on Gemma 3. "
188
+ "This model is pre-trained for performance on medical text "
189
+ "and image comprehension and is optimized for medical "
190
+ "applications that involve a text generation component."
191
+ ),
192
+ "params": 4300079472,
193
+ "path": "gemma3",
194
+ },
195
+ "kaggle_handle": "kaggle://keras/medgemma/keras/medgemma_4b/1",
196
+ },
184
197
  "medgemma_instruct_4b": {
185
198
  "metadata": {
186
199
  "description": (
187
200
  "A 4 billion parameter model based on Gemma 3. "
188
- "This model is trained for performance on medical text"
189
- "and image comprehension and is optimized for medical"
201
+ "This model is instruction-tuned for performance on medical "
202
+ "text and image comprehension and is optimized for medical "
190
203
  "applications that involve a text generation component."
191
204
  ),
192
205
  "params": 4300079472,
@@ -198,8 +211,8 @@ backbone_presets = {
198
211
  "metadata": {
199
212
  "description": (
200
213
  "A 27 billion parameter model based on Gemma 3. "
201
- "This model trained for performance on medical text "
202
- "and image comprehension and is optimized for medical "
214
+ "This model is instruction-tuned for performance on medical "
215
+ " text and image comprehension and is optimized for medical "
203
216
  "applications that involve a text generation component."
204
217
  ),
205
218
  "params": 27432406640,
@@ -211,13 +224,72 @@ backbone_presets = {
211
224
  "metadata": {
212
225
  "description": (
213
226
  "A 27 billion parameter text-only model based on Gemma 3. "
214
- "This model is trained for performance on medical text "
215
- "comprehension and is optimized for medical applications "
216
- "that involve a text generation component."
227
+ "This model is instruction-tuned (No images) for performance "
228
+ "on medical text comprehension and is optimized for medical "
229
+ "applications that involve a text generation component."
217
230
  ),
218
231
  "params": 27009002240,
219
232
  "path": "gemma3",
220
233
  },
221
234
  "kaggle_handle": "kaggle://keras/medgemma/keras/medgemma_instruct_27b_text/1",
222
235
  },
236
+ "medgemma_1.5_instruct_4b": {
237
+ "metadata": {
238
+ "description": (
239
+ "A 4 billion parameter,Instruct-tuned MedGemma 1.5 4B is an "
240
+ "updated version of the Instruction-tuned MedGemma 4B model."
241
+ ),
242
+ "params": 4300079472,
243
+ "path": "gemma3",
244
+ },
245
+ "kaggle_handle": "kaggle://keras/medgemma/keras/medgemma_1.5_instruct_4b/1",
246
+ },
247
+ "function_gemma_instruct_270m": {
248
+ "metadata": {
249
+ "description": (
250
+ "A 270M Million parameter text-only model based on Gemma 3. "
251
+ "This model is trained specifically for function calling "
252
+ "improvements."
253
+ ),
254
+ "params": 268098176,
255
+ "path": "gemma3",
256
+ },
257
+ "kaggle_handle": "kaggle://keras/function-gemma/keras/function_gemma_instruct_270m/1",
258
+ },
259
+ "translategemma_4b_it": {
260
+ "metadata": {
261
+ "description": (
262
+ "4 billion parameter, 34-layer, multimodal instruction-tuned "
263
+ "translation model based on Gemma 3. Supports text and image "
264
+ "input for translation across 55 languages."
265
+ ),
266
+ "params": 4299915632,
267
+ "path": "gemma3",
268
+ },
269
+ "kaggle_handle": "kaggle://keras/translategemma/keras/translategemma_4b_it/1",
270
+ },
271
+ "translategemma_12b_it": {
272
+ "metadata": {
273
+ "description": (
274
+ "12 billion parameter, 48-layer, multimodal instruction-tuned "
275
+ "translation model based on Gemma 3. Supports text and image "
276
+ "input for translation across 55 languages."
277
+ ),
278
+ "params": 12187079280,
279
+ "path": "gemma3",
280
+ },
281
+ "kaggle_handle": "kaggle://keras/translategemma/keras/translategemma_12b_it/1",
282
+ },
283
+ "translategemma_27b_it": {
284
+ "metadata": {
285
+ "description": (
286
+ "27 billion parameter, 62-layer, multimodal instruction-tuned "
287
+ "translation model based on Gemma 3. Supports text and image "
288
+ "input for translation across 55 languages."
289
+ ),
290
+ "params": 27432062576,
291
+ "path": "gemma3",
292
+ },
293
+ "kaggle_handle": "kaggle://keras/translategemma/keras/translategemma_27b_it/1",
294
+ },
223
295
  }
@@ -2,7 +2,7 @@ import keras
2
2
  from keras import ops
3
3
 
4
4
  from keras_hub.src.api_export import keras_hub_export
5
- from keras_hub.src.models.gemma.rms_normalization import RMSNormalization
5
+ from keras_hub.src.models.gemma3.gemma3_layers import RMSNormalization
6
6
  from keras_hub.src.utils.keras_utils import clone_initializer
7
7
 
8
8
 
@@ -1,10 +1,8 @@
1
1
  import keras
2
+ from keras.layers import ReversibleEmbedding
2
3
 
3
4
  from keras_hub.src.api_export import keras_hub_export
4
5
  from keras_hub.src.layers.modeling.position_embedding import PositionEmbedding
5
- from keras_hub.src.layers.modeling.reversible_embedding import (
6
- ReversibleEmbedding,
7
- )
8
6
  from keras_hub.src.layers.modeling.transformer_decoder import TransformerDecoder
9
7
  from keras_hub.src.models.backbone import Backbone
10
8
  from keras_hub.src.utils.keras_utils import gelu_approximate
@@ -422,7 +422,7 @@ class GPT2CausalLM(CausalLM):
422
422
  return per_token_loss
423
423
 
424
424
  def get_quantization_layer_structure(self, mode):
425
- if mode != "gptq":
425
+ if mode not in ["gptq", "awq"]:
426
426
  return None
427
427
 
428
428
  backbone = self.backbone
@@ -1,9 +1,7 @@
1
1
  import keras
2
+ from keras.layers import ReversibleEmbedding
2
3
 
3
4
  from keras_hub.src.api_export import keras_hub_export
4
- from keras_hub.src.layers.modeling.reversible_embedding import (
5
- ReversibleEmbedding,
6
- )
7
5
  from keras_hub.src.models.backbone import Backbone
8
6
  from keras_hub.src.models.gpt_neo_x.gpt_neo_x_decoder import GPTNeoXDecoder
9
7
  from keras_hub.src.utils.keras_utils import gelu_approximate
@@ -1,9 +1,7 @@
1
1
  import keras
2
+ from keras.layers import ReversibleEmbedding
2
3
 
3
4
  from keras_hub.src.api_export import keras_hub_export
4
- from keras_hub.src.layers.modeling.reversible_embedding import (
5
- ReversibleEmbedding,
6
- )
7
5
  from keras_hub.src.models.backbone import Backbone
8
6
  from keras_hub.src.models.gpt_oss.gpt_oss_decoder import (
9
7
  GptOssTransformerDecoder,
@@ -1,10 +1,8 @@
1
1
  import keras
2
2
  from keras import ops
3
+ from keras.layers import ReversibleEmbedding
3
4
 
4
5
  from keras_hub.src.api_export import keras_hub_export
5
- from keras_hub.src.layers.modeling.reversible_embedding import (
6
- ReversibleEmbedding,
7
- )
8
6
  from keras_hub.src.models.backbone import Backbone
9
7
  from keras_hub.src.models.llama.llama_decoder import LlamaTransformerDecoder
10
8
  from keras_hub.src.models.llama.llama_layernorm import LlamaLayerNorm
@@ -86,7 +86,7 @@ class MaskedLM(Task):
86
86
  )
87
87
 
88
88
  def get_quantization_layer_structure(self, mode):
89
- if mode != "gptq":
89
+ if mode not in ["gptq", "awq"]:
90
90
  return None
91
91
 
92
92
  backbone = self.backbone
@@ -1,10 +1,8 @@
1
1
  import keras
2
2
  from keras import ops
3
+ from keras.layers import ReversibleEmbedding
3
4
 
4
5
  from keras_hub.src.api_export import keras_hub_export
5
- from keras_hub.src.layers.modeling.reversible_embedding import (
6
- ReversibleEmbedding,
7
- )
8
6
  from keras_hub.src.models.backbone import Backbone
9
7
  from keras_hub.src.models.mistral.mistral_layer_norm import (
10
8
  MistralLayerNormalization,
@@ -1,10 +1,8 @@
1
1
  import keras
2
2
  from keras import ops
3
+ from keras.layers import ReversibleEmbedding
3
4
 
4
5
  from keras_hub.src.api_export import keras_hub_export
5
- from keras_hub.src.layers.modeling.reversible_embedding import (
6
- ReversibleEmbedding,
7
- )
8
6
  from keras_hub.src.models.backbone import Backbone
9
7
  from keras_hub.src.models.mixtral.mixtral_decoder import (
10
8
  MixtralTransformerDecoder,
@@ -1,9 +1,7 @@
1
1
  import keras
2
+ from keras.layers import ReversibleEmbedding
2
3
 
3
4
  from keras_hub.src.api_export import keras_hub_export
4
- from keras_hub.src.layers.modeling.reversible_embedding import (
5
- ReversibleEmbedding,
6
- )
7
5
  from keras_hub.src.models.backbone import Backbone
8
6
  from keras_hub.src.models.moonshine.moonshine_decoder import (
9
7
  MoonshineDecoderBlock,
@@ -1,10 +1,8 @@
1
1
  import keras
2
2
  from keras import ops
3
+ from keras.layers import ReversibleEmbedding
3
4
 
4
5
  from keras_hub.src.api_export import keras_hub_export
5
- from keras_hub.src.layers.modeling.reversible_embedding import (
6
- ReversibleEmbedding,
7
- )
8
6
  from keras_hub.src.models.backbone import Backbone
9
7
  from keras_hub.src.models.gemma.rms_normalization import RMSNormalization
10
8
  from keras_hub.src.models.pali_gemma.pali_gemma_decoder_block import (
@@ -13,9 +13,11 @@ from keras_hub.src.utils.tensor_utils import preprocessing_function
13
13
 
14
14
  try:
15
15
  import tensorflow as tf
16
- import tensorflow_text as tf_text
17
16
  except ImportError:
18
17
  tf = None
18
+ try:
19
+ import tensorflow_text as tf_text
20
+ except ImportError:
19
21
  tf_text = None
20
22
 
21
23
  PARSEQ_VOCAB = list(
@@ -1,9 +1,7 @@
1
1
  import keras
2
+ from keras.layers import ReversibleEmbedding
2
3
 
3
4
  from keras_hub.src.api_export import keras_hub_export
4
- from keras_hub.src.layers.modeling.reversible_embedding import (
5
- ReversibleEmbedding,
6
- )
7
5
  from keras_hub.src.models.backbone import Backbone
8
6
  from keras_hub.src.models.phi3.phi3_decoder import Phi3Decoder
9
7
  from keras_hub.src.models.phi3.phi3_layernorm import Phi3LayerNorm
@@ -1,10 +1,8 @@
1
1
  import keras
2
2
  from keras import ops
3
+ from keras.layers import ReversibleEmbedding
3
4
 
4
5
  from keras_hub.src.api_export import keras_hub_export
5
- from keras_hub.src.layers.modeling.reversible_embedding import (
6
- ReversibleEmbedding,
7
- )
8
6
  from keras_hub.src.models.backbone import Backbone
9
7
  from keras_hub.src.models.qwen.qwen_decoder import QwenTransformerDecoder
10
8
  from keras_hub.src.models.qwen.qwen_layernorm import QwenLayerNorm
@@ -1,6 +1,7 @@
1
1
  """Qwen preset configurations."""
2
2
 
3
3
  backbone_presets = {
4
+ # Qwen 2.5 Models
4
5
  "qwen2.5_0.5b_en": {
5
6
  "metadata": {
6
7
  "description": ("24-layer Qwen model with 0.5 billion parameters."),
@@ -58,4 +59,212 @@ backbone_presets = {
58
59
  },
59
60
  "kaggle_handle": "kaggle://keras/qwen/keras/qwen2.5_instruct_72b_en/2",
60
61
  },
62
+ # Qwen 2.5 Coder Models
63
+ "qwen2.5_coder_0.5b": {
64
+ "metadata": {
65
+ "description": (
66
+ "Code-focused fine-tuned Qwen-2.5 model with 0.5 "
67
+ "billion parameters."
68
+ ),
69
+ "params": 494032768,
70
+ "path": "qwen",
71
+ },
72
+ "kaggle_handle": (
73
+ "kaggle://keras/qwen2-5-coder/keras/qwen2.5_coder_0.5b/1"
74
+ ),
75
+ },
76
+ "qwen2.5_coder_1.5b": {
77
+ "metadata": {
78
+ "description": (
79
+ "Code-focused fine-tuned 28-layer Qwen-2.5 model with 1.5 "
80
+ "billion parameters."
81
+ ),
82
+ "params": 1543434240,
83
+ "path": "qwen",
84
+ },
85
+ "kaggle_handle": (
86
+ "kaggle://keras/qwen2-5-coder/keras/qwen2.5_coder_1.5b/1"
87
+ ),
88
+ },
89
+ "qwen2.5_coder_3b": {
90
+ "metadata": {
91
+ "description": (
92
+ "Code-focused fine-tuned Qwen-2.5 model with 3 "
93
+ "billion parameters."
94
+ ),
95
+ "params": 3085938688,
96
+ "path": "qwen",
97
+ },
98
+ "kaggle_handle": (
99
+ "kaggle://keras/qwen2-5-coder/keras/qwen2.5_coder_3b/1"
100
+ ),
101
+ },
102
+ "qwen2.5_coder_7b": {
103
+ "metadata": {
104
+ "description": (
105
+ "Code-focused fine-tuned Qwen-2.5 model with 7 "
106
+ "billion parameters."
107
+ ),
108
+ "params": 6993420288,
109
+ "path": "qwen",
110
+ },
111
+ "kaggle_handle": (
112
+ "kaggle://keras/qwen2-5-coder/keras/qwen2.5_coder_7b/1"
113
+ ),
114
+ },
115
+ "qwen2.5_coder_14b": {
116
+ "metadata": {
117
+ "description": (
118
+ "Code-focused fine-tuned Qwen-2.5 model with 14 "
119
+ "billion parameters."
120
+ ),
121
+ "params": 14000000000,
122
+ "path": "qwen",
123
+ },
124
+ "kaggle_handle": (
125
+ "kaggle://keras/qwen2-5-coder/keras/qwen2.5_coder_14b/1"
126
+ ),
127
+ },
128
+ "qwen2.5_coder_32b": {
129
+ "metadata": {
130
+ "description": (
131
+ "Code-focused fine-tuned Qwen-2.5 model with 32 "
132
+ "billion parameters."
133
+ ),
134
+ "params": 32763876352,
135
+ "path": "qwen",
136
+ },
137
+ "kaggle_handle": (
138
+ "kaggle://keras/qwen2-5-coder/keras/qwen2.5_coder_32b/1"
139
+ ),
140
+ },
141
+ "qwen2.5_coder_instruct_0.5b": {
142
+ "metadata": {
143
+ "description": (
144
+ "Instruction-tuned code-focused Qwen-2.5 model with "
145
+ "0.5 billion parameters."
146
+ ),
147
+ "params": 494032768,
148
+ "path": "qwen",
149
+ },
150
+ "kaggle_handle": (
151
+ "kaggle://keras/qwen2-5-coder/keras/qwen2.5_coder_instruct_0.5b/1"
152
+ ),
153
+ },
154
+ "qwen2.5_coder_instruct_1.5b": {
155
+ "metadata": {
156
+ "description": (
157
+ "Instruction-tuned code-focused Qwen-2.5 model with "
158
+ "1.5 billion parameters."
159
+ ),
160
+ "params": 1543434240,
161
+ "path": "qwen",
162
+ },
163
+ "kaggle_handle": (
164
+ "kaggle://keras/qwen2-5-coder/keras/qwen2.5_coder_instruct_1.5b/1"
165
+ ),
166
+ },
167
+ "qwen2.5_coder_instruct_3b": {
168
+ "metadata": {
169
+ "description": (
170
+ "Instruction-tuned code-focused Qwen-2.5 model with "
171
+ "3 billion parameters."
172
+ ),
173
+ "params": 3085938688,
174
+ "path": "qwen",
175
+ },
176
+ "kaggle_handle": (
177
+ "kaggle://keras/qwen2-5-coder/keras/qwen2.5_coder_instruct_3b/1"
178
+ ),
179
+ },
180
+ "qwen2.5_coder_instruct_7b": {
181
+ "metadata": {
182
+ "description": (
183
+ "Instruction-tuned code-focused Qwen-2.5 model with "
184
+ "7 billion parameters."
185
+ ),
186
+ "params": 6993420288,
187
+ "path": "qwen",
188
+ },
189
+ "kaggle_handle": (
190
+ "kaggle://keras/qwen2-5-coder/keras/qwen2.5_coder_instruct_7b/1"
191
+ ),
192
+ },
193
+ "qwen2.5_coder_instruct_14b": {
194
+ "metadata": {
195
+ "description": (
196
+ "Instruction-tuned code-focused Qwen-2.5 model with "
197
+ "14 billion parameters."
198
+ ),
199
+ "params": 14000000000,
200
+ "path": "qwen",
201
+ },
202
+ "kaggle_handle": (
203
+ "kaggle://keras/qwen2-5-coder/keras/qwen2.5_coder_instruct_14b/1"
204
+ ),
205
+ },
206
+ "qwen2.5_coder_instruct_32b": {
207
+ "metadata": {
208
+ "description": (
209
+ "Instruction-tuned code-focused Qwen-2.5 model with "
210
+ "32 billion parameters."
211
+ ),
212
+ "params": 32763876352,
213
+ "path": "qwen",
214
+ },
215
+ "kaggle_handle": (
216
+ "kaggle://keras/qwen2-5-coder/keras/qwen2.5_coder_instruct_32b/1"
217
+ ),
218
+ },
219
+ # Qwen 2.5 Math Models
220
+ "qwen2.5_math_1.5b_en": {
221
+ "metadata": {
222
+ "description": (
223
+ "Math-focused Qwen-2.5 model with 1.5 billion parameters."
224
+ ),
225
+ "params": 1543714304,
226
+ "path": "qwen",
227
+ },
228
+ "kaggle_handle": (
229
+ "kaggle://keras/qwen2-5-math/keras/qwen2.5_math_1.5b_en/1"
230
+ ),
231
+ },
232
+ "qwen2.5_math_instruct_1.5b_en": {
233
+ "metadata": {
234
+ "description": (
235
+ "Instruction-tuned math-focused Qwen-2.5 model with "
236
+ "1.5 billion parameters."
237
+ ),
238
+ "params": 1543714304,
239
+ "path": "qwen",
240
+ },
241
+ "kaggle_handle": (
242
+ "kaggle://keras/qwen2-5-math/keras/qwen2.5_math_instruct_1.5b_en/1"
243
+ ),
244
+ },
245
+ "qwen2.5_math_7b_en": {
246
+ "metadata": {
247
+ "description": (
248
+ "Math-focused Qwen-2.5 model with 7 billion parameters."
249
+ ),
250
+ "params": 7615616512,
251
+ "path": "qwen",
252
+ },
253
+ "kaggle_handle": (
254
+ "kaggle://keras/qwen2-5-math/keras/qwen2.5_math_7b_en/1"
255
+ ),
256
+ },
257
+ "qwen2.5_math_instruct_7b_en": {
258
+ "metadata": {
259
+ "description": (
260
+ "Instruction-tuned math-focused Qwen-2.5 model with "
261
+ "7 billion parameters."
262
+ ),
263
+ "params": 7615616512,
264
+ "path": "qwen",
265
+ },
266
+ "kaggle_handle": (
267
+ "kaggle://keras/qwen2-5-math/keras/qwen2.5_math_instruct_7b_en/1"
268
+ ),
269
+ },
61
270
  }
@@ -1,10 +1,8 @@
1
1
  import keras
2
2
  from keras import ops
3
+ from keras.layers import ReversibleEmbedding
3
4
 
4
5
  from keras_hub.src.api_export import keras_hub_export
5
- from keras_hub.src.layers.modeling.reversible_embedding import (
6
- ReversibleEmbedding,
7
- )
8
6
  from keras_hub.src.models.backbone import Backbone
9
7
  from keras_hub.src.models.qwen3.qwen3_decoder import Qwen3TransformerDecoder
10
8
  from keras_hub.src.models.qwen3.qwen3_layernorm import Qwen3LayerNorm
@@ -1,10 +1,8 @@
1
1
  import keras
2
2
  from keras import ops
3
+ from keras.layers import ReversibleEmbedding
3
4
 
4
5
  from keras_hub.src.api_export import keras_hub_export
5
- from keras_hub.src.layers.modeling.reversible_embedding import (
6
- ReversibleEmbedding,
7
- )
8
6
  from keras_hub.src.models.backbone import Backbone
9
7
  from keras_hub.src.models.qwen3_moe.qwen3_moe_decoder import (
10
8
  Qwen3MoeTransformerDecoder,
@@ -1,6 +1,7 @@
1
1
  """Qwen3 MoE model preset configurations."""
2
2
 
3
3
  backbone_presets = {
4
+ # Qwen-3 MoE Models
4
5
  "qwen3_moe_30b_a3b_en": {
5
6
  "metadata": {
6
7
  "description": (
@@ -27,4 +28,18 @@ backbone_presets = {
27
28
  },
28
29
  "kaggle_handle": "kaggle://keras/qwen-3-moe/keras/qwen3_moe_235b_a22b_en/1",
29
30
  },
31
+ # Qwen-3 Coder MoE Models
32
+ "qwen3_coder_instruct_30b_a3b_en": {
33
+ "metadata": {
34
+ "description": (
35
+ "A Code-Specific Model,Mixture-of-Experts (MoE) model "
36
+ "has 30.5 billion total parameters with 3.3 billion "
37
+ "activated, built on 48 layers and utilizes 32 query "
38
+ "and 4 key/value attention heads with 128 experts (8 active)."
39
+ ),
40
+ "params": 30532122624,
41
+ "path": "qwen3_moe",
42
+ },
43
+ "kaggle_handle": "kaggle://keras/qwen3-coder/keras/qwen3_coder_instruct_30b_a3b_en/1",
44
+ },
30
45
  }
@@ -1,10 +1,8 @@
1
1
  import keras
2
2
  from keras import ops
3
+ from keras.layers import ReversibleEmbedding
3
4
 
4
5
  from keras_hub.src.api_export import keras_hub_export
5
- from keras_hub.src.layers.modeling.reversible_embedding import (
6
- ReversibleEmbedding,
7
- )
8
6
  from keras_hub.src.models.backbone import Backbone
9
7
  from keras_hub.src.models.qwen.qwen_layernorm import QwenLayerNorm
10
8
  from keras_hub.src.models.qwen_moe.qwen_moe_decoder import (