PyPI - keras-hub - Versions diffs - 0.25.0.dev0__py3-none-any.whl → 0.26.0.dev0__py3-none-any.whl - Mend

keras-hub 0.25.0.dev0py3-none-any.whl → 0.26.0.dev0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (109) hide show

keras_hub/layers/__init__.py +21 -0
keras_hub/models/__init__.py +27 -0
keras_hub/src/layers/modeling/non_max_supression.py +5 -2
keras_hub/src/layers/modeling/reversible_embedding.py +2 -275
keras_hub/src/layers/modeling/token_and_position_embedding.py +6 -6
keras_hub/src/layers/modeling/transformer_layer_utils.py +9 -9
keras_hub/src/layers/preprocessing/masked_lm_mask_generator.py +3 -1
keras_hub/src/layers/preprocessing/multi_segment_packer.py +3 -1
keras_hub/src/models/albert/albert_backbone.py +1 -3
keras_hub/src/models/backbone.py +3 -0
keras_hub/src/models/bart/bart_backbone.py +1 -3
keras_hub/src/models/bert/bert_backbone.py +2 -4
keras_hub/src/models/bloom/bloom_backbone.py +1 -3
keras_hub/src/models/causal_lm.py +2 -2
keras_hub/src/models/deberta_v3/deberta_v3_backbone.py +1 -3
keras_hub/src/models/edrec/edrec_backbone.py +147 -0
keras_hub/src/models/edrec/edrec_layers.py +434 -0
keras_hub/src/models/edrec/edrec_seq2seq_lm.py +273 -0
keras_hub/src/models/electra/electra_backbone.py +1 -3
keras_hub/src/models/f_net/f_net_backbone.py +1 -3
keras_hub/src/models/falcon/falcon_backbone.py +1 -3
keras_hub/src/models/flux/flux_layers.py +3 -3
keras_hub/src/models/flux/flux_maths.py +29 -15
keras_hub/src/models/gemma/gemma_backbone.py +1 -3
keras_hub/src/models/gemma/gemma_causal_lm.py +1 -1
keras_hub/src/models/gemma3/gemma3_attention.py +1 -1
keras_hub/src/models/gemma3/gemma3_backbone.py +70 -8
keras_hub/src/models/gemma3/gemma3_causal_lm.py +16 -1
keras_hub/src/models/gemma3/gemma3_decoder_block.py +23 -3
keras_hub/src/models/gemma3/{gemma3_interleave_embeddings.py → gemma3_layers.py} +101 -0
keras_hub/src/models/gemma3/gemma3_presets.py +79 -7
keras_hub/src/models/gemma3/gemma3_vision_encoder.py +1 -1
keras_hub/src/models/gpt2/gpt2_backbone.py +1 -3
keras_hub/src/models/gpt2/gpt2_causal_lm.py +1 -1
keras_hub/src/models/gpt_neo_x/gpt_neo_x_backbone.py +1 -3
keras_hub/src/models/gpt_oss/gpt_oss_backbone.py +1 -3
keras_hub/src/models/llama/llama_backbone.py +1 -3
keras_hub/src/models/masked_lm.py +1 -1
keras_hub/src/models/mistral/mistral_backbone.py +1 -3
keras_hub/src/models/mixtral/mixtral_backbone.py +1 -3
keras_hub/src/models/moonshine/moonshine_backbone.py +1 -3
keras_hub/src/models/pali_gemma/pali_gemma_backbone.py +1 -3
keras_hub/src/models/parseq/parseq_tokenizer.py +3 -1
keras_hub/src/models/phi3/phi3_backbone.py +1 -3
keras_hub/src/models/qwen/qwen_backbone.py +1 -3
keras_hub/src/models/qwen/qwen_presets.py +209 -0
keras_hub/src/models/qwen3/qwen3_backbone.py +1 -3
keras_hub/src/models/qwen3_moe/qwen3_moe_backbone.py +1 -3
keras_hub/src/models/qwen3_moe/qwen3_moe_presets.py +15 -0
keras_hub/src/models/qwen_moe/qwen_moe_backbone.py +1 -3
keras_hub/src/models/roformer_v2/roformer_v2_backbone.py +1 -3
keras_hub/src/models/rqvae/__init__.py +5 -0
keras_hub/src/models/rqvae/rqvae_backbone.py +167 -0
keras_hub/src/models/rqvae/rqvae_layers.py +335 -0
keras_hub/src/models/rwkv7/__init__.py +5 -0
keras_hub/src/models/rwkv7/rwkv7_backbone.py +180 -0
keras_hub/src/models/rwkv7/rwkv7_causal_lm.py +259 -0
keras_hub/src/models/rwkv7/rwkv7_causal_lm_preprocessor.py +214 -0
keras_hub/src/models/rwkv7/rwkv7_layer.py +724 -0
keras_hub/src/models/rwkv7/rwkv7_presets.py +26 -0
keras_hub/src/models/rwkv7/rwkv7_tokenizer.py +495 -0
keras_hub/src/models/sam/sam_backbone.py +5 -1
keras_hub/src/models/sam/sam_prompt_encoder.py +1 -1
keras_hub/src/models/sam3/__init__.py +7 -0
keras_hub/src/models/sam3/roi_align.py +222 -0
keras_hub/src/models/sam3/sam3_detr_decoder.py +641 -0
keras_hub/src/models/sam3/sam3_detr_encoder.py +293 -0
keras_hub/src/models/sam3/sam3_dot_product_scoring.py +120 -0
keras_hub/src/models/sam3/sam3_geometry_encoder.py +517 -0
keras_hub/src/models/sam3/sam3_image_converter.py +10 -0
keras_hub/src/models/sam3/sam3_layers.py +814 -0
keras_hub/src/models/sam3/sam3_mask_decoder.py +374 -0
keras_hub/src/models/sam3/sam3_pc_backbone.py +306 -0
keras_hub/src/models/sam3/sam3_pc_image_segmenter.py +282 -0
keras_hub/src/models/sam3/sam3_pc_image_segmenter_preprocessor.py +336 -0
keras_hub/src/models/sam3/sam3_presets.py +16 -0
keras_hub/src/models/sam3/sam3_text_encoder.py +212 -0
keras_hub/src/models/sam3/sam3_tokenizer.py +65 -0
keras_hub/src/models/sam3/sam3_utils.py +134 -0
keras_hub/src/models/sam3/sam3_vision_encoder.py +738 -0
keras_hub/src/models/segformer/segformer_backbone.py +6 -6
keras_hub/src/models/siglip/siglip_layers.py +1 -3
keras_hub/src/models/smollm3/smollm3_backbone.py +1 -3
keras_hub/src/models/stable_diffusion_3/t5_encoder.py +1 -3
keras_hub/src/models/t5/t5_backbone.py +1 -3
keras_hub/src/models/t5gemma/t5gemma_backbone.py +1 -3
keras_hub/src/models/task.py +1 -1
keras_hub/src/tests/test_case.py +394 -3
keras_hub/src/tokenizers/byte_pair_tokenizer.py +33 -2
keras_hub/src/tokenizers/byte_tokenizer.py +3 -1
keras_hub/src/tokenizers/sentence_piece_tokenizer.py +15 -1
keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py +3 -1
keras_hub/src/tokenizers/word_piece_tokenizer.py +15 -1
keras_hub/src/utils/preset_utils.py +1 -1
keras_hub/src/utils/tensor_utils.py +12 -0
keras_hub/src/utils/transformers/convert_gemma3.py +68 -22
keras_hub/src/utils/transformers/convert_qwen3_moe.py +4 -1
keras_hub/src/utils/transformers/convert_sam3.py +472 -0
keras_hub/src/utils/transformers/export/gemma3.py +196 -0
keras_hub/src/utils/transformers/export/hf_exporter.py +86 -25
keras_hub/src/utils/transformers/export/qwen.py +136 -0
keras_hub/src/utils/transformers/preset_loader.py +15 -1
keras_hub/src/version.py +1 -1
keras_hub/tokenizers/__init__.py +6 -0
{keras_hub-0.25.0.dev0.dist-info → keras_hub-0.26.0.dev0.dist-info}/METADATA +6 -13
{keras_hub-0.25.0.dev0.dist-info → keras_hub-0.26.0.dev0.dist-info}/RECORD +108 -76
{keras_hub-0.25.0.dev0.dist-info → keras_hub-0.26.0.dev0.dist-info}/WHEEL +1 -1
keras_hub/src/models/gemma3/rms_normalization.py +0 -26
{keras_hub-0.25.0.dev0.dist-info → keras_hub-0.26.0.dev0.dist-info}/top_level.txt +0 -0

keras_hub/src/models/gemma3/{gemma3_interleave_embeddings.py → gemma3_layers.py} RENAMED Viewed

@@ -2,6 +2,107 @@ import keras
 from keras import ops
+class RMSNormalization(keras.layers.Layer):
+    def __init__(self, epsilon=1e-6, **kwargs):
+        super().__init__(**kwargs)
+        self.epsilon = epsilon
+    def build(self, input_shape):
+        self.scale = self.add_weight(
+            name="scale",
+            trainable=True,
+            shape=(input_shape[-1],),
+            initializer="zeros",
+        )
+        self.built = True
+    def call(self, x):
+        # Always compute normalization in float32.
+        x = ops.cast(x, "float32")
+        scale = ops.cast(self.scale, "float32")
+        var = ops.mean(ops.square(x), axis=-1, keepdims=True)
+        normed_inputs = x * ops.reciprocal(ops.sqrt(var + self.epsilon))
+        normed_inputs = normed_inputs * (1 + scale)
+        return ops.cast(normed_inputs, self.compute_dtype)
+class Gemma3MeanPooling(keras.layers.Layer):
+    """Mean pooling layer that computes the average of token embeddings.
+    This layer correctly handles variable-length sequences by ignoring
+    padded tokens in the mean calculation, using a `padding_mask`.
+    Example:
+    ```python
+    import numpy as np
+    sequence_output = np.random.rand(2, 4, 8).astype("float32")
+    padding_mask = np.array([[1, 1, 1, 0], [1, 1, 0, 0]], dtype="int32")
+    mean_pool_layer = Gemma3MeanPooling()
+    pooled = mean_pool_layer([sequence_output, padding_mask])
+    # pooled.shape -> (2, 8)
+    ```
+    """
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.supports_masking = True
+    def call(self, inputs, padding_mask=None):
+        """Performs masked mean pooling on the token embeddings.
+        Args:
+            inputs: The sequence of embeddings to pool, with a shape of
+                `(batch_size, seq_len, hidden_dim)`.
+            padding_mask: The mask indicating valid tokens, with a shape of
+                `(batch_size, seq_len)`.
+        Returns:
+            A tensor representing the pooled embeddings, with a shape of
+            `(batch_size, hidden_dim)`.
+        """
+        if padding_mask is None:
+            inputs, padding_mask = inputs
+        sequence_output = inputs
+        mask = ops.expand_dims(
+            ops.cast(padding_mask, sequence_output.dtype), axis=-1
+        )
+        masked_output = sequence_output * mask
+        sum_embeddings = ops.sum(masked_output, axis=1)
+        num_tokens = ops.sum(
+            ops.cast(padding_mask, sequence_output.dtype), axis=1
+        )
+        num_tokens = ops.expand_dims(num_tokens, axis=1)
+        # Avoid division by zero
+        num_tokens = ops.maximum(num_tokens, 1e-9)
+        mean_embeddings = sum_embeddings / num_tokens
+        return ops.cast(mean_embeddings, self.compute_dtype)
+    def compute_output_shape(self, input_shape):
+        """Computes the output shape of the layer.
+        Args:
+            input_shape: A tuple or list of tuples representing input shapes.
+        Returns:
+            A tuple representing the output shape.
+        """
+        if isinstance(input_shape, list):
+            sequence_output_shape = input_shape[0]
+        else:
+            sequence_output_shape = input_shape
+        return sequence_output_shape[:-2] + (sequence_output_shape[-1],)
+    def get_config(self):
+        """Returns the config of the layer."""
+        return super().get_config()
 class Gemma3InterleaveEmbeddings(keras.layers.Layer):
     """Places image embeddings in the correct position in an embedding sequence.

keras_hub/src/models/gemma3/gemma3_presets.py CHANGED Viewed

@@ -181,12 +181,25 @@ backbone_presets = {
         },
         "kaggle_handle": "kaggle://keras/gemma3/keras/gemma3_instruct_270m/4",
     },
+    "medgemma_4b": {
+        "metadata": {
+            "description": (
+                "A 4 billion parameter model based on Gemma 3. "
+                "This model is pre-trained for performance on medical text "
+                "and image comprehension and is optimized for medical "
+                "applications that involve a text generation component."
+            ),
+            "params": 4300079472,
+            "path": "gemma3",
+        },
+        "kaggle_handle": "kaggle://keras/medgemma/keras/medgemma_4b/1",
+    },
     "medgemma_instruct_4b": {
         "metadata": {
             "description": (
                 "A 4 billion parameter model based on Gemma 3. "
-                "This model is trained for performance on medical text"
-                "and image comprehension and is optimized for medical"
+                "This model is instruction-tuned for performance on medical "
+                "text and image comprehension and is optimized for medical "
                 "applications that involve a text generation component."
             ),
             "params": 4300079472,
@@ -198,8 +211,8 @@ backbone_presets = {
         "metadata": {
             "description": (
                 "A 27 billion parameter model based on Gemma 3. "
-                "This model trained for performance on medical text "
-                "and image comprehension and is optimized for medical "
+                "This model is instruction-tuned for performance on medical "
+                " text and image comprehension and is optimized for medical "
                 "applications that involve a text generation component."
             ),
             "params": 27432406640,
@@ -211,13 +224,72 @@ backbone_presets = {
         "metadata": {
             "description": (
                 "A 27 billion parameter text-only model based on Gemma 3. "
-                "This model is trained for performance on medical text "
-                "comprehension and is optimized for medical applications "
-                "that involve a text generation component."
+                "This model is instruction-tuned (No images) for performance "
+                "on medical text comprehension and is optimized for medical "
+                "applications that involve a text generation component."
             ),
             "params": 27009002240,
             "path": "gemma3",
         },
         "kaggle_handle": "kaggle://keras/medgemma/keras/medgemma_instruct_27b_text/1",
     },
+    "medgemma_1.5_instruct_4b": {
+        "metadata": {
+            "description": (
+                "A 4 billion parameter,Instruct-tuned MedGemma 1.5 4B is an "
+                "updated version of the Instruction-tuned MedGemma 4B model."
+            ),
+            "params": 4300079472,
+            "path": "gemma3",
+        },
+        "kaggle_handle": "kaggle://keras/medgemma/keras/medgemma_1.5_instruct_4b/1",
+    },
+    "function_gemma_instruct_270m": {
+        "metadata": {
+            "description": (
+                "A 270M Million parameter text-only model based on Gemma 3. "
+                "This model is trained specifically for function calling "
+                "improvements."
+            ),
+            "params": 268098176,
+            "path": "gemma3",
+        },
+        "kaggle_handle": "kaggle://keras/function-gemma/keras/function_gemma_instruct_270m/1",
+    },
+    "translategemma_4b_it": {
+        "metadata": {
+            "description": (
+                "4 billion parameter, 34-layer, multimodal instruction-tuned "
+                "translation model based on Gemma 3. Supports text and image "
+                "input for translation across 55 languages."
+            ),
+            "params": 4299915632,
+            "path": "gemma3",
+        },
+        "kaggle_handle": "kaggle://keras/translategemma/keras/translategemma_4b_it/1",
+    },
+    "translategemma_12b_it": {
+        "metadata": {
+            "description": (
+                "12 billion parameter, 48-layer, multimodal instruction-tuned "
+                "translation model based on Gemma 3. Supports text and image "
+                "input for translation across 55 languages."
+            ),
+            "params": 12187079280,
+            "path": "gemma3",
+        },
+        "kaggle_handle": "kaggle://keras/translategemma/keras/translategemma_12b_it/1",
+    },
+    "translategemma_27b_it": {
+        "metadata": {
+            "description": (
+                "27 billion parameter, 62-layer, multimodal instruction-tuned "
+                "translation model based on Gemma 3. Supports text and image "
+                "input for translation across 55 languages."
+            ),
+            "params": 27432062576,
+            "path": "gemma3",
+        },
+        "kaggle_handle": "kaggle://keras/translategemma/keras/translategemma_27b_it/1",
+    },
 }

keras_hub/src/models/gemma3/gemma3_vision_encoder.py CHANGED Viewed

@@ -2,7 +2,7 @@ import keras
 from keras import ops
 from keras_hub.src.api_export import keras_hub_export
-from keras_hub.src.models.gemma.rms_normalization import RMSNormalization
+from keras_hub.src.models.gemma3.gemma3_layers import RMSNormalization
 from keras_hub.src.utils.keras_utils import clone_initializer

keras_hub/src/models/gpt2/gpt2_backbone.py CHANGED Viewed

@@ -1,10 +1,8 @@
 import keras
+from keras.layers import ReversibleEmbedding
 from keras_hub.src.api_export import keras_hub_export
 from keras_hub.src.layers.modeling.position_embedding import PositionEmbedding
-from keras_hub.src.layers.modeling.reversible_embedding import (
-    ReversibleEmbedding,
-)
 from keras_hub.src.layers.modeling.transformer_decoder import TransformerDecoder
 from keras_hub.src.models.backbone import Backbone
 from keras_hub.src.utils.keras_utils import gelu_approximate

keras_hub/src/models/gpt2/gpt2_causal_lm.py CHANGED Viewed

@@ -422,7 +422,7 @@ class GPT2CausalLM(CausalLM):
         return per_token_loss
     def get_quantization_layer_structure(self, mode):
-        if mode != "gptq":
+        if mode not in ["gptq", "awq"]:
             return None
         backbone = self.backbone

keras_hub/src/models/gpt_neo_x/gpt_neo_x_backbone.py CHANGED Viewed

@@ -1,9 +1,7 @@
 import keras
+from keras.layers import ReversibleEmbedding
 from keras_hub.src.api_export import keras_hub_export
-from keras_hub.src.layers.modeling.reversible_embedding import (
-    ReversibleEmbedding,
-)
 from keras_hub.src.models.backbone import Backbone
 from keras_hub.src.models.gpt_neo_x.gpt_neo_x_decoder import GPTNeoXDecoder
 from keras_hub.src.utils.keras_utils import gelu_approximate

keras_hub/src/models/gpt_oss/gpt_oss_backbone.py CHANGED Viewed

@@ -1,9 +1,7 @@
 import keras
+from keras.layers import ReversibleEmbedding
 from keras_hub.src.api_export import keras_hub_export
-from keras_hub.src.layers.modeling.reversible_embedding import (
-    ReversibleEmbedding,
-)
 from keras_hub.src.models.backbone import Backbone
 from keras_hub.src.models.gpt_oss.gpt_oss_decoder import (
     GptOssTransformerDecoder,

keras_hub/src/models/llama/llama_backbone.py CHANGED Viewed

@@ -1,10 +1,8 @@
 import keras
 from keras import ops
+from keras.layers import ReversibleEmbedding
 from keras_hub.src.api_export import keras_hub_export
-from keras_hub.src.layers.modeling.reversible_embedding import (
-    ReversibleEmbedding,
-)
 from keras_hub.src.models.backbone import Backbone
 from keras_hub.src.models.llama.llama_decoder import LlamaTransformerDecoder
 from keras_hub.src.models.llama.llama_layernorm import LlamaLayerNorm

keras_hub/src/models/masked_lm.py CHANGED Viewed

@@ -86,7 +86,7 @@ class MaskedLM(Task):
         )
     def get_quantization_layer_structure(self, mode):
-        if mode != "gptq":
+        if mode not in ["gptq", "awq"]:
             return None
         backbone = self.backbone

keras_hub/src/models/mistral/mistral_backbone.py CHANGED Viewed

@@ -1,10 +1,8 @@
 import keras
 from keras import ops
+from keras.layers import ReversibleEmbedding
 from keras_hub.src.api_export import keras_hub_export
-from keras_hub.src.layers.modeling.reversible_embedding import (
-    ReversibleEmbedding,
-)
 from keras_hub.src.models.backbone import Backbone
 from keras_hub.src.models.mistral.mistral_layer_norm import (
     MistralLayerNormalization,

keras_hub/src/models/mixtral/mixtral_backbone.py CHANGED Viewed

@@ -1,10 +1,8 @@
 import keras
 from keras import ops
+from keras.layers import ReversibleEmbedding
 from keras_hub.src.api_export import keras_hub_export
-from keras_hub.src.layers.modeling.reversible_embedding import (
-    ReversibleEmbedding,
-)
 from keras_hub.src.models.backbone import Backbone
 from keras_hub.src.models.mixtral.mixtral_decoder import (
     MixtralTransformerDecoder,

keras_hub/src/models/moonshine/moonshine_backbone.py CHANGED Viewed

@@ -1,9 +1,7 @@
 import keras
+from keras.layers import ReversibleEmbedding
 from keras_hub.src.api_export import keras_hub_export
-from keras_hub.src.layers.modeling.reversible_embedding import (
-    ReversibleEmbedding,
-)
 from keras_hub.src.models.backbone import Backbone
 from keras_hub.src.models.moonshine.moonshine_decoder import (
     MoonshineDecoderBlock,

keras_hub/src/models/pali_gemma/pali_gemma_backbone.py CHANGED Viewed

@@ -1,10 +1,8 @@
 import keras
 from keras import ops
+from keras.layers import ReversibleEmbedding
 from keras_hub.src.api_export import keras_hub_export
-from keras_hub.src.layers.modeling.reversible_embedding import (
-    ReversibleEmbedding,
-)
 from keras_hub.src.models.backbone import Backbone
 from keras_hub.src.models.gemma.rms_normalization import RMSNormalization
 from keras_hub.src.models.pali_gemma.pali_gemma_decoder_block import (

keras_hub/src/models/parseq/parseq_tokenizer.py CHANGED Viewed

@@ -13,9 +13,11 @@ from keras_hub.src.utils.tensor_utils import preprocessing_function
 try:
     import tensorflow as tf
-    import tensorflow_text as tf_text
 except ImportError:
     tf = None
+try:
+    import tensorflow_text as tf_text
+except ImportError:
     tf_text = None
 PARSEQ_VOCAB = list(

keras_hub/src/models/phi3/phi3_backbone.py CHANGED Viewed

@@ -1,9 +1,7 @@
 import keras
+from keras.layers import ReversibleEmbedding
 from keras_hub.src.api_export import keras_hub_export
-from keras_hub.src.layers.modeling.reversible_embedding import (
-    ReversibleEmbedding,
-)
 from keras_hub.src.models.backbone import Backbone
 from keras_hub.src.models.phi3.phi3_decoder import Phi3Decoder
 from keras_hub.src.models.phi3.phi3_layernorm import Phi3LayerNorm

keras_hub/src/models/qwen/qwen_backbone.py CHANGED Viewed

@@ -1,10 +1,8 @@
 import keras
 from keras import ops
+from keras.layers import ReversibleEmbedding
 from keras_hub.src.api_export import keras_hub_export
-from keras_hub.src.layers.modeling.reversible_embedding import (
-    ReversibleEmbedding,
-)
 from keras_hub.src.models.backbone import Backbone
 from keras_hub.src.models.qwen.qwen_decoder import QwenTransformerDecoder
 from keras_hub.src.models.qwen.qwen_layernorm import QwenLayerNorm

keras_hub/src/models/qwen/qwen_presets.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """Qwen preset configurations."""
 backbone_presets = {
+    # Qwen 2.5 Models
     "qwen2.5_0.5b_en": {
         "metadata": {
             "description": ("24-layer Qwen model with 0.5 billion parameters."),
@@ -58,4 +59,212 @@ backbone_presets = {
         },
         "kaggle_handle": "kaggle://keras/qwen/keras/qwen2.5_instruct_72b_en/2",
     },
+    # Qwen 2.5 Coder Models
+    "qwen2.5_coder_0.5b": {
+        "metadata": {
+            "description": (
+                "Code-focused fine-tuned Qwen-2.5 model with 0.5 "
+                "billion parameters."
+            ),
+            "params": 494032768,
+            "path": "qwen",
+        },
+        "kaggle_handle": (
+            "kaggle://keras/qwen2-5-coder/keras/qwen2.5_coder_0.5b/1"
+        ),
+    },
+    "qwen2.5_coder_1.5b": {
+        "metadata": {
+            "description": (
+                "Code-focused fine-tuned 28-layer Qwen-2.5 model with 1.5 "
+                "billion parameters."
+            ),
+            "params": 1543434240,
+            "path": "qwen",
+        },
+        "kaggle_handle": (
+            "kaggle://keras/qwen2-5-coder/keras/qwen2.5_coder_1.5b/1"
+        ),
+    },
+    "qwen2.5_coder_3b": {
+        "metadata": {
+            "description": (
+                "Code-focused fine-tuned Qwen-2.5 model with 3 "
+                "billion parameters."
+            ),
+            "params": 3085938688,
+            "path": "qwen",
+        },
+        "kaggle_handle": (
+            "kaggle://keras/qwen2-5-coder/keras/qwen2.5_coder_3b/1"
+        ),
+    },
+    "qwen2.5_coder_7b": {
+        "metadata": {
+            "description": (
+                "Code-focused fine-tuned Qwen-2.5 model with 7 "
+                "billion parameters."
+            ),
+            "params": 6993420288,
+            "path": "qwen",
+        },
+        "kaggle_handle": (
+            "kaggle://keras/qwen2-5-coder/keras/qwen2.5_coder_7b/1"
+        ),
+    },
+    "qwen2.5_coder_14b": {
+        "metadata": {
+            "description": (
+                "Code-focused fine-tuned Qwen-2.5 model with 14 "
+                "billion parameters."
+            ),
+            "params": 14000000000,
+            "path": "qwen",
+        },
+        "kaggle_handle": (
+            "kaggle://keras/qwen2-5-coder/keras/qwen2.5_coder_14b/1"
+        ),
+    },
+    "qwen2.5_coder_32b": {
+        "metadata": {
+            "description": (
+                "Code-focused fine-tuned Qwen-2.5 model with 32 "
+                "billion parameters."
+            ),
+            "params": 32763876352,
+            "path": "qwen",
+        },
+        "kaggle_handle": (
+            "kaggle://keras/qwen2-5-coder/keras/qwen2.5_coder_32b/1"
+        ),
+    },
+    "qwen2.5_coder_instruct_0.5b": {
+        "metadata": {
+            "description": (
+                "Instruction-tuned code-focused Qwen-2.5 model with "
+                "0.5 billion parameters."
+            ),
+            "params": 494032768,
+            "path": "qwen",
+        },
+        "kaggle_handle": (
+            "kaggle://keras/qwen2-5-coder/keras/qwen2.5_coder_instruct_0.5b/1"
+        ),
+    },
+    "qwen2.5_coder_instruct_1.5b": {
+        "metadata": {
+            "description": (
+                "Instruction-tuned code-focused Qwen-2.5 model with "
+                "1.5 billion parameters."
+            ),
+            "params": 1543434240,
+            "path": "qwen",
+        },
+        "kaggle_handle": (
+            "kaggle://keras/qwen2-5-coder/keras/qwen2.5_coder_instruct_1.5b/1"
+        ),
+    },
+    "qwen2.5_coder_instruct_3b": {
+        "metadata": {
+            "description": (
+                "Instruction-tuned code-focused Qwen-2.5 model with "
+                "3 billion parameters."
+            ),
+            "params": 3085938688,
+            "path": "qwen",
+        },
+        "kaggle_handle": (
+            "kaggle://keras/qwen2-5-coder/keras/qwen2.5_coder_instruct_3b/1"
+        ),
+    },
+    "qwen2.5_coder_instruct_7b": {
+        "metadata": {
+            "description": (
+                "Instruction-tuned code-focused Qwen-2.5 model with "
+                "7 billion parameters."
+            ),
+            "params": 6993420288,
+            "path": "qwen",
+        },
+        "kaggle_handle": (
+            "kaggle://keras/qwen2-5-coder/keras/qwen2.5_coder_instruct_7b/1"
+        ),
+    },
+    "qwen2.5_coder_instruct_14b": {
+        "metadata": {
+            "description": (
+                "Instruction-tuned code-focused Qwen-2.5 model with "
+                "14 billion parameters."
+            ),
+            "params": 14000000000,
+            "path": "qwen",
+        },
+        "kaggle_handle": (
+            "kaggle://keras/qwen2-5-coder/keras/qwen2.5_coder_instruct_14b/1"
+        ),
+    },
+    "qwen2.5_coder_instruct_32b": {
+        "metadata": {
+            "description": (
+                "Instruction-tuned code-focused Qwen-2.5 model with "
+                "32 billion parameters."
+            ),
+            "params": 32763876352,
+            "path": "qwen",
+        },
+        "kaggle_handle": (
+            "kaggle://keras/qwen2-5-coder/keras/qwen2.5_coder_instruct_32b/1"
+        ),
+    },
+    # Qwen 2.5 Math Models
+    "qwen2.5_math_1.5b_en": {
+        "metadata": {
+            "description": (
+                "Math-focused Qwen-2.5 model with 1.5 billion parameters."
+            ),
+            "params": 1543714304,
+            "path": "qwen",
+        },
+        "kaggle_handle": (
+            "kaggle://keras/qwen2-5-math/keras/qwen2.5_math_1.5b_en/1"
+        ),
+    },
+    "qwen2.5_math_instruct_1.5b_en": {
+        "metadata": {
+            "description": (
+                "Instruction-tuned math-focused Qwen-2.5 model with "
+                "1.5 billion parameters."
+            ),
+            "params": 1543714304,
+            "path": "qwen",
+        },
+        "kaggle_handle": (
+            "kaggle://keras/qwen2-5-math/keras/qwen2.5_math_instruct_1.5b_en/1"
+        ),
+    },
+    "qwen2.5_math_7b_en": {
+        "metadata": {
+            "description": (
+                "Math-focused Qwen-2.5 model with 7 billion parameters."
+            ),
+            "params": 7615616512,
+            "path": "qwen",
+        },
+        "kaggle_handle": (
+            "kaggle://keras/qwen2-5-math/keras/qwen2.5_math_7b_en/1"
+        ),
+    },
+    "qwen2.5_math_instruct_7b_en": {
+        "metadata": {
+            "description": (
+                "Instruction-tuned math-focused Qwen-2.5 model with "
+                "7 billion parameters."
+            ),
+            "params": 7615616512,
+            "path": "qwen",
+        },
+        "kaggle_handle": (
+            "kaggle://keras/qwen2-5-math/keras/qwen2.5_math_instruct_7b_en/1"
+        ),
+    },
 }

keras_hub/src/models/qwen3/qwen3_backbone.py CHANGED Viewed

@@ -1,10 +1,8 @@
 import keras
 from keras import ops
+from keras.layers import ReversibleEmbedding
 from keras_hub.src.api_export import keras_hub_export
-from keras_hub.src.layers.modeling.reversible_embedding import (
-    ReversibleEmbedding,
-)
 from keras_hub.src.models.backbone import Backbone
 from keras_hub.src.models.qwen3.qwen3_decoder import Qwen3TransformerDecoder
 from keras_hub.src.models.qwen3.qwen3_layernorm import Qwen3LayerNorm

keras_hub/src/models/qwen3_moe/qwen3_moe_backbone.py CHANGED Viewed

@@ -1,10 +1,8 @@
 import keras
 from keras import ops
+from keras.layers import ReversibleEmbedding
 from keras_hub.src.api_export import keras_hub_export
-from keras_hub.src.layers.modeling.reversible_embedding import (
-    ReversibleEmbedding,
-)
 from keras_hub.src.models.backbone import Backbone
 from keras_hub.src.models.qwen3_moe.qwen3_moe_decoder import (
     Qwen3MoeTransformerDecoder,

keras_hub/src/models/qwen3_moe/qwen3_moe_presets.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """Qwen3 MoE model preset configurations."""
 backbone_presets = {
+    #  Qwen-3 MoE Models
     "qwen3_moe_30b_a3b_en": {
         "metadata": {
             "description": (
@@ -27,4 +28,18 @@ backbone_presets = {
         },
         "kaggle_handle": "kaggle://keras/qwen-3-moe/keras/qwen3_moe_235b_a22b_en/1",
     },
+    # Qwen-3 Coder MoE Models
+    "qwen3_coder_instruct_30b_a3b_en": {
+        "metadata": {
+            "description": (
+                "A Code-Specific Model,Mixture-of-Experts (MoE) model "
+                "has 30.5 billion total parameters with 3.3 billion "
+                "activated, built on 48 layers and utilizes 32 query "
+                "and 4 key/value attention heads with 128 experts (8 active)."
+            ),
+            "params": 30532122624,
+            "path": "qwen3_moe",
+        },
+        "kaggle_handle": "kaggle://keras/qwen3-coder/keras/qwen3_coder_instruct_30b_a3b_en/1",
+    },
 }

keras_hub/src/models/qwen_moe/qwen_moe_backbone.py CHANGED Viewed

@@ -1,10 +1,8 @@
 import keras
 from keras import ops
+from keras.layers import ReversibleEmbedding
 from keras_hub.src.api_export import keras_hub_export
-from keras_hub.src.layers.modeling.reversible_embedding import (
-    ReversibleEmbedding,
-)
 from keras_hub.src.models.backbone import Backbone
 from keras_hub.src.models.qwen.qwen_layernorm import QwenLayerNorm
 from keras_hub.src.models.qwen_moe.qwen_moe_decoder import (

keras-hub 0.25.0.dev0__py3-none-any.whl → 0.26.0.dev0__py3-none-any.whl

keras-hub 0.25.0.dev0py3-none-any.whl → 0.26.0.dev0py3-none-any.whl