PyPI - keras-hub - Versions diffs - 0.20.0.dev1__py3-none-any.whl → 0.21.0.dev1__py3-none-any.whl - Mend

keras-hub 0.20.0.dev1py3-none-any.whl → 0.21.0.dev1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (105) hide show

keras_hub/__init__.py +15 -33
keras_hub/layers/__init__.py +134 -0
keras_hub/metrics/__init__.py +11 -0
keras_hub/models/__init__.py +642 -0
keras_hub/samplers/__init__.py +18 -0
keras_hub/src/layers/modeling/reversible_embedding.py +25 -35
keras_hub/src/layers/preprocessing/image_converter.py +1 -0
keras_hub/src/layers/preprocessing/random_deletion.py +1 -1
keras_hub/src/layers/preprocessing/random_swap.py +1 -1
keras_hub/src/models/audio_to_text.py +66 -0
keras_hub/src/models/audio_to_text_preprocessor.py +80 -0
keras_hub/src/models/backbone.py +5 -2
keras_hub/src/models/cspnet/cspnet_backbone.py +51 -26
keras_hub/src/models/cspnet/cspnet_presets.py +38 -3
keras_hub/src/models/falcon/falcon_backbone.py +1 -1
keras_hub/src/models/gemma/gemma_presets.py +10 -10
keras_hub/src/models/gemma3/gemma3_causal_lm_preprocessor.py +3 -2
keras_hub/src/models/gemma3/gemma3_presets.py +8 -8
keras_hub/src/models/gemma3/gemma3_vision_encoder.py +1 -1
keras_hub/src/models/llama/llama_attention.py +24 -6
keras_hub/src/models/llama/llama_backbone.py +50 -16
keras_hub/src/models/llama/llama_decoder.py +20 -3
keras_hub/src/models/llama/llama_presets.py +3 -3
keras_hub/src/models/llama/llama_rotary_embedding.py +180 -0
keras_hub/src/models/llama3/llama3_backbone.py +10 -2
keras_hub/src/models/llama3/llama3_presets.py +84 -2
keras_hub/src/models/mistral/mistral_presets.py +3 -3
keras_hub/src/models/mixtral/__init__.py +5 -0
keras_hub/src/models/mixtral/mixtral_attention.py +252 -0
keras_hub/src/models/mixtral/mixtral_backbone.py +207 -0
keras_hub/src/models/mixtral/mixtral_causal_lm.py +281 -0
keras_hub/src/models/mixtral/mixtral_causal_lm_preprocessor.py +76 -0
keras_hub/src/models/mixtral/mixtral_decoder.py +494 -0
keras_hub/src/models/mixtral/mixtral_layer_norm.py +34 -0
keras_hub/src/models/mixtral/mixtral_presets.py +26 -0
keras_hub/src/models/mixtral/mixtral_tokenizer.py +21 -0
keras_hub/src/models/moonshine/__init__.py +5 -0
keras_hub/src/models/moonshine/moonshine_audio_converter.py +301 -0
keras_hub/src/models/moonshine/moonshine_audio_to_text.py +383 -0
keras_hub/src/models/moonshine/moonshine_audio_to_text_preprocessor.py +272 -0
keras_hub/src/models/moonshine/moonshine_backbone.py +478 -0
keras_hub/src/models/moonshine/moonshine_decoder.py +313 -0
keras_hub/src/models/moonshine/moonshine_encoder.py +212 -0
keras_hub/src/models/moonshine/moonshine_layers.py +239 -0
keras_hub/src/models/moonshine/moonshine_multi_head_attention.py +355 -0
keras_hub/src/models/moonshine/moonshine_presets.py +25 -0
keras_hub/src/models/moonshine/moonshine_tokenizer.py +62 -0
keras_hub/src/models/pali_gemma/pali_gemma_presets.py +11 -11
keras_hub/src/models/pali_gemma/pali_gemma_vit.py +1 -1
keras_hub/src/models/qwen/__init__.py +4 -0
keras_hub/src/models/qwen/qwen_attention.py +3 -1
keras_hub/src/models/qwen/qwen_backbone.py +8 -1
keras_hub/src/models/qwen/qwen_causal_lm.py +7 -0
keras_hub/src/models/qwen/qwen_causal_lm_preprocessor.py +7 -0
keras_hub/src/models/qwen/qwen_presets.py +61 -0
keras_hub/src/models/qwen/qwen_tokenizer.py +9 -0
keras_hub/src/models/qwen_moe/__init__.py +5 -0
keras_hub/src/models/qwen_moe/qwen_moe_attention.py +375 -0
keras_hub/src/models/qwen_moe/qwen_moe_backbone.py +373 -0
keras_hub/src/models/qwen_moe/qwen_moe_causal_lm.py +350 -0
keras_hub/src/models/qwen_moe/qwen_moe_causal_lm_preprocessor.py +17 -0
keras_hub/src/models/qwen_moe/qwen_moe_decoder.py +625 -0
keras_hub/src/models/qwen_moe/qwen_moe_layernorm.py +32 -0
keras_hub/src/models/qwen_moe/qwen_moe_presets.py +15 -0
keras_hub/src/models/qwen_moe/qwen_moe_tokenizer.py +46 -0
keras_hub/src/models/retinanet/retinanet_image_converter.py +0 -13
keras_hub/src/models/retinanet/retinanet_presets.py +2 -2
keras_hub/src/models/segformer/segformer_image_segmenter_preprocessor.py +0 -18
keras_hub/src/models/segformer/segformer_presets.py +12 -12
keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_backbone.py +6 -0
keras_hub/src/models/task.py +5 -2
keras_hub/src/models/xception/__init__.py +5 -0
keras_hub/src/models/xception/xception_backbone.py +188 -0
keras_hub/src/models/xception/xception_image_classifier.py +12 -0
keras_hub/src/models/xception/xception_image_classifier_preprocessor.py +14 -0
keras_hub/src/models/xception/xception_image_converter.py +8 -0
keras_hub/src/models/xception/xception_presets.py +14 -0
keras_hub/src/tests/mocks/mock_gemma3_tokenizer.py +155 -0
keras_hub/src/utils/coco/__init__.py +0 -0
keras_hub/src/utils/coco/coco_utils.py +133 -0
keras_hub/src/utils/imagenet/imagenet_utils.py +36 -0
keras_hub/src/utils/keras_utils.py +11 -0
keras_hub/src/utils/preset_utils.py +70 -10
keras_hub/src/utils/tensor_utils.py +27 -1
keras_hub/src/utils/timm/convert_cspnet.py +94 -23
keras_hub/src/utils/timm/preset_loader.py +6 -6
keras_hub/src/utils/transformers/convert_llama3.py +21 -1
keras_hub/src/utils/transformers/convert_mixtral.py +139 -0
keras_hub/src/utils/transformers/convert_qwen.py +1 -0
keras_hub/src/utils/transformers/convert_qwen_moe.py +253 -0
keras_hub/src/utils/transformers/preset_loader.py +6 -0
keras_hub/src/{version_utils.py → version.py} +1 -1
keras_hub/tokenizers/__init__.py +117 -0
keras_hub/utils/__init__.py +21 -0
{keras_hub-0.20.0.dev1.dist-info → keras_hub-0.21.0.dev1.dist-info}/METADATA +6 -20
{keras_hub-0.20.0.dev1.dist-info → keras_hub-0.21.0.dev1.dist-info}/RECORD +98 -55
{keras_hub-0.20.0.dev1.dist-info → keras_hub-0.21.0.dev1.dist-info}/WHEEL +1 -1
keras_hub/api/__init__.py +0 -15
keras_hub/api/layers/__init__.py +0 -86
keras_hub/api/metrics/__init__.py +0 -11
keras_hub/api/models/__init__.py +0 -416
keras_hub/api/samplers/__init__.py +0 -16
keras_hub/api/tokenizers/__init__.py +0 -58
keras_hub/api/utils/__init__.py +0 -9
{keras_hub-0.20.0.dev1.dist-info → keras_hub-0.21.0.dev1.dist-info}/top_level.txt +0 -0

keras_hub/src/layers/modeling/reversible_embedding.py CHANGED Viewed

@@ -1,3 +1,5 @@
+import inspect
 import keras
 from keras import ops
@@ -184,31 +186,33 @@ class ReversibleEmbedding(keras.layers.Embedding):
         else:
             self._quantization_mode_error(self.quantization_mode)
-    def _int8_build(
-        self,
-        embeddings_initializer="zeros",
-        embeddings_scale_initializer="ones",
-        reverse_embeddings_initializer="zeros",
-        reverse_embeddings_scale_initializer="ones",
-    ):
-        super()._int8_build(
-            embeddings_initializer, embeddings_scale_initializer
-        )
+    def _int8_build(self, embeddings_shape=None):
+        if (
+            "embeddings_shape"
+            in inspect.signature(super()._int8_build).parameters
+        ):
+            if embeddings_shape is None:
+                embeddings_shape = (self.input_dim, self.output_dim)
+            super()._int8_build(embeddings_shape=embeddings_shape)
+        else:
+            # Backward compatibility for older versions of Keras.
+            super()._int8_build()
         self.inputs_quantizer = keras.quantizers.AbsMaxQuantizer(axis=-1)
         if not self.tie_weights:
             self.reverse_embeddings = self.add_weight(
                 name="reverse_embeddings",
                 shape=(self.output_dim, self.input_dim),
-                initializer=reverse_embeddings_initializer,
+                initializer="zeros",
                 dtype="int8",
                 trainable=False,
             )
             self.reverse_embeddings_scale = self.add_weight(
                 name="reverse_embeddings_scale",
                 shape=(self.input_dim,),
-                initializer=reverse_embeddings_scale_initializer,
+                initializer="ones",
                 trainable=False,
             )
+        self._is_quantized = True
     def _int8_call(self, inputs, reverse=False):
         if reverse:
@@ -232,27 +236,20 @@ class ReversibleEmbedding(keras.layers.Embedding):
         return super()._int8_call(inputs)
     def quantize(self, mode, type_check=True):
-        import gc
         if type_check and type(self) is not ReversibleEmbedding:
-            raise NotImplementedError(
-                f"Layer {self.__class__.__name__} does not have a `quantize()` "
-                "method implemented."
-            )
-        self._check_quantize_args(mode, self.compute_dtype)
+            raise self._not_implemented_error(self.quantize)
         def abs_max_quantize(inputs, axis):
             return keras.quantizers.abs_max_quantize(
                 inputs, axis=axis, to_numpy=True
             )
-        self._tracker.unlock()
+        embeddings_shape = (self.input_dim, self.output_dim)
         if mode == "int8":
             embeddings, embeddings_scale = abs_max_quantize(
                 self._embeddings, axis=-1
             )
             embeddings_scale = ops.squeeze(embeddings_scale, axis=-1)
-            self._untrack_variable(self._embeddings)
             del self._embeddings
             if not self.tie_weights:
                 reverse_embeddings, reverse_embeddings_scale = abs_max_quantize(
@@ -261,24 +258,17 @@ class ReversibleEmbedding(keras.layers.Embedding):
                 reverse_embeddings_scale = ops.squeeze(
                     reverse_embeddings_scale, axis=0
                 )
-                self._untrack_variable(self.reverse_embeddings)
                 del self.reverse_embeddings
-            else:
-                reverse_embeddings = None
-                reverse_embeddings_scale = None
-            self._int8_build(
-                lambda shape, dtype: embeddings,
-                lambda shape, dtype: embeddings_scale,
-                lambda shape, dtype: reverse_embeddings,
-                lambda shape, dtype: reverse_embeddings_scale,
-            )
-        else:
-            raise self._quantization_mode_error(mode)
-        self._tracker.lock()
+        self.quantized_build(embeddings_shape, mode)
+        if mode == "int8":
+            self._embeddings.assign(embeddings)
+            self.embeddings_scale.assign(embeddings_scale)
+            if not self.tie_weights:
+                self.reverse_embeddings.assign(reverse_embeddings)
+                self.reverse_embeddings_scale.assign(reverse_embeddings_scale)
         if self.dtype_policy.quantization_mode is None:
             policy = keras.dtype_policies.get(
                 f"{mode}_from_{self.dtype_policy.name}"
             )
             self.dtype_policy = policy
-        gc.collect()

keras_hub/src/layers/preprocessing/image_converter.py CHANGED Viewed

@@ -246,6 +246,7 @@ class ImageConverter(PreprocessingLayer):
         self.antialias = antialias
         self.bounding_box_format = bounding_box_format
         self.data_format = standardize_data_format(data_format)
+        self.built = True
     @property
     def image_size(self):

keras_hub/src/layers/preprocessing/random_deletion.py CHANGED Viewed

@@ -125,7 +125,7 @@ class RandomDeletion(PreprocessingLayer):
         self.rate = rate
         self.max_deletions = max_deletions
-        self.seed = random.randint(1, 1e9) if seed is None else seed
+        self.seed = random.randint(1, int(1e9)) if seed is None else seed
         self._generator = tf.random.Generator.from_seed(self.seed)
         self.skip_list = skip_list
         self.skip_fn = skip_fn

keras_hub/src/layers/preprocessing/random_swap.py CHANGED Viewed

@@ -127,7 +127,7 @@ class RandomSwap(PreprocessingLayer):
         self.rate = rate
         self.max_swaps = max_swaps
-        self.seed = random.randint(1, 1e9) if seed is None else seed
+        self.seed = random.randint(1, int(1e9)) if seed is None else seed
         self._generator = tf.random.Generator.from_seed(self.seed)
         self.skip_list = skip_list
         self.skip_fn = skip_fn

keras_hub/src/models/audio_to_text.py ADDED Viewed

@@ -0,0 +1,66 @@
+from keras_hub.src.models.seq_2_seq_lm import Seq2SeqLM
+class AudioToText(Seq2SeqLM):
+    """Base class for audio-to-text models.
+    `AudioToText` tasks wrap a `keras_hub.models.Backbone` (capable of
+    processing audio and text features) and a
+    `keras_hub.models.AudioToTextPreprocessor` to create a model for
+    audio-to-text tasks like speech recognition or audio transcription.
+    These models typically consist of an encoder that processes audio input
+    and a decoder that generates a textual representation.
+    `AudioToText` tasks provide a high-level `generate()` method for
+    auto-regressively generating text from audio input. An optional text
+    prompt can also be provided to the decoder to guide generation. The
+    sampling strategy for generation (e.g., greedy, top-k, top-p) can be
+    controlled via the `sampler` argument in the `compile()` method.
+    When calling `fit()`, inputs should consist of audio data and corresponding
+    target text transcriptions. The model is trained to predict the target text
+    token-by-token.
+    All `AudioToText` tasks include a `from_preset()` constructor which
+    can be used to load pre-trained configurations and weights for specific
+    audio-to-text models.
+    This constructor can also be called on the base `AudioToText` class,
+    which will automatically select the correct subclass based on the preset.
+    Examples:
+    ```python
+    # Load a Moonshine backbone with pre-trained weights.
+    # AudioToText is a base class. You will typically work with a specific
+    # implementation, such as `keras_hub.models.MoonshineAudioToText`.
+    # The following examples demonstrate common usage patterns.
+    # Initialize a model from a preset using the specific subclass.
+    audio_to_text = keras_hub.models.MoonshineAudioToText.from_preset(
+        "moonshine_base_en"
+    )
+    # Initialize a model from a preset using the base class.
+    audio_to_text_model_base = keras_hub.models.AudioToText.from_preset(
+        "moonshine_base_en"
+    )
+    # Generate text from an audio input.
+    audio_input_tensor = keras.random.normal((1, 16000, 1))
+    generated_output = audio_to_text_model.generate(
+        {"audio": audio_input_tensor}
+    )
+    # Generate conditioned on the `"The quick brown fox."` as an input sequence.
+    prompted_output = audio_to_text_model.generate(
+        {"audio": audio_input_tensor, "text": "The quick brown fox."}
+    )
+    # Use a different sampling strategy for generation.
+    audio_to_text_model.compile(sampler="greedy")
+    greedy_output = audio_to_text_model.generate(
+        {"audio": audio_input_tensor}
+    )
+    """
+    # TODO: Fill in once audio to text task model requirements are clearer.

keras_hub/src/models/audio_to_text_preprocessor.py ADDED Viewed

@@ -0,0 +1,80 @@
+from keras_hub.src.models.seq_2_seq_lm_preprocessor import Seq2SeqLMPreprocessor
+class AudioToTextPreprocessor(Seq2SeqLMPreprocessor):
+    """Base class for audio-to-text preprocessing layers.
+    `AudioToTextPreprocessor` layers wrap an audio feature extractor (specific
+    to the subclass) and a `keras_hub.tokenizer.Tokenizer` to create a
+    preprocessing layer for audio-to-text tasks. It is intended to be
+    paired with a `keras_hub.models.AudioToText` task.
+    Subclasses are expected to handle the conversion of raw audio data into
+    numerical features suitable for an encoder, and raw text data into token IDs
+    for a decoder.
+    All `AudioToTextPreprocessor` layers take a dictionary as input,
+    typically with keys like `"audio"` (for audio data) and `"text"` (for
+    target transcriptions or decoder prompts).
+    This layer will always output a `(x, y, sample_weight)` tuple, where `x`
+    is a dictionary containing processed audio features for the encoder and
+    tokenized text inputs for the decoder. `y` contains the target token IDs
+    (decoder input tokens shifted by one position), and `sample_weight`
+    indicates padding in `y`. The exact keys and structure of features within
+    `x` will depend on the specific subclass and the paired `AudioToText` model.
+    An `AudioToTextPreprocessor` includes `generate_preprocess` and
+    `generate_postprocess` methods for use during inference with an
+    `AudioToText` model's `generate()` method.
+    All `AudioToTextPreprocessor` tasks include a `from_preset()` constructor
+    which can be used to load a pre-trained configuration, including tokenizer
+    vocabularies and audio feature extraction settings. Calling `from_preset()`
+    on this base class can instantiate the correct subclass registered for the
+    given preset.
+    Examples:
+    ```python
+    preprocessor = keras_hub.models.AudioToTextPreprocessor.from_preset(
+        "moonshine_base_en",
+        decoder_sequence_length=10
+    )
+    # Process a single audio-text pair.
+    x = {
+        "audio": keras.random.normal((1, 16000, 1)),
+        "text": ["the quick brown fox"]
+    }
+    x, y, sample_weight = preprocessor(x)
+    # Process a batch of audio-text pairs.
+    x = {
+        "audio": keras.random.normal((2, 16000, 1)),
+        "text": ["first sentence", "second sentence"]
+    }
+    x, y, sample_weight = preprocessor(x)
+    # With a `tf.data.Dataset`.
+    audio_tf = keras.ops.convert_to_tensor(batch_input["audio"])
+    text_tf = batch_input["text"] # List of strings
+    x = {"audio": audio_tf, "text": text_tf}
+    ds = tf.data.Dataset.from_tensor_slices(x)
+    ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE)
+    ds = ds.batch(2) # Batching after map
+    # Generate preprocess and postprocess.
+    x = preprocessor.generate_preprocess({
+        "audio": keras.random.normal((1, 16000, 1)),
+        "text": ["optional prompt text"]
+    })
+    x = preprocessor.generate_postprocess({
+        "decoder_token_ids": keras.ops.array([[10, 20, 30, 2, 0]]),
+        "decoder_padding_mask": keras.ops.array([
+            [True, True, True, True, False]
+        ])
+    })
+    ```
+    """
+    # TODO: Fill in once audio to text task model requirements are clearer.

keras_hub/src/models/backbone.py CHANGED Viewed

@@ -177,14 +177,17 @@ class Backbone(keras.Model):
             )
         return loader.load_backbone(backbone_cls, load_weights, **kwargs)
-    def save_to_preset(self, preset_dir):
+    def save_to_preset(self, preset_dir, max_shard_size=10):
         """Save backbone to a preset directory.
         Args:
             preset_dir: The path to the local model preset directory.
+            max_shard_size: `int` or `float`. Maximum size in GB for each
+                sharded file. If `None`, no sharding will be done. Defaults to
+                `10`.
         """
         saver = get_preset_saver(preset_dir)
-        saver.save_backbone(self)
+        saver.save_backbone(self, max_shard_size=max_shard_size)
     def get_lora_target_names(self):
         """Returns list of layer names which are to be LoRA-fied.

keras_hub/src/models/cspnet/cspnet_backbone.py CHANGED Viewed

@@ -81,7 +81,7 @@ class CSPNetBackbone(FeaturePyramidBackbone):
     # Pretrained backbone
     model = keras_hub.models.CSPNetBackbone.from_preset(
-        "cspdarknet53_ra_imagenet"
+        "csp_darknet_53_ra_imagenet"
     )
     model(input_data)
@@ -357,18 +357,6 @@ def bottleneck_block(
             dtype=dtype,
             name=f"{name}_bottleneck_block_bn_3",
         )(x)
-        if activation == "leaky_relu":
-            x = layers.LeakyReLU(
-                negative_slope=0.01,
-                dtype=dtype,
-                name=f"{name}_bottleneck_block_activation_3",
-            )(x)
-        else:
-            x = layers.Activation(
-                activation,
-                dtype=dtype,
-                name=f"{name}_bottleneck_block_activation_3",
-            )(x)
         x = layers.add(
             [x, shortcut], dtype=dtype, name=f"{name}_bottleneck_block_add"
@@ -673,6 +661,13 @@ def cross_stage(
                         name=f"{name}_csp_activation_1",
                     )(x)
             else:
+                if strides > 1:
+                    x = layers.ZeroPadding2D(
+                        1,
+                        data_format=data_format,
+                        dtype=dtype,
+                        name=f"{name}_csp_conv_pad_1",
+                    )(x)
                 x = layers.Conv2D(
                     filters=down_chs,
                     kernel_size=3,
@@ -882,6 +877,13 @@ def cross_stage3(
                         name=f"{name}_cs3_activation_1",
                     )(x)
             else:
+                if strides > 1:
+                    x = layers.ZeroPadding2D(
+                        1,
+                        data_format=data_format,
+                        dtype=dtype,
+                        name=f"{name}_cs3_conv_pad_1",
+                    )(x)
                 x = layers.Conv2D(
                     filters=down_chs,
                     kernel_size=3,
@@ -1062,6 +1064,13 @@ def dark_stage(
                     name=f"{name}_dark_activation_1",
                 )(x)
         else:
+            if strides > 1:
+                x = layers.ZeroPadding2D(
+                    1,
+                    data_format=data_format,
+                    dtype=dtype,
+                    name=f"{name}_dark_conv_pad_1",
+                )(x)
             x = layers.Conv2D(
                 filters=filters,
                 kernel_size=3,
@@ -1091,18 +1100,18 @@ def dark_stage(
                     dtype=dtype,
                     name=f"{name}_dark_activation_1",
                 )(x)
-            for i in range(depth):
-                x = block_fn(
-                    filters=block_channels,
-                    dilation=dilation,
-                    bottle_ratio=bottle_ratio,
-                    groups=groups,
-                    activation=activation,
-                    data_format=data_format,
-                    channel_axis=channel_axis,
-                    dtype=dtype,
-                    name=f"{name}_block_{i}",
-                )(x)
+        for i in range(depth):
+            x = block_fn(
+                filters=block_channels,
+                dilation=dilation,
+                bottle_ratio=bottle_ratio,
+                groups=groups,
+                activation=activation,
+                data_format=data_format,
+                channel_axis=channel_axis,
+                dtype=dtype,
+                name=f"{name}_block_{i}",
+            )(x)
         return x
     return apply
@@ -1135,6 +1144,13 @@ def create_csp_stem(
                 or (i == last_idx and strides > 2 and not pooling)
                 else 1
             )
+            if conv_strides > 1:
+                x = layers.ZeroPadding2D(
+                    (kernel_size - 1) // 2,
+                    data_format=data_format,
+                    dtype=dtype,
+                    name=f"csp_stem_pad_{i}",
+                )(x)
             x = layers.Conv2D(
                 filters=chs,
                 kernel_size=kernel_size,
@@ -1167,10 +1183,19 @@ def create_csp_stem(
         if pooling == "max":
             assert strides > 2
+            # Use manual padding to handle edge case scenario to ignore zero's
+            # as max value instead consider negative values from Leaky Relu type
+            # of activations.
+            pad_width = [[1, 1], [1, 1]]
+            if data_format == "channels_last":
+                pad_width += [[0, 0]]
+            else:
+                pad_width = [[0, 0]] + pad_width
+            pad_width = [[0, 0]] + pad_width
+            x = ops.pad(x, pad_width=pad_width, constant_values=float("-inf"))
             x = layers.MaxPooling2D(
                 pool_size=3,
                 strides=2,
-                padding="same",
                 data_format=data_format,
                 dtype=dtype,
                 name="csp_stem_pool",

keras_hub/src/models/cspnet/cspnet_presets.py CHANGED Viewed

@@ -6,11 +6,46 @@ backbone_presets = {
             "description": (
                 "A CSP-DarkNet (Cross-Stage-Partial) image classification model"
                 " pre-trained on the Randomly Augmented ImageNet 1k dataset at "
-                "a 224x224 resolution."
+                "a 256x256 resolution."
             ),
-            "params": 26652512,
+            "params": 27642184,
             "path": "cspnet",
         },
-        "kaggle_handle": "kaggle://keras/cspdarknet/keras/csp_darknet_53_ra_imagenet/1",
+        "kaggle_handle": "kaggle://keras/cspdarknet/keras/csp_darknet_53_ra_imagenet/2",
+    },
+    "csp_resnext_50_ra_imagenet": {
+        "metadata": {
+            "description": (
+                "A CSP-ResNeXt (Cross-Stage-Partial) image classification model"
+                " pre-trained on the Randomly Augmented ImageNet 1k dataset at "
+                "a 256x256 resolution."
+            ),
+            "params": 20569896,
+            "path": "cspnet",
+        },
+        "kaggle_handle": "kaggle://keras/cspdarknet/keras/csp_resnext_50_ra_imagenet/1",
+    },
+    "csp_resnet_50_ra_imagenet": {
+        "metadata": {
+            "description": (
+                "A CSP-ResNet (Cross-Stage-Partial) image classification model"
+                " pre-trained on the Randomly Augmented ImageNet 1k dataset at "
+                "a 256x256 resolution."
+            ),
+            "params": 21616168,
+            "path": "cspnet",
+        },
+        "kaggle_handle": "kaggle://keras/cspdarknet/keras/csp_resnet_50_ra_imagenet/1",
+    },
+    "darknet_53_imagenet": {
+        "metadata": {
+            "description": (
+                "A DarkNet image classification model pre-trained on the"
+                "ImageNet 1k dataset at a 256x256 resolution."
+            ),
+            "params": 41609928,
+            "path": "cspnet",
+        },
+        "kaggle_handle": "kaggle://keras/cspdarknet/keras/darknet_53_imagenet/1",
     },
 }

keras_hub/src/models/falcon/falcon_backbone.py CHANGED Viewed

@@ -29,7 +29,7 @@ class FalconBackbone(Backbone):
         layer_norm_epsilon: float. Epsilon for the layer normalization layers in
             the transformer decoder.
         attention_dropout_rate: float. Dropout probability for the attention.
-        feedforward_dropout_rate: flaot. Dropout probability for the
+        feedforward_dropout_rate: float. Dropout probability for the
             feedforward.
         dtype: string or `keras.mixed_precision.DTypePolicy`. The dtype to use
             for model computations and weights. Note that some computations,

keras_hub/src/models/gemma/gemma_presets.py CHANGED Viewed

@@ -61,7 +61,7 @@ backbone_presets = {
             "params": 8537680896,
             "path": "gemma",
         },
-        "kaggle_handle": "kaggle://keras/gemma/keras/gemma_7b_en/3",
+        "kaggle_handle": "kaggle://keras/gemma/keras/gemma_7b_en/4",
     },
     "gemma_instruct_7b_en": {
         "metadata": {
@@ -71,7 +71,7 @@ backbone_presets = {
             "params": 8537680896,
             "path": "gemma",
         },
-        "kaggle_handle": "kaggle://keras/gemma/keras/gemma_instruct_7b_en/3",
+        "kaggle_handle": "kaggle://keras/gemma/keras/gemma_instruct_7b_en/4",
     },
     "gemma_1.1_instruct_7b_en": {
         "metadata": {
@@ -82,7 +82,7 @@ backbone_presets = {
             "params": 8537680896,
             "path": "gemma",
         },
-        "kaggle_handle": "kaggle://keras/gemma/keras/gemma_1.1_instruct_7b_en/4",
+        "kaggle_handle": "kaggle://keras/gemma/keras/gemma_1.1_instruct_7b_en/5",
     },
     "code_gemma_7b_en": {
         "metadata": {
@@ -94,7 +94,7 @@ backbone_presets = {
             "params": 8537680896,
             "path": "gemma",
         },
-        "kaggle_handle": "kaggle://keras/codegemma/keras/code_gemma_7b_en/2",
+        "kaggle_handle": "kaggle://keras/codegemma/keras/code_gemma_7b_en/3",
     },
     "code_gemma_instruct_7b_en": {
         "metadata": {
@@ -106,7 +106,7 @@ backbone_presets = {
             "params": 8537680896,
             "path": "gemma",
         },
-        "kaggle_handle": "kaggle://keras/codegemma/keras/code_gemma_instruct_7b_en/2",
+        "kaggle_handle": "kaggle://keras/codegemma/keras/code_gemma_instruct_7b_en/3",
     },
     "code_gemma_1.1_instruct_7b_en": {
         "metadata": {
@@ -118,7 +118,7 @@ backbone_presets = {
             "params": 8537680896,
             "path": "gemma",
         },
-        "kaggle_handle": "kaggle://keras/codegemma/keras/code_gemma_1.1_instruct_7b_en/2",
+        "kaggle_handle": "kaggle://keras/codegemma/keras/code_gemma_1.1_instruct_7b_en/3",
     },
     "gemma2_2b_en": {
         "metadata": {
@@ -144,7 +144,7 @@ backbone_presets = {
             "params": 9241705984,
             "path": "gemma",
         },
-        "kaggle_handle": "kaggle://keras/gemma2/keras/gemma2_9b_en/3",
+        "kaggle_handle": "kaggle://keras/gemma2/keras/gemma2_9b_en/4",
     },
     "gemma2_instruct_9b_en": {
         "metadata": {
@@ -154,7 +154,7 @@ backbone_presets = {
             "params": 9241705984,
             "path": "gemma",
         },
-        "kaggle_handle": "kaggle://keras/gemma2/keras/gemma2_instruct_9b_en/3",
+        "kaggle_handle": "kaggle://keras/gemma2/keras/gemma2_instruct_9b_en/4",
     },
     "gemma2_27b_en": {
         "metadata": {
@@ -162,7 +162,7 @@ backbone_presets = {
             "params": 27227128320,
             "path": "gemma",
         },
-        "kaggle_handle": "kaggle://keras/gemma2/keras/gemma2_27b_en/2",
+        "kaggle_handle": "kaggle://keras/gemma2/keras/gemma2_27b_en/3",
     },
     "gemma2_instruct_27b_en": {
         "metadata": {
@@ -172,7 +172,7 @@ backbone_presets = {
             "params": 27227128320,
             "path": "gemma",
         },
-        "kaggle_handle": "kaggle://keras/gemma2/keras/gemma2_instruct_27b_en/2",
+        "kaggle_handle": "kaggle://keras/gemma2/keras/gemma2_instruct_27b_en/3",
     },
     "shieldgemma_2b_en": {
         "metadata": {

keras_hub/src/models/gemma3/gemma3_causal_lm_preprocessor.py CHANGED Viewed

@@ -40,13 +40,13 @@ class Gemma3CausalLMPreprocessor(CausalLMPreprocessor):
     For use with generation, the layer also exposes two methods
     `generate_preprocess()` and `generate_postprocess()`. When this preprocessor
-    is attached to a `keras_hub.models.GemmaCausalLM` instance, these methods
+    is attached to a `keras_hub.models.Gemma3CausalLM` instance, these methods
     will be called implicitly in `generate()`. They can also be called
     standalone (e.g. to precompute preprocessing inputs for generation in a
     separate process).
     Args:
-        tokenizer: A `keras_hub.models.GemmaTokenizer` instance.
+        tokenizer: A `keras_hub.models.Gemma3Tokenizer` instance.
         image_converter: A `keras_hub.layers.ImageConverter` instance. Defaults
             to `None`.
         sequence_length: The length of the packed inputs. Defaults to 1024.
@@ -512,6 +512,7 @@ class Gemma3CausalLMPreprocessor(CausalLMPreprocessor):
         # Extract text part of the input.
         prompts, responses = x["prompts"], x["responses"]
+        tf.debugging.assert_shapes([(prompts, ("N",)), (responses, ("N",))])
         # Find out if the input is batched/not batched. Uprank if not batched.
         # In other preprocessors, we don't have to do this, but here, all

keras-hub 0.20.0.dev1__py3-none-any.whl → 0.21.0.dev1__py3-none-any.whl

keras-hub 0.20.0.dev1py3-none-any.whl → 0.21.0.dev1py3-none-any.whl