PyPI - keras-hub - Versions diffs - 0.21.1.dev0__py3-none-any.whl → 0.22.0__py3-none-any.whl - Mend

keras-hub 0.21.1.dev0py3-none-any.whl → 0.22.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (95) hide show

keras_hub/layers/__init__.py +9 -0
keras_hub/models/__init__.py +47 -0
keras_hub/src/layers/modeling/transformer_encoder.py +6 -3
keras_hub/src/layers/preprocessing/multi_segment_packer.py +17 -3
keras_hub/src/layers/preprocessing/start_end_packer.py +24 -6
keras_hub/src/models/backbone.py +13 -10
keras_hub/src/models/clip/clip_backbone.py +3 -102
keras_hub/src/models/clip/clip_layers.py +295 -0
keras_hub/src/models/clip/clip_preprocessor.py +57 -48
keras_hub/src/models/clip/clip_text_encoder.py +2 -2
keras_hub/src/models/clip/clip_vision_encoder.py +3 -3
keras_hub/src/models/deit/__init__.py +5 -0
keras_hub/src/models/deit/deit_backbone.py +154 -0
keras_hub/src/models/deit/deit_image_classifier.py +171 -0
keras_hub/src/models/deit/deit_image_classifier_preprocessor.py +12 -0
keras_hub/src/models/deit/deit_image_converter.py +8 -0
keras_hub/src/models/deit/deit_layers.py +519 -0
keras_hub/src/models/deit/deit_presets.py +49 -0
keras_hub/src/models/dinov2/__init__.py +5 -0
keras_hub/src/models/dinov2/dinov2_backbone.py +228 -0
keras_hub/src/models/dinov2/dinov2_image_converter.py +8 -0
keras_hub/src/models/dinov2/dinov2_layers.py +886 -0
keras_hub/src/models/dinov2/dinov2_presets.py +89 -0
keras_hub/src/models/esm/__init__.py +5 -0
keras_hub/src/models/esm/esm_attention.py +95 -0
keras_hub/src/models/esm/esm_backbone.py +229 -0
keras_hub/src/models/esm/esm_classifier.py +184 -0
keras_hub/src/models/esm/esm_classifier_preprocessor.py +135 -0
keras_hub/src/models/esm/esm_encoder.py +134 -0
keras_hub/src/models/esm/esm_masked_plm.py +117 -0
keras_hub/src/models/esm/esm_masked_plm_preprocessor.py +143 -0
keras_hub/src/models/esm/esm_presets.py +53 -0
keras_hub/src/models/esm/esm_tokenizer.py +82 -0
keras_hub/src/models/flux/flux_text_to_image_preprocessor.py +6 -2
keras_hub/src/models/gemma/gemma_attention.py +1 -1
keras_hub/src/models/gemma3/gemma3_backbone.py +2 -2
keras_hub/src/models/gemma3/gemma3_interleave_embeddings.py +1 -1
keras_hub/src/models/gemma3/gemma3_presets.py +25 -0
keras_hub/src/models/hgnetv2/__init__.py +5 -0
keras_hub/src/models/hgnetv2/hgnetv2_backbone.py +193 -0
keras_hub/src/models/hgnetv2/hgnetv2_encoder.py +148 -0
keras_hub/src/models/hgnetv2/hgnetv2_image_classifier.py +216 -0
keras_hub/src/models/hgnetv2/hgnetv2_image_classifier_preprocessor.py +14 -0
keras_hub/src/models/hgnetv2/hgnetv2_image_converter.py +8 -0
keras_hub/src/models/hgnetv2/hgnetv2_layers.py +918 -0
keras_hub/src/models/hgnetv2/hgnetv2_presets.py +58 -0
keras_hub/src/models/llama3/llama3_presets.py +3 -3
keras_hub/src/models/mistral/mistral_presets.py +17 -1
keras_hub/src/models/mixtral/mixtral_presets.py +2 -2
keras_hub/src/models/mobilenet/mobilenet_presets.py +4 -4
keras_hub/src/models/pali_gemma/pali_gemma_backbone.py +2 -2
keras_hub/src/models/pali_gemma/pali_gemma_causal_lm.py +2 -2
keras_hub/src/models/pali_gemma/pali_gemma_presets.py +17 -17
keras_hub/src/models/qwen3/__init__.py +5 -0
keras_hub/src/models/qwen3/qwen3_attention.py +369 -0
keras_hub/src/models/qwen3/qwen3_backbone.py +191 -0
keras_hub/src/models/qwen3/qwen3_causal_lm.py +390 -0
keras_hub/src/models/qwen3/qwen3_causal_lm_preprocessor.py +10 -0
keras_hub/src/models/qwen3/qwen3_decoder.py +309 -0
keras_hub/src/models/qwen3/qwen3_layernorm.py +38 -0
keras_hub/src/models/qwen3/qwen3_presets.py +73 -0
keras_hub/src/models/qwen3/qwen3_tokenizer.py +48 -0
keras_hub/src/models/qwen_moe/qwen_moe_attention.py +1 -0
keras_hub/src/models/qwen_moe/qwen_moe_presets.py +2 -2
keras_hub/src/models/roformer_v2/roformer_v2_attention.py +0 -2
keras_hub/src/models/stable_diffusion_3/flow_match_euler_discrete_scheduler.py +16 -7
keras_hub/src/models/stable_diffusion_3/mmdit.py +61 -4
keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_backbone.py +31 -32
keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_image_to_image.py +1 -0
keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_inpaint.py +1 -0
keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image.py +1 -0
keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image_preprocessor.py +6 -2
keras_hub/src/models/vit/vit_backbone.py +31 -11
keras_hub/src/models/vit/vit_image_converter.py +0 -70
keras_hub/src/models/vit/vit_layers.py +33 -18
keras_hub/src/models/vit/vit_presets.py +11 -11
keras_hub/src/utils/keras_utils.py +17 -0
keras_hub/src/utils/preset_utils.py +19 -4
keras_hub/src/utils/tensor_utils.py +14 -0
keras_hub/src/utils/transformers/convert_deit.py +155 -0
keras_hub/src/utils/transformers/convert_dinov2.py +180 -0
keras_hub/src/utils/transformers/convert_esm.py +159 -0
keras_hub/src/utils/transformers/convert_llama3.py +6 -0
keras_hub/src/utils/transformers/convert_qwen3.py +145 -0
keras_hub/src/utils/transformers/export/gemma.py +89 -0
keras_hub/src/utils/transformers/export/hf_exporter.py +98 -0
keras_hub/src/utils/transformers/preset_loader.py +14 -2
keras_hub/src/version.py +1 -1
keras_hub/tokenizers/__init__.py +1 -0
{keras_hub-0.21.1.dev0.dist-info → keras_hub-0.22.0.dist-info}/METADATA +4 -4
{keras_hub-0.21.1.dev0.dist-info → keras_hub-0.22.0.dist-info}/RECORD +93 -49
keras_hub/src/models/clip/clip_encoder_block.py +0 -111
keras_hub/src/models/clip/clip_vision_embedding.py +0 -101
{keras_hub-0.21.1.dev0.dist-info → keras_hub-0.22.0.dist-info}/WHEEL +0 -0
{keras_hub-0.21.1.dev0.dist-info → keras_hub-0.22.0.dist-info}/top_level.txt +0 -0

keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_backbone.py CHANGED Viewed

@@ -1,4 +1,6 @@
 import keras
+from keras import backend
+from keras import distribution
 from keras import layers
 from keras import ops
@@ -96,26 +98,10 @@ class LatentRescaling(layers.Rescaling):
         return (self.backend.cast(inputs, dtype) / scale) + offset
-class ClassifierFreeGuidanceConcatenate(layers.Layer):
-    def call(
-        self,
-        latents,
-        positive_contexts,
-        negative_contexts,
-        positive_pooled_projections,
-        negative_pooled_projections,
-        timestep,
-    ):
+class TimestepBroadcastTo(layers.Layer):
+    def call(self, latents, timestep):
         timestep = ops.broadcast_to(timestep, ops.shape(latents)[:1])
-        latents = ops.concatenate([latents, latents], axis=0)
-        contexts = ops.concatenate(
-            [positive_contexts, negative_contexts], axis=0
-        )
-        pooled_projections = ops.concatenate(
-            [positive_pooled_projections, negative_pooled_projections], axis=0
-        )
-        timesteps = ops.concatenate([timestep, timestep], axis=0)
-        return latents, contexts, pooled_projections, timesteps
+        return timestep
 class ClassifierFreeGuidance(layers.Layer):
@@ -330,8 +316,8 @@ class StableDiffusion3Backbone(Backbone):
             name="diffuser",
         )
         self.vae = vae
-        self.cfg_concat = ClassifierFreeGuidanceConcatenate(
-            dtype=dtype, name="classifier_free_guidance_concat"
+        self.timestep_broadcast_to = TimestepBroadcastTo(
+            dtype=dtype, name="timestep_broadcast_to"
         )
         self.cfg = ClassifierFreeGuidance(
             dtype=dtype, name="classifier_free_guidance"
@@ -538,6 +524,9 @@ class StableDiffusion3Backbone(Backbone):
         latents = self.vae.encode(images)
         return self.image_rescaling(latents)
+    def configure_scheduler(self, num_steps):
+        self.scheduler.set_sigmas(num_steps)
     def add_noise_step(self, latents, noises, step, num_steps):
         return self.scheduler.add_noise(latents, noises, step, num_steps)
@@ -562,11 +551,15 @@ class StableDiffusion3Backbone(Backbone):
         # Concatenation for classifier-free guidance.
         if guidance_scale is not None:
-            concated_latents, contexts, pooled_projs, timesteps = (
-                self.cfg_concat(latents, *embeddings, timestep)
+            timestep = self.timestep_broadcast_to(latents, timestep)
+            timesteps = ops.concatenate([timestep, timestep], axis=0)
+            concated_latents = ops.concatenate([latents, latents], axis=0)
+            contexts = ops.concatenate([embeddings[0], embeddings[1]], axis=0)
+            pooled_projs = ops.concatenate(
+                [embeddings[2], embeddings[3]], axis=0
             )
         else:
-            timesteps = ops.broadcast_to(timestep, ops.shape(latents)[:1])
+            timesteps = self.timestep_broadcast_to(latents, timestep)
             concated_latents = latents
             contexts = embeddings[0]
             pooled_projs = embeddings[2]
@@ -623,20 +616,26 @@ class StableDiffusion3Backbone(Backbone):
     def from_config(cls, config, custom_objects=None):
         config = config.copy()
-        # Propagate `dtype` to text encoders if needed.
+        # Propagate `dtype` to the VAE if needed.
         if "dtype" in config and config["dtype"] is not None:
             dtype_config = config["dtype"]
             if "dtype" not in config["vae"]["config"]:
                 config["vae"]["config"]["dtype"] = dtype_config
-            if "dtype" not in config["clip_l"]["config"]:
-                config["clip_l"]["config"]["dtype"] = dtype_config
-            if "dtype" not in config["clip_g"]["config"]:
-                config["clip_g"]["config"]["dtype"] = dtype_config
+        # Text encoders default to float16 dtype if not specified.
+        # TODO: JAX CPU doesn't support float16 in `nn.dot_product_attention`.
+        is_jax_cpu = (
+            backend.backend() == "jax"
+            and "cpu" in distribution.list_devices()[0].lower()
+        )
+        for text_encoder in ("clip_l", "clip_g", "t5"):
             if (
-                config["t5"] is not None
-                and "dtype" not in config["t5"]["config"]
+                text_encoder in config
+                and config[text_encoder] is not None
+                and "dtype" not in config[text_encoder]["config"]
+                and not is_jax_cpu
             ):
-                config["t5"]["config"]["dtype"] = dtype_config
+                config[text_encoder]["config"]["dtype"] = "float16"
         # We expect `vae`, `clip_l`, `clip_g` and/or `t5` to be instantiated.
         config["vae"] = layers.deserialize(

keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_image_to_image.py CHANGED Viewed

@@ -169,6 +169,7 @@ class StableDiffusion3ImageToImage(ImageToImage):
         guidance_scale=7.0,
         seed=None,
     ):
+        self.backbone.configure_scheduler(num_steps)
         return super().generate(
             inputs,
             num_steps=num_steps,

keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_inpaint.py CHANGED Viewed

@@ -184,6 +184,7 @@ class StableDiffusion3Inpaint(Inpaint):
         guidance_scale=7.0,
         seed=None,
     ):
+        self.backbone.configure_scheduler(num_steps)
         return super().generate(
             inputs,
             num_steps=num_steps,

keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image.py CHANGED Viewed

@@ -141,6 +141,7 @@ class StableDiffusion3TextToImage(TextToImage):
         guidance_scale=7.0,
         seed=None,
     ):
+        self.backbone.configure_scheduler(num_steps)
         return super().generate(
             inputs,
             num_steps=num_steps,

keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image_preprocessor.py CHANGED Viewed

@@ -50,8 +50,12 @@ class StableDiffusion3TextToImagePreprocessor(TextToImagePreprocessor):
     def generate_preprocess(self, x):
         token_ids = {}
-        token_ids["clip_l"] = self.clip_l_preprocessor(x)["token_ids"]
-        token_ids["clip_g"] = self.clip_g_preprocessor(x)["token_ids"]
+        token_ids["clip_l"] = self.clip_l_preprocessor(
+            {"prompts": x, "images": None}
+        )["token_ids"]
+        token_ids["clip_g"] = self.clip_g_preprocessor(
+            {"prompts": x, "images": None}
+        )["token_ids"]
         if self.t5_preprocessor is not None:
             token_ids["t5"] = self.t5_preprocessor(x)["token_ids"]
         return token_ids

keras_hub/src/models/vit/vit_backbone.py CHANGED Viewed

@@ -18,10 +18,10 @@ class ViTBackbone(Backbone):
     Args:
         image_shape: A tuple or list of 3 integers representing the shape of the
-            input image `(height, width, channels)`, `height` and `width` must
-            be equal.
-        patch_size: int. The size of each image patch, the input image will be
-            divided into patches of shape `(patch_size, patch_size)`.
+            input image `(height, width, channels)`.
+        patch_size: int or (int, int). The size of each image patch, the input
+            image will be divided into patches of shape
+            `(patch_size_h, patch_size_w)`.
         num_layers: int. The number of transformer encoder layers.
         num_heads: int. specifying the number of attention heads in each
             Transformer encoder layer.
@@ -37,6 +37,10 @@ class ViTBackbone(Backbone):
         use_mha_bias: bool. Whether to use bias in the multi-head
             attention layers.
         use_mlp_bias: bool. Whether to use bias in the MLP layers.
+        use_class_token: bool. Whether to use class token to be part of
+            patch embedding. Defaults to `True`.
+        use_patch_bias: bool. Whether to use bias in Conv2d of patch embedding
+            layer. Defaults to `True`.
         data_format: str.  `"channels_last"` or `"channels_first"`, specifying
             the data format for the input image. If `None`, defaults to
             `"channels_last"`.
@@ -58,6 +62,8 @@ class ViTBackbone(Backbone):
         layer_norm_epsilon=1e-6,
         use_mha_bias=True,
         use_mlp_bias=True,
+        use_class_token=True,
+        use_patch_bias=True,
         data_format=None,
         dtype=None,
         **kwargs,
@@ -74,24 +80,34 @@ class ViTBackbone(Backbone):
                 f"at index {h_axis} (height) or {w_axis} (width). "
                 f"Image shape: {image_shape}"
             )
-        if image_shape[h_axis] != image_shape[w_axis]:
+        if isinstance(patch_size, int):
+            patch_size = (patch_size, patch_size)
+        if image_shape[h_axis] % patch_size[0] != 0:
+            raise ValueError(
+                f"Input height {image_shape[h_axis]} should be divisible by "
+                f"patch size {patch_size[0]}."
+            )
+        if image_shape[w_axis] % patch_size[1] != 0:
             raise ValueError(
-                f"Image height and width must be equal. Found height: "
-                f"{image_shape[h_axis]}, width: {image_shape[w_axis]} at "
-                f"indices {h_axis} and {w_axis} respectively. Image shape: "
-                f"{image_shape}"
+                f"Input width {image_shape[h_axis]} should be divisible by "
+                f"patch size {patch_size[1]}."
             )
         num_channels = image_shape[channels_axis]
         # === Functional Model ===
-        inputs = keras.layers.Input(shape=image_shape)
+        inputs = keras.layers.Input(shape=image_shape, name="images")
         x = ViTPatchingAndEmbedding(
-            image_size=image_shape[h_axis],
+            image_size=(image_shape[h_axis], image_shape[w_axis]),
             patch_size=patch_size,
             hidden_dim=hidden_dim,
             num_channels=num_channels,
+            use_class_token=use_class_token,
+            use_patch_bias=use_patch_bias,
             data_format=data_format,
             dtype=dtype,
             name="vit_patching_and_embedding",
@@ -130,6 +146,8 @@ class ViTBackbone(Backbone):
         self.layer_norm_epsilon = layer_norm_epsilon
         self.use_mha_bias = use_mha_bias
         self.use_mlp_bias = use_mlp_bias
+        self.use_class_token = use_class_token
+        self.use_patch_bias = use_patch_bias
         self.data_format = data_format
     def get_config(self):
@@ -147,6 +165,8 @@ class ViTBackbone(Backbone):
                 "layer_norm_epsilon": self.layer_norm_epsilon,
                 "use_mha_bias": self.use_mha_bias,
                 "use_mlp_bias": self.use_mlp_bias,
+                "use_class_token": self.use_class_token,
+                "use_patch_bias": self.use_patch_bias,
             }
         )
         return config

keras_hub/src/models/vit/vit_image_converter.py CHANGED Viewed

@@ -1,78 +1,8 @@
 from keras_hub.src.api_export import keras_hub_export
 from keras_hub.src.layers.preprocessing.image_converter import ImageConverter
 from keras_hub.src.models.vit.vit_backbone import ViTBackbone
-from keras_hub.src.utils.tensor_utils import preprocessing_function
 @keras_hub_export("keras_hub.layers.ViTImageConverter")
 class ViTImageConverter(ImageConverter):
-    """Converts images to the format expected by a ViT model.
-    This layer performs image normalization using mean and standard deviation
-    values. By default, it uses the same normalization as the
-    "google/vit-large-patch16-224" model on Hugging Face:
-    `norm_mean=[0.5, 0.5, 0.5]` and `norm_std=[0.5, 0.5, 0.5]`
-    ([reference](https://huggingface.co/google/vit-large-patch16-224/blob/main/preprocessor_config.json)).
-    These defaults are suitable for models pretrained using this normalization.
-    Args:
-        norm_mean: list or tuple of floats. Mean values for image normalization.
-            Defaults to `[0.5, 0.5, 0.5]`.
-        norm_std: list or tuple of floats. Standard deviation values for
-            image normalization. Defaults to `[0.5, 0.5, 0.5]`.
-        **kwargs: Additional keyword arguments passed to
-            `keras_hub.layers.preprocessing.ImageConverter`.
-    Examples:
-    ```python
-    import keras
-    import numpy as np
-    from keras_hub.src.layers import ViTImageConverter
-    # Example image (replace with your actual image data)
-    image = np.random.rand(1, 224, 224, 3)  # Example: (B, H, W, C)
-    # Create a ViTImageConverter instance
-    converter = ViTImageConverter(
-        image_size=(28,28),
-        scale=1/255.
-    )
-    # Preprocess the image
-    preprocessed_image = converter(image)
-    ```
-    """
     backbone_cls = ViTBackbone
-    def __init__(
-        self, norm_mean=[0.5, 0.5, 0.5], norm_std=[0.5, 0.5, 0.5], **kwargs
-    ):
-        super().__init__(**kwargs)
-        self.norm_mean = norm_mean
-        self.norm_std = norm_std
-    @preprocessing_function
-    def call(self, inputs):
-        # TODO: Remove this whole function. Why can just use scale and offset
-        # in the base class.
-        x = super().call(inputs)
-        if self.norm_mean:
-            norm_mean = self._expand_non_channel_dims(self.norm_mean, x)
-            x, norm_mean = self._convert_types(x, norm_mean, self.compute_dtype)
-            x = x - norm_mean
-        if self.norm_std:
-            norm_std = self._expand_non_channel_dims(self.norm_std, x)
-            x, norm_std = self._convert_types(x, norm_std, x.dtype)
-            x = x / norm_std
-        return x
-    def get_config(self):
-        config = super().get_config()
-        config.update(
-            {
-                "norm_mean": self.norm_mean,
-                "norm_std": self.norm_std,
-            }
-        )
-        return config

keras_hub/src/models/vit/vit_layers.py CHANGED Viewed

@@ -75,12 +75,13 @@ class ViTPatchingAndEmbedding(keras.layers.Layer):
     """Patches the image and embeds the patches.
     Args:
-        image_size: int. Size of the input image (height or width).
-            Assumed to be square.
-        patch_size: int. Size of each image patch.
+        image_size: (int, int). Size of the input image.
+        patch_size: (int, int). Size of each image patch.
         hidden_dim: int. Dimensionality of the patch embeddings.
         num_channels: int. Number of channels in the input image. Defaults to
             `3`.
+        use_class_token: bool. Whether to use class token to be part of
+            patch embedding. Defaults to `True`.
         data_format: str. `"channels_last"` or `"channels_first"`. Defaults to
             `None` (which uses `"channels_last"`).
         **kwargs: Additional keyword arguments passed to `keras.layers.Layer`
@@ -92,12 +93,15 @@ class ViTPatchingAndEmbedding(keras.layers.Layer):
         patch_size,
         hidden_dim,
         num_channels=3,
+        use_class_token=True,
+        use_patch_bias=True,
         data_format=None,
         **kwargs,
     ):
         super().__init__(**kwargs)
-        num_patches = (image_size // patch_size) ** 2
-        num_positions = num_patches + 1
+        grid_size = tuple([s // p for s, p in zip(image_size, patch_size)])
+        num_patches = grid_size[0] * grid_size[1]
+        num_positions = num_patches + 1 if use_class_token else num_patches
         # === Config ===
         self.image_size = image_size
@@ -106,19 +110,22 @@ class ViTPatchingAndEmbedding(keras.layers.Layer):
         self.num_channels = num_channels
         self.num_patches = num_patches
         self.num_positions = num_positions
+        self.use_class_token = use_class_token
+        self.use_patch_bias = use_patch_bias
         self.data_format = standardize_data_format(data_format)
     def build(self, input_shape):
-        self.class_token = self.add_weight(
-            shape=(
-                1,
-                1,
-                self.hidden_dim,
-            ),
-            initializer="random_normal",
-            dtype=self.variable_dtype,
-            name="class_token",
-        )
+        if self.use_class_token:
+            self.class_token = self.add_weight(
+                shape=(
+                    1,
+                    1,
+                    self.hidden_dim,
+                ),
+                initializer="random_normal",
+                dtype=self.variable_dtype,
+                name="class_token",
+            )
         self.patch_embedding = keras.layers.Conv2D(
             filters=self.hidden_dim,
             kernel_size=self.patch_size,
@@ -127,6 +134,7 @@ class ViTPatchingAndEmbedding(keras.layers.Layer):
             activation=None,
             dtype=self.dtype_policy,
             data_format=self.data_format,
+            use_bias=self.use_patch_bias,
             name="patch_embedding",
         )
         self.patch_embedding.build(input_shape)
@@ -153,10 +161,16 @@ class ViTPatchingAndEmbedding(keras.layers.Layer):
         patch_embeddings = ops.reshape(
             patch_embeddings, [embeddings_shape[0], -1, embeddings_shape[-1]]
         )
-        class_token = ops.tile(self.class_token, (embeddings_shape[0], 1, 1))
         position_embeddings = self.position_embedding(self.position_ids)
-        embeddings = ops.concatenate([class_token, patch_embeddings], axis=1)
-        return ops.add(embeddings, position_embeddings)
+        if self.use_class_token:
+            class_token = ops.tile(
+                self.class_token, (embeddings_shape[0], 1, 1)
+            )
+            patch_embeddings = ops.concatenate(
+                [class_token, patch_embeddings], axis=1
+            )
+        return ops.add(patch_embeddings, position_embeddings)
     def compute_output_shape(self, input_shape):
         return (
@@ -175,6 +189,7 @@ class ViTPatchingAndEmbedding(keras.layers.Layer):
                 "num_channels": self.num_channels,
                 "num_patches": self.num_patches,
                 "num_positions": self.num_positions,
+                "use_class_token": self.use_class_token,
             }
         )
         return config

keras_hub/src/models/vit/vit_presets.py CHANGED Viewed

@@ -11,7 +11,7 @@ backbone_presets = {
             "params": 85798656,
             "path": "vit",
         },
-        "kaggle_handle": "kaggle://keras/vit/keras/vit_base_patch16_224_imagenet/2",
+        "kaggle_handle": "kaggle://keras/vit/keras/vit_base_patch16_224_imagenet/3",
     },
     "vit_base_patch16_384_imagenet": {
         "metadata": {
@@ -22,7 +22,7 @@ backbone_presets = {
             "params": 86090496,
             "path": "vit",
         },
-        "kaggle_handle": "kaggle://keras/vit/keras/vit_base_patch16_384_imagenet/2",
+        "kaggle_handle": "kaggle://keras/vit/keras/vit_base_patch16_384_imagenet/3",
     },
     "vit_large_patch16_224_imagenet": {
         "metadata": {
@@ -33,7 +33,7 @@ backbone_presets = {
             "params": 303301632,
             "path": "vit",
         },
-        "kaggle_handle": "kaggle://keras/vit/keras/vit_large_patch16_224_imagenet/2",
+        "kaggle_handle": "kaggle://keras/vit/keras/vit_large_patch16_224_imagenet/3",
     },
     "vit_large_patch16_384_imagenet": {
         "metadata": {
@@ -44,7 +44,7 @@ backbone_presets = {
             "params": 303690752,
             "path": "vit",
         },
-        "kaggle_handle": "kaggle://keras/vit/keras/vit_large_patch16_384_imagenet/2",
+        "kaggle_handle": "kaggle://keras/vit/keras/vit_large_patch16_384_imagenet/3",
     },
     "vit_base_patch32_384_imagenet": {
         "metadata": {
@@ -55,7 +55,7 @@ backbone_presets = {
             "params": 87528192,
             "path": "vit",
         },
-        "kaggle_handle": "kaggle://keras/vit/keras/vit_base_patch32_384_imagenet/1",
+        "kaggle_handle": "kaggle://keras/vit/keras/vit_base_patch32_384_imagenet/2",
     },
     "vit_large_patch32_384_imagenet": {
         "metadata": {
@@ -66,7 +66,7 @@ backbone_presets = {
             "params": 305607680,
             "path": "vit",
         },
-        "kaggle_handle": "kaggle://keras/vit/keras/vit_large_patch32_384_imagenet/1",
+        "kaggle_handle": "kaggle://keras/vit/keras/vit_large_patch32_384_imagenet/2",
     },
     "vit_base_patch16_224_imagenet21k": {
         "metadata": {
@@ -77,7 +77,7 @@ backbone_presets = {
             "params": 85798656,
             "path": "vit",
         },
-        "kaggle_handle": "kaggle://keras/vit/keras/vit_base_patch16_224_imagenet21k/1",
+        "kaggle_handle": "kaggle://keras/vit/keras/vit_base_patch16_224_imagenet21k/2",
     },
     "vit_base_patch32_224_imagenet21k": {
         "metadata": {
@@ -88,7 +88,7 @@ backbone_presets = {
             "params": 87455232,
             "path": "vit",
         },
-        "kaggle_handle": "kaggle://keras/vit/keras/vit_base_patch32_224_imagenet21k/1",
+        "kaggle_handle": "kaggle://keras/vit/keras/vit_base_patch32_224_imagenet21k/2",
     },
     "vit_huge_patch14_224_imagenet21k": {
         "metadata": {
@@ -99,7 +99,7 @@ backbone_presets = {
             "params": 630764800,
             "path": "vit",
         },
-        "kaggle_handle": "kaggle://keras/vit/keras/vit_huge_patch14_224_imagenet21k/1",
+        "kaggle_handle": "kaggle://keras/vit/keras/vit_huge_patch14_224_imagenet21k/2",
     },
     "vit_large_patch16_224_imagenet21k": {
         "metadata": {
@@ -110,7 +110,7 @@ backbone_presets = {
             "params": 303301632,
             "path": "vit",
         },
-        "kaggle_handle": "kaggle://keras/vit/keras/vit_large_patch16_224_imagenet21k/1",
+        "kaggle_handle": "kaggle://keras/vit/keras/vit_large_patch16_224_imagenet21k/2",
     },
     "vit_large_patch32_224_imagenet21k": {
         "metadata": {
@@ -121,6 +121,6 @@ backbone_presets = {
             "params": 305510400,
             "path": "vit",
         },
-        "kaggle_handle": "kaggle://keras/vit/keras/vit_large_patch32_224_imagenet21k/1",
+        "kaggle_handle": "kaggle://keras/vit/keras/vit_large_patch32_224_imagenet21k/2",
     },
 }

keras_hub/src/utils/keras_utils.py CHANGED Viewed

@@ -71,6 +71,23 @@ def fused_attention_op_available():
             )
             return False
         return True
+    elif (
+        hasattr(keras.config, "is_flash_attention_enabled")
+        and keras.config.backend() == "torch"
+    ):
+        try:
+            from torch.backends.cuda import SDPAParams as SDPAParams
+            from torch.backends.cuda import (
+                can_use_flash_attention as can_use_flash_attention,
+            )
+        except ImportError:
+            logging.warning(
+                "Flash attention is not supported in your current PyTorch "
+                "version. Please update it by following the official guide: "
+                "https://pytorch.org/get-started/locally/"
+            )
+            return False
+        return True
     else:
         return False

keras_hub/src/utils/preset_utils.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import collections
 import datetime
+import glob
 import inspect
 import json
 import os
@@ -317,7 +318,8 @@ def _validate_backbone(preset):
         )
     weights_path = os.path.join(preset, MODEL_WEIGHTS_FILE)
-    if not os.path.exists(weights_path):
+    sharded_weights_path = os.path.join(preset, "model_*.weights.h5")
+    if not os.path.exists(weights_path) and not glob.glob(sharded_weights_path):
         raise FileNotFoundError(
             f"The weights file is missing from the preset directory `{preset}`."
         )
@@ -647,7 +649,10 @@ class KerasPresetLoader(PresetLoader):
         return check_config_class(self.config)
     def load_backbone(self, cls, load_weights, **kwargs):
-        backbone = self._load_serialized_object(self.config, **kwargs)
+        config = self.config.copy()
+        backbone_kwargs, kwargs = self.get_backbone_kwargs(**kwargs)
+        config["config"] = {**config["config"], **backbone_kwargs}
+        backbone = self._load_serialized_object(config, **kwargs)
         if load_weights:
             jax_memory_cleanup(backbone)
             self._load_backbone_weights(backbone)
@@ -732,7 +737,13 @@ class KerasPresetLoader(PresetLoader):
         with open(config_path, encoding="utf-8") as config_file:
             config = json.load(config_file)
         weight_map = config["weight_map"]
-        return sorted(set(weight_map.values()))
+        filenames = set()
+        for v in weight_map.values():
+            if isinstance(v, list):
+                filenames.update(v)
+            else:
+                filenames.add(v)
+        return sorted(filenames)
     def _load_backbone_weights(self, backbone):
         # Detect if the backbone is sharded or not.
@@ -772,7 +783,11 @@ class KerasPresetSaver:
         backbone_size_in_gb = backbone_size_in_bytes / (1024**3)
         # If the size of the backbone is larger than `max_shard_size`, save
         # sharded weights.
-        if sharded_weights_available() and backbone_size_in_gb > max_shard_size:
+        if (
+            sharded_weights_available()
+            and max_shard_size is not None
+            and backbone_size_in_gb > max_shard_size
+        ):
             backbone_sharded_weights_config_path = os.path.join(
                 self.preset_dir, SHARDED_MODEL_WEIGHTS_CONFIG_FILE
             )

keras_hub/src/utils/tensor_utils.py CHANGED Viewed

@@ -21,6 +21,20 @@ except ImportError:
 NO_CONVERT_COUNTER = threading.local()
+def pad(x, shape, padding_side, pad_value):
+    if padding_side == "left":
+        x = x[..., ::-1]
+    outputs = x.to_tensor(
+        default_value=pad_value,
+        shape=shape,
+    )
+    if padding_side == "left":
+        outputs = outputs[..., ::-1]
+    return outputs
 @contextlib.contextmanager
 def no_convert_scope():
     try:

keras-hub 0.21.1.dev0__py3-none-any.whl → 0.22.0__py3-none-any.whl

keras-hub 0.21.1.dev0py3-none-any.whl → 0.22.0py3-none-any.whl